@sjcrh/proteinpaint-server 2.30.5 → 2.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/routes/gdc.mafBuild.ts +22 -28
- package/routes/gdc.topVariablyExpressedGenes.ts +39 -23
- package/routes/termdb.singlecellSamples.ts +45 -0
- package/server.js +1 -1
- package/shared/common.js +10 -12
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sjcrh/proteinpaint-server",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.31.0",
|
|
4
4
|
"description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
|
|
5
5
|
"main": "server.js",
|
|
6
6
|
"bin": "start.js",
|
|
@@ -56,7 +56,7 @@
|
|
|
56
56
|
},
|
|
57
57
|
"dependencies": {
|
|
58
58
|
"@sjcrh/augen": "2.27.0",
|
|
59
|
-
"@sjcrh/proteinpaint-rust": "2.
|
|
59
|
+
"@sjcrh/proteinpaint-rust": "2.31.0",
|
|
60
60
|
"better-sqlite3": "^7.5.3",
|
|
61
61
|
"body-parser": "^1.15.2",
|
|
62
62
|
"canvas": "~2.9.3",
|
package/routes/gdc.mafBuild.ts
CHANGED
|
@@ -3,6 +3,8 @@ import path from 'path'
|
|
|
3
3
|
import fs from 'fs'
|
|
4
4
|
import { run_rust } from '@sjcrh/proteinpaint-rust'
|
|
5
5
|
import serverconfig from '#src/serverconfig.js'
|
|
6
|
+
import Readable from 'stream'
|
|
7
|
+
import { GdcMafBuildRequest } from '#shared/types/routes/gdc.mafBuild.ts'
|
|
6
8
|
|
|
7
9
|
const apihost = process.env.PP_GDC_HOST || 'https://api.gdc.cancer.gov'
|
|
8
10
|
const maxTotalSizeCompressed = serverconfig.features.gdcMafMaxFileSize || 50000000 // 50Mb
|
|
@@ -11,64 +13,56 @@ export const api = {
|
|
|
11
13
|
endpoint: 'gdc/mafBuild',
|
|
12
14
|
methods: {
|
|
13
15
|
all: {
|
|
14
|
-
init
|
|
15
|
-
return async (req: any, res: any): Promise<void> => {
|
|
16
|
-
try {
|
|
17
|
-
await buildMaf(req, res)
|
|
18
|
-
} catch (e: any) {
|
|
19
|
-
if (e.stack) console.log(e.stack)
|
|
20
|
-
res.send({ status: 'error', error: e.message || e })
|
|
21
|
-
}
|
|
22
|
-
}
|
|
23
|
-
},
|
|
16
|
+
init,
|
|
24
17
|
request: {
|
|
25
|
-
typeId:
|
|
26
|
-
//valid: default to type checker
|
|
18
|
+
typeId: 'GdcMafBuildRequest'
|
|
27
19
|
},
|
|
28
20
|
response: {
|
|
29
|
-
typeId: 'GdcMafBuildResponse'
|
|
30
|
-
// will combine this with type checker
|
|
31
|
-
//valid: (t) => {}
|
|
21
|
+
typeId: null // 'GdcMafBuildResponse'
|
|
32
22
|
}
|
|
33
23
|
}
|
|
34
24
|
}
|
|
35
25
|
}
|
|
36
26
|
|
|
37
|
-
|
|
38
|
-
req
|
|
39
|
-
|
|
27
|
+
function init({ genomes }) {
|
|
28
|
+
return async (req: any, res: any): Promise<void> => {
|
|
29
|
+
try {
|
|
30
|
+
await buildMaf(req.query as GdcMafBuildRequest, res)
|
|
31
|
+
} catch (e: any) {
|
|
32
|
+
if (e.stack) console.log(e.stack)
|
|
33
|
+
res.send({ status: 'error', error: e.message || e })
|
|
34
|
+
}
|
|
35
|
+
}
|
|
40
36
|
}
|
|
41
37
|
|
|
38
|
+
/*
|
|
39
|
+
q{}
|
|
42
40
|
res{}
|
|
43
41
|
*/
|
|
44
|
-
async function buildMaf(
|
|
42
|
+
async function buildMaf(q: GdcMafBuildRequest, res: any) {
|
|
45
43
|
const t0 = Date.now()
|
|
46
44
|
|
|
47
|
-
const fileLst2 = (await getFileLstUnderSizeLimit(
|
|
45
|
+
const fileLst2 = (await getFileLstUnderSizeLimit(q.fileIdLst)) as string[]
|
|
48
46
|
console.log('test gdc maf sizes', Date.now() - t0)
|
|
49
47
|
|
|
50
|
-
const outFile = path.join(serverconfig.cachedir, 'gdcMaf.' + Math.random().toString()) // should be a gzipped file. does it need to end with '.gz' or it's auto-added?
|
|
51
|
-
|
|
52
48
|
const arg = {
|
|
53
49
|
fileIdLst: fileLst2,
|
|
54
|
-
host: path.join(apihost, 'data')
|
|
55
|
-
outFile
|
|
50
|
+
host: path.join(apihost, 'data') // must use the /data/ endpoint from current host
|
|
56
51
|
}
|
|
57
52
|
|
|
58
|
-
await run_rust('gdcmaf', JSON.stringify(arg))
|
|
53
|
+
const dataGzipped = await run_rust('gdcmaf', JSON.stringify(arg))
|
|
59
54
|
|
|
60
55
|
console.log('rust gdcmaf', Date.now() - t0)
|
|
61
56
|
|
|
62
|
-
const data =
|
|
57
|
+
const data = JSON.parse(dataGzipped)
|
|
63
58
|
|
|
64
59
|
// by directly returning a blob, it won't tell client how many files are used
|
|
65
|
-
|
|
66
60
|
res.writeHead(200, {
|
|
67
61
|
'Content-Type': 'application/octet-stream',
|
|
68
62
|
'Content-Disposition': 'attachment; filename=cohort.maf.gz',
|
|
69
63
|
'Content-Length': data.length
|
|
70
64
|
})
|
|
71
|
-
res.end(Buffer.from(data
|
|
65
|
+
res.end(Buffer.from(data, 'binary'))
|
|
72
66
|
}
|
|
73
67
|
|
|
74
68
|
/*
|
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
import { GdcTopVariablyExpressedGenesResponse } from '#shared/types/routes/gdc.topVariablyExpressedGenes.ts'
|
|
2
2
|
import { getCasesWithExressionDataFromCohort } from '../src/mds3.gdc.js'
|
|
3
|
-
|
|
3
|
+
import path from 'path'
|
|
4
4
|
import got from 'got'
|
|
5
5
|
import serverconfig from '#src/serverconfig.js'
|
|
6
6
|
|
|
7
7
|
// TODO change when api is released to prod
|
|
8
8
|
//const apihost = process.env.PP_GDC_HOST || 'https://api.gdc.cancer.gov'
|
|
9
9
|
const apihost = 'https://uat-portal.gdc.cancer.gov/auth/api/v0/gene_expression/gene_selection'
|
|
10
|
+
// temporarily hardcode to use the direct API URL,
|
|
11
|
+
// previously hardcoded to use 'https://uat-portal.gdc.cancer.gov/auth/api/v0/'
|
|
12
|
+
const geneExpHost = 'https://uat-api.gdc.cancer.gov'
|
|
10
13
|
|
|
11
14
|
const gdcGenome = 'hg38'
|
|
12
15
|
const gdcDslabel = 'GDC'
|
|
@@ -23,7 +26,7 @@ export const api = {
|
|
|
23
26
|
if (!genome) throw 'hg38 genome missing'
|
|
24
27
|
const ds = genome.datasets?.[gdcDslabel]
|
|
25
28
|
if (!ds) throw 'gdc dataset missing'
|
|
26
|
-
const genes = await getGenes(req.query, ds)
|
|
29
|
+
const genes = await getGenes(req.query, ds, genome)
|
|
27
30
|
const payload = { genes } as GdcTopVariablyExpressedGenesResponse
|
|
28
31
|
res.send(payload)
|
|
29
32
|
} catch (e: any) {
|
|
@@ -53,12 +56,16 @@ req.query {
|
|
|
53
56
|
|
|
54
57
|
ds { } // server-side ds object
|
|
55
58
|
|
|
59
|
+
genome {}
|
|
56
60
|
*/
|
|
57
|
-
async function getGenes(q: any, ds: any) {
|
|
61
|
+
async function getGenes(q: any, ds: any, genome: any) {
|
|
58
62
|
if (serverconfig.features.gdcGenes) {
|
|
59
63
|
// for testing only; delete when api issue is resolved
|
|
60
64
|
return serverconfig.features.gdcGenes as string[]
|
|
61
65
|
}
|
|
66
|
+
if (!ds.__gdc.doneCaching) {
|
|
67
|
+
throw `The server has not finished caching the case IDs: try again in ~2 minutes`
|
|
68
|
+
}
|
|
62
69
|
|
|
63
70
|
// based on current cohort, get list of cases with exp data, as input of next api query
|
|
64
71
|
const caseLst = await getCasesWithExressionDataFromCohort(q, ds)
|
|
@@ -68,30 +75,39 @@ async function getGenes(q: any, ds: any) {
|
|
|
68
75
|
}
|
|
69
76
|
|
|
70
77
|
// change to this when api is available on prod
|
|
71
|
-
|
|
78
|
+
const url = path.join(geneExpHost, '/gene_expression/gene_selection')
|
|
72
79
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
80
|
+
try {
|
|
81
|
+
const response = await got.post(url, {
|
|
82
|
+
headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
|
|
83
|
+
body: JSON.stringify({
|
|
84
|
+
// !!! temporarily limit the case_ids length, otherwise the request times out !!!
|
|
85
|
+
case_ids: caseLst.slice(0, 20),
|
|
86
|
+
//gene_ids: [] // this should not be a required parameter
|
|
87
|
+
gene_type: 'protein_coding',
|
|
88
|
+
selection_size: Number(q.maxGenes || 100)
|
|
89
|
+
})
|
|
79
90
|
})
|
|
80
|
-
})
|
|
81
91
|
|
|
82
|
-
|
|
83
|
-
|
|
92
|
+
const re = JSON.parse(response.body)
|
|
93
|
+
// {"gene_selection":[{"gene_id":"ENSG00000141510","log2_uqfpkm_median":3.103430497010492,"log2_uqfpkm_stddev":0.8692021350485105,"symbol":"TP53"}, ... ]}
|
|
84
94
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
95
|
+
const genes = [] as string[]
|
|
96
|
+
if (!Array.isArray(re.gene_selection)) throw 're.gene_selection[] is not array'
|
|
97
|
+
for (const i of re.gene_selection) {
|
|
98
|
+
if (i.gene_id && typeof i.gene_id == 'string') {
|
|
99
|
+
// is ensg, convert to symbol
|
|
100
|
+
const t = genome.genedb.getNameByAlias.get(i.gene_id)
|
|
101
|
+
if (t) genes.push(t.name) // ensg
|
|
102
|
+
} else if (i.symbol && typeof i.symbol == 'string') {
|
|
103
|
+
genes.push(i.symbol)
|
|
104
|
+
} else {
|
|
105
|
+
throw 'one of re.gene_selection[] is missing both gene_id and symbol'
|
|
106
|
+
}
|
|
94
107
|
}
|
|
108
|
+
return genes
|
|
109
|
+
} catch (e) {
|
|
110
|
+
console.log(e.stack || e)
|
|
111
|
+
throw e
|
|
95
112
|
}
|
|
96
|
-
return genes
|
|
97
113
|
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import {
|
|
2
|
+
TermdbSinglecellsamplesRequest,
|
|
3
|
+
TermdbSinglecellsamplesResponse
|
|
4
|
+
} from '#shared/types/routes/termdb.singlecellSamples.ts'
|
|
5
|
+
|
|
6
|
+
/* route returns list of samples with sc data
|
|
7
|
+
this is due to the fact that sometimes not all samples in a dataset has sc data
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export const api: any = {
|
|
11
|
+
endpoint: 'termdb/singlecellSamples',
|
|
12
|
+
methods: {
|
|
13
|
+
get: {
|
|
14
|
+
init,
|
|
15
|
+
request: {
|
|
16
|
+
typeId: 'TermdbSinglecellsamplesRequest'
|
|
17
|
+
},
|
|
18
|
+
response: {
|
|
19
|
+
typeId: 'TermdbSinglecellsamplesResponse'
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
post: {
|
|
23
|
+
alternativeFor: 'get',
|
|
24
|
+
init
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function init({ genomes }) {
|
|
30
|
+
return async (req: any, res: any): Promise<void> => {
|
|
31
|
+
const q = req.query as TermdbSinglecellsamplesRequest
|
|
32
|
+
try {
|
|
33
|
+
const g = genomes[q.genome]
|
|
34
|
+
if (!g) throw 'invalid genome name'
|
|
35
|
+
const ds = g.datasets[q.dslabel]
|
|
36
|
+
if (!ds) throw 'invalid dataset name'
|
|
37
|
+
if (!ds.queries?.singleCell) throw 'no singlecell data on this dataset'
|
|
38
|
+
const result = (await ds.queries.singleCell.samples.get(q)) as TermdbSinglecellsamplesResponse
|
|
39
|
+
res.send(result)
|
|
40
|
+
} catch (e: any) {
|
|
41
|
+
if (e instanceof Error && e.stack) console.log(e)
|
|
42
|
+
res.send({ error: e.message || e })
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|