@sjcrh/proteinpaint-server 2.31.0 → 2.33.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/genome/hg38.js +2 -2
- package/package.json +4 -3
- package/routes/gdc.mafBuild.ts +12 -10
- package/routes/gdc.topVariablyExpressedGenes.ts +50 -35
- package/routes/termdb.singlecellData.ts +45 -0
- package/routes/termdb.singlecellSamples.ts +124 -2
- package/server.js +1 -1
package/genome/hg38.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sjcrh/proteinpaint-server",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.33.0",
|
|
4
4
|
"description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
|
|
5
5
|
"main": "server.js",
|
|
6
6
|
"bin": "start.js",
|
|
@@ -25,6 +25,7 @@
|
|
|
25
25
|
"pretest:integration": "tsc --esModuleInterop genome/*.ts dataset/*.ts",
|
|
26
26
|
"test:integration": "echo 'TODO: server integration tests'",
|
|
27
27
|
"prepack": "tsc --esModuleInterop genome/*.ts dataset/*.ts && webpack --env NODE_ENV=production",
|
|
28
|
+
"test:tsc": "tsc --esModuleInterop --noEmit --allowImportingTsExtensions ./shared/types/test/*.type.spec.ts",
|
|
28
29
|
"response": "nodemon modules/test/test.server.js --watch src",
|
|
29
30
|
"getconf": "../build/getConfigProp.js",
|
|
30
31
|
"doc": "../augen/build.sh routes shared/types/routes shared/checkers ../public/docs/server"
|
|
@@ -55,8 +56,8 @@
|
|
|
55
56
|
"webpack-notifier": "^1.15.0"
|
|
56
57
|
},
|
|
57
58
|
"dependencies": {
|
|
58
|
-
"@sjcrh/augen": "2.
|
|
59
|
-
"@sjcrh/proteinpaint-rust": "2.
|
|
59
|
+
"@sjcrh/augen": "2.32.2-0",
|
|
60
|
+
"@sjcrh/proteinpaint-rust": "2.33.0",
|
|
60
61
|
"better-sqlite3": "^7.5.3",
|
|
61
62
|
"body-parser": "^1.15.2",
|
|
62
63
|
"canvas": "~2.9.3",
|
package/routes/gdc.mafBuild.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import got from 'got'
|
|
2
2
|
import path from 'path'
|
|
3
3
|
import fs from 'fs'
|
|
4
|
-
import {
|
|
4
|
+
import { run_rust_stream } from '@sjcrh/proteinpaint-rust'
|
|
5
5
|
import serverconfig from '#src/serverconfig.js'
|
|
6
6
|
import Readable from 'stream'
|
|
7
7
|
import { GdcMafBuildRequest } from '#shared/types/routes/gdc.mafBuild.ts'
|
|
@@ -50,19 +50,21 @@ async function buildMaf(q: GdcMafBuildRequest, res: any) {
|
|
|
50
50
|
host: path.join(apihost, 'data') // must use the /data/ endpoint from current host
|
|
51
51
|
}
|
|
52
52
|
|
|
53
|
-
const
|
|
53
|
+
const rustStream = run_rust_stream('gdcmaf', JSON.stringify(arg))
|
|
54
|
+
res.setHeader('Content-Type', 'application/octet-stream')
|
|
55
|
+
res.setHeader('Content-Disposition', 'attachment; filename=cohort.maf.gz')
|
|
56
|
+
rustStream.pipe(res)
|
|
54
57
|
|
|
55
58
|
console.log('rust gdcmaf', Date.now() - t0)
|
|
56
59
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
'
|
|
60
|
+
rustStream.on('end', () => {
|
|
61
|
+
res.end()
|
|
62
|
+
})
|
|
63
|
+
rustStream.on('error', err => {
|
|
64
|
+
console.error(err)
|
|
65
|
+
res.statusCode = 500
|
|
66
|
+
res.end('Internal Server Error')
|
|
64
67
|
})
|
|
65
|
-
res.end(Buffer.from(data, 'binary'))
|
|
66
68
|
}
|
|
67
69
|
|
|
68
70
|
/*
|
|
@@ -1,15 +1,21 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
GdcTopVariablyExpressedGenesRequest,
|
|
3
|
+
GdcTopVariablyExpressedGenesResponse
|
|
4
|
+
} from '#shared/types/routes/gdc.topVariablyExpressedGenes.ts'
|
|
2
5
|
import { getCasesWithExressionDataFromCohort } from '../src/mds3.gdc.js'
|
|
3
6
|
import path from 'path'
|
|
4
7
|
import got from 'got'
|
|
5
8
|
import serverconfig from '#src/serverconfig.js'
|
|
6
9
|
|
|
10
|
+
// TODO make it general purpose based on ds.queries.geneExpression.topVariablyExpressedGenes{}; wait till case/gene link changes are done
|
|
11
|
+
|
|
7
12
|
// TODO change when api is released to prod
|
|
8
13
|
//const apihost = process.env.PP_GDC_HOST || 'https://api.gdc.cancer.gov'
|
|
9
14
|
const apihost = 'https://uat-portal.gdc.cancer.gov/auth/api/v0/gene_expression/gene_selection'
|
|
10
15
|
// temporarily hardcode to use the direct API URL,
|
|
11
16
|
// previously hardcoded to use 'https://uat-portal.gdc.cancer.gov/auth/api/v0/'
|
|
12
17
|
const geneExpHost = 'https://uat-api.gdc.cancer.gov'
|
|
18
|
+
// https://github.com/NCI-GDC/gdcapi/blob/develop/openapi/gene-expression.yaml
|
|
13
19
|
|
|
14
20
|
const gdcGenome = 'hg38'
|
|
15
21
|
const gdcDslabel = 'GDC'
|
|
@@ -18,47 +24,37 @@ export const api = {
|
|
|
18
24
|
endpoint: 'gdc/topVariablyExpressedGenes',
|
|
19
25
|
methods: {
|
|
20
26
|
get: {
|
|
21
|
-
init
|
|
22
|
-
return async (req: any, res: any): Promise<void> => {
|
|
23
|
-
try {
|
|
24
|
-
// following logic requires hg38 gdc dataset
|
|
25
|
-
const genome = genomes[gdcGenome]
|
|
26
|
-
if (!genome) throw 'hg38 genome missing'
|
|
27
|
-
const ds = genome.datasets?.[gdcDslabel]
|
|
28
|
-
if (!ds) throw 'gdc dataset missing'
|
|
29
|
-
const genes = await getGenes(req.query, ds, genome)
|
|
30
|
-
const payload = { genes } as GdcTopVariablyExpressedGenesResponse
|
|
31
|
-
res.send(payload)
|
|
32
|
-
} catch (e: any) {
|
|
33
|
-
res.send({ status: 'error', error: e.message || e })
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
},
|
|
27
|
+
init,
|
|
37
28
|
request: {
|
|
38
|
-
typeId:
|
|
39
|
-
//valid: default to type checker
|
|
29
|
+
typeId: 'GdcTopVariablyExpressedGenesRequest'
|
|
40
30
|
},
|
|
41
31
|
response: {
|
|
42
32
|
typeId: 'GdcTopVariablyExpressedGenesResponse'
|
|
43
|
-
// will combine this with type checker
|
|
44
|
-
//valid: (t) => {}
|
|
45
33
|
}
|
|
46
34
|
}
|
|
47
35
|
}
|
|
48
36
|
}
|
|
49
37
|
|
|
50
|
-
|
|
51
|
-
req
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
38
|
+
function init({ genomes }) {
|
|
39
|
+
return async (req: any, res: any): Promise<void> => {
|
|
40
|
+
try {
|
|
41
|
+
// following logic requires hg38 gdc dataset
|
|
42
|
+
const genome = genomes[gdcGenome]
|
|
43
|
+
if (!genome) throw 'hg38 genome missing'
|
|
44
|
+
const ds = genome.datasets?.[gdcDslabel]
|
|
45
|
+
if (!ds) throw 'gdc dataset missing'
|
|
46
|
+
const genes = await getGenes(req.query as GdcTopVariablyExpressedGenesRequest, ds, genome)
|
|
47
|
+
const payload = { genes } as GdcTopVariablyExpressedGenesResponse
|
|
48
|
+
res.send(payload)
|
|
49
|
+
} catch (e: any) {
|
|
50
|
+
res.send({ status: 'error', error: e.message || e })
|
|
51
|
+
}
|
|
52
|
+
}
|
|
55
53
|
}
|
|
56
54
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
genome {
|
|
60
|
-
*/
|
|
61
|
-
async function getGenes(q: any, ds: any, genome: any) {
|
|
55
|
+
/*
|
|
56
|
+
*/
|
|
57
|
+
async function getGenes(q: GdcTopVariablyExpressedGenesRequest, ds: any, genome: any) {
|
|
62
58
|
if (serverconfig.features.gdcGenes) {
|
|
63
59
|
// for testing only; delete when api issue is resolved
|
|
64
60
|
return serverconfig.features.gdcGenes as string[]
|
|
@@ -83,9 +79,14 @@ async function getGenes(q: any, ds: any, genome: any) {
|
|
|
83
79
|
body: JSON.stringify({
|
|
84
80
|
// !!! temporarily limit the case_ids length, otherwise the request times out !!!
|
|
85
81
|
case_ids: caseLst.slice(0, 20),
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
82
|
+
|
|
83
|
+
// temporary!! restrict pool to cgc due to slow api. delete when new api is online
|
|
84
|
+
gene_ids: tempGetCGCgenes(genome),
|
|
85
|
+
|
|
86
|
+
// when gene_ids is deleted, enable this
|
|
87
|
+
//gene_type: 'protein_coding',
|
|
88
|
+
|
|
89
|
+
selection_size: Number(q.maxGenes) // FIXME it's defined as number but why it's string??
|
|
89
90
|
})
|
|
90
91
|
})
|
|
91
92
|
|
|
@@ -106,8 +107,22 @@ async function getGenes(q: any, ds: any, genome: any) {
|
|
|
106
107
|
}
|
|
107
108
|
}
|
|
108
109
|
return genes
|
|
109
|
-
} catch (e) {
|
|
110
|
+
} catch (e: any) {
|
|
110
111
|
console.log(e.stack || e)
|
|
111
112
|
throw e
|
|
112
113
|
}
|
|
113
114
|
}
|
|
115
|
+
|
|
116
|
+
function tempGetCGCgenes(genome: any) {
|
|
117
|
+
const lst = [] as string[] // list of ENSG ids from cgc genes
|
|
118
|
+
// don't think there's need to preparse genome.geneset, as this function is only temporary
|
|
119
|
+
for (const s of genome.geneset[0].lst) {
|
|
120
|
+
const a = genome.genedb.getAliasByName.all(s)
|
|
121
|
+
if (a) {
|
|
122
|
+
for (const b of a) {
|
|
123
|
+
if (b.alias.startsWith('ENSG')) lst.push(b.alias)
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
return lst
|
|
128
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import {
|
|
2
|
+
TermdbSinglecellDataRequest,
|
|
3
|
+
TermdbSinglecellDataResponse
|
|
4
|
+
} from '#shared/types/routes/termdb.singlecellData.ts'
|
|
5
|
+
|
|
6
|
+
/*
|
|
7
|
+
given a sample, return it's singlecell data from dataset
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export const api: any = {
|
|
11
|
+
endpoint: 'termdb/singlecellData',
|
|
12
|
+
methods: {
|
|
13
|
+
get: {
|
|
14
|
+
init,
|
|
15
|
+
request: {
|
|
16
|
+
typeId: 'TermdbSinglecellDataRequest'
|
|
17
|
+
},
|
|
18
|
+
response: {
|
|
19
|
+
typeId: 'TermdbSinglecellDataResponse'
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
post: {
|
|
23
|
+
alternativeFor: 'get',
|
|
24
|
+
init
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function init({ genomes }) {
|
|
30
|
+
return async (req: any, res: any): Promise<void> => {
|
|
31
|
+
const q = req.query as TermdbSinglecellDataRequest
|
|
32
|
+
try {
|
|
33
|
+
const g = genomes[q.genome]
|
|
34
|
+
if (!g) throw 'invalid genome name'
|
|
35
|
+
const ds = g.datasets[q.dslabel]
|
|
36
|
+
if (!ds) throw 'invalid dataset name'
|
|
37
|
+
if (!ds.queries?.singleCell) throw 'no singlecell data on this dataset'
|
|
38
|
+
const result = (await ds.queries.singleCell.data.get(q)) as TermdbSinglecellDataResponse
|
|
39
|
+
res.send(result)
|
|
40
|
+
} catch (e: any) {
|
|
41
|
+
if (e instanceof Error && e.stack) console.log(e)
|
|
42
|
+
res.send({ error: e.message || e })
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
@@ -1,7 +1,27 @@
|
|
|
1
|
+
import fs from 'fs'
|
|
2
|
+
import path from 'path'
|
|
3
|
+
import { read_file } from '#src/utils.js'
|
|
4
|
+
import serverconfig from '#src/serverconfig.js'
|
|
1
5
|
import {
|
|
6
|
+
SingleCellQuery,
|
|
7
|
+
SingleCellSamplesNative,
|
|
8
|
+
SingleCellSamplesGdc,
|
|
9
|
+
SingleCellDataNative,
|
|
10
|
+
SingleCellDataGdc
|
|
11
|
+
} from '#shared/types/dataset.ts'
|
|
12
|
+
import {
|
|
13
|
+
Sample,
|
|
2
14
|
TermdbSinglecellsamplesRequest,
|
|
3
15
|
TermdbSinglecellsamplesResponse
|
|
4
16
|
} from '#shared/types/routes/termdb.singlecellSamples.ts'
|
|
17
|
+
import {
|
|
18
|
+
Cell,
|
|
19
|
+
Plot,
|
|
20
|
+
HasdataResponse,
|
|
21
|
+
NodataResponse,
|
|
22
|
+
ErrorResponse
|
|
23
|
+
} from '#shared/types/routes/termdb.singlecellData.ts'
|
|
24
|
+
import { gdc_validate_query_singleCell_samples, gdc_validate_query_singleCell_data } from '#src/mds3.gdc.js'
|
|
5
25
|
|
|
6
26
|
/* route returns list of samples with sc data
|
|
7
27
|
this is due to the fact that sometimes not all samples in a dataset has sc data
|
|
@@ -35,11 +55,113 @@ function init({ genomes }) {
|
|
|
35
55
|
const ds = g.datasets[q.dslabel]
|
|
36
56
|
if (!ds) throw 'invalid dataset name'
|
|
37
57
|
if (!ds.queries?.singleCell) throw 'no singlecell data on this dataset'
|
|
38
|
-
const
|
|
39
|
-
res.send(
|
|
58
|
+
const samples = (await ds.queries.singleCell.samples.get(q)) as TermdbSinglecellsamplesResponse
|
|
59
|
+
res.send({ samples })
|
|
40
60
|
} catch (e: any) {
|
|
41
61
|
if (e instanceof Error && e.stack) console.log(e)
|
|
42
62
|
res.send({ error: e.message || e })
|
|
43
63
|
}
|
|
44
64
|
}
|
|
45
65
|
}
|
|
66
|
+
|
|
67
|
+
/////////////////// ds query validator
|
|
68
|
+
export async function validate_query_singleCell(ds: any, genome: any) {
|
|
69
|
+
const q = ds.queries.singleCell as SingleCellQuery
|
|
70
|
+
if (!q) return
|
|
71
|
+
|
|
72
|
+
if (q.samples.src == 'gdcapi') {
|
|
73
|
+
gdc_validate_query_singleCell_samples(ds, genome)
|
|
74
|
+
} else {
|
|
75
|
+
validateSamplesNative(q.samples as SingleCellSamplesNative, ds)
|
|
76
|
+
}
|
|
77
|
+
// q.samples.get() added
|
|
78
|
+
|
|
79
|
+
if (q.data.src == 'gdcapi') {
|
|
80
|
+
gdc_validate_query_singleCell_data(ds, genome)
|
|
81
|
+
} else {
|
|
82
|
+
validateDataNative(q.data as SingleCellDataNative, ds)
|
|
83
|
+
}
|
|
84
|
+
// q.data.get() added
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function validateSamplesNative(S: SingleCellSamplesNative, ds: any) {
|
|
88
|
+
// for now use this quick fix method to pull sample ids annotated by this term
|
|
89
|
+
// to support situation where not all samples from a dataset has sc data
|
|
90
|
+
const samples = [] as Sample[] // list of sample ids with sc data
|
|
91
|
+
const s = ds.cohort.termdb.q.getAllValues4term(S.isSampleTerm)
|
|
92
|
+
for (const id of s.keys()) {
|
|
93
|
+
samples.push({ sample: ds.cohort.termdb.q.id2sampleName(id) })
|
|
94
|
+
}
|
|
95
|
+
if (samples.length == 0) throw 'no sample with sc data'
|
|
96
|
+
// getter returns array of {name:<samplename>, files:[]} where files is gdc specific. each sample is an obj and allows to add ds-specific stuff
|
|
97
|
+
S.get = () => samples
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function validateDataNative(D: SingleCellDataNative, ds: any) {
|
|
101
|
+
const nameSet = new Set() // guard against duplicating plot names
|
|
102
|
+
for (const plot of D.plots) {
|
|
103
|
+
if (nameSet.has(plot.name)) throw 'duplicate plot.name'
|
|
104
|
+
nameSet.add(plot.name)
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// scoped and cached for runtime
|
|
108
|
+
const _terms = [] as any
|
|
109
|
+
const _tid2cellvalue = {} as any
|
|
110
|
+
|
|
111
|
+
for (const tid of D.termIds) {
|
|
112
|
+
const t = ds.cohort.termdb.q.termjsonByOneid(tid)
|
|
113
|
+
if (!t) throw 'invalid term id from queries.singleCell.data.termIds[]'
|
|
114
|
+
_terms.push(t)
|
|
115
|
+
_tid2cellvalue[tid] = ds.cohort.termdb.q.getAllValues4term(tid)
|
|
116
|
+
}
|
|
117
|
+
D.get = async sample => {
|
|
118
|
+
// if sample is int, may convert to string
|
|
119
|
+
try {
|
|
120
|
+
const tid2cellvalue = {}
|
|
121
|
+
for (const tid of D.termIds) tid2cellvalue[tid] = {} // k: cell id, v: cell value for this term
|
|
122
|
+
|
|
123
|
+
const plots = [] as Plot[] // given a sample name, collect every plot data for this sample and return
|
|
124
|
+
for (const plot of D.plots) {
|
|
125
|
+
const tsvfile = path.join(serverconfig.tpmasterdir, plot.folder, sample, plot.fileSuffix)
|
|
126
|
+
try {
|
|
127
|
+
await fs.promises.stat(tsvfile)
|
|
128
|
+
} catch (e: any) {
|
|
129
|
+
if (e.code == 'ENOENT') {
|
|
130
|
+
// no file found for this sample; allowed because sampleView tests if that sample has sc data or not
|
|
131
|
+
continue
|
|
132
|
+
}
|
|
133
|
+
if (e.code == 'EACCES') throw 'cannot read file, permission denied'
|
|
134
|
+
throw 'failed to load sc data file'
|
|
135
|
+
}
|
|
136
|
+
const lines = (await read_file(tsvfile)).trim().split('\n')
|
|
137
|
+
// 1st line is header
|
|
138
|
+
const cells = [] as Cell[]
|
|
139
|
+
for (let i = 1; i < lines.length; i++) {
|
|
140
|
+
// each line is a cell
|
|
141
|
+
const l = lines[i].split('\t')
|
|
142
|
+
const cellId = l[0],
|
|
143
|
+
x = Number(l[4]), // FIXME standardize, or define idx in plot
|
|
144
|
+
y = Number(l[5])
|
|
145
|
+
if (!cellId) throw 'cell id missing'
|
|
146
|
+
if (!Number.isFinite(x) || !Number.isFinite(y)) throw 'x/y not number'
|
|
147
|
+
cells.push({ cellId, x, y })
|
|
148
|
+
|
|
149
|
+
for (const tid of D.termIds) {
|
|
150
|
+
if (_tid2cellvalue[tid].has(cellId)) {
|
|
151
|
+
tid2cellvalue[tid][cellId] = _tid2cellvalue[tid].get(cellId)
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
plots.push({ name: plot.name, cells })
|
|
156
|
+
}
|
|
157
|
+
if (plots.length == 0) {
|
|
158
|
+
// no data available for this sample
|
|
159
|
+
return { nodata: true }
|
|
160
|
+
}
|
|
161
|
+
return { plots, terms: _terms, tid2cellvalue }
|
|
162
|
+
} catch (e: any) {
|
|
163
|
+
if (e.stack) console.log(e.stack)
|
|
164
|
+
return { error: e.message || e }
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|