@sjcrh/proteinpaint-server 2.34.1-0 → 2.35.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/routes/termdb.singleSampleMutation.ts +87 -0
- package/routes/termdb.singlecellData.ts +8 -4
- package/routes/termdb.singlecellSamples.ts +28 -21
- package/routes/termdb.topVariablyExpressedGenes.ts +161 -0
- package/server.js +2 -1
- package/server.js.map +1 -0
- package/routes/gdc.topVariablyExpressedGenes.ts +0 -122
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sjcrh/proteinpaint-server",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.35.0",
|
|
4
4
|
"description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
|
|
5
5
|
"main": "server.js",
|
|
6
6
|
"bin": "start.js",
|
|
@@ -56,7 +56,7 @@
|
|
|
56
56
|
"webpack-notifier": "^1.15.0"
|
|
57
57
|
},
|
|
58
58
|
"dependencies": {
|
|
59
|
-
"@sjcrh/augen": "2.
|
|
59
|
+
"@sjcrh/augen": "2.35.0",
|
|
60
60
|
"@sjcrh/proteinpaint-rust": "2.34.0",
|
|
61
61
|
"better-sqlite3": "^7.5.3",
|
|
62
62
|
"body-parser": "^1.15.2",
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import fs from 'fs'
|
|
2
|
+
import path from 'path'
|
|
3
|
+
import { read_file } from '#src/utils.js'
|
|
4
|
+
import serverconfig from '#src/serverconfig.js'
|
|
5
|
+
import {
|
|
6
|
+
TermdbSingleSampleMutationRequest,
|
|
7
|
+
TermdbSingleSampleMutationResponse
|
|
8
|
+
} from '#shared/types/routes/termdb.singleSampleMutation.ts'
|
|
9
|
+
import { gdcValidate_query_singleSampleMutation } from '#src/mds3.gdc.js'
|
|
10
|
+
|
|
11
|
+
export const api: any = {
|
|
12
|
+
endpoint: 'termdb/singleSampleMutation',
|
|
13
|
+
methods: {
|
|
14
|
+
get: {
|
|
15
|
+
init,
|
|
16
|
+
request: {
|
|
17
|
+
typeId: 'TermdbSingleSampleMutationRequest'
|
|
18
|
+
},
|
|
19
|
+
response: {
|
|
20
|
+
typeId: 'TermdbSingleSampleMutationResponse'
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function init({ genomes }) {
|
|
27
|
+
return async (req: any, res: any): Promise<void> => {
|
|
28
|
+
const q = req.query as TermdbSingleSampleMutationRequest
|
|
29
|
+
let result
|
|
30
|
+
try {
|
|
31
|
+
const g = genomes[q.genome]
|
|
32
|
+
if (!g) throw 'invalid genome name'
|
|
33
|
+
const ds = g.datasets[q.dslabel]
|
|
34
|
+
if (!ds) throw 'invalid dataset name'
|
|
35
|
+
if (!ds.queries?.singleSampleMutation) throw 'not supported on this dataset'
|
|
36
|
+
result = (await ds.queries.singleSampleMutation.get(q)) as TermdbSingleSampleMutationResponse
|
|
37
|
+
} catch (e: any) {
|
|
38
|
+
if (e.stack) console.log(e.stack)
|
|
39
|
+
result = {
|
|
40
|
+
status: e.status || 400,
|
|
41
|
+
error: e.message || e
|
|
42
|
+
} as TermdbSingleSampleMutationResponse
|
|
43
|
+
}
|
|
44
|
+
res.send(result)
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/////////////////// ds query validator
|
|
49
|
+
export async function validate_query_singleSampleMutation(ds: any, genome: any) {
|
|
50
|
+
const _q = ds.queries.singleSampleMutation
|
|
51
|
+
if (!_q) return
|
|
52
|
+
if (_q.src == 'gdcapi') {
|
|
53
|
+
gdcValidate_query_singleSampleMutation(ds, genome)
|
|
54
|
+
} else if (_q.src == 'native') {
|
|
55
|
+
// using a folder to store text files for individual samples
|
|
56
|
+
// file names are integer sample id
|
|
57
|
+
_q.get = async (q: TermdbSingleSampleMutationRequest) => {
|
|
58
|
+
/* as mds3 client may not be using integer sample id for now,
|
|
59
|
+
the argument is string id and has to be mapped to integer id
|
|
60
|
+
*/
|
|
61
|
+
let fileName = q.sample
|
|
62
|
+
if (ds.cohort?.termdb?.q?.sampleName2id) {
|
|
63
|
+
// has name-to-id converter
|
|
64
|
+
fileName = ds.cohort.termdb.q.sampleName2id(q.sample)
|
|
65
|
+
if (fileName == undefined) {
|
|
66
|
+
// unable to convert string id to integer
|
|
67
|
+
return []
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const file = path.join(serverconfig.tpmasterdir, _q.folder, fileName.toString())
|
|
72
|
+
try {
|
|
73
|
+
await fs.promises.stat(file)
|
|
74
|
+
} catch (e: any) {
|
|
75
|
+
if (e.code == 'EACCES') throw 'cannot read file, permission denied'
|
|
76
|
+
if (e.code == 'ENOENT') throw 'no data for this sample'
|
|
77
|
+
throw 'failed to load data'
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const data = await read_file(file)
|
|
81
|
+
// object wraps around mlst[] so it's possible to add other attr e.g. total number of mutations that exceeds viewing limit
|
|
82
|
+
return { mlst: JSON.parse(data) }
|
|
83
|
+
}
|
|
84
|
+
} else {
|
|
85
|
+
throw 'unknown singleSampleMutation.src'
|
|
86
|
+
}
|
|
87
|
+
}
|
|
@@ -29,17 +29,21 @@ export const api: any = {
|
|
|
29
29
|
function init({ genomes }) {
|
|
30
30
|
return async (req: any, res: any): Promise<void> => {
|
|
31
31
|
const q = req.query as TermdbSinglecellDataRequest
|
|
32
|
+
let result
|
|
32
33
|
try {
|
|
33
34
|
const g = genomes[q.genome]
|
|
34
35
|
if (!g) throw 'invalid genome name'
|
|
35
36
|
const ds = g.datasets[q.dslabel]
|
|
36
37
|
if (!ds) throw 'invalid dataset name'
|
|
37
38
|
if (!ds.queries?.singleCell) throw 'no singlecell data on this dataset'
|
|
38
|
-
|
|
39
|
-
res.send(result)
|
|
39
|
+
result = (await ds.queries.singleCell.data.get(q)) as TermdbSinglecellDataResponse
|
|
40
40
|
} catch (e: any) {
|
|
41
|
-
if (e
|
|
42
|
-
|
|
41
|
+
if (e.stack) console.log(e)
|
|
42
|
+
result = {
|
|
43
|
+
status: e.status || 400,
|
|
44
|
+
error: e.message || e
|
|
45
|
+
} as TermdbSinglecellDataResponse
|
|
43
46
|
}
|
|
47
|
+
res.send(result)
|
|
44
48
|
}
|
|
45
49
|
}
|
|
@@ -14,13 +14,7 @@ import {
|
|
|
14
14
|
TermdbSinglecellsamplesRequest,
|
|
15
15
|
TermdbSinglecellsamplesResponse
|
|
16
16
|
} from '#shared/types/routes/termdb.singlecellSamples.ts'
|
|
17
|
-
import {
|
|
18
|
-
Cell,
|
|
19
|
-
Plot,
|
|
20
|
-
HasdataResponse,
|
|
21
|
-
NodataResponse,
|
|
22
|
-
ErrorResponse
|
|
23
|
-
} from '#shared/types/routes/termdb.singlecellData.ts'
|
|
17
|
+
import { Cell, Plot } from '#shared/types/routes/termdb.singlecellData.ts'
|
|
24
18
|
import { gdc_validate_query_singleCell_samples, gdc_validate_query_singleCell_data } from '#src/mds3.gdc.js'
|
|
25
19
|
|
|
26
20
|
/* route returns list of samples with sc data
|
|
@@ -49,18 +43,22 @@ export const api: any = {
|
|
|
49
43
|
function init({ genomes }) {
|
|
50
44
|
return async (req: any, res: any): Promise<void> => {
|
|
51
45
|
const q = req.query as TermdbSinglecellsamplesRequest
|
|
46
|
+
let result
|
|
52
47
|
try {
|
|
53
48
|
const g = genomes[q.genome]
|
|
54
49
|
if (!g) throw 'invalid genome name'
|
|
55
50
|
const ds = g.datasets[q.dslabel]
|
|
56
51
|
if (!ds) throw 'invalid dataset name'
|
|
57
52
|
if (!ds.queries?.singleCell) throw 'no singlecell data on this dataset'
|
|
58
|
-
|
|
59
|
-
res.send({ samples })
|
|
53
|
+
result = (await ds.queries.singleCell.samples.get(q)) as TermdbSinglecellsamplesResponse
|
|
60
54
|
} catch (e: any) {
|
|
61
|
-
if (e
|
|
62
|
-
|
|
55
|
+
if (e.stack) console.log(e.stack)
|
|
56
|
+
result = {
|
|
57
|
+
status: e.status || 400,
|
|
58
|
+
error: e.message || e
|
|
59
|
+
} as TermdbSinglecellsamplesResponse
|
|
63
60
|
}
|
|
61
|
+
res.send(result)
|
|
64
62
|
}
|
|
65
63
|
}
|
|
66
64
|
|
|
@@ -71,15 +69,19 @@ export async function validate_query_singleCell(ds: any, genome: any) {
|
|
|
71
69
|
|
|
72
70
|
if (q.samples.src == 'gdcapi') {
|
|
73
71
|
gdc_validate_query_singleCell_samples(ds, genome)
|
|
74
|
-
} else {
|
|
72
|
+
} else if (q.samples.src == 'native') {
|
|
75
73
|
validateSamplesNative(q.samples as SingleCellSamplesNative, ds)
|
|
74
|
+
} else {
|
|
75
|
+
throw 'unknown singleCell.samples.src'
|
|
76
76
|
}
|
|
77
77
|
// q.samples.get() added
|
|
78
78
|
|
|
79
79
|
if (q.data.src == 'gdcapi') {
|
|
80
80
|
gdc_validate_query_singleCell_data(ds, genome)
|
|
81
|
-
} else {
|
|
81
|
+
} else if (q.data.src == 'native') {
|
|
82
82
|
validateDataNative(q.data as SingleCellDataNative, ds)
|
|
83
|
+
} else {
|
|
84
|
+
throw 'unknown singleCell.data.src'
|
|
83
85
|
}
|
|
84
86
|
// q.data.get() added
|
|
85
87
|
}
|
|
@@ -94,7 +96,9 @@ function validateSamplesNative(S: SingleCellSamplesNative, ds: any) {
|
|
|
94
96
|
}
|
|
95
97
|
if (samples.length == 0) throw 'no sample with sc data'
|
|
96
98
|
// getter returns array of {sample:<samplename>, files:[]} where files is gdc specific. each sample is an obj and allows to add ds-specific stuff
|
|
97
|
-
S.get = () =>
|
|
99
|
+
S.get = () => {
|
|
100
|
+
return { samples, fields: ['sample'], columnNames: ['Sample'] }
|
|
101
|
+
}
|
|
98
102
|
}
|
|
99
103
|
|
|
100
104
|
function validateDataNative(D: SingleCellDataNative, ds: any) {
|
|
@@ -112,17 +116,22 @@ function validateDataNative(D: SingleCellDataNative, ds: any) {
|
|
|
112
116
|
const t = ds.cohort.termdb.q.termjsonByOneid(tid)
|
|
113
117
|
if (!t) throw 'invalid term id from queries.singleCell.data.termIds[]'
|
|
114
118
|
_terms.push(t)
|
|
115
|
-
_tid2cellvalue[tid] =
|
|
119
|
+
// _tid2cellvalue[tid] = {}
|
|
120
|
+
// const clusterMap = ds.cohort.termdb.q.getAllValues4term(tid)
|
|
121
|
+
// for(const [id, cluster] of clusterMap)
|
|
122
|
+
// {
|
|
123
|
+
// const name = ds.cohort.termdb.q.id2sampleName(id)
|
|
124
|
+
// _tid2cellvalue[tid][name] = cluster
|
|
125
|
+
// }
|
|
116
126
|
}
|
|
117
|
-
D.get = async
|
|
127
|
+
D.get = async q => {
|
|
118
128
|
// if sample is int, may convert to string
|
|
119
129
|
try {
|
|
120
130
|
const tid2cellvalue = {}
|
|
121
131
|
for (const tid of D.termIds) tid2cellvalue[tid] = {} // k: cell id, v: cell value for this term
|
|
122
|
-
|
|
123
132
|
const plots = [] as Plot[] // given a sample name, collect every plot data for this sample and return
|
|
124
133
|
for (const plot of D.plots) {
|
|
125
|
-
const tsvfile = path.join(serverconfig.tpmasterdir, plot.folder, sample
|
|
134
|
+
const tsvfile = path.join(serverconfig.tpmasterdir, plot.folder, q.sample + plot.fileSuffix)
|
|
126
135
|
try {
|
|
127
136
|
await fs.promises.stat(tsvfile)
|
|
128
137
|
} catch (e: any) {
|
|
@@ -147,9 +156,7 @@ function validateDataNative(D: SingleCellDataNative, ds: any) {
|
|
|
147
156
|
cells.push({ cellId, x, y })
|
|
148
157
|
|
|
149
158
|
for (const tid of D.termIds) {
|
|
150
|
-
|
|
151
|
-
tid2cellvalue[tid][cellId] = _tid2cellvalue[tid].get(cellId)
|
|
152
|
-
}
|
|
159
|
+
tid2cellvalue[tid][cellId] = l[1]
|
|
153
160
|
}
|
|
154
161
|
}
|
|
155
162
|
plots.push({ name: plot.name, cells })
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import {
|
|
2
|
+
TermdbTopVariablyExpressedGenesRequest,
|
|
3
|
+
TermdbTopVariablyExpressedGenesResponse
|
|
4
|
+
} from '#shared/types/routes/termdb.topVariablyExpressedGenes.ts'
|
|
5
|
+
import { gdcGetCasesWithExressionDataFromCohort, apihost, geneExpHost } from '../src/mds3.gdc.js'
|
|
6
|
+
import path from 'path'
|
|
7
|
+
import got from 'got'
|
|
8
|
+
import serverconfig from '#src/serverconfig.js'
|
|
9
|
+
|
|
10
|
+
export const api = {
|
|
11
|
+
endpoint: 'termdb/topVariablyExpressedGenes',
|
|
12
|
+
methods: {
|
|
13
|
+
get: {
|
|
14
|
+
init,
|
|
15
|
+
request: {
|
|
16
|
+
typeId: 'TermdbTopVariablyExpressedGenesRequest'
|
|
17
|
+
},
|
|
18
|
+
response: {
|
|
19
|
+
typeId: 'TermdbTopVariablyExpressedGenesResponse'
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function init({ genomes }) {
|
|
26
|
+
return async (req: any, res: any): Promise<void> => {
|
|
27
|
+
try {
|
|
28
|
+
const q = req.query as TermdbTopVariablyExpressedGenesRequest
|
|
29
|
+
const genome = genomes[q.genome]
|
|
30
|
+
if (!genome) throw 'invalid genome'
|
|
31
|
+
const ds = genome.datasets?.[q.dslabel]
|
|
32
|
+
if (!ds) throw 'invalid dslabel'
|
|
33
|
+
if (!ds.queries?.topVariablyExpressedGenes) throw 'not supported on dataset'
|
|
34
|
+
const genes = await ds.queries.topVariablyExpressedGenes.getGenes(q)
|
|
35
|
+
res.send({ genes } as TermdbTopVariablyExpressedGenesResponse)
|
|
36
|
+
} catch (e: any) {
|
|
37
|
+
res.send({ status: 'error', error: e.message || e })
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function validate_query_TopVariablyExpressedGenes(ds: any, genome: any) {
|
|
43
|
+
const q = ds.queries.topVariablyExpressedGenes
|
|
44
|
+
if (!q) return
|
|
45
|
+
if (q.src == 'gdcapi') {
|
|
46
|
+
gdcValidateQuery(ds, genome)
|
|
47
|
+
} else if (q.src == 'native') {
|
|
48
|
+
nativeValidateQuery(ds, genome)
|
|
49
|
+
} else {
|
|
50
|
+
throw 'unknown topVariablyExpressedGenes.src'
|
|
51
|
+
}
|
|
52
|
+
// added getter: q.getGenes()
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function nativeValidateQuery(ds: any, genome: any) {
|
|
56
|
+
ds.queries.topVariablyExpressedGenes.getGenes = async (
|
|
57
|
+
q: TermdbTopVariablyExpressedGenesRequest,
|
|
58
|
+
ds: any,
|
|
59
|
+
genome: any
|
|
60
|
+
) => {
|
|
61
|
+
// get list of samples that are used in current analysis
|
|
62
|
+
const samples = [] as string[]
|
|
63
|
+
// call rust to compute top genes on these samples
|
|
64
|
+
const genes = await computeGenes4nativeDs(q, ds, samples)
|
|
65
|
+
return genes
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
async function computeGenes4nativeDs(q: TermdbTopVariablyExpressedGenesRequest, ds: any, samples: string[]) {
|
|
70
|
+
return []
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function gdcValidateQuery(ds: any, genome: any) {
|
|
74
|
+
ds.queries.topVariablyExpressedGenes.getGenes = async (q: TermdbTopVariablyExpressedGenesRequest) => {
|
|
75
|
+
if (serverconfig.features.gdcGenes) {
|
|
76
|
+
// for testing on dev, must not set on prod!! delete to trigger api query
|
|
77
|
+
console.log('!!GDC!! using serverconfig.features.gdcGenes[]')
|
|
78
|
+
return serverconfig.features.gdcGenes as string[]
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// disable when caching is incomplete (particularly cases with gene exp data); to prevent showing wrong data on client
|
|
82
|
+
if (!ds.__gdc.doneCaching) throw 'The server has not finished caching the case IDs: try again in ~2 minutes'
|
|
83
|
+
|
|
84
|
+
// based on current cohort, get list of cases with exp data, as input of next api query
|
|
85
|
+
const caseLst = await gdcGetCasesWithExressionDataFromCohort(q, ds)
|
|
86
|
+
if (caseLst.length == 0) {
|
|
87
|
+
// there are no cases with gene exp data
|
|
88
|
+
return [] as string[]
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// change to this when api is available on prod
|
|
92
|
+
const url = path.join(geneExpHost, '/gene_expression/gene_selection')
|
|
93
|
+
|
|
94
|
+
try {
|
|
95
|
+
const response = await got.post(url, {
|
|
96
|
+
headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
|
|
97
|
+
body: JSON.stringify(getGeneSelectionArg(q, caseLst))
|
|
98
|
+
})
|
|
99
|
+
|
|
100
|
+
const re = JSON.parse(response.body)
|
|
101
|
+
// {"gene_selection":[{"gene_id":"ENSG00000141510","log2_uqfpkm_median":3.103430497010492,"log2_uqfpkm_stddev":0.8692021350485105,"symbol":"TP53"}, ... ]}
|
|
102
|
+
|
|
103
|
+
const genes = [] as string[]
|
|
104
|
+
if (!Array.isArray(re.gene_selection)) throw 're.gene_selection[] is not array'
|
|
105
|
+
for (const i of re.gene_selection) {
|
|
106
|
+
if (i.gene_id && typeof i.gene_id == 'string') {
|
|
107
|
+
// is ensg, convert to symbol
|
|
108
|
+
const t = genome.genedb.getNameByAlias.get(i.gene_id)
|
|
109
|
+
if (t) genes.push(t.name) // ensg
|
|
110
|
+
} else if (i.symbol && typeof i.symbol == 'string') {
|
|
111
|
+
genes.push(i.symbol)
|
|
112
|
+
} else {
|
|
113
|
+
throw 'one of re.gene_selection[] is missing both gene_id and symbol'
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
return genes
|
|
117
|
+
} catch (e: any) {
|
|
118
|
+
console.log(e.stack || e)
|
|
119
|
+
throw e
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function getGeneSelectionArg(q: any, caseLst: any) {
|
|
124
|
+
//to hide messy logic during testing phase
|
|
125
|
+
|
|
126
|
+
/* when api performance issue is resolved, return this
|
|
127
|
+
return {
|
|
128
|
+
case_ids: caseLst,
|
|
129
|
+
gene_type:'protein_coding',
|
|
130
|
+
selection_size: Number(q.maxGenes)
|
|
131
|
+
}
|
|
132
|
+
*/
|
|
133
|
+
|
|
134
|
+
//////////////////////////////////////////////////
|
|
135
|
+
//
|
|
136
|
+
// !!!!!!!!!!!!!!!! TEMPORARY !!!!!!!!!!!!!!!!!!!!
|
|
137
|
+
//
|
|
138
|
+
//////////////////////////////////////////////////
|
|
139
|
+
// limit the case_ids length, and restrict pool to CGC genes, otherwise the request times out !!!
|
|
140
|
+
// must revert asap
|
|
141
|
+
return {
|
|
142
|
+
case_ids: caseLst.slice(0, 20),
|
|
143
|
+
gene_ids: tempGetCGCgenes(genome),
|
|
144
|
+
selection_size: Number(q.maxGenes)
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function tempGetCGCgenes(genome: any) {
|
|
150
|
+
const lst = [] as string[] // list of ENSG ids from cgc genes
|
|
151
|
+
// don't think there's need to preparse genome.geneset, as this function is only temporary
|
|
152
|
+
for (const s of genome.geneset[0].lst) {
|
|
153
|
+
const a = genome.genedb.getAliasByName.all(s)
|
|
154
|
+
if (a) {
|
|
155
|
+
for (const b of a) {
|
|
156
|
+
if (b.alias.startsWith('ENSG')) lst.push(b.alias)
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
return lst
|
|
161
|
+
}
|