@sjcrh/proteinpaint-server 2.34.1-0 → 2.35.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/routes/termdb.singleSampleMutation.ts +87 -0
- package/routes/termdb.singlecellData.ts +8 -4
- package/routes/termdb.singlecellSamples.ts +28 -21
- package/routes/termdb.topVariablyExpressedGenes.ts +161 -0
- package/server.js +2 -1
- package/server.js.map +1 -0
- package/routes/gdc.topVariablyExpressedGenes.ts +0 -122
|
@@ -1,122 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
GdcTopVariablyExpressedGenesRequest,
|
|
3
|
-
GdcTopVariablyExpressedGenesResponse
|
|
4
|
-
} from '#shared/types/routes/gdc.topVariablyExpressedGenes.ts'
|
|
5
|
-
import { getCasesWithExressionDataFromCohort } from '../src/mds3.gdc.js'
|
|
6
|
-
import path from 'path'
|
|
7
|
-
import got from 'got'
|
|
8
|
-
import serverconfig from '#src/serverconfig.js'
|
|
9
|
-
|
|
10
|
-
// TODO make it general purpose based on ds.queries.geneExpression.topVariablyExpressedGenes{}; wait till case/gene link changes are done
|
|
11
|
-
// https://github.com/NCI-GDC/gdcapi/blob/develop/openapi/gene-expression.yaml
|
|
12
|
-
const apihost = process.env.PP_GDC_HOST || 'https://api.gdc.cancer.gov'
|
|
13
|
-
|
|
14
|
-
const gdcGenome = 'hg38'
|
|
15
|
-
const gdcDslabel = 'GDC'
|
|
16
|
-
|
|
17
|
-
export const api = {
|
|
18
|
-
endpoint: 'gdc/topVariablyExpressedGenes',
|
|
19
|
-
methods: {
|
|
20
|
-
get: {
|
|
21
|
-
init,
|
|
22
|
-
request: {
|
|
23
|
-
typeId: 'GdcTopVariablyExpressedGenesRequest'
|
|
24
|
-
},
|
|
25
|
-
response: {
|
|
26
|
-
typeId: 'GdcTopVariablyExpressedGenesResponse'
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
function init({ genomes }) {
|
|
33
|
-
return async (req: any, res: any): Promise<void> => {
|
|
34
|
-
try {
|
|
35
|
-
// following logic requires hg38 gdc dataset
|
|
36
|
-
const genome = genomes[gdcGenome]
|
|
37
|
-
if (!genome) throw 'hg38 genome missing'
|
|
38
|
-
const ds = genome.datasets?.[gdcDslabel]
|
|
39
|
-
if (!ds) throw 'gdc dataset missing'
|
|
40
|
-
const genes = await getGenes(req.query as GdcTopVariablyExpressedGenesRequest, ds, genome)
|
|
41
|
-
const payload = { genes } as GdcTopVariablyExpressedGenesResponse
|
|
42
|
-
res.send(payload)
|
|
43
|
-
} catch (e: any) {
|
|
44
|
-
res.send({ status: 'error', error: e.message || e })
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
/*
|
|
50
|
-
*/
|
|
51
|
-
async function getGenes(q: GdcTopVariablyExpressedGenesRequest, ds: any, genome: any) {
|
|
52
|
-
if (serverconfig.features.gdcGenes) {
|
|
53
|
-
// for testing only; delete when api issue is resolved
|
|
54
|
-
return serverconfig.features.gdcGenes as string[]
|
|
55
|
-
}
|
|
56
|
-
if (!ds.__gdc.doneCaching) {
|
|
57
|
-
throw `The server has not finished caching the case IDs: try again in ~2 minutes`
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
// based on current cohort, get list of cases with exp data, as input of next api query
|
|
61
|
-
const caseLst = await getCasesWithExressionDataFromCohort(q, ds)
|
|
62
|
-
if (caseLst.length == 0) {
|
|
63
|
-
// there are no cases with gene exp data
|
|
64
|
-
return [] as string[]
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
// change to this when api is available on prod
|
|
68
|
-
const url = path.join(apihost, '/gene_expression/gene_selection')
|
|
69
|
-
|
|
70
|
-
try {
|
|
71
|
-
const response = await got.post(url, {
|
|
72
|
-
headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
|
|
73
|
-
body: JSON.stringify({
|
|
74
|
-
// !!! temporarily limit the case_ids length, otherwise the request times out !!!
|
|
75
|
-
case_ids: caseLst.slice(0, 20),
|
|
76
|
-
|
|
77
|
-
// temporary!! restrict pool to cgc due to slow api. delete when new api is online
|
|
78
|
-
gene_ids: tempGetCGCgenes(genome),
|
|
79
|
-
|
|
80
|
-
// when gene_ids is deleted, enable this
|
|
81
|
-
//gene_type: 'protein_coding',
|
|
82
|
-
|
|
83
|
-
selection_size: Number(q.maxGenes) // FIXME it's defined as number but why it's string??
|
|
84
|
-
})
|
|
85
|
-
})
|
|
86
|
-
|
|
87
|
-
const re = JSON.parse(response.body)
|
|
88
|
-
// {"gene_selection":[{"gene_id":"ENSG00000141510","log2_uqfpkm_median":3.103430497010492,"log2_uqfpkm_stddev":0.8692021350485105,"symbol":"TP53"}, ... ]}
|
|
89
|
-
|
|
90
|
-
const genes = [] as string[]
|
|
91
|
-
if (!Array.isArray(re.gene_selection)) throw 're.gene_selection[] is not array'
|
|
92
|
-
for (const i of re.gene_selection) {
|
|
93
|
-
if (i.gene_id && typeof i.gene_id == 'string') {
|
|
94
|
-
// is ensg, convert to symbol
|
|
95
|
-
const t = genome.genedb.getNameByAlias.get(i.gene_id)
|
|
96
|
-
if (t) genes.push(t.name) // ensg
|
|
97
|
-
} else if (i.symbol && typeof i.symbol == 'string') {
|
|
98
|
-
genes.push(i.symbol)
|
|
99
|
-
} else {
|
|
100
|
-
throw 'one of re.gene_selection[] is missing both gene_id and symbol'
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
return genes
|
|
104
|
-
} catch (e: any) {
|
|
105
|
-
console.log(e.stack || e)
|
|
106
|
-
throw e
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
function tempGetCGCgenes(genome: any) {
|
|
111
|
-
const lst = [] as string[] // list of ENSG ids from cgc genes
|
|
112
|
-
// don't think there's need to preparse genome.geneset, as this function is only temporary
|
|
113
|
-
for (const s of genome.geneset[0].lst) {
|
|
114
|
-
const a = genome.genedb.getAliasByName.all(s)
|
|
115
|
-
if (a) {
|
|
116
|
-
for (const b of a) {
|
|
117
|
-
if (b.alias.startsWith('ENSG')) lst.push(b.alias)
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
return lst
|
|
122
|
-
}
|