@sjcrh/proteinpaint-server 2.34.1-0 → 2.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,122 +0,0 @@
1
- import {
2
- GdcTopVariablyExpressedGenesRequest,
3
- GdcTopVariablyExpressedGenesResponse
4
- } from '#shared/types/routes/gdc.topVariablyExpressedGenes.ts'
5
- import { getCasesWithExressionDataFromCohort } from '../src/mds3.gdc.js'
6
- import path from 'path'
7
- import got from 'got'
8
- import serverconfig from '#src/serverconfig.js'
9
-
10
- // TODO make it general purpose based on ds.queries.geneExpression.topVariablyExpressedGenes{}; wait till case/gene link changes are done
11
- // https://github.com/NCI-GDC/gdcapi/blob/develop/openapi/gene-expression.yaml
12
- const apihost = process.env.PP_GDC_HOST || 'https://api.gdc.cancer.gov'
13
-
14
- const gdcGenome = 'hg38'
15
- const gdcDslabel = 'GDC'
16
-
17
- export const api = {
18
- endpoint: 'gdc/topVariablyExpressedGenes',
19
- methods: {
20
- get: {
21
- init,
22
- request: {
23
- typeId: 'GdcTopVariablyExpressedGenesRequest'
24
- },
25
- response: {
26
- typeId: 'GdcTopVariablyExpressedGenesResponse'
27
- }
28
- }
29
- }
30
- }
31
-
32
- function init({ genomes }) {
33
- return async (req: any, res: any): Promise<void> => {
34
- try {
35
- // following logic requires hg38 gdc dataset
36
- const genome = genomes[gdcGenome]
37
- if (!genome) throw 'hg38 genome missing'
38
- const ds = genome.datasets?.[gdcDslabel]
39
- if (!ds) throw 'gdc dataset missing'
40
- const genes = await getGenes(req.query as GdcTopVariablyExpressedGenesRequest, ds, genome)
41
- const payload = { genes } as GdcTopVariablyExpressedGenesResponse
42
- res.send(payload)
43
- } catch (e: any) {
44
- res.send({ status: 'error', error: e.message || e })
45
- }
46
- }
47
- }
48
-
49
- /*
50
- */
51
- async function getGenes(q: GdcTopVariablyExpressedGenesRequest, ds: any, genome: any) {
52
- if (serverconfig.features.gdcGenes) {
53
- // for testing only; delete when api issue is resolved
54
- return serverconfig.features.gdcGenes as string[]
55
- }
56
- if (!ds.__gdc.doneCaching) {
57
- throw `The server has not finished caching the case IDs: try again in ~2 minutes`
58
- }
59
-
60
- // based on current cohort, get list of cases with exp data, as input of next api query
61
- const caseLst = await getCasesWithExressionDataFromCohort(q, ds)
62
- if (caseLst.length == 0) {
63
- // there are no cases with gene exp data
64
- return [] as string[]
65
- }
66
-
67
- // change to this when api is available on prod
68
- const url = path.join(apihost, '/gene_expression/gene_selection')
69
-
70
- try {
71
- const response = await got.post(url, {
72
- headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
73
- body: JSON.stringify({
74
- // !!! temporarily limit the case_ids length, otherwise the request times out !!!
75
- case_ids: caseLst.slice(0, 20),
76
-
77
- // temporary!! restrict pool to cgc due to slow api. delete when new api is online
78
- gene_ids: tempGetCGCgenes(genome),
79
-
80
- // when gene_ids is deleted, enable this
81
- //gene_type: 'protein_coding',
82
-
83
- selection_size: Number(q.maxGenes) // FIXME it's defined as number but why it's string??
84
- })
85
- })
86
-
87
- const re = JSON.parse(response.body)
88
- // {"gene_selection":[{"gene_id":"ENSG00000141510","log2_uqfpkm_median":3.103430497010492,"log2_uqfpkm_stddev":0.8692021350485105,"symbol":"TP53"}, ... ]}
89
-
90
- const genes = [] as string[]
91
- if (!Array.isArray(re.gene_selection)) throw 're.gene_selection[] is not array'
92
- for (const i of re.gene_selection) {
93
- if (i.gene_id && typeof i.gene_id == 'string') {
94
- // is ensg, convert to symbol
95
- const t = genome.genedb.getNameByAlias.get(i.gene_id)
96
- if (t) genes.push(t.name) // ensg
97
- } else if (i.symbol && typeof i.symbol == 'string') {
98
- genes.push(i.symbol)
99
- } else {
100
- throw 'one of re.gene_selection[] is missing both gene_id and symbol'
101
- }
102
- }
103
- return genes
104
- } catch (e: any) {
105
- console.log(e.stack || e)
106
- throw e
107
- }
108
- }
109
-
110
- function tempGetCGCgenes(genome: any) {
111
- const lst = [] as string[] // list of ENSG ids from cgc genes
112
- // don't think there's need to preparse genome.geneset, as this function is only temporary
113
- for (const s of genome.geneset[0].lst) {
114
- const a = genome.genedb.getAliasByName.all(s)
115
- if (a) {
116
- for (const b of a) {
117
- if (b.alias.startsWith('ENSG')) lst.push(b.alias)
118
- }
119
- }
120
- }
121
- return lst
122
- }