@sjcrh/proteinpaint-server 2.32.2-0 → 2.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,9 +24,9 @@ exports.default = {
24
24
  }
25
25
  ]
26
26
  },
27
- variantUrl: {
27
+ ssmUrl: {
28
28
  base: 'https://www.ncbi.nlm.nih.gov/clinvar/variation/',
29
- key: 'vcf_id',
29
+ namekey: 'vcf_id',
30
30
  linkText: 'ClinVar',
31
31
  shownSeparately: true
32
32
  },
@@ -42,9 +42,9 @@ exports.default = {
42
42
  }
43
43
  ]
44
44
  },
45
- variantUrl: {
45
+ ssmUrl: {
46
46
  base: 'https://www.ncbi.nlm.nih.gov/clinvar/variation/',
47
- key: 'vcf_id',
47
+ namekey: 'vcf_id',
48
48
  linkText: 'ClinVar',
49
49
  shownSeparately: true
50
50
  },
package/genome/hg38.js CHANGED
@@ -9,9 +9,9 @@ exports.default = {
9
9
  },
10
10
  termdbs: {
11
11
  msigdb: {
12
- label: 'MSigDB',
12
+ label: 'MSigDB (2023.2.Hs)',
13
13
  cohort: {
14
- db: { file: 'anno/msigdb/db' },
14
+ db: { file: 'anno/msigdb/db_2023.2.Hs' },
15
15
  termdb: {
16
16
  isGeneSetTermdb: true
17
17
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sjcrh/proteinpaint-server",
3
- "version": "2.32.2-0",
3
+ "version": "2.34.0",
4
4
  "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
5
5
  "main": "server.js",
6
6
  "bin": "start.js",
@@ -57,7 +57,7 @@
57
57
  },
58
58
  "dependencies": {
59
59
  "@sjcrh/augen": "2.32.2-0",
60
- "@sjcrh/proteinpaint-rust": "2.31.0",
60
+ "@sjcrh/proteinpaint-rust": "2.34.0",
61
61
  "better-sqlite3": "^7.5.3",
62
62
  "body-parser": "^1.15.2",
63
63
  "canvas": "~2.9.3",
@@ -1,7 +1,7 @@
1
1
  import got from 'got'
2
2
  import path from 'path'
3
3
  import fs from 'fs'
4
- import { run_rust } from '@sjcrh/proteinpaint-rust'
4
+ import { run_rust_stream } from '@sjcrh/proteinpaint-rust'
5
5
  import serverconfig from '#src/serverconfig.js'
6
6
  import Readable from 'stream'
7
7
  import { GdcMafBuildRequest } from '#shared/types/routes/gdc.mafBuild.ts'
@@ -50,19 +50,21 @@ async function buildMaf(q: GdcMafBuildRequest, res: any) {
50
50
  host: path.join(apihost, 'data') // must use the /data/ endpoint from current host
51
51
  }
52
52
 
53
- const dataGzipped = await run_rust('gdcmaf', JSON.stringify(arg))
53
+ const rustStream = run_rust_stream('gdcmaf', JSON.stringify(arg))
54
+ res.setHeader('Content-Type', 'application/octet-stream')
55
+ res.setHeader('Content-Disposition', 'attachment; filename=cohort.maf.gz')
56
+ rustStream.pipe(res)
54
57
 
55
58
  console.log('rust gdcmaf', Date.now() - t0)
56
59
 
57
- const data = JSON.parse(dataGzipped)
58
-
59
- // by directly returning a blob, it won't tell client how many files are used
60
- res.writeHead(200, {
61
- 'Content-Type': 'application/octet-stream',
62
- 'Content-Disposition': 'attachment; filename=cohort.maf.gz',
63
- 'Content-Length': data.length
60
+ rustStream.on('end', () => {
61
+ res.end()
62
+ })
63
+ rustStream.on('error', err => {
64
+ console.error(err)
65
+ res.statusCode = 500
66
+ res.end('Internal Server Error')
64
67
  })
65
- res.end(Buffer.from(data, 'binary'))
66
68
  }
67
69
 
68
70
  /*
@@ -1,15 +1,21 @@
1
- import { GdcTopVariablyExpressedGenesResponse } from '#shared/types/routes/gdc.topVariablyExpressedGenes.ts'
1
+ import {
2
+ GdcTopVariablyExpressedGenesRequest,
3
+ GdcTopVariablyExpressedGenesResponse
4
+ } from '#shared/types/routes/gdc.topVariablyExpressedGenes.ts'
2
5
  import { getCasesWithExressionDataFromCohort } from '../src/mds3.gdc.js'
3
6
  import path from 'path'
4
7
  import got from 'got'
5
8
  import serverconfig from '#src/serverconfig.js'
6
9
 
10
+ // TODO make it general purpose based on ds.queries.geneExpression.topVariablyExpressedGenes{}; wait till case/gene link changes are done
11
+
7
12
  // TODO change when api is released to prod
8
13
  //const apihost = process.env.PP_GDC_HOST || 'https://api.gdc.cancer.gov'
9
14
  const apihost = 'https://uat-portal.gdc.cancer.gov/auth/api/v0/gene_expression/gene_selection'
10
15
  // temporarily hardcode to use the direct API URL,
11
16
  // previously hardcoded to use 'https://uat-portal.gdc.cancer.gov/auth/api/v0/'
12
17
  const geneExpHost = 'https://uat-api.gdc.cancer.gov'
18
+ // https://github.com/NCI-GDC/gdcapi/blob/develop/openapi/gene-expression.yaml
13
19
 
14
20
  const gdcGenome = 'hg38'
15
21
  const gdcDslabel = 'GDC'
@@ -18,47 +24,37 @@ export const api = {
18
24
  endpoint: 'gdc/topVariablyExpressedGenes',
19
25
  methods: {
20
26
  get: {
21
- init({ genomes }) {
22
- return async (req: any, res: any): Promise<void> => {
23
- try {
24
- // following logic requires hg38 gdc dataset
25
- const genome = genomes[gdcGenome]
26
- if (!genome) throw 'hg38 genome missing'
27
- const ds = genome.datasets?.[gdcDslabel]
28
- if (!ds) throw 'gdc dataset missing'
29
- const genes = await getGenes(req.query, ds, genome)
30
- const payload = { genes } as GdcTopVariablyExpressedGenesResponse
31
- res.send(payload)
32
- } catch (e: any) {
33
- res.send({ status: 'error', error: e.message || e })
34
- }
35
- }
36
- },
27
+ init,
37
28
  request: {
38
- typeId: null
39
- //valid: default to type checker
29
+ typeId: 'GdcTopVariablyExpressedGenesRequest'
40
30
  },
41
31
  response: {
42
32
  typeId: 'GdcTopVariablyExpressedGenesResponse'
43
- // will combine this with type checker
44
- //valid: (t) => {}
45
33
  }
46
34
  }
47
35
  }
48
36
  }
49
37
 
50
- /*
51
- req.query {
52
- filter0 // optional gdc GFF cohort filter, invisible and read only
53
- FIXME should there be pp filter too?
54
- maxGenes: int
38
+ function init({ genomes }) {
39
+ return async (req: any, res: any): Promise<void> => {
40
+ try {
41
+ // following logic requires hg38 gdc dataset
42
+ const genome = genomes[gdcGenome]
43
+ if (!genome) throw 'hg38 genome missing'
44
+ const ds = genome.datasets?.[gdcDslabel]
45
+ if (!ds) throw 'gdc dataset missing'
46
+ const genes = await getGenes(req.query as GdcTopVariablyExpressedGenesRequest, ds, genome)
47
+ const payload = { genes } as GdcTopVariablyExpressedGenesResponse
48
+ res.send(payload)
49
+ } catch (e: any) {
50
+ res.send({ status: 'error', error: e.message || e })
51
+ }
52
+ }
55
53
  }
56
54
 
57
- ds { } // server-side ds object
58
-
59
- genome {}
60
- */
61
- async function getGenes(q: any, ds: any, genome: any) {
55
+ /*
56
+ */
57
+ async function getGenes(q: GdcTopVariablyExpressedGenesRequest, ds: any, genome: any) {
62
58
  if (serverconfig.features.gdcGenes) {
63
59
  // for testing only; delete when api issue is resolved
64
60
  return serverconfig.features.gdcGenes as string[]
@@ -83,9 +79,14 @@ async function getGenes(q: any, ds: any, genome: any) {
83
79
  body: JSON.stringify({
84
80
  // !!! temporarily limit the case_ids length, otherwise the request times out !!!
85
81
  case_ids: caseLst.slice(0, 20),
86
- //gene_ids: [] // this should not be a required parameter
87
- gene_type: 'protein_coding',
88
- selection_size: Number(q.maxGenes || 100)
82
+
83
+ // temporary!! restrict pool to cgc due to slow api. delete when new api is online
84
+ gene_ids: tempGetCGCgenes(genome),
85
+
86
+ // when gene_ids is deleted, enable this
87
+ //gene_type: 'protein_coding',
88
+
89
+ selection_size: Number(q.maxGenes) // FIXME it's defined as number but why it's string??
89
90
  })
90
91
  })
91
92
 
@@ -111,3 +112,17 @@ async function getGenes(q: any, ds: any, genome: any) {
111
112
  throw e
112
113
  }
113
114
  }
115
+
116
+ function tempGetCGCgenes(genome: any) {
117
+ const lst = [] as string[] // list of ENSG ids from cgc genes
118
+ // don't think there's need to preparse genome.geneset, as this function is only temporary
119
+ for (const s of genome.geneset[0].lst) {
120
+ const a = genome.genedb.getAliasByName.all(s)
121
+ if (a) {
122
+ for (const b of a) {
123
+ if (b.alias.startsWith('ENSG')) lst.push(b.alias)
124
+ }
125
+ }
126
+ }
127
+ return lst
128
+ }