@sjcrh/proteinpaint-server 2.30.5 → 2.32.2-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sjcrh/proteinpaint-server",
3
- "version": "2.30.5",
3
+ "version": "2.32.2-0",
4
4
  "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
5
5
  "main": "server.js",
6
6
  "bin": "start.js",
@@ -25,6 +25,7 @@
25
25
  "pretest:integration": "tsc --esModuleInterop genome/*.ts dataset/*.ts",
26
26
  "test:integration": "echo 'TODO: server integration tests'",
27
27
  "prepack": "tsc --esModuleInterop genome/*.ts dataset/*.ts && webpack --env NODE_ENV=production",
28
+ "test:tsc": "tsc --esModuleInterop --noEmit --allowImportingTsExtensions ./shared/types/test/*.type.spec.ts",
28
29
  "response": "nodemon modules/test/test.server.js --watch src",
29
30
  "getconf": "../build/getConfigProp.js",
30
31
  "doc": "../augen/build.sh routes shared/types/routes shared/checkers ../public/docs/server"
@@ -55,8 +56,8 @@
55
56
  "webpack-notifier": "^1.15.0"
56
57
  },
57
58
  "dependencies": {
58
- "@sjcrh/augen": "2.27.0",
59
- "@sjcrh/proteinpaint-rust": "2.30.2",
59
+ "@sjcrh/augen": "2.32.2-0",
60
+ "@sjcrh/proteinpaint-rust": "2.31.0",
60
61
  "better-sqlite3": "^7.5.3",
61
62
  "body-parser": "^1.15.2",
62
63
  "canvas": "~2.9.3",
@@ -3,6 +3,8 @@ import path from 'path'
3
3
  import fs from 'fs'
4
4
  import { run_rust } from '@sjcrh/proteinpaint-rust'
5
5
  import serverconfig from '#src/serverconfig.js'
6
+ import Readable from 'stream'
7
+ import { GdcMafBuildRequest } from '#shared/types/routes/gdc.mafBuild.ts'
6
8
 
7
9
  const apihost = process.env.PP_GDC_HOST || 'https://api.gdc.cancer.gov'
8
10
  const maxTotalSizeCompressed = serverconfig.features.gdcMafMaxFileSize || 50000000 // 50Mb
@@ -11,64 +13,56 @@ export const api = {
11
13
  endpoint: 'gdc/mafBuild',
12
14
  methods: {
13
15
  all: {
14
- init({ genomes }) {
15
- return async (req: any, res: any): Promise<void> => {
16
- try {
17
- await buildMaf(req, res)
18
- } catch (e: any) {
19
- if (e.stack) console.log(e.stack)
20
- res.send({ status: 'error', error: e.message || e })
21
- }
22
- }
23
- },
16
+ init,
24
17
  request: {
25
- typeId: null
26
- //valid: default to type checker
18
+ typeId: 'GdcMafBuildRequest'
27
19
  },
28
20
  response: {
29
- typeId: 'GdcMafBuildResponse'
30
- // will combine this with type checker
31
- //valid: (t) => {}
21
+ typeId: null // 'GdcMafBuildResponse'
32
22
  }
33
23
  }
34
24
  }
35
25
  }
36
26
 
37
- /*
38
- req.query {
39
- fileIdLst [] // list of maf file uuids
27
+ function init({ genomes }) {
28
+ return async (req: any, res: any): Promise<void> => {
29
+ try {
30
+ await buildMaf(req.query as GdcMafBuildRequest, res)
31
+ } catch (e: any) {
32
+ if (e.stack) console.log(e.stack)
33
+ res.send({ status: 'error', error: e.message || e })
34
+ }
35
+ }
40
36
  }
41
37
 
38
+ /*
39
+ q{}
42
40
  res{}
43
41
  */
44
- async function buildMaf(req: any, res: any) {
42
+ async function buildMaf(q: GdcMafBuildRequest, res: any) {
45
43
  const t0 = Date.now()
46
44
 
47
- const fileLst2 = (await getFileLstUnderSizeLimit(req.query.fileIdLst)) as string[]
45
+ const fileLst2 = (await getFileLstUnderSizeLimit(q.fileIdLst)) as string[]
48
46
  console.log('test gdc maf sizes', Date.now() - t0)
49
47
 
50
- const outFile = path.join(serverconfig.cachedir, 'gdcMaf.' + Math.random().toString()) // should be a gzipped file. does it need to end with '.gz' or it's auto-added?
51
-
52
48
  const arg = {
53
49
  fileIdLst: fileLst2,
54
- host: path.join(apihost, 'data'), // must use the /data/ endpoint from current host
55
- outFile
50
+ host: path.join(apihost, 'data') // must use the /data/ endpoint from current host
56
51
  }
57
52
 
58
- await run_rust('gdcmaf', JSON.stringify(arg))
53
+ const dataGzipped = await run_rust('gdcmaf', JSON.stringify(arg))
59
54
 
60
55
  console.log('rust gdcmaf', Date.now() - t0)
61
56
 
62
- const data = await fs.promises.readFile(outFile)
57
+ const data = JSON.parse(dataGzipped)
63
58
 
64
59
  // by directly returning a blob, it won't tell client how many files are used
65
-
66
60
  res.writeHead(200, {
67
61
  'Content-Type': 'application/octet-stream',
68
62
  'Content-Disposition': 'attachment; filename=cohort.maf.gz',
69
63
  'Content-Length': data.length
70
64
  })
71
- res.end(Buffer.from(data as any, 'binary'))
65
+ res.end(Buffer.from(data, 'binary'))
72
66
  }
73
67
 
74
68
  /*
@@ -1,12 +1,15 @@
1
1
  import { GdcTopVariablyExpressedGenesResponse } from '#shared/types/routes/gdc.topVariablyExpressedGenes.ts'
2
2
  import { getCasesWithExressionDataFromCohort } from '../src/mds3.gdc.js'
3
- //import path from 'path'
3
+ import path from 'path'
4
4
  import got from 'got'
5
5
  import serverconfig from '#src/serverconfig.js'
6
6
 
7
7
  // TODO change when api is released to prod
8
8
  //const apihost = process.env.PP_GDC_HOST || 'https://api.gdc.cancer.gov'
9
9
  const apihost = 'https://uat-portal.gdc.cancer.gov/auth/api/v0/gene_expression/gene_selection'
10
+ // temporarily hardcode to use the direct API URL,
11
+ // previously hardcoded to use 'https://uat-portal.gdc.cancer.gov/auth/api/v0/'
12
+ const geneExpHost = 'https://uat-api.gdc.cancer.gov'
10
13
 
11
14
  const gdcGenome = 'hg38'
12
15
  const gdcDslabel = 'GDC'
@@ -23,7 +26,7 @@ export const api = {
23
26
  if (!genome) throw 'hg38 genome missing'
24
27
  const ds = genome.datasets?.[gdcDslabel]
25
28
  if (!ds) throw 'gdc dataset missing'
26
- const genes = await getGenes(req.query, ds)
29
+ const genes = await getGenes(req.query, ds, genome)
27
30
  const payload = { genes } as GdcTopVariablyExpressedGenesResponse
28
31
  res.send(payload)
29
32
  } catch (e: any) {
@@ -53,12 +56,16 @@ req.query {
53
56
 
54
57
  ds { } // server-side ds object
55
58
 
59
+ genome {}
56
60
  */
57
- async function getGenes(q: any, ds: any) {
61
+ async function getGenes(q: any, ds: any, genome: any) {
58
62
  if (serverconfig.features.gdcGenes) {
59
63
  // for testing only; delete when api issue is resolved
60
64
  return serverconfig.features.gdcGenes as string[]
61
65
  }
66
+ if (!ds.__gdc.doneCaching) {
67
+ throw `The server has not finished caching the case IDs: try again in ~2 minutes`
68
+ }
62
69
 
63
70
  // based on current cohort, get list of cases with exp data, as input of next api query
64
71
  const caseLst = await getCasesWithExressionDataFromCohort(q, ds)
@@ -68,30 +75,39 @@ async function getGenes(q: any, ds: any) {
68
75
  }
69
76
 
70
77
  // change to this when api is available on prod
71
- // const url = path.join(apihost, '/gene_expression/gene_selection')
78
+ const url = path.join(geneExpHost, '/gene_expression/gene_selection')
72
79
 
73
- const response = await got.post(apihost, {
74
- headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
75
- body: JSON.stringify({
76
- case_ids: caseLst,
77
- //gene_ids: [] // this should not be a required parameter
78
- size: q.maxGenes || 100
80
+ try {
81
+ const response = await got.post(url, {
82
+ headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
83
+ body: JSON.stringify({
84
+ // !!! temporarily limit the case_ids length, otherwise the request times out !!!
85
+ case_ids: caseLst.slice(0, 20),
86
+ //gene_ids: [] // this should not be a required parameter
87
+ gene_type: 'protein_coding',
88
+ selection_size: Number(q.maxGenes || 100)
89
+ })
79
90
  })
80
- })
81
91
 
82
- const re = JSON.parse(response.body)
83
- // {"gene_selection":[{"gene_id":"ENSG00000141510","log2_uqfpkm_median":3.103430497010492,"log2_uqfpkm_stddev":0.8692021350485105,"symbol":"TP53"}, ... ]}
92
+ const re = JSON.parse(response.body)
93
+ // {"gene_selection":[{"gene_id":"ENSG00000141510","log2_uqfpkm_median":3.103430497010492,"log2_uqfpkm_stddev":0.8692021350485105,"symbol":"TP53"}, ... ]}
84
94
 
85
- const genes = [] as string[]
86
- if (!Array.isArray(re.gene_selection)) throw 're.gene_selection[] is not array'
87
- for (const i of re.gene_selection) {
88
- if (i.gene_id && typeof i.gene_id == 'string') {
89
- genes.push(i.gene_id) // ensg
90
- } else if (i.symbol && typeof i.symbol == 'string') {
91
- genes.push(i.symbol)
92
- } else {
93
- throw 'one of re.gene_selection[] is missing both gene_id and symbol'
95
+ const genes = [] as string[]
96
+ if (!Array.isArray(re.gene_selection)) throw 're.gene_selection[] is not array'
97
+ for (const i of re.gene_selection) {
98
+ if (i.gene_id && typeof i.gene_id == 'string') {
99
+ // is ensg, convert to symbol
100
+ const t = genome.genedb.getNameByAlias.get(i.gene_id)
101
+ if (t) genes.push(t.name) // ensg
102
+ } else if (i.symbol && typeof i.symbol == 'string') {
103
+ genes.push(i.symbol)
104
+ } else {
105
+ throw 'one of re.gene_selection[] is missing both gene_id and symbol'
106
+ }
94
107
  }
108
+ return genes
109
+ } catch (e: any) {
110
+ console.log(e.stack || e)
111
+ throw e
95
112
  }
96
- return genes
97
113
  }
@@ -0,0 +1,45 @@
1
+ import {
2
+ TermdbSinglecellDataRequest,
3
+ TermdbSinglecellDataResponse
4
+ } from '#shared/types/routes/termdb.singlecellData.ts'
5
+
6
+ /*
7
+ given a sample, return it's singlecell data from dataset
8
+ */
9
+
10
+ export const api: any = {
11
+ endpoint: 'termdb/singlecellData',
12
+ methods: {
13
+ get: {
14
+ init,
15
+ request: {
16
+ typeId: 'TermdbSinglecellDataRequest'
17
+ },
18
+ response: {
19
+ typeId: 'TermdbSinglecellDataResponse'
20
+ }
21
+ },
22
+ post: {
23
+ alternativeFor: 'get',
24
+ init
25
+ }
26
+ }
27
+ }
28
+
29
+ function init({ genomes }) {
30
+ return async (req: any, res: any): Promise<void> => {
31
+ const q = req.query as TermdbSinglecellDataRequest
32
+ try {
33
+ const g = genomes[q.genome]
34
+ if (!g) throw 'invalid genome name'
35
+ const ds = g.datasets[q.dslabel]
36
+ if (!ds) throw 'invalid dataset name'
37
+ if (!ds.queries?.singleCell) throw 'no singlecell data on this dataset'
38
+ const result = (await ds.queries.singleCell.data.get(q)) as TermdbSinglecellDataResponse
39
+ res.send(result)
40
+ } catch (e: any) {
41
+ if (e instanceof Error && e.stack) console.log(e)
42
+ res.send({ error: e.message || e })
43
+ }
44
+ }
45
+ }
@@ -0,0 +1,167 @@
1
+ import fs from 'fs'
2
+ import path from 'path'
3
+ import { read_file } from '#src/utils.js'
4
+ import serverconfig from '#src/serverconfig.js'
5
+ import {
6
+ SingleCellQuery,
7
+ SingleCellSamplesNative,
8
+ SingleCellSamplesGdc,
9
+ SingleCellDataNative,
10
+ SingleCellDataGdc
11
+ } from '#shared/types/dataset.ts'
12
+ import {
13
+ Sample,
14
+ TermdbSinglecellsamplesRequest,
15
+ TermdbSinglecellsamplesResponse
16
+ } from '#shared/types/routes/termdb.singlecellSamples.ts'
17
+ import {
18
+ Cell,
19
+ Plot,
20
+ HasdataResponse,
21
+ NodataResponse,
22
+ ErrorResponse
23
+ } from '#shared/types/routes/termdb.singlecellData.ts'
24
+ import { gdc_validate_query_singleCell_samples, gdc_validate_query_singleCell_data } from '#src/mds3.gdc.js'
25
+
26
+ /* route returns list of samples with sc data
27
+ this is due to the fact that sometimes not all samples in a dataset has sc data
28
+ */
29
+
30
+ export const api: any = {
31
+ endpoint: 'termdb/singlecellSamples',
32
+ methods: {
33
+ get: {
34
+ init,
35
+ request: {
36
+ typeId: 'TermdbSinglecellsamplesRequest'
37
+ },
38
+ response: {
39
+ typeId: 'TermdbSinglecellsamplesResponse'
40
+ }
41
+ },
42
+ post: {
43
+ alternativeFor: 'get',
44
+ init
45
+ }
46
+ }
47
+ }
48
+
49
+ function init({ genomes }) {
50
+ return async (req: any, res: any): Promise<void> => {
51
+ const q = req.query as TermdbSinglecellsamplesRequest
52
+ try {
53
+ const g = genomes[q.genome]
54
+ if (!g) throw 'invalid genome name'
55
+ const ds = g.datasets[q.dslabel]
56
+ if (!ds) throw 'invalid dataset name'
57
+ if (!ds.queries?.singleCell) throw 'no singlecell data on this dataset'
58
+ const samples = (await ds.queries.singleCell.samples.get(q)) as TermdbSinglecellsamplesResponse
59
+ res.send({ samples })
60
+ } catch (e: any) {
61
+ if (e instanceof Error && e.stack) console.log(e)
62
+ res.send({ error: e.message || e })
63
+ }
64
+ }
65
+ }
66
+
67
+ /////////////////// ds query validator
68
+ export async function validate_query_singleCell(ds: any, genome: any) {
69
+ const q = ds.queries.singleCell as SingleCellQuery
70
+ if (!q) return
71
+
72
+ if (q.samples.src == 'gdcapi') {
73
+ gdc_validate_query_singleCell_samples(ds, genome)
74
+ } else {
75
+ validateSamplesNative(q.samples as SingleCellSamplesNative, ds)
76
+ }
77
+ // q.samples.get() added
78
+
79
+ if (q.data.src == 'gdcapi') {
80
+ gdc_validate_query_singleCell_data(ds, genome)
81
+ } else {
82
+ validateDataNative(q.data as SingleCellDataNative, ds)
83
+ }
84
+ // q.data.get() added
85
+ }
86
+
87
+ function validateSamplesNative(S: SingleCellSamplesNative, ds: any) {
88
+ // for now use this quick fix method to pull sample ids annotated by this term
89
+ // to support situation where not all samples from a dataset has sc data
90
+ const samples = [] as Sample[] // list of sample ids with sc data
91
+ const s = ds.cohort.termdb.q.getAllValues4term(S.isSampleTerm)
92
+ for (const id of s.keys()) {
93
+ samples.push({ sample: ds.cohort.termdb.q.id2sampleName(id) })
94
+ }
95
+ if (samples.length == 0) throw 'no sample with sc data'
96
+ // getter returns array of {name:<samplename>, files:[]} where files is gdc specific. each sample is an obj and allows to add ds-specific stuff
97
+ S.get = () => samples
98
+ }
99
+
100
+ function validateDataNative(D: SingleCellDataNative, ds: any) {
101
+ const nameSet = new Set() // guard against duplicating plot names
102
+ for (const plot of D.plots) {
103
+ if (nameSet.has(plot.name)) throw 'duplicate plot.name'
104
+ nameSet.add(plot.name)
105
+ }
106
+
107
+ // scoped and cached for runtime
108
+ const _terms = [] as any
109
+ const _tid2cellvalue = {} as any
110
+
111
+ for (const tid of D.termIds) {
112
+ const t = ds.cohort.termdb.q.termjsonByOneid(tid)
113
+ if (!t) throw 'invalid term id from queries.singleCell.data.termIds[]'
114
+ _terms.push(t)
115
+ _tid2cellvalue[tid] = ds.cohort.termdb.q.getAllValues4term(tid)
116
+ }
117
+ D.get = async sample => {
118
+ // if sample is int, may convert to string
119
+ try {
120
+ const tid2cellvalue = {}
121
+ for (const tid of D.termIds) tid2cellvalue[tid] = {} // k: cell id, v: cell value for this term
122
+
123
+ const plots = [] as Plot[] // given a sample name, collect every plot data for this sample and return
124
+ for (const plot of D.plots) {
125
+ const tsvfile = path.join(serverconfig.tpmasterdir, plot.folder, sample, plot.fileSuffix)
126
+ try {
127
+ await fs.promises.stat(tsvfile)
128
+ } catch (e: any) {
129
+ if (e.code == 'ENOENT') {
130
+ // no file found for this sample; allowed because sampleView tests if that sample has sc data or not
131
+ continue
132
+ }
133
+ if (e.code == 'EACCES') throw 'cannot read file, permission denied'
134
+ throw 'failed to load sc data file'
135
+ }
136
+ const lines = (await read_file(tsvfile)).trim().split('\n')
137
+ // 1st line is header
138
+ const cells = [] as Cell[]
139
+ for (let i = 1; i < lines.length; i++) {
140
+ // each line is a cell
141
+ const l = lines[i].split('\t')
142
+ const cellId = l[0],
143
+ x = Number(l[4]), // FIXME standardize, or define idx in plot
144
+ y = Number(l[5])
145
+ if (!cellId) throw 'cell id missing'
146
+ if (!Number.isFinite(x) || !Number.isFinite(y)) throw 'x/y not number'
147
+ cells.push({ cellId, x, y })
148
+
149
+ for (const tid of D.termIds) {
150
+ if (_tid2cellvalue[tid].has(cellId)) {
151
+ tid2cellvalue[tid][cellId] = _tid2cellvalue[tid].get(cellId)
152
+ }
153
+ }
154
+ }
155
+ plots.push({ name: plot.name, cells })
156
+ }
157
+ if (plots.length == 0) {
158
+ // no data available for this sample
159
+ return { nodata: true }
160
+ }
161
+ return { plots, terms: _terms, tid2cellvalue }
162
+ } catch (e: any) {
163
+ if (e.stack) console.log(e.stack)
164
+ return { error: e.message || e }
165
+ }
166
+ }
167
+ }