@sjcrh/proteinpaint-server 2.34.1 → 2.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sjcrh/proteinpaint-server",
3
- "version": "2.34.1",
3
+ "version": "2.35.0",
4
4
  "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
5
5
  "main": "server.js",
6
6
  "bin": "start.js",
@@ -56,7 +56,7 @@
56
56
  "webpack-notifier": "^1.15.0"
57
57
  },
58
58
  "dependencies": {
59
- "@sjcrh/augen": "2.32.2-0",
59
+ "@sjcrh/augen": "2.35.0",
60
60
  "@sjcrh/proteinpaint-rust": "2.34.0",
61
61
  "better-sqlite3": "^7.5.3",
62
62
  "body-parser": "^1.15.2",
@@ -0,0 +1,87 @@
1
+ import fs from 'fs'
2
+ import path from 'path'
3
+ import { read_file } from '#src/utils.js'
4
+ import serverconfig from '#src/serverconfig.js'
5
+ import {
6
+ TermdbSingleSampleMutationRequest,
7
+ TermdbSingleSampleMutationResponse
8
+ } from '#shared/types/routes/termdb.singleSampleMutation.ts'
9
+ import { gdcValidate_query_singleSampleMutation } from '#src/mds3.gdc.js'
10
+
11
+ export const api: any = {
12
+ endpoint: 'termdb/singleSampleMutation',
13
+ methods: {
14
+ get: {
15
+ init,
16
+ request: {
17
+ typeId: 'TermdbSingleSampleMutationRequest'
18
+ },
19
+ response: {
20
+ typeId: 'TermdbSingleSampleMutationResponse'
21
+ }
22
+ }
23
+ }
24
+ }
25
+
26
+ function init({ genomes }) {
27
+ return async (req: any, res: any): Promise<void> => {
28
+ const q = req.query as TermdbSingleSampleMutationRequest
29
+ let result
30
+ try {
31
+ const g = genomes[q.genome]
32
+ if (!g) throw 'invalid genome name'
33
+ const ds = g.datasets[q.dslabel]
34
+ if (!ds) throw 'invalid dataset name'
35
+ if (!ds.queries?.singleSampleMutation) throw 'not supported on this dataset'
36
+ result = (await ds.queries.singleSampleMutation.get(q)) as TermdbSingleSampleMutationResponse
37
+ } catch (e: any) {
38
+ if (e.stack) console.log(e.stack)
39
+ result = {
40
+ status: e.status || 400,
41
+ error: e.message || e
42
+ } as TermdbSingleSampleMutationResponse
43
+ }
44
+ res.send(result)
45
+ }
46
+ }
47
+
48
+ /////////////////// ds query validator
49
+ export async function validate_query_singleSampleMutation(ds: any, genome: any) {
50
+ const _q = ds.queries.singleSampleMutation
51
+ if (!_q) return
52
+ if (_q.src == 'gdcapi') {
53
+ gdcValidate_query_singleSampleMutation(ds, genome)
54
+ } else if (_q.src == 'native') {
55
+ // using a folder to store text files for individual samples
56
+ // file names are integer sample id
57
+ _q.get = async (q: TermdbSingleSampleMutationRequest) => {
58
+ /* as mds3 client may not be using integer sample id for now,
59
+ the argument is string id and has to be mapped to integer id
60
+ */
61
+ let fileName = q.sample
62
+ if (ds.cohort?.termdb?.q?.sampleName2id) {
63
+ // has name-to-id converter
64
+ fileName = ds.cohort.termdb.q.sampleName2id(q.sample)
65
+ if (fileName == undefined) {
66
+ // unable to convert string id to integer
67
+ return []
68
+ }
69
+ }
70
+
71
+ const file = path.join(serverconfig.tpmasterdir, _q.folder, fileName.toString())
72
+ try {
73
+ await fs.promises.stat(file)
74
+ } catch (e: any) {
75
+ if (e.code == 'EACCES') throw 'cannot read file, permission denied'
76
+ if (e.code == 'ENOENT') throw 'no data for this sample'
77
+ throw 'failed to load data'
78
+ }
79
+
80
+ const data = await read_file(file)
81
+ // object wraps around mlst[] so it's possible to add other attr e.g. total number of mutations that exceeds viewing limit
82
+ return { mlst: JSON.parse(data) }
83
+ }
84
+ } else {
85
+ throw 'unknown singleSampleMutation.src'
86
+ }
87
+ }
@@ -29,17 +29,21 @@ export const api: any = {
29
29
  function init({ genomes }) {
30
30
  return async (req: any, res: any): Promise<void> => {
31
31
  const q = req.query as TermdbSinglecellDataRequest
32
+ let result
32
33
  try {
33
34
  const g = genomes[q.genome]
34
35
  if (!g) throw 'invalid genome name'
35
36
  const ds = g.datasets[q.dslabel]
36
37
  if (!ds) throw 'invalid dataset name'
37
38
  if (!ds.queries?.singleCell) throw 'no singlecell data on this dataset'
38
- const result = (await ds.queries.singleCell.data.get(q)) as TermdbSinglecellDataResponse
39
- res.send(result)
39
+ result = (await ds.queries.singleCell.data.get(q)) as TermdbSinglecellDataResponse
40
40
  } catch (e: any) {
41
- if (e instanceof Error && e.stack) console.log(e)
42
- res.send({ error: e.message || e })
41
+ if (e.stack) console.log(e)
42
+ result = {
43
+ status: e.status || 400,
44
+ error: e.message || e
45
+ } as TermdbSinglecellDataResponse
43
46
  }
47
+ res.send(result)
44
48
  }
45
49
  }
@@ -14,13 +14,7 @@ import {
14
14
  TermdbSinglecellsamplesRequest,
15
15
  TermdbSinglecellsamplesResponse
16
16
  } from '#shared/types/routes/termdb.singlecellSamples.ts'
17
- import {
18
- Cell,
19
- Plot,
20
- HasdataResponse,
21
- NodataResponse,
22
- ErrorResponse
23
- } from '#shared/types/routes/termdb.singlecellData.ts'
17
+ import { Cell, Plot } from '#shared/types/routes/termdb.singlecellData.ts'
24
18
  import { gdc_validate_query_singleCell_samples, gdc_validate_query_singleCell_data } from '#src/mds3.gdc.js'
25
19
 
26
20
  /* route returns list of samples with sc data
@@ -49,18 +43,22 @@ export const api: any = {
49
43
  function init({ genomes }) {
50
44
  return async (req: any, res: any): Promise<void> => {
51
45
  const q = req.query as TermdbSinglecellsamplesRequest
46
+ let result
52
47
  try {
53
48
  const g = genomes[q.genome]
54
49
  if (!g) throw 'invalid genome name'
55
50
  const ds = g.datasets[q.dslabel]
56
51
  if (!ds) throw 'invalid dataset name'
57
52
  if (!ds.queries?.singleCell) throw 'no singlecell data on this dataset'
58
- const samples = (await ds.queries.singleCell.samples.get(q)) as TermdbSinglecellsamplesResponse
59
- res.send({ samples })
53
+ result = (await ds.queries.singleCell.samples.get(q)) as TermdbSinglecellsamplesResponse
60
54
  } catch (e: any) {
61
- if (e instanceof Error && e.stack) console.log(e)
62
- res.send({ error: e.message || e })
55
+ if (e.stack) console.log(e.stack)
56
+ result = {
57
+ status: e.status || 400,
58
+ error: e.message || e
59
+ } as TermdbSinglecellsamplesResponse
63
60
  }
61
+ res.send(result)
64
62
  }
65
63
  }
66
64
 
@@ -71,15 +69,19 @@ export async function validate_query_singleCell(ds: any, genome: any) {
71
69
 
72
70
  if (q.samples.src == 'gdcapi') {
73
71
  gdc_validate_query_singleCell_samples(ds, genome)
74
- } else {
72
+ } else if (q.samples.src == 'native') {
75
73
  validateSamplesNative(q.samples as SingleCellSamplesNative, ds)
74
+ } else {
75
+ throw 'unknown singleCell.samples.src'
76
76
  }
77
77
  // q.samples.get() added
78
78
 
79
79
  if (q.data.src == 'gdcapi') {
80
80
  gdc_validate_query_singleCell_data(ds, genome)
81
- } else {
81
+ } else if (q.data.src == 'native') {
82
82
  validateDataNative(q.data as SingleCellDataNative, ds)
83
+ } else {
84
+ throw 'unknown singleCell.data.src'
83
85
  }
84
86
  // q.data.get() added
85
87
  }
@@ -94,7 +96,9 @@ function validateSamplesNative(S: SingleCellSamplesNative, ds: any) {
94
96
  }
95
97
  if (samples.length == 0) throw 'no sample with sc data'
96
98
  // getter returns array of {sample:<samplename>, files:[]} where files is gdc specific. each sample is an obj and allows to add ds-specific stuff
97
- S.get = () => samples
99
+ S.get = () => {
100
+ return { samples, fields: ['sample'], columnNames: ['Sample'] }
101
+ }
98
102
  }
99
103
 
100
104
  function validateDataNative(D: SingleCellDataNative, ds: any) {
@@ -112,17 +116,22 @@ function validateDataNative(D: SingleCellDataNative, ds: any) {
112
116
  const t = ds.cohort.termdb.q.termjsonByOneid(tid)
113
117
  if (!t) throw 'invalid term id from queries.singleCell.data.termIds[]'
114
118
  _terms.push(t)
115
- _tid2cellvalue[tid] = ds.cohort.termdb.q.getAllValues4term(tid)
119
+ // _tid2cellvalue[tid] = {}
120
+ // const clusterMap = ds.cohort.termdb.q.getAllValues4term(tid)
121
+ // for(const [id, cluster] of clusterMap)
122
+ // {
123
+ // const name = ds.cohort.termdb.q.id2sampleName(id)
124
+ // _tid2cellvalue[tid][name] = cluster
125
+ // }
116
126
  }
117
- D.get = async sample => {
127
+ D.get = async q => {
118
128
  // if sample is int, may convert to string
119
129
  try {
120
130
  const tid2cellvalue = {}
121
131
  for (const tid of D.termIds) tid2cellvalue[tid] = {} // k: cell id, v: cell value for this term
122
-
123
132
  const plots = [] as Plot[] // given a sample name, collect every plot data for this sample and return
124
133
  for (const plot of D.plots) {
125
- const tsvfile = path.join(serverconfig.tpmasterdir, plot.folder, sample, plot.fileSuffix)
134
+ const tsvfile = path.join(serverconfig.tpmasterdir, plot.folder, q.sample + plot.fileSuffix)
126
135
  try {
127
136
  await fs.promises.stat(tsvfile)
128
137
  } catch (e: any) {
@@ -147,9 +156,7 @@ function validateDataNative(D: SingleCellDataNative, ds: any) {
147
156
  cells.push({ cellId, x, y })
148
157
 
149
158
  for (const tid of D.termIds) {
150
- if (_tid2cellvalue[tid].has(cellId)) {
151
- tid2cellvalue[tid][cellId] = _tid2cellvalue[tid].get(cellId)
152
- }
159
+ tid2cellvalue[tid][cellId] = l[1]
153
160
  }
154
161
  }
155
162
  plots.push({ name: plot.name, cells })
@@ -0,0 +1,161 @@
1
+ import {
2
+ TermdbTopVariablyExpressedGenesRequest,
3
+ TermdbTopVariablyExpressedGenesResponse
4
+ } from '#shared/types/routes/termdb.topVariablyExpressedGenes.ts'
5
+ import { gdcGetCasesWithExressionDataFromCohort, apihost, geneExpHost } from '../src/mds3.gdc.js'
6
+ import path from 'path'
7
+ import got from 'got'
8
+ import serverconfig from '#src/serverconfig.js'
9
+
10
+ export const api = {
11
+ endpoint: 'termdb/topVariablyExpressedGenes',
12
+ methods: {
13
+ get: {
14
+ init,
15
+ request: {
16
+ typeId: 'TermdbTopVariablyExpressedGenesRequest'
17
+ },
18
+ response: {
19
+ typeId: 'TermdbTopVariablyExpressedGenesResponse'
20
+ }
21
+ }
22
+ }
23
+ }
24
+
25
+ function init({ genomes }) {
26
+ return async (req: any, res: any): Promise<void> => {
27
+ try {
28
+ const q = req.query as TermdbTopVariablyExpressedGenesRequest
29
+ const genome = genomes[q.genome]
30
+ if (!genome) throw 'invalid genome'
31
+ const ds = genome.datasets?.[q.dslabel]
32
+ if (!ds) throw 'invalid dslabel'
33
+ if (!ds.queries?.topVariablyExpressedGenes) throw 'not supported on dataset'
34
+ const genes = await ds.queries.topVariablyExpressedGenes.getGenes(q)
35
+ res.send({ genes } as TermdbTopVariablyExpressedGenesResponse)
36
+ } catch (e: any) {
37
+ res.send({ status: 'error', error: e.message || e })
38
+ }
39
+ }
40
+ }
41
+
42
+ export function validate_query_TopVariablyExpressedGenes(ds: any, genome: any) {
43
+ const q = ds.queries.topVariablyExpressedGenes
44
+ if (!q) return
45
+ if (q.src == 'gdcapi') {
46
+ gdcValidateQuery(ds, genome)
47
+ } else if (q.src == 'native') {
48
+ nativeValidateQuery(ds, genome)
49
+ } else {
50
+ throw 'unknown topVariablyExpressedGenes.src'
51
+ }
52
+ // added getter: q.getGenes()
53
+ }
54
+
55
+ function nativeValidateQuery(ds: any, genome: any) {
56
+ ds.queries.topVariablyExpressedGenes.getGenes = async (
57
+ q: TermdbTopVariablyExpressedGenesRequest,
58
+ ds: any,
59
+ genome: any
60
+ ) => {
61
+ // get list of samples that are used in current analysis
62
+ const samples = [] as string[]
63
+ // call rust to compute top genes on these samples
64
+ const genes = await computeGenes4nativeDs(q, ds, samples)
65
+ return genes
66
+ }
67
+ }
68
+
69
+ async function computeGenes4nativeDs(q: TermdbTopVariablyExpressedGenesRequest, ds: any, samples: string[]) {
70
+ return []
71
+ }
72
+
73
+ function gdcValidateQuery(ds: any, genome: any) {
74
+ ds.queries.topVariablyExpressedGenes.getGenes = async (q: TermdbTopVariablyExpressedGenesRequest) => {
75
+ if (serverconfig.features.gdcGenes) {
76
+ // for testing on dev, must not set on prod!! delete to trigger api query
77
+ console.log('!!GDC!! using serverconfig.features.gdcGenes[]')
78
+ return serverconfig.features.gdcGenes as string[]
79
+ }
80
+
81
+ // disable when caching is incomplete (particularly cases with gene exp data); to prevent showing wrong data on client
82
+ if (!ds.__gdc.doneCaching) throw 'The server has not finished caching the case IDs: try again in ~2 minutes'
83
+
84
+ // based on current cohort, get list of cases with exp data, as input of next api query
85
+ const caseLst = await gdcGetCasesWithExressionDataFromCohort(q, ds)
86
+ if (caseLst.length == 0) {
87
+ // there are no cases with gene exp data
88
+ return [] as string[]
89
+ }
90
+
91
+ // change to this when api is available on prod
92
+ const url = path.join(geneExpHost, '/gene_expression/gene_selection')
93
+
94
+ try {
95
+ const response = await got.post(url, {
96
+ headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
97
+ body: JSON.stringify(getGeneSelectionArg(q, caseLst))
98
+ })
99
+
100
+ const re = JSON.parse(response.body)
101
+ // {"gene_selection":[{"gene_id":"ENSG00000141510","log2_uqfpkm_median":3.103430497010492,"log2_uqfpkm_stddev":0.8692021350485105,"symbol":"TP53"}, ... ]}
102
+
103
+ const genes = [] as string[]
104
+ if (!Array.isArray(re.gene_selection)) throw 're.gene_selection[] is not array'
105
+ for (const i of re.gene_selection) {
106
+ if (i.gene_id && typeof i.gene_id == 'string') {
107
+ // is ensg, convert to symbol
108
+ const t = genome.genedb.getNameByAlias.get(i.gene_id)
109
+ if (t) genes.push(t.name) // ensg
110
+ } else if (i.symbol && typeof i.symbol == 'string') {
111
+ genes.push(i.symbol)
112
+ } else {
113
+ throw 'one of re.gene_selection[] is missing both gene_id and symbol'
114
+ }
115
+ }
116
+ return genes
117
+ } catch (e: any) {
118
+ console.log(e.stack || e)
119
+ throw e
120
+ }
121
+ }
122
+
123
+ function getGeneSelectionArg(q: any, caseLst: any) {
124
+ //to hide messy logic during testing phase
125
+
126
+ /* when api performance issue is resolved, return this
127
+ return {
128
+ case_ids: caseLst,
129
+ gene_type:'protein_coding',
130
+ selection_size: Number(q.maxGenes)
131
+ }
132
+ */
133
+
134
+ //////////////////////////////////////////////////
135
+ //
136
+ // !!!!!!!!!!!!!!!! TEMPORARY !!!!!!!!!!!!!!!!!!!!
137
+ //
138
+ //////////////////////////////////////////////////
139
+ // limit the case_ids length, and restrict pool to CGC genes, otherwise the request times out !!!
140
+ // must revert asap
141
+ return {
142
+ case_ids: caseLst.slice(0, 20),
143
+ gene_ids: tempGetCGCgenes(genome),
144
+ selection_size: Number(q.maxGenes)
145
+ }
146
+ }
147
+ }
148
+
149
+ function tempGetCGCgenes(genome: any) {
150
+ const lst = [] as string[] // list of ENSG ids from cgc genes
151
+ // don't think there's need to preparse genome.geneset, as this function is only temporary
152
+ for (const s of genome.geneset[0].lst) {
153
+ const a = genome.genedb.getAliasByName.all(s)
154
+ if (a) {
155
+ for (const b of a) {
156
+ if (b.alias.startsWith('ENSG')) lst.push(b.alias)
157
+ }
158
+ }
159
+ }
160
+ return lst
161
+ }