@sjcrh/proteinpaint-server 2.39.2 → 2.39.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sjcrh/proteinpaint-server",
3
- "version": "2.39.2",
3
+ "version": "2.39.5",
4
4
  "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
5
5
  "main": "server.js",
6
6
  "bin": "start.js",
@@ -1,62 +1,288 @@
1
- import { GdcTopMutatedGeneResponse } from '#shared/types/routes/gdc.topMutatedGenes.ts'
1
+ import { GdcTopMutatedGeneRequest, GdcTopMutatedGeneResponse } from '#shared/types/routes/gdc.topMutatedGenes.ts'
2
2
  import path from 'path'
3
3
  import got from 'got'
4
4
 
5
+ // TODO change to /termdb/topMutatedGenes
6
+
5
7
  const apihost = process.env.PP_GDC_HOST || 'https://api.gdc.cancer.gov'
8
+ const apihostGraphql = apihost + (apihost.includes('/v0') ? '' : '/v0') + '/graphql'
6
9
 
7
10
  export const api = {
8
11
  endpoint: 'gdc/topMutatedGenes',
9
12
  methods: {
10
- get: {
11
- init({ genomes }) {
12
- /*
13
- genomes parameter is currently not used
14
- could be used later to:
15
- - verify hg38/GDC is on this instance and otherwise disable this route..
16
- - perform conversion on gene name/id for future on needs
17
- */
18
-
19
- return async (req: any, res: any): Promise<void> => {
20
- try {
21
- const genes = await getGenes(req.query)
22
- const payload = { genes } as GdcTopMutatedGeneResponse
23
- res.send(payload)
24
- } catch (e: any) {
25
- res.send({ status: 'error', error: e.message || e })
26
- }
27
- }
28
- },
13
+ all: {
14
+ init,
29
15
  request: {
30
- typeId: null
31
- //valid: default to type checker
16
+ typeId: 'GdcTopMutatedGeneRequest'
32
17
  },
33
18
  response: {
34
19
  typeId: 'GdcTopMutatedGeneResponse'
35
- // will combine this with type checker
36
- //valid: (t) => {}
37
20
  }
38
21
  }
39
22
  }
40
23
  }
41
24
 
42
- /*
43
- req.query {
44
- filter0 // optional gdc GFF cohort filter, invisible and read only
45
- FIXME should there be pp filter too?
46
- geneFilter?: str
47
- maxGenes: int
25
+ function init() {
26
+ /*
27
+ genomes parameter is currently not used
28
+ could be used later to:
29
+ - verify hg38/GDC is on this instance and otherwise disable this route..
30
+ - perform conversion on gene name/id for future on needs
31
+ */
32
+ return async (req: any, res: any): Promise<void> => {
33
+ const q: GdcTopMutatedGeneRequest = req.query
34
+ try {
35
+ const genes = await getGenesGraphql(q)
36
+ const payload: GdcTopMutatedGeneResponse = { genes }
37
+ res.send(payload)
38
+ } catch (e: any) {
39
+ res.send({ status: 'error', error: e.message || e })
40
+ }
41
+ }
48
42
  }
49
43
 
50
- mayAddCGC2filter() are copied to
51
- /utils/gdc/topSSMgenes.js
52
- and hosted on https://proteinpaint.stjude.org/GDC/
53
- */
54
- async function getGenes(q: any) {
55
- let _f = { op: 'and', content: [] } // allow blank filter to test geneset edit ui (without filter)
44
+ const query = `
45
+ query GenesTable_relayQuery(
46
+ $genesTable_filters: FiltersArgument
47
+ $genesTable_size: Int
48
+ $genesTable_offset: Int
49
+ $score: String
50
+ $ssmCase: FiltersArgument
51
+ $geneCaseFilter: FiltersArgument
52
+ $ssmTested: FiltersArgument
53
+ $cnvTested: FiltersArgument
54
+ $cnvGainFilters: FiltersArgument
55
+ $cnvLossFilters: FiltersArgument
56
+ ) {
57
+ genesTableViewer: viewer {
58
+ explore {
59
+ cases {
60
+ hits(first: 0, filters: $ssmTested) {
61
+ total
62
+ }
63
+ }
64
+ filteredCases: cases {
65
+ hits(first: 0, filters: $geneCaseFilter) {
66
+ total
67
+ }
68
+ }
69
+ cnvCases: cases {
70
+ hits(first: 0, filters: $cnvTested) {
71
+ total
72
+ }
73
+ }
74
+ genes {
75
+ hits(first: $genesTable_size, offset: $genesTable_offset, filters: $genesTable_filters, score: $score) {
76
+ total
77
+ edges {
78
+ node {
79
+ id
80
+ numCases: score
81
+ symbol
82
+ name
83
+ cytoband
84
+ biotype
85
+ gene_id
86
+ is_cancer_gene_census
87
+ ssm_case: case {
88
+ hits(first: 0, filters: $ssmCase) {
89
+ total
90
+ }
91
+ }
92
+ cnv_case: case {
93
+ hits(first: 0, filters: $cnvTested) {
94
+ total
95
+ }
96
+ }
97
+ case_cnv_gain: case {
98
+ hits(first: 0, filters: $cnvGainFilters) {
99
+ total
100
+ }
101
+ }
102
+ case_cnv_loss: case {
103
+ hits(first: 0, filters: $cnvLossFilters) {
104
+ total
105
+ }
106
+ }
107
+ }
108
+ }
109
+ }
110
+ }
111
+ }
112
+ }
113
+ }
114
+ `
115
+
116
+ async function getGenesGraphql(q: GdcTopMutatedGeneRequest) {
117
+ // set type "any" to avoid complains
118
+ const variables: any = {
119
+ genesTable_filters: {
120
+ op: 'and',
121
+ content: []
122
+ },
123
+ genesTable_size: q.maxGenes || 50,
124
+ genesTable_offset: 0,
125
+ score: 'case.project.project_id',
126
+ ssmCase: {
127
+ op: 'and',
128
+ content: [
129
+ {
130
+ op: 'in',
131
+ content: {
132
+ field: 'cases.available_variation_data',
133
+ value: ['ssm']
134
+ }
135
+ },
136
+ {
137
+ op: 'NOT',
138
+ content: {
139
+ field: 'genes.case.ssm.observation.observation_id',
140
+ value: 'MISSING'
141
+ }
142
+ }
143
+ ]
144
+ },
145
+ geneCaseFilter: {
146
+ content: [
147
+ {
148
+ content: {
149
+ field: 'cases.available_variation_data',
150
+ value: ['ssm']
151
+ },
152
+ op: 'in'
153
+ }
154
+ ],
155
+ op: 'and'
156
+ },
157
+ ssmTested: {
158
+ content: [
159
+ {
160
+ content: {
161
+ field: 'cases.available_variation_data',
162
+ value: ['ssm']
163
+ },
164
+ op: 'in'
165
+ }
166
+ ],
167
+ op: 'and'
168
+ },
169
+ cnvTested: {
170
+ op: 'and',
171
+ content: [
172
+ {
173
+ content: {
174
+ field: 'cases.available_variation_data',
175
+ value: ['cnv']
176
+ },
177
+ op: 'in'
178
+ }
179
+ ]
180
+ },
181
+ cnvGainFilters: {
182
+ op: 'and',
183
+ content: [
184
+ {
185
+ content: {
186
+ field: 'cases.available_variation_data',
187
+ value: ['cnv']
188
+ },
189
+ op: 'in'
190
+ },
191
+ {
192
+ content: {
193
+ field: 'cnvs.cnv_change',
194
+ value: ['Gain']
195
+ },
196
+ op: 'in'
197
+ }
198
+ ]
199
+ },
200
+ cnvLossFilters: {
201
+ op: 'and',
202
+ content: [
203
+ {
204
+ content: {
205
+ field: 'cases.available_variation_data',
206
+ value: ['cnv']
207
+ },
208
+ op: 'in'
209
+ },
210
+ {
211
+ content: {
212
+ field: 'cnvs.cnv_change',
213
+ value: ['Loss']
214
+ },
215
+ op: 'in'
216
+ }
217
+ ]
218
+ }
219
+ }
220
+
56
221
  if (q.filter0) {
57
- if (typeof q.filter0 != 'object') throw 'filter0 not object'
58
- _f = q.filter0
222
+ variables.genesTable_filters.content.push(JSON.parse(JSON.stringify(q.filter0)))
223
+ variables.geneCaseFilter.content.push(JSON.parse(JSON.stringify(q.filter0)))
224
+ variables.cnvTested.content.push(JSON.parse(JSON.stringify(q.filter0)))
225
+ variables.cnvGainFilters.content.push(JSON.parse(JSON.stringify(q.filter0)))
226
+ variables.cnvLossFilters.content.push(JSON.parse(JSON.stringify(q.filter0)))
227
+ }
228
+
229
+ if (q.geneFilter == 'CGC') {
230
+ variables.genesTable_filters.content.push(geneCGC())
231
+ variables.geneCaseFilter.content.push(geneCGC())
232
+ variables.cnvTested.content.push(geneCGC())
233
+ variables.cnvGainFilters.content.push(geneCGC())
234
+ variables.cnvLossFilters.content.push(geneCGC())
235
+ }
236
+
237
+ const response = await got.post(apihostGraphql, {
238
+ headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
239
+ body: JSON.stringify({ query, variables })
240
+ })
241
+
242
+ const re: any = JSON.parse(response.body)
243
+ const genes: string[] = []
244
+ for (const g of re.data.genesTableViewer.explore.genes.hits.edges) {
245
+ /*
246
+ {
247
+ node: {
248
+ biotype: 'protein_coding',
249
+ case_cnv_gain: { hits: [Object] },
250
+ case_cnv_loss: { hits: [Object] },
251
+ cnv_case: { hits: [Object] },
252
+ cytoband: [ '17q11.2' ],
253
+ gene_id: 'ENSG00000196712',
254
+ is_cancer_gene_census: true,
255
+ name: 'neurofibromin 1',
256
+ numCases: 93,
257
+ ssm_case: { hits: [Object] },
258
+ symbol: 'NF1'
259
+ }
260
+ }
261
+ */
262
+ genes.push(g.node.symbol)
59
263
  }
264
+ return genes
265
+ }
266
+
267
+ function geneCGC() {
268
+ // return a copy of cgc filter obj each time
269
+ return {
270
+ content: {
271
+ field: 'genes.is_cancer_gene_census',
272
+ value: ['true']
273
+ },
274
+ op: 'in'
275
+ } as object
276
+ }
277
+
278
+ /*************************************
279
+ old method to use rest api
280
+ **************************************
281
+ this api only gets ssm-cases and does not account for cnv cases, will not return any gene for ssm-less cohort e.g. APOLLO-LUAD
282
+ thus is replaced by getGenesGraphql
283
+ */
284
+ async function getGenes(q: GdcTopMutatedGeneRequest) {
285
+ const _f = q.filter0 || { op: 'and', content: [] } // allow blank filter to test geneset edit ui (without filter)
60
286
  const response = await got.post(path.join(apihost, '/analysis/top_mutated_genes_by_project'), {
61
287
  headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
62
288
  body: JSON.stringify({
@@ -1,11 +1,18 @@
1
- import { TermdbClusterRequest, TermdbClusterResponse } from '#shared/types/routes/termdb.cluster.ts'
2
1
  import path from 'path'
2
+ import fs from 'fs'
3
+ import lines2R from '#src/lines2R.js'
4
+ import {
5
+ TermdbClusterRequest,
6
+ TermdbClusterResponse,
7
+ Clustering,
8
+ ValidResponse,
9
+ SinglegeneResponse
10
+ } from '#shared/types/routes/termdb.cluster.ts'
3
11
  import * as utils from '#src/utils.js'
4
12
  import serverconfig from '#src/serverconfig.js'
5
13
  import { GeneExpressionQuery, GeneExpressionQueryNative } from '#shared/types/dataset.ts'
6
14
  import { gdc_validate_query_geneExpression } from '#src/mds3.gdc.js'
7
15
  import { mayLimitSamples } from '#src/mds3.filter.js'
8
- import { doClustering } from '#src/doClustering.js' // unable to convert this to ts yet, when converted, move all code here
9
16
  import { dtgeneexpression } from '#shared/common.js'
10
17
 
11
18
  export const api = {
@@ -57,14 +64,87 @@ async function getResult(q: TermdbClusterRequest, ds: any) {
57
64
  if (gene2sample2value.size == 1) {
58
65
  // get data for only 1 gene; still return data, may create violin plot later
59
66
  const g = Array.from(gene2sample2value.keys())[0]
60
- return { gene: g, data: gene2sample2value.get(g) }
67
+ return { gene: g, data: gene2sample2value.get(g) } as SinglegeneResponse
61
68
  }
62
69
 
63
70
  // have data for multiple genes, run clustering
64
71
  const t = Date.now() // use "t=new Date()" will lead to tsc error
65
- const clustering = await doClustering(gene2sample2value, q, ds)
72
+ const clustering: Clustering = await doClustering(gene2sample2value, q)
66
73
  if (serverconfig.debugmode) console.log('clustering done:', Date.now() - t, 'ms')
67
- return { clustering, byTermId, bySampleId }
74
+ return { clustering, byTermId, bySampleId } as ValidResponse
75
+ }
76
+
77
+ async function doClustering(data: any, q: TermdbClusterRequest) {
78
+ // get set of unique sample names, to generate col_names dimension
79
+ const sampleSet = new Set()
80
+ for (const o of data.values()) {
81
+ // {sampleId: value}
82
+ for (const s in o) sampleSet.add(s)
83
+ break
84
+ }
85
+
86
+ const inputData = {
87
+ matrix: [] as number[][],
88
+ row_names: [] as string[], // genes
89
+ col_names: [...sampleSet] as string[], // samples
90
+ cluster_method: q.clusterMethod as string,
91
+ plot_image: false // When true causes cluster.rs to plot the image into a png file (EXPERIMENTAL)
92
+ }
93
+
94
+ // compose "data{}" into a matrix
95
+ for (const [gene, o] of data) {
96
+ inputData.row_names.push(gene)
97
+ const row: number[] = []
98
+ for (const s of inputData.col_names) {
99
+ row.push(o[s] || 0)
100
+ }
101
+ inputData.matrix.push(getZscore(row))
102
+ }
103
+
104
+ const Rinputfile = path.join(serverconfig.cachedir, Math.random().toString() + '.json')
105
+ await utils.write_file(Rinputfile, JSON.stringify(inputData))
106
+ const Routput = JSON.parse(await lines2R(path.join(serverconfig.binpath, 'utils/hclust.R'), [], [Rinputfile]))
107
+ fs.unlink(Rinputfile, (arg: any) => {
108
+ return
109
+ })
110
+
111
+ const row_names_index: number[] = Routput.RowOrder.map(row => inputData.row_names.indexOf(row.name)) // sorted rows. value is array index in input data
112
+ const col_names_index: number[] = Routput.ColOrder.map(col => inputData.col_names.indexOf(col.name)) // sorted columns, value is array index from input array
113
+
114
+ // generated sorted matrix based on row/col clustering order
115
+ const output_matrix: number[][] = []
116
+ for (const rowI of row_names_index) {
117
+ const newRow: number[] = []
118
+ for (const colI of col_names_index) {
119
+ newRow.push(inputData.matrix[rowI][colI])
120
+ }
121
+ output_matrix.push(newRow)
122
+ }
123
+
124
+ return {
125
+ row: {
126
+ merge: Routput.RowMerge,
127
+ height: Routput.RowHeight,
128
+ order: Routput.RowOrder,
129
+ inputOrder: inputData.row_names
130
+ },
131
+ col: {
132
+ merge: Routput.ColumnMerge,
133
+ height: Routput.ColumnHeight,
134
+ order: Routput.ColOrder,
135
+ inputOrder: inputData.col_names
136
+ },
137
+ matrix: output_matrix
138
+ }
139
+ }
140
+ function getZscore(l: number[]) {
141
+ const mean: number = l.reduce((sum, v) => sum + v, 0) / l.length
142
+ const sd: number = Math.sqrt(l.reduce((sum, v) => sum + Math.pow(v - mean, 2), 0) / l.length)
143
+
144
+ if (sd == 0) {
145
+ return l
146
+ }
147
+ return l.map(v => (v - mean) / sd)
68
148
  }
69
149
 
70
150
  export async function validate_query_geneExpression(ds: any, genome: any) {