@sjcrh/proteinpaint-server 2.39.2 → 2.39.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/routes/gdc.topMutatedGenes.ts +264 -38
- package/routes/termdb.cluster.ts +85 -5
- package/server.js +1 -1
- package/server.js.map +1 -1
- package/utils/hclust.R +29 -47
package/package.json
CHANGED
|
@@ -1,62 +1,288 @@
|
|
|
1
|
-
import { GdcTopMutatedGeneResponse } from '#shared/types/routes/gdc.topMutatedGenes.ts'
|
|
1
|
+
import { GdcTopMutatedGeneRequest, GdcTopMutatedGeneResponse } from '#shared/types/routes/gdc.topMutatedGenes.ts'
|
|
2
2
|
import path from 'path'
|
|
3
3
|
import got from 'got'
|
|
4
4
|
|
|
5
|
+
// TODO change to /termdb/topMutatedGenes
|
|
6
|
+
|
|
5
7
|
const apihost = process.env.PP_GDC_HOST || 'https://api.gdc.cancer.gov'
|
|
8
|
+
const apihostGraphql = apihost + (apihost.includes('/v0') ? '' : '/v0') + '/graphql'
|
|
6
9
|
|
|
7
10
|
export const api = {
|
|
8
11
|
endpoint: 'gdc/topMutatedGenes',
|
|
9
12
|
methods: {
|
|
10
|
-
|
|
11
|
-
init
|
|
12
|
-
/*
|
|
13
|
-
genomes parameter is currently not used
|
|
14
|
-
could be used later to:
|
|
15
|
-
- verify hg38/GDC is on this instance and otherwise disable this route..
|
|
16
|
-
- perform conversion on gene name/id for future on needs
|
|
17
|
-
*/
|
|
18
|
-
|
|
19
|
-
return async (req: any, res: any): Promise<void> => {
|
|
20
|
-
try {
|
|
21
|
-
const genes = await getGenes(req.query)
|
|
22
|
-
const payload = { genes } as GdcTopMutatedGeneResponse
|
|
23
|
-
res.send(payload)
|
|
24
|
-
} catch (e: any) {
|
|
25
|
-
res.send({ status: 'error', error: e.message || e })
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
},
|
|
13
|
+
all: {
|
|
14
|
+
init,
|
|
29
15
|
request: {
|
|
30
|
-
typeId:
|
|
31
|
-
//valid: default to type checker
|
|
16
|
+
typeId: 'GdcTopMutatedGeneRequest'
|
|
32
17
|
},
|
|
33
18
|
response: {
|
|
34
19
|
typeId: 'GdcTopMutatedGeneResponse'
|
|
35
|
-
// will combine this with type checker
|
|
36
|
-
//valid: (t) => {}
|
|
37
20
|
}
|
|
38
21
|
}
|
|
39
22
|
}
|
|
40
23
|
}
|
|
41
24
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
25
|
+
function init() {
|
|
26
|
+
/*
|
|
27
|
+
genomes parameter is currently not used
|
|
28
|
+
could be used later to:
|
|
29
|
+
- verify hg38/GDC is on this instance and otherwise disable this route..
|
|
30
|
+
- perform conversion on gene name/id for future on needs
|
|
31
|
+
*/
|
|
32
|
+
return async (req: any, res: any): Promise<void> => {
|
|
33
|
+
const q: GdcTopMutatedGeneRequest = req.query
|
|
34
|
+
try {
|
|
35
|
+
const genes = await getGenesGraphql(q)
|
|
36
|
+
const payload: GdcTopMutatedGeneResponse = { genes }
|
|
37
|
+
res.send(payload)
|
|
38
|
+
} catch (e: any) {
|
|
39
|
+
res.send({ status: 'error', error: e.message || e })
|
|
40
|
+
}
|
|
41
|
+
}
|
|
48
42
|
}
|
|
49
43
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
44
|
+
const query = `
|
|
45
|
+
query GenesTable_relayQuery(
|
|
46
|
+
$genesTable_filters: FiltersArgument
|
|
47
|
+
$genesTable_size: Int
|
|
48
|
+
$genesTable_offset: Int
|
|
49
|
+
$score: String
|
|
50
|
+
$ssmCase: FiltersArgument
|
|
51
|
+
$geneCaseFilter: FiltersArgument
|
|
52
|
+
$ssmTested: FiltersArgument
|
|
53
|
+
$cnvTested: FiltersArgument
|
|
54
|
+
$cnvGainFilters: FiltersArgument
|
|
55
|
+
$cnvLossFilters: FiltersArgument
|
|
56
|
+
) {
|
|
57
|
+
genesTableViewer: viewer {
|
|
58
|
+
explore {
|
|
59
|
+
cases {
|
|
60
|
+
hits(first: 0, filters: $ssmTested) {
|
|
61
|
+
total
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
filteredCases: cases {
|
|
65
|
+
hits(first: 0, filters: $geneCaseFilter) {
|
|
66
|
+
total
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
cnvCases: cases {
|
|
70
|
+
hits(first: 0, filters: $cnvTested) {
|
|
71
|
+
total
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
genes {
|
|
75
|
+
hits(first: $genesTable_size, offset: $genesTable_offset, filters: $genesTable_filters, score: $score) {
|
|
76
|
+
total
|
|
77
|
+
edges {
|
|
78
|
+
node {
|
|
79
|
+
id
|
|
80
|
+
numCases: score
|
|
81
|
+
symbol
|
|
82
|
+
name
|
|
83
|
+
cytoband
|
|
84
|
+
biotype
|
|
85
|
+
gene_id
|
|
86
|
+
is_cancer_gene_census
|
|
87
|
+
ssm_case: case {
|
|
88
|
+
hits(first: 0, filters: $ssmCase) {
|
|
89
|
+
total
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
cnv_case: case {
|
|
93
|
+
hits(first: 0, filters: $cnvTested) {
|
|
94
|
+
total
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
case_cnv_gain: case {
|
|
98
|
+
hits(first: 0, filters: $cnvGainFilters) {
|
|
99
|
+
total
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
case_cnv_loss: case {
|
|
103
|
+
hits(first: 0, filters: $cnvLossFilters) {
|
|
104
|
+
total
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
`
|
|
115
|
+
|
|
116
|
+
async function getGenesGraphql(q: GdcTopMutatedGeneRequest) {
|
|
117
|
+
// set type "any" to avoid complains
|
|
118
|
+
const variables: any = {
|
|
119
|
+
genesTable_filters: {
|
|
120
|
+
op: 'and',
|
|
121
|
+
content: []
|
|
122
|
+
},
|
|
123
|
+
genesTable_size: q.maxGenes || 50,
|
|
124
|
+
genesTable_offset: 0,
|
|
125
|
+
score: 'case.project.project_id',
|
|
126
|
+
ssmCase: {
|
|
127
|
+
op: 'and',
|
|
128
|
+
content: [
|
|
129
|
+
{
|
|
130
|
+
op: 'in',
|
|
131
|
+
content: {
|
|
132
|
+
field: 'cases.available_variation_data',
|
|
133
|
+
value: ['ssm']
|
|
134
|
+
}
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
op: 'NOT',
|
|
138
|
+
content: {
|
|
139
|
+
field: 'genes.case.ssm.observation.observation_id',
|
|
140
|
+
value: 'MISSING'
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
]
|
|
144
|
+
},
|
|
145
|
+
geneCaseFilter: {
|
|
146
|
+
content: [
|
|
147
|
+
{
|
|
148
|
+
content: {
|
|
149
|
+
field: 'cases.available_variation_data',
|
|
150
|
+
value: ['ssm']
|
|
151
|
+
},
|
|
152
|
+
op: 'in'
|
|
153
|
+
}
|
|
154
|
+
],
|
|
155
|
+
op: 'and'
|
|
156
|
+
},
|
|
157
|
+
ssmTested: {
|
|
158
|
+
content: [
|
|
159
|
+
{
|
|
160
|
+
content: {
|
|
161
|
+
field: 'cases.available_variation_data',
|
|
162
|
+
value: ['ssm']
|
|
163
|
+
},
|
|
164
|
+
op: 'in'
|
|
165
|
+
}
|
|
166
|
+
],
|
|
167
|
+
op: 'and'
|
|
168
|
+
},
|
|
169
|
+
cnvTested: {
|
|
170
|
+
op: 'and',
|
|
171
|
+
content: [
|
|
172
|
+
{
|
|
173
|
+
content: {
|
|
174
|
+
field: 'cases.available_variation_data',
|
|
175
|
+
value: ['cnv']
|
|
176
|
+
},
|
|
177
|
+
op: 'in'
|
|
178
|
+
}
|
|
179
|
+
]
|
|
180
|
+
},
|
|
181
|
+
cnvGainFilters: {
|
|
182
|
+
op: 'and',
|
|
183
|
+
content: [
|
|
184
|
+
{
|
|
185
|
+
content: {
|
|
186
|
+
field: 'cases.available_variation_data',
|
|
187
|
+
value: ['cnv']
|
|
188
|
+
},
|
|
189
|
+
op: 'in'
|
|
190
|
+
},
|
|
191
|
+
{
|
|
192
|
+
content: {
|
|
193
|
+
field: 'cnvs.cnv_change',
|
|
194
|
+
value: ['Gain']
|
|
195
|
+
},
|
|
196
|
+
op: 'in'
|
|
197
|
+
}
|
|
198
|
+
]
|
|
199
|
+
},
|
|
200
|
+
cnvLossFilters: {
|
|
201
|
+
op: 'and',
|
|
202
|
+
content: [
|
|
203
|
+
{
|
|
204
|
+
content: {
|
|
205
|
+
field: 'cases.available_variation_data',
|
|
206
|
+
value: ['cnv']
|
|
207
|
+
},
|
|
208
|
+
op: 'in'
|
|
209
|
+
},
|
|
210
|
+
{
|
|
211
|
+
content: {
|
|
212
|
+
field: 'cnvs.cnv_change',
|
|
213
|
+
value: ['Loss']
|
|
214
|
+
},
|
|
215
|
+
op: 'in'
|
|
216
|
+
}
|
|
217
|
+
]
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
56
221
|
if (q.filter0) {
|
|
57
|
-
|
|
58
|
-
|
|
222
|
+
variables.genesTable_filters.content.push(JSON.parse(JSON.stringify(q.filter0)))
|
|
223
|
+
variables.geneCaseFilter.content.push(JSON.parse(JSON.stringify(q.filter0)))
|
|
224
|
+
variables.cnvTested.content.push(JSON.parse(JSON.stringify(q.filter0)))
|
|
225
|
+
variables.cnvGainFilters.content.push(JSON.parse(JSON.stringify(q.filter0)))
|
|
226
|
+
variables.cnvLossFilters.content.push(JSON.parse(JSON.stringify(q.filter0)))
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
if (q.geneFilter == 'CGC') {
|
|
230
|
+
variables.genesTable_filters.content.push(geneCGC())
|
|
231
|
+
variables.geneCaseFilter.content.push(geneCGC())
|
|
232
|
+
variables.cnvTested.content.push(geneCGC())
|
|
233
|
+
variables.cnvGainFilters.content.push(geneCGC())
|
|
234
|
+
variables.cnvLossFilters.content.push(geneCGC())
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
const response = await got.post(apihostGraphql, {
|
|
238
|
+
headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
|
|
239
|
+
body: JSON.stringify({ query, variables })
|
|
240
|
+
})
|
|
241
|
+
|
|
242
|
+
const re: any = JSON.parse(response.body)
|
|
243
|
+
const genes: string[] = []
|
|
244
|
+
for (const g of re.data.genesTableViewer.explore.genes.hits.edges) {
|
|
245
|
+
/*
|
|
246
|
+
{
|
|
247
|
+
node: {
|
|
248
|
+
biotype: 'protein_coding',
|
|
249
|
+
case_cnv_gain: { hits: [Object] },
|
|
250
|
+
case_cnv_loss: { hits: [Object] },
|
|
251
|
+
cnv_case: { hits: [Object] },
|
|
252
|
+
cytoband: [ '17q11.2' ],
|
|
253
|
+
gene_id: 'ENSG00000196712',
|
|
254
|
+
is_cancer_gene_census: true,
|
|
255
|
+
name: 'neurofibromin 1',
|
|
256
|
+
numCases: 93,
|
|
257
|
+
ssm_case: { hits: [Object] },
|
|
258
|
+
symbol: 'NF1'
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
*/
|
|
262
|
+
genes.push(g.node.symbol)
|
|
59
263
|
}
|
|
264
|
+
return genes
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function geneCGC() {
|
|
268
|
+
// return a copy of cgc filter obj each time
|
|
269
|
+
return {
|
|
270
|
+
content: {
|
|
271
|
+
field: 'genes.is_cancer_gene_census',
|
|
272
|
+
value: ['true']
|
|
273
|
+
},
|
|
274
|
+
op: 'in'
|
|
275
|
+
} as object
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/*************************************
|
|
279
|
+
old method to use rest api
|
|
280
|
+
**************************************
|
|
281
|
+
this api only gets ssm-cases and does not account for cnv cases, will not return any gene for ssm-less cohort e.g. APOLLO-LUAD
|
|
282
|
+
thus is replaced by getGenesGraphql
|
|
283
|
+
*/
|
|
284
|
+
async function getGenes(q: GdcTopMutatedGeneRequest) {
|
|
285
|
+
const _f = q.filter0 || { op: 'and', content: [] } // allow blank filter to test geneset edit ui (without filter)
|
|
60
286
|
const response = await got.post(path.join(apihost, '/analysis/top_mutated_genes_by_project'), {
|
|
61
287
|
headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
|
|
62
288
|
body: JSON.stringify({
|
package/routes/termdb.cluster.ts
CHANGED
|
@@ -1,11 +1,18 @@
|
|
|
1
|
-
import { TermdbClusterRequest, TermdbClusterResponse } from '#shared/types/routes/termdb.cluster.ts'
|
|
2
1
|
import path from 'path'
|
|
2
|
+
import fs from 'fs'
|
|
3
|
+
import lines2R from '#src/lines2R.js'
|
|
4
|
+
import {
|
|
5
|
+
TermdbClusterRequest,
|
|
6
|
+
TermdbClusterResponse,
|
|
7
|
+
Clustering,
|
|
8
|
+
ValidResponse,
|
|
9
|
+
SinglegeneResponse
|
|
10
|
+
} from '#shared/types/routes/termdb.cluster.ts'
|
|
3
11
|
import * as utils from '#src/utils.js'
|
|
4
12
|
import serverconfig from '#src/serverconfig.js'
|
|
5
13
|
import { GeneExpressionQuery, GeneExpressionQueryNative } from '#shared/types/dataset.ts'
|
|
6
14
|
import { gdc_validate_query_geneExpression } from '#src/mds3.gdc.js'
|
|
7
15
|
import { mayLimitSamples } from '#src/mds3.filter.js'
|
|
8
|
-
import { doClustering } from '#src/doClustering.js' // unable to convert this to ts yet, when converted, move all code here
|
|
9
16
|
import { dtgeneexpression } from '#shared/common.js'
|
|
10
17
|
|
|
11
18
|
export const api = {
|
|
@@ -57,14 +64,87 @@ async function getResult(q: TermdbClusterRequest, ds: any) {
|
|
|
57
64
|
if (gene2sample2value.size == 1) {
|
|
58
65
|
// get data for only 1 gene; still return data, may create violin plot later
|
|
59
66
|
const g = Array.from(gene2sample2value.keys())[0]
|
|
60
|
-
return { gene: g, data: gene2sample2value.get(g) }
|
|
67
|
+
return { gene: g, data: gene2sample2value.get(g) } as SinglegeneResponse
|
|
61
68
|
}
|
|
62
69
|
|
|
63
70
|
// have data for multiple genes, run clustering
|
|
64
71
|
const t = Date.now() // use "t=new Date()" will lead to tsc error
|
|
65
|
-
const clustering = await doClustering(gene2sample2value, q
|
|
72
|
+
const clustering: Clustering = await doClustering(gene2sample2value, q)
|
|
66
73
|
if (serverconfig.debugmode) console.log('clustering done:', Date.now() - t, 'ms')
|
|
67
|
-
return { clustering, byTermId, bySampleId }
|
|
74
|
+
return { clustering, byTermId, bySampleId } as ValidResponse
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
async function doClustering(data: any, q: TermdbClusterRequest) {
|
|
78
|
+
// get set of unique sample names, to generate col_names dimension
|
|
79
|
+
const sampleSet = new Set()
|
|
80
|
+
for (const o of data.values()) {
|
|
81
|
+
// {sampleId: value}
|
|
82
|
+
for (const s in o) sampleSet.add(s)
|
|
83
|
+
break
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const inputData = {
|
|
87
|
+
matrix: [] as number[][],
|
|
88
|
+
row_names: [] as string[], // genes
|
|
89
|
+
col_names: [...sampleSet] as string[], // samples
|
|
90
|
+
cluster_method: q.clusterMethod as string,
|
|
91
|
+
plot_image: false // When true causes cluster.rs to plot the image into a png file (EXPERIMENTAL)
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// compose "data{}" into a matrix
|
|
95
|
+
for (const [gene, o] of data) {
|
|
96
|
+
inputData.row_names.push(gene)
|
|
97
|
+
const row: number[] = []
|
|
98
|
+
for (const s of inputData.col_names) {
|
|
99
|
+
row.push(o[s] || 0)
|
|
100
|
+
}
|
|
101
|
+
inputData.matrix.push(getZscore(row))
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const Rinputfile = path.join(serverconfig.cachedir, Math.random().toString() + '.json')
|
|
105
|
+
await utils.write_file(Rinputfile, JSON.stringify(inputData))
|
|
106
|
+
const Routput = JSON.parse(await lines2R(path.join(serverconfig.binpath, 'utils/hclust.R'), [], [Rinputfile]))
|
|
107
|
+
fs.unlink(Rinputfile, (arg: any) => {
|
|
108
|
+
return
|
|
109
|
+
})
|
|
110
|
+
|
|
111
|
+
const row_names_index: number[] = Routput.RowOrder.map(row => inputData.row_names.indexOf(row.name)) // sorted rows. value is array index in input data
|
|
112
|
+
const col_names_index: number[] = Routput.ColOrder.map(col => inputData.col_names.indexOf(col.name)) // sorted columns, value is array index from input array
|
|
113
|
+
|
|
114
|
+
// generated sorted matrix based on row/col clustering order
|
|
115
|
+
const output_matrix: number[][] = []
|
|
116
|
+
for (const rowI of row_names_index) {
|
|
117
|
+
const newRow: number[] = []
|
|
118
|
+
for (const colI of col_names_index) {
|
|
119
|
+
newRow.push(inputData.matrix[rowI][colI])
|
|
120
|
+
}
|
|
121
|
+
output_matrix.push(newRow)
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return {
|
|
125
|
+
row: {
|
|
126
|
+
merge: Routput.RowMerge,
|
|
127
|
+
height: Routput.RowHeight,
|
|
128
|
+
order: Routput.RowOrder,
|
|
129
|
+
inputOrder: inputData.row_names
|
|
130
|
+
},
|
|
131
|
+
col: {
|
|
132
|
+
merge: Routput.ColumnMerge,
|
|
133
|
+
height: Routput.ColumnHeight,
|
|
134
|
+
order: Routput.ColOrder,
|
|
135
|
+
inputOrder: inputData.col_names
|
|
136
|
+
},
|
|
137
|
+
matrix: output_matrix
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
function getZscore(l: number[]) {
|
|
141
|
+
const mean: number = l.reduce((sum, v) => sum + v, 0) / l.length
|
|
142
|
+
const sd: number = Math.sqrt(l.reduce((sum, v) => sum + Math.pow(v - mean, 2), 0) / l.length)
|
|
143
|
+
|
|
144
|
+
if (sd == 0) {
|
|
145
|
+
return l
|
|
146
|
+
}
|
|
147
|
+
return l.map(v => (v - mean) / sd)
|
|
68
148
|
}
|
|
69
149
|
|
|
70
150
|
export async function validate_query_geneExpression(ds: any, genome: any) {
|