@sjcrh/proteinpaint-server 2.27.0 → 2.27.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cards/databrowser.json +1 -1
- package/cards/disco.json +69 -0
- package/cards/hic.json +1 -0
- package/cards/index.json +12 -2
- package/package.json +2 -1
- package/routes/{gdcMaf.ts → gdc.maf.ts} +2 -2
- package/routes/gdc.topMutatedGenes.ts +130 -0
- package/routes/gdc.topVariablyExpressedGenes.ts +97 -0
- package/server.js +1 -1
- package/utils/fastclust.R +8 -1
package/cards/databrowser.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
{
|
|
2
|
-
"intro": "
|
|
2
|
+
"intro": "By uploading a dictionary in the Data Browser, one can explore the structure of their custom data. <br><br>Options to upload sample annotation and other data types are in development.",
|
|
3
3
|
"ppcalls":[
|
|
4
4
|
{
|
|
5
5
|
"isUi": true,
|
package/cards/disco.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
{
|
|
2
|
+
"ribbonMessage": "We're excited to announce our newest app, the <b>Disco Plot!</b>",
|
|
3
|
+
"ppcalls": [
|
|
4
|
+
{
|
|
5
|
+
"isUi": true,
|
|
6
|
+
"runargs": {
|
|
7
|
+
"noheader": true,
|
|
8
|
+
"nobox": 1,
|
|
9
|
+
"parseurl": false,
|
|
10
|
+
"tkui": "disco"
|
|
11
|
+
}
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"message": "In this plot, all chromosomes of the genome are arranged in a circle. Multiple rings along the circle indicates example data points of mutation, copy number change, and structural variation/fusion events. Gene labels are based on mutation and sv/fusion events.",
|
|
15
|
+
"runargs": {
|
|
16
|
+
"noheader": true,
|
|
17
|
+
"nobox": true,
|
|
18
|
+
"genome": "hg38",
|
|
19
|
+
"disco": {
|
|
20
|
+
"mlst": [
|
|
21
|
+
{
|
|
22
|
+
"alt": "T",
|
|
23
|
+
"chr": "chr1",
|
|
24
|
+
"class": "M",
|
|
25
|
+
"dt": 1,
|
|
26
|
+
"gene": "H3F3A",
|
|
27
|
+
"isoform": "NM_002107",
|
|
28
|
+
"mname": "K28M",
|
|
29
|
+
"position": 226252135,
|
|
30
|
+
"ref": "A"
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"dt": 4,
|
|
34
|
+
"chr": "chr1",
|
|
35
|
+
"start": 1,
|
|
36
|
+
"stop": 100000000,
|
|
37
|
+
"value": 0.5
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"dt": 4,
|
|
41
|
+
"chr": "chr1",
|
|
42
|
+
"start": 100000000,
|
|
43
|
+
"stop": 200000000,
|
|
44
|
+
"value": -0.5
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
"chrA": "chr6",
|
|
48
|
+
"posA": 3067605,
|
|
49
|
+
"geneA": "MDC1",
|
|
50
|
+
"chrB": "chr12",
|
|
51
|
+
"posB": 61521661,
|
|
52
|
+
"geneB": "KMT2D",
|
|
53
|
+
"dt": 2,
|
|
54
|
+
"strandA": "+",
|
|
55
|
+
"strandB": "-"
|
|
56
|
+
}
|
|
57
|
+
]
|
|
58
|
+
}
|
|
59
|
+
},
|
|
60
|
+
"testSpec": {
|
|
61
|
+
"button": 1,
|
|
62
|
+
"expected": {
|
|
63
|
+
"svg": 1,
|
|
64
|
+
"g": 2
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
]
|
|
69
|
+
}
|
package/cards/hic.json
CHANGED
package/cards/index.json
CHANGED
|
@@ -106,6 +106,7 @@
|
|
|
106
106
|
"section": "tracks",
|
|
107
107
|
"description": "Chromatin interaction at a locus",
|
|
108
108
|
"image": "https://proteinpaint.stjude.org/ppdemo/images/hic-square.png",
|
|
109
|
+
"ribbon": { "text": "updated", "expireDate": "2023-10-31" },
|
|
109
110
|
"sandboxJson": "hic",
|
|
110
111
|
"searchterms": ["hic", "hicstraw", "chromatin interaction", "conformation"]
|
|
111
112
|
},
|
|
@@ -285,7 +286,6 @@
|
|
|
285
286
|
"section": "apps",
|
|
286
287
|
"description": "2D scatter plot of samples & metadata",
|
|
287
288
|
"image": "https://proteinpaint.stjude.org/ppdemo/images/scatterplot-square.png",
|
|
288
|
-
"ribbon": { "text": "updated", "expireDate": "2023-03-01" },
|
|
289
289
|
"sandboxJson": "scatterplot",
|
|
290
290
|
"searchterms": ["tSNE", "lasso"]
|
|
291
291
|
},
|
|
@@ -307,6 +307,16 @@
|
|
|
307
307
|
"ribbon": {"text":"beta"},
|
|
308
308
|
"sandboxJson": "databrowser",
|
|
309
309
|
"searchterms": ["clinical", "dictionary"]
|
|
310
|
+
},
|
|
311
|
+
{
|
|
312
|
+
"type": "card",
|
|
313
|
+
"name": "Disco Plot",
|
|
314
|
+
"section": "apps",
|
|
315
|
+
"description": "Circos-like plot of genome-wide mutational events",
|
|
316
|
+
"image": "https://proteinpaint.stjude.org/ppdemo/images/disco-square.png",
|
|
317
|
+
"ribbon": { "text":"new", "expireDate": "2023-10-31" },
|
|
318
|
+
"sandboxJson": "disco",
|
|
319
|
+
"searchterms": ["disco"]
|
|
310
320
|
}
|
|
311
321
|
]
|
|
312
|
-
}
|
|
322
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sjcrh/proteinpaint-server",
|
|
3
|
-
"version": "2.27.
|
|
3
|
+
"version": "2.27.2",
|
|
4
4
|
"description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
|
|
5
5
|
"main": "server.js",
|
|
6
6
|
"bin": "start.js",
|
|
@@ -71,6 +71,7 @@
|
|
|
71
71
|
"jsonwebtoken": "^9.0.0",
|
|
72
72
|
"jstat": "^1.9.3",
|
|
73
73
|
"lazy": "^1.0.11",
|
|
74
|
+
"micromatch": "^4.0.5",
|
|
74
75
|
"minimatch": "^3.1.2",
|
|
75
76
|
"node-fetch": "^2.6.1",
|
|
76
77
|
"partjson": "^0.58.1",
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { GdcMafResponse, File } from '#shared/types/routes/
|
|
1
|
+
import { GdcMafResponse, File } from '#shared/types/routes/gdc.maf.ts'
|
|
2
2
|
import { fileSize } from '#shared/fileSize.js'
|
|
3
3
|
import path from 'path'
|
|
4
4
|
import got from 'got'
|
|
@@ -6,7 +6,7 @@ import got from 'got'
|
|
|
6
6
|
const apihost = process.env.PP_GDC_HOST || 'https://api.gdc.cancer.gov'
|
|
7
7
|
|
|
8
8
|
export const api = {
|
|
9
|
-
endpoint: '
|
|
9
|
+
endpoint: 'gdc/maf',
|
|
10
10
|
methods: {
|
|
11
11
|
get: {
|
|
12
12
|
init({ genomes }) {
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import { GdcTopMutatedGeneResponse } from '#shared/types/routes/gdc.topMutatedGenes.ts'
|
|
2
|
+
import path from 'path'
|
|
3
|
+
import got from 'got'
|
|
4
|
+
|
|
5
|
+
const apihost = process.env.PP_GDC_HOST || 'https://api.gdc.cancer.gov'
|
|
6
|
+
|
|
7
|
+
export const api = {
|
|
8
|
+
endpoint: 'gdc/topMutatedGenes',
|
|
9
|
+
methods: {
|
|
10
|
+
get: {
|
|
11
|
+
init({ genomes }) {
|
|
12
|
+
/*
|
|
13
|
+
genomes parameter is currently not used
|
|
14
|
+
could be used later to:
|
|
15
|
+
- verify hg38/GDC is on this instance and otherwise disable this route..
|
|
16
|
+
- perform conversion on gene name/id for future on needs
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
return async (req: any, res: any): Promise<void> => {
|
|
20
|
+
try {
|
|
21
|
+
const genes = await getGenes(req.query)
|
|
22
|
+
const payload = { genes } as GdcTopMutatedGeneResponse
|
|
23
|
+
res.send(payload)
|
|
24
|
+
} catch (e: any) {
|
|
25
|
+
res.send({ status: 'error', error: e.message || e })
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
request: {
|
|
30
|
+
typeId: null
|
|
31
|
+
//valid: default to type checker
|
|
32
|
+
},
|
|
33
|
+
response: {
|
|
34
|
+
typeId: 'GdcTopMutatedGeneResponse'
|
|
35
|
+
// will combine this with type checker
|
|
36
|
+
//valid: (t) => {}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/*
|
|
43
|
+
req.query {
|
|
44
|
+
filter0 // optional gdc GFF cohort filter, invisible and read only
|
|
45
|
+
FIXME should there be pp filter too?
|
|
46
|
+
geneFilter?: str
|
|
47
|
+
maxGenes: int
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
mayAddCGC2filter() are copied to
|
|
51
|
+
/utils/gdc/topSSMgenes.js
|
|
52
|
+
and hosted on https://proteinpaint.stjude.org/GDC/
|
|
53
|
+
*/
|
|
54
|
+
async function getGenes(q: any) {
|
|
55
|
+
let _f = { op: 'and', content: [] } // allow blank filter to test geneset edit ui (without filter)
|
|
56
|
+
if (q.filter0) {
|
|
57
|
+
if (typeof q.filter0 != 'object') throw 'filter0 not object'
|
|
58
|
+
_f = q.filter0
|
|
59
|
+
}
|
|
60
|
+
const response = await got.post(path.join(apihost, '/analysis/top_mutated_genes_by_project'), {
|
|
61
|
+
headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
|
|
62
|
+
body: JSON.stringify({
|
|
63
|
+
size: q.maxGenes || 50,
|
|
64
|
+
fields: 'symbol',
|
|
65
|
+
filters: mayAddCGC2filter(_f, q.geneFilter)
|
|
66
|
+
})
|
|
67
|
+
})
|
|
68
|
+
const re = JSON.parse(response.body)
|
|
69
|
+
const genes = [] as string[]
|
|
70
|
+
for (const hit of re.data.hits) {
|
|
71
|
+
if (!hit.symbol) continue
|
|
72
|
+
genes.push(hit.symbol)
|
|
73
|
+
}
|
|
74
|
+
return genes
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/*
|
|
78
|
+
str:
|
|
79
|
+
stringified gdc filter object, should not include the "genes.is_cancer_gene_census" filter element
|
|
80
|
+
geneFilter: str
|
|
81
|
+
if = 'CGC', insert following element into the filter and return stringified obj
|
|
82
|
+
|
|
83
|
+
{
|
|
84
|
+
"op":"and",
|
|
85
|
+
"content":[
|
|
86
|
+
{
|
|
87
|
+
"content":{ "field":"genes.is_cancer_gene_census", "value":["true"] },
|
|
88
|
+
"op":"in"
|
|
89
|
+
}
|
|
90
|
+
]
|
|
91
|
+
}
|
|
92
|
+
*/
|
|
93
|
+
function mayAddCGC2filter(f: any, geneFilter?: string) {
|
|
94
|
+
// reformulate f into f2
|
|
95
|
+
// f may be "in" or "and". f2 is always "and", in order to join in additional filters
|
|
96
|
+
let f2
|
|
97
|
+
|
|
98
|
+
if (f.op == 'in') {
|
|
99
|
+
// wrap f into f2
|
|
100
|
+
f2 = { op: 'and', content: [f] }
|
|
101
|
+
} else if (f.op == 'and') {
|
|
102
|
+
// no need to wrap
|
|
103
|
+
f2 = f
|
|
104
|
+
} else {
|
|
105
|
+
throw 'f.op not "in" or "and"'
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// per Phil on 12/16/2022, following filters ensure to return IDH1 as 1st gene for gliomas
|
|
109
|
+
f2.content.push({
|
|
110
|
+
op: 'NOT',
|
|
111
|
+
content: {
|
|
112
|
+
field: 'ssms.consequence.transcript.annotation.vep_impact',
|
|
113
|
+
value: 'missing'
|
|
114
|
+
}
|
|
115
|
+
})
|
|
116
|
+
f2.content.push({
|
|
117
|
+
op: 'in',
|
|
118
|
+
content: {
|
|
119
|
+
field: 'ssms.consequence.transcript.consequence_type',
|
|
120
|
+
value: ['missense_variant', 'frameshift_variant', 'start_lost', 'stop_lost', 'stop_gained']
|
|
121
|
+
}
|
|
122
|
+
})
|
|
123
|
+
|
|
124
|
+
if (geneFilter == 'CGC') {
|
|
125
|
+
// using CGC genes, add in filter
|
|
126
|
+
f2.content.push({ op: 'in', content: { field: 'genes.is_cancer_gene_census', value: ['true'] } })
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
return f2
|
|
130
|
+
}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import { GdcTopVariablyExpressedGenesResponse } from '#shared/types/routes/gdc.topVariablyExpressedGenes.ts'
|
|
2
|
+
import { getCasesWithExressionDataFromCohort } from '../src/mds3.gdc.js'
|
|
3
|
+
//import path from 'path'
|
|
4
|
+
import got from 'got'
|
|
5
|
+
import serverconfig from '../src/serverconfig.js'
|
|
6
|
+
|
|
7
|
+
// TODO change when api is released to prod
|
|
8
|
+
//const apihost = process.env.PP_GDC_HOST || 'https://api.gdc.cancer.gov'
|
|
9
|
+
const apihost = 'https://uat-portal.gdc.cancer.gov/auth/api/v0/gene_expression/gene_selection'
|
|
10
|
+
|
|
11
|
+
const gdcGenome = 'hg38'
|
|
12
|
+
const gdcDslabel = 'GDC'
|
|
13
|
+
|
|
14
|
+
export const api = {
|
|
15
|
+
endpoint: 'gdc/topVariablyExpressedGenes',
|
|
16
|
+
methods: {
|
|
17
|
+
get: {
|
|
18
|
+
init({ genomes }) {
|
|
19
|
+
return async (req: any, res: any): Promise<void> => {
|
|
20
|
+
try {
|
|
21
|
+
// following logic requires hg38 gdc dataset
|
|
22
|
+
const genome = genomes[gdcGenome]
|
|
23
|
+
if (!genome) throw 'hg38 genome missing'
|
|
24
|
+
const ds = genome.datasets?.[gdcDslabel]
|
|
25
|
+
if (!ds) throw 'gdc dataset missing'
|
|
26
|
+
const genes = await getGenes(req.query, ds)
|
|
27
|
+
const payload = { genes } as GdcTopVariablyExpressedGenesResponse
|
|
28
|
+
res.send(payload)
|
|
29
|
+
} catch (e: any) {
|
|
30
|
+
res.send({ status: 'error', error: e.message || e })
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
},
|
|
34
|
+
request: {
|
|
35
|
+
typeId: null
|
|
36
|
+
//valid: default to type checker
|
|
37
|
+
},
|
|
38
|
+
response: {
|
|
39
|
+
typeId: 'GdcTopVariablyExpressedGenesResponse'
|
|
40
|
+
// will combine this with type checker
|
|
41
|
+
//valid: (t) => {}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/*
|
|
48
|
+
req.query {
|
|
49
|
+
filter0 // optional gdc GFF cohort filter, invisible and read only
|
|
50
|
+
FIXME should there be pp filter too?
|
|
51
|
+
maxGenes: int
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
ds { } // server-side ds object
|
|
55
|
+
|
|
56
|
+
*/
|
|
57
|
+
async function getGenes(q: any, ds: any) {
|
|
58
|
+
if (serverconfig.features.gdcGenes) {
|
|
59
|
+
// for testing only; delete when api issue is resolved
|
|
60
|
+
return serverconfig.features.gdcGenes as string[]
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// based on current cohort, get list of cases with exp data, as input of next api query
|
|
64
|
+
const caseLst = await getCasesWithExressionDataFromCohort(q, ds)
|
|
65
|
+
if (caseLst.length == 0) {
|
|
66
|
+
// there are no cases with gene exp data
|
|
67
|
+
return [] as string[]
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// change to this when api is available on prod
|
|
71
|
+
// const url = path.join(apihost, '/gene_expression/gene_selection')
|
|
72
|
+
|
|
73
|
+
const response = await got.post(apihost, {
|
|
74
|
+
headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
|
|
75
|
+
body: JSON.stringify({
|
|
76
|
+
case_ids: caseLst,
|
|
77
|
+
//gene_ids: [] // this should not be a required parameter
|
|
78
|
+
size: q.maxGenes || 100
|
|
79
|
+
})
|
|
80
|
+
})
|
|
81
|
+
|
|
82
|
+
const re = JSON.parse(response.body)
|
|
83
|
+
// {"gene_selection":[{"gene_id":"ENSG00000141510","log2_uqfpkm_median":3.103430497010492,"log2_uqfpkm_stddev":0.8692021350485105,"symbol":"TP53"}, ... ]}
|
|
84
|
+
|
|
85
|
+
const genes = [] as string[]
|
|
86
|
+
if (!Array.isArray(re.gene_selection)) throw 're.gene_selection[] is not array'
|
|
87
|
+
for (const i of re.gene_selection) {
|
|
88
|
+
if (i.gene_id && typeof i.gene_id == 'string') {
|
|
89
|
+
genes.push(i.gene_id) // ensg
|
|
90
|
+
} else if (i.symbol && typeof i.symbol == 'string') {
|
|
91
|
+
genes.push(i.symbol)
|
|
92
|
+
} else {
|
|
93
|
+
throw 'one of re.gene_selection[] is missing both gene_id and symbol'
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
return genes
|
|
97
|
+
}
|