@sjcrh/proteinpaint-server 2.38.0 → 2.39.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/routes/gdc.mafBuild.ts +7 -2
- package/routes/termdb.categories.ts +8 -6
- package/routes/termdb.cluster.ts +181 -0
- package/routes/termdb.getdescrstats.ts +3 -3
- package/routes/termdb.getnumericcategories.ts +99 -0
- package/routes/termdb.getpercentile.ts +7 -6
- package/routes/termdb.topVariablyExpressedGenes.ts +60 -11
- package/routes/termdb.violin.ts +3 -3
- package/server.js +1 -1
- package/server.js.map +1 -1
- package/src/mds3.gdc.filter.js +9 -6
- package/utils/hclust.R +25 -116
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sjcrh/proteinpaint-server",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.39.0",
|
|
4
4
|
"description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
|
|
5
5
|
"main": "server.js",
|
|
6
6
|
"bin": "start.js",
|
|
@@ -57,7 +57,7 @@
|
|
|
57
57
|
},
|
|
58
58
|
"dependencies": {
|
|
59
59
|
"@sjcrh/augen": "2.35.0",
|
|
60
|
-
"@sjcrh/proteinpaint-rust": "2.
|
|
60
|
+
"@sjcrh/proteinpaint-rust": "2.39.0",
|
|
61
61
|
"better-sqlite3": "^7.5.3",
|
|
62
62
|
"body-parser": "^1.15.2",
|
|
63
63
|
"canvas": "~2.9.3",
|
package/routes/gdc.mafBuild.ts
CHANGED
|
@@ -43,10 +43,15 @@ async function buildMaf(q: GdcMafBuildRequest, res: any) {
|
|
|
43
43
|
const t0 = Date.now()
|
|
44
44
|
|
|
45
45
|
const fileLst2 = (await getFileLstUnderSizeLimit(q.fileIdLst)) as string[]
|
|
46
|
-
|
|
46
|
+
if (serverconfig.debugmode)
|
|
47
|
+
console.log(
|
|
48
|
+
`${fileLst2.length} out of ${q.fileIdLst.length} input MAF files accepted by size limit`,
|
|
49
|
+
Date.now() - t0
|
|
50
|
+
)
|
|
47
51
|
|
|
48
52
|
const arg = {
|
|
49
53
|
fileIdLst: fileLst2,
|
|
54
|
+
columns: q.columns,
|
|
50
55
|
host: path.join(apihost, 'data') // must use the /data/ endpoint from current host
|
|
51
56
|
}
|
|
52
57
|
|
|
@@ -55,7 +60,7 @@ async function buildMaf(q: GdcMafBuildRequest, res: any) {
|
|
|
55
60
|
res.setHeader('Content-Disposition', 'attachment; filename=cohort.maf.gz')
|
|
56
61
|
rustStream.pipe(res)
|
|
57
62
|
|
|
58
|
-
console.log('rust gdcmaf', Date.now() - t0)
|
|
63
|
+
if (serverconfig.debugmode) console.log('rust gdcmaf', Date.now() - t0)
|
|
59
64
|
|
|
60
65
|
rustStream.on('end', () => {
|
|
61
66
|
res.end()
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
import { getcategoriesRequest, getcategoriesResponse } from '#shared/types/routes/termdb.categories.ts'
|
|
2
2
|
import { getOrderedLabels } from '#src/termdb.barchart.js'
|
|
3
3
|
import { getData } from '#src/termdb.matrix.js'
|
|
4
4
|
|
|
@@ -68,7 +68,7 @@ export const api: any = {
|
|
|
68
68
|
|
|
69
69
|
function init({ genomes }) {
|
|
70
70
|
return async (req: any, res: any): Promise<void> => {
|
|
71
|
-
const q = req.query
|
|
71
|
+
const q = req.query as getcategoriesRequest
|
|
72
72
|
try {
|
|
73
73
|
const g = genomes[req.query.genome]
|
|
74
74
|
if (!g) throw 'invalid genome name'
|
|
@@ -88,8 +88,8 @@ function init({ genomes }) {
|
|
|
88
88
|
}
|
|
89
89
|
|
|
90
90
|
async function trigger_getcategories(
|
|
91
|
-
q: { tid: string | number; type: string; filter: any; term1_q: any; currentGeneNames
|
|
92
|
-
res: { send: (arg0: { lst: any[]; orderedLabels
|
|
91
|
+
q: { tid: string | number; type: string; filter: any; term1_q: any; currentGeneNames?: string[]; rglst?: any },
|
|
92
|
+
res: { send: (arg0: { lst: any[]; orderedLabels?: any }) => void },
|
|
93
93
|
tdb: { q: { termjsonByOneid: (arg0: any) => any } },
|
|
94
94
|
ds: { assayAvailability: { byDt: { [s: string]: any } | ArrayLike<any> } },
|
|
95
95
|
genome: any
|
|
@@ -99,13 +99,15 @@ async function trigger_getcategories(
|
|
|
99
99
|
if (!q.tid) throw '.tid missing'
|
|
100
100
|
const term =
|
|
101
101
|
q.type == 'geneVariant' ? { name: q.tid, type: 'geneVariant', isleaf: true } : tdb.q.termjsonByOneid(q.tid)
|
|
102
|
+
|
|
102
103
|
const arg = {
|
|
103
104
|
filter: q.filter,
|
|
104
105
|
terms:
|
|
105
106
|
q.type == 'geneVariant'
|
|
106
107
|
? [{ term: term, q: { isAtomic: true } }]
|
|
107
108
|
: [{ id: q.tid, term, q: q.term1_q || getDefaultQ(term, q) }],
|
|
108
|
-
currentGeneNames: q.currentGeneNames
|
|
109
|
+
currentGeneNames: q.currentGeneNames, // optional, from mds3 mayAddGetCategoryArgs()
|
|
110
|
+
rglst: q.rglst // optional, from mds3 mayAddGetCategoryArgs()
|
|
109
111
|
}
|
|
110
112
|
|
|
111
113
|
const data = await getData(arg, ds, genome)
|
|
@@ -200,7 +202,7 @@ async function trigger_getcategories(
|
|
|
200
202
|
res.send({
|
|
201
203
|
lst,
|
|
202
204
|
orderedLabels
|
|
203
|
-
})
|
|
205
|
+
} as getcategoriesResponse)
|
|
204
206
|
}
|
|
205
207
|
|
|
206
208
|
function getDefaultQ(
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
import { TermdbClusterRequest, TermdbClusterResponse } from '#shared/types/routes/termdb.cluster.ts'
|
|
2
|
+
import fs from 'fs'
|
|
3
|
+
import path from 'path'
|
|
4
|
+
import * as utils from '#src/utils.js'
|
|
5
|
+
import serverconfig from '#src/serverconfig.js'
|
|
6
|
+
import { GeneExpressionQuery, GeneExpressionQueryNative } from '#shared/types/dataset.ts'
|
|
7
|
+
import { gdc_validate_query_geneExpression } from '#src/mds3.gdc.js'
|
|
8
|
+
import { mayLimitSamples } from '#src/mds3.filter.js'
|
|
9
|
+
import { doClustering } from '#src/doClustering.js' // unable to convert this to ts yet, when converted, move all code here
|
|
10
|
+
import { dtgeneexpression } from '#shared/common.js'
|
|
11
|
+
|
|
12
|
+
export const api = {
|
|
13
|
+
endpoint: 'termdb/cluster',
|
|
14
|
+
methods: {
|
|
15
|
+
get: {
|
|
16
|
+
init,
|
|
17
|
+
request: {
|
|
18
|
+
typeId: 'TermdbClusterRequest'
|
|
19
|
+
},
|
|
20
|
+
response: {
|
|
21
|
+
typeId: 'TermdbClusterResponse'
|
|
22
|
+
}
|
|
23
|
+
},
|
|
24
|
+
post: {
|
|
25
|
+
alternativeFor: 'get',
|
|
26
|
+
init
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function init({ genomes }) {
|
|
32
|
+
return async (req: any, res: any): Promise<void> => {
|
|
33
|
+
const q = req.query as TermdbClusterRequest
|
|
34
|
+
let result
|
|
35
|
+
try {
|
|
36
|
+
const g = genomes[q.genome]
|
|
37
|
+
if (!g) throw 'invalid genome name'
|
|
38
|
+
const ds = g.datasets[q.dslabel]
|
|
39
|
+
if (!ds) throw 'invalid dataset name'
|
|
40
|
+
if (ds.__gdc && !ds.__gdc.doneCaching)
|
|
41
|
+
throw 'The server has not finished caching the case IDs: try again in ~2 minutes'
|
|
42
|
+
if (q.dataType == dtgeneexpression) {
|
|
43
|
+
if (!ds.queries?.geneExpression) throw 'no geneExpression data on this dataset'
|
|
44
|
+
result = (await getResult(q, ds)) as TermdbClusterResponse
|
|
45
|
+
} else {
|
|
46
|
+
throw 'unknown q.dataType ' + q.dataType
|
|
47
|
+
}
|
|
48
|
+
} catch (e: any) {
|
|
49
|
+
if (e.stack) console.log(e.stack)
|
|
50
|
+
result = {
|
|
51
|
+
status: e.status || 400,
|
|
52
|
+
error: e.message || e
|
|
53
|
+
} as TermdbClusterResponse
|
|
54
|
+
}
|
|
55
|
+
res.send(result)
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
async function getResult(q: TermdbClusterRequest, ds: any) {
|
|
60
|
+
const { gene2sample2value, byTermId, bySampleId } = await ds.queries.geneExpression.get(q)
|
|
61
|
+
if (gene2sample2value.size == 0) throw 'no data'
|
|
62
|
+
if (gene2sample2value.size == 1) {
|
|
63
|
+
// get data for only 1 gene; still return data, may create violin plot later
|
|
64
|
+
const g = Array.from(gene2sample2value.keys())[0]
|
|
65
|
+
return { gene: g, data: gene2sample2value.get(g) }
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// have data for multiple genes, run clustering
|
|
69
|
+
const t = Date.now() // use "t=new Date()" will lead to tsc error
|
|
70
|
+
const clustering = await doClustering(gene2sample2value, q, ds)
|
|
71
|
+
if (serverconfig.debugmode) console.log('clustering done:', Date.now() - t, 'ms')
|
|
72
|
+
return { clustering, byTermId, bySampleId }
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export async function validate_query_geneExpression(ds: any, genome: any) {
|
|
76
|
+
const q = ds.queries.geneExpression as GeneExpressionQuery
|
|
77
|
+
if (!q) return
|
|
78
|
+
|
|
79
|
+
if (q.src == 'gdcapi') {
|
|
80
|
+
gdc_validate_query_geneExpression(ds, genome)
|
|
81
|
+
// q.get() added
|
|
82
|
+
return
|
|
83
|
+
}
|
|
84
|
+
if (q.src == 'native') {
|
|
85
|
+
validateNative(q, ds, genome)
|
|
86
|
+
return
|
|
87
|
+
}
|
|
88
|
+
throw 'unknown queries.geneExpression.src'
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
async function validateNative(q: GeneExpressionQueryNative, ds: any, genome: any) {
|
|
92
|
+
q.file = path.join(serverconfig.tpmasterdir, q.file)
|
|
93
|
+
await utils.validate_tabixfile(q.file)
|
|
94
|
+
q.nochr = await utils.tabix_is_nochr(q.file, null, genome)
|
|
95
|
+
q.samples = [] as number[]
|
|
96
|
+
|
|
97
|
+
{
|
|
98
|
+
// is a gene-by-sample matrix file
|
|
99
|
+
const lines = await utils.get_header_tabix(q.file)
|
|
100
|
+
if (!lines[0]) throw 'header line missing from ' + q.file
|
|
101
|
+
const l = lines[0].split('\t')
|
|
102
|
+
if (l.slice(0, 4).join('\t') != '#chr\tstart\tstop\tgene') throw 'header line has wrong content for columns 1-4'
|
|
103
|
+
for (let i = 4; i < l.length; i++) {
|
|
104
|
+
const id = ds.cohort.termdb.q.sampleName2id(l[i])
|
|
105
|
+
if (id == undefined) throw 'unknown sample from header'
|
|
106
|
+
q.samples.push(id)
|
|
107
|
+
}
|
|
108
|
+
console.log(q.samples.length, 'samples from geneExpression of', ds.label)
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/*
|
|
112
|
+
query exp data one gene at a time
|
|
113
|
+
param{}
|
|
114
|
+
.genes[{}]
|
|
115
|
+
.gene=str
|
|
116
|
+
.chr=str
|
|
117
|
+
.start=int
|
|
118
|
+
.stop=int
|
|
119
|
+
.filterObj{}
|
|
120
|
+
*/
|
|
121
|
+
q.get = async (param: TermdbClusterRequest) => {
|
|
122
|
+
const limitSamples = await mayLimitSamples(param, q.samples, ds)
|
|
123
|
+
if (limitSamples?.size == 0) {
|
|
124
|
+
// got 0 sample after filtering, return blank array for no data
|
|
125
|
+
return new Set()
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// has at least 1 sample passing filter and with exp data
|
|
129
|
+
// TODO what if there's just 1 sample not enough for clustering?
|
|
130
|
+
const bySampleId = {}
|
|
131
|
+
if (limitSamples) {
|
|
132
|
+
for (const sid of limitSamples) {
|
|
133
|
+
bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) }
|
|
134
|
+
}
|
|
135
|
+
} else {
|
|
136
|
+
// use all samples with exp data
|
|
137
|
+
for (const sid of q.samples) {
|
|
138
|
+
bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) }
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const gene2sample2value = new Map() // k: gene symbol, v: { sampleId : value }
|
|
143
|
+
|
|
144
|
+
for (const g of param.genes) {
|
|
145
|
+
// FIXME newly added geneVariant terms from client to be changed to {gene} but not {name}
|
|
146
|
+
if (!g.gene) continue
|
|
147
|
+
|
|
148
|
+
if (!g.chr) {
|
|
149
|
+
// quick fix: newly added gene from client will lack chr/start/stop
|
|
150
|
+
const lst = genome.genedb.getjsonbyname.all(g.gene)
|
|
151
|
+
if (lst.length == 0) continue
|
|
152
|
+
const j = JSON.parse(lst.find(i => i.isdefault).genemodel || lst[0].genemodel)
|
|
153
|
+
g.start = j.start
|
|
154
|
+
g.stop = j.stop
|
|
155
|
+
g.chr = j.chr
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
gene2sample2value.set(g.gene, {})
|
|
159
|
+
await utils.get_lines_bigfile({
|
|
160
|
+
args: [q.file, (q.nochr ? g.chr?.replace('chr', '') : g.chr) + ':' + g.start + '-' + g.stop], // must do g.chr?.replace to avoid tsc error
|
|
161
|
+
callback: line => {
|
|
162
|
+
const l = line.split('\t')
|
|
163
|
+
// case-insensitive match! FIXME if g.gene is alias won't work
|
|
164
|
+
if (l[3].toLowerCase() != g.gene.toLowerCase()) return
|
|
165
|
+
for (let i = 4; i < l.length; i++) {
|
|
166
|
+
const sampleId = q.samples[i - 4]
|
|
167
|
+
if (limitSamples && !limitSamples.has(sampleId)) continue // doing filtering and sample of current column is not used
|
|
168
|
+
// if l[i] is blank string?
|
|
169
|
+
const v = Number(l[i])
|
|
170
|
+
if (Number.isNaN(v)) throw 'exp value not number'
|
|
171
|
+
gene2sample2value.get(g.gene)[sampleId] = v
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
} as any)
|
|
175
|
+
// Above!! add "as any" to suppress a npx tsc alert
|
|
176
|
+
}
|
|
177
|
+
// pass blank byTermId to match with expected output structure
|
|
178
|
+
const byTermId = {}
|
|
179
|
+
return { gene2sample2value, byTermId, bySampleId }
|
|
180
|
+
}
|
|
181
|
+
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
import { getdescrstatsRequest, getdescrstatsResponse } from '#shared/types/routes/termdb.getdescrstats.ts'
|
|
2
2
|
import roundValue from '#shared/roundValue.js'
|
|
3
3
|
import computePercentile from '#shared/compute.percentile.js'
|
|
4
4
|
import * as termdbsql from '#src/termdb.sql.js'
|
|
@@ -62,7 +62,7 @@ export const api: any = {
|
|
|
62
62
|
|
|
63
63
|
function init({ genomes }) {
|
|
64
64
|
return async (req: any, res: any): Promise<void> => {
|
|
65
|
-
const q = req.query
|
|
65
|
+
const q = req.query as getdescrstatsRequest
|
|
66
66
|
try {
|
|
67
67
|
const g = genomes[req.query.genome]
|
|
68
68
|
if (!g) throw 'invalid genome name'
|
|
@@ -141,5 +141,5 @@ async function trigger_getdescrstats(q: any, res: any, ds: any) {
|
|
|
141
141
|
{ id: 'max', label: 'Maximum', value: roundValue(max, 2) },
|
|
142
142
|
{ id: 'sd', label: 'Standard deviation', value: roundValue(sd, 2) }
|
|
143
143
|
]
|
|
144
|
-
})
|
|
144
|
+
} as getdescrstatsResponse)
|
|
145
145
|
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import {
|
|
2
|
+
getnumericcategoriesRequest,
|
|
3
|
+
getnumericcategoriesResponse
|
|
4
|
+
} from '#shared/types/routes/termdb.getnumericcategories.ts'
|
|
5
|
+
import * as termdbsql from '#src/termdb.sql.js'
|
|
6
|
+
|
|
7
|
+
export const api: any = {
|
|
8
|
+
endpoint: 'termdb/numericcategories',
|
|
9
|
+
methods: {
|
|
10
|
+
get: {
|
|
11
|
+
init,
|
|
12
|
+
request: {
|
|
13
|
+
typeId: 'getnumericcategoriesRequest'
|
|
14
|
+
},
|
|
15
|
+
response: {
|
|
16
|
+
typeId: 'getnumericcategoriesResponse'
|
|
17
|
+
},
|
|
18
|
+
examples: [
|
|
19
|
+
{
|
|
20
|
+
request: {
|
|
21
|
+
body: {
|
|
22
|
+
genome: 'hg38-test',
|
|
23
|
+
dslabel: 'TermdbTest',
|
|
24
|
+
embedder: 'localhost',
|
|
25
|
+
tid: 'aaclassic_5',
|
|
26
|
+
filter: {
|
|
27
|
+
type: 'tvslst',
|
|
28
|
+
in: true,
|
|
29
|
+
join: '',
|
|
30
|
+
lst: [
|
|
31
|
+
{
|
|
32
|
+
tag: 'cohortFilter',
|
|
33
|
+
type: 'tvs',
|
|
34
|
+
tvs: {
|
|
35
|
+
term: {
|
|
36
|
+
name: 'Cohort',
|
|
37
|
+
type: 'categorical',
|
|
38
|
+
values: { ABC: { label: 'ABC' }, XYZ: { label: 'XYZ' } },
|
|
39
|
+
id: 'subcohort',
|
|
40
|
+
isleaf: false,
|
|
41
|
+
groupsetting: { disabled: true }
|
|
42
|
+
},
|
|
43
|
+
values: [{ key: 'ABC', label: 'ABC' }]
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
]
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
},
|
|
50
|
+
response: {
|
|
51
|
+
header: { status: 200 }
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
]
|
|
55
|
+
},
|
|
56
|
+
post: {
|
|
57
|
+
alternativeFor: 'get',
|
|
58
|
+
init
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function init({ genomes }) {
|
|
64
|
+
return async (req: any, res: any): Promise<void> => {
|
|
65
|
+
const q = req.query as getnumericcategoriesRequest
|
|
66
|
+
try {
|
|
67
|
+
const g = genomes[req.query.genome]
|
|
68
|
+
if (!g) throw 'invalid genome name'
|
|
69
|
+
const ds = g.datasets[req.query.dslabel]
|
|
70
|
+
if (!ds) throw 'invalid dataset name'
|
|
71
|
+
const tdb = ds.cohort.termdb
|
|
72
|
+
if (!tdb) throw 'invalid termdb object'
|
|
73
|
+
|
|
74
|
+
await trigger_getnumericcategories(q, res, tdb, ds) // as getnumericcategoriesResponse
|
|
75
|
+
} catch (e) {
|
|
76
|
+
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
|
77
|
+
// @ts-ignore
|
|
78
|
+
res.send({ error: e?.message || e })
|
|
79
|
+
if (e instanceof Error && e.stack) console.log(e)
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
async function trigger_getnumericcategories(
|
|
85
|
+
q: { tid: any; filter?: any },
|
|
86
|
+
res: { send: (arg0: { lst: any }) => void },
|
|
87
|
+
tdb: { q: { termjsonByOneid: (arg0: any) => any } },
|
|
88
|
+
ds: any
|
|
89
|
+
) {
|
|
90
|
+
if (!q.tid) throw '.tid missing'
|
|
91
|
+
const term = tdb.q.termjsonByOneid(q.tid)
|
|
92
|
+
const arg = {
|
|
93
|
+
ds,
|
|
94
|
+
term_id: q.tid,
|
|
95
|
+
filter: q.filter
|
|
96
|
+
}
|
|
97
|
+
const lst = await termdbsql.get_summary_numericcategories(arg)
|
|
98
|
+
res.send({ lst } as getnumericcategoriesResponse)
|
|
99
|
+
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
import * as termdbsql from '
|
|
3
|
-
import computePercentile from '
|
|
1
|
+
import { getpercentileRequest, getpercentileResponse } from '#shared/types/routes/termdb.getpercentile.ts'
|
|
2
|
+
import * as termdbsql from '../src/termdb.sql.js'
|
|
3
|
+
import computePercentile from '../shared/compute.percentile.js'
|
|
4
|
+
import { Filter } from '../shared/types/filter'
|
|
4
5
|
|
|
5
6
|
export const api: any = {
|
|
6
7
|
endpoint: 'termdb/getpercentile',
|
|
@@ -61,7 +62,7 @@ export const api: any = {
|
|
|
61
62
|
|
|
62
63
|
function init({ genomes }) {
|
|
63
64
|
return async (req: any, res: any): Promise<void> => {
|
|
64
|
-
const q = req.query
|
|
65
|
+
const q = req.query as getpercentileRequest
|
|
65
66
|
try {
|
|
66
67
|
const g = genomes[req.query.genome]
|
|
67
68
|
if (!g) throw 'invalid genome name'
|
|
@@ -78,7 +79,7 @@ function init({ genomes }) {
|
|
|
78
79
|
}
|
|
79
80
|
|
|
80
81
|
async function trigger_getpercentile(
|
|
81
|
-
q: { tid: string; getpercentile: number[]; filter:
|
|
82
|
+
q: { tid: string; getpercentile: number[]; filter: Filter },
|
|
82
83
|
res: { send: (arg0: { values: number[] }) => void },
|
|
83
84
|
ds: { cohort: { termdb: { q: { termjsonByOneid: (arg0: any) => any } } } }
|
|
84
85
|
) {
|
|
@@ -113,5 +114,5 @@ async function trigger_getpercentile(
|
|
|
113
114
|
const perc_value = computePercentile(values, percentile)
|
|
114
115
|
perc_values.push(perc_value)
|
|
115
116
|
}
|
|
116
|
-
res.send({ values: perc_values })
|
|
117
|
+
res.send({ values: perc_values } as getpercentileResponse)
|
|
117
118
|
}
|
|
@@ -4,8 +4,10 @@ import {
|
|
|
4
4
|
} from '#shared/types/routes/termdb.topVariablyExpressedGenes.ts'
|
|
5
5
|
import { gdcGetCasesWithExressionDataFromCohort, apihost, geneExpHost } from '../src/mds3.gdc.js'
|
|
6
6
|
import path from 'path'
|
|
7
|
+
import { run_rust } from '@sjcrh/proteinpaint-rust'
|
|
7
8
|
import got from 'got'
|
|
8
9
|
import serverconfig from '#src/serverconfig.js'
|
|
10
|
+
import { get_samples } from '#src/termdb.sql.js'
|
|
9
11
|
|
|
10
12
|
export const api = {
|
|
11
13
|
endpoint: 'termdb/topVariablyExpressedGenes',
|
|
@@ -32,9 +34,9 @@ function init({ genomes }) {
|
|
|
32
34
|
if (!ds) throw 'invalid dslabel'
|
|
33
35
|
if (!ds.queries?.topVariablyExpressedGenes) throw 'not supported on dataset'
|
|
34
36
|
|
|
35
|
-
const t
|
|
37
|
+
const t = Date.now()
|
|
36
38
|
const genes = await ds.queries.topVariablyExpressedGenes.getGenes(q)
|
|
37
|
-
if (serverconfig.debugmode) console.log('topVariablyExpressedGenes',
|
|
39
|
+
if (serverconfig.debugmode) console.log('topVariablyExpressedGenes', Date.now() - t, 'ms')
|
|
38
40
|
|
|
39
41
|
res.send({ genes } as TermdbTopVariablyExpressedGenesResponse)
|
|
40
42
|
} catch (e: any) {
|
|
@@ -57,21 +59,68 @@ export function validate_query_TopVariablyExpressedGenes(ds: any, genome: any) {
|
|
|
57
59
|
}
|
|
58
60
|
|
|
59
61
|
function nativeValidateQuery(ds: any, genome: any) {
|
|
60
|
-
ds.queries.
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
) => {
|
|
65
|
-
// get list of samples that are used in current analysis
|
|
62
|
+
const gE = ds.queries.geneExpression // a separate query required to supply data for computing top genes
|
|
63
|
+
if (!gE) throw 'topVariablyExpressedGenes query given but geneExpression missing'
|
|
64
|
+
if (gE.src != 'native') throw 'topVariablyExpressedGenes is native but geneExpression.src is not native'
|
|
65
|
+
|
|
66
|
+
ds.queries.topVariablyExpressedGenes.getGenes = async (q: TermdbTopVariablyExpressedGenesRequest) => {
|
|
67
|
+
// get list of samples that are used in current analysis; gE.samples[] contains all sample integer ids with exp data
|
|
66
68
|
const samples = [] as string[]
|
|
69
|
+
if (q.filter) {
|
|
70
|
+
// get all samples pasing pp filter, may contain those without exp data
|
|
71
|
+
const sidlst = await get_samples(q.filter, ds)
|
|
72
|
+
// [{id:int}]
|
|
73
|
+
// filter for those with exp data from q.samples[]
|
|
74
|
+
for (const i of sidlst) {
|
|
75
|
+
if (gE.samples.includes(i.id)) {
|
|
76
|
+
// this sample passing filter also has exp data; convert to string name
|
|
77
|
+
const n: string = ds.cohort.termdb.q.id2sampleName(i.id)
|
|
78
|
+
if (!n) throw 'sample id cannot convert to string name'
|
|
79
|
+
samples.push(n)
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
} else {
|
|
83
|
+
// no filter, use all samples with exp data
|
|
84
|
+
for (const i of gE.samples) {
|
|
85
|
+
const n: string = ds.cohort.termdb.q.id2sampleName(i.id)
|
|
86
|
+
if (!n) throw 'sample id cannot convert to string name'
|
|
87
|
+
samples.push(n)
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
67
91
|
// call rust to compute top genes on these samples
|
|
68
|
-
const genes = await computeGenes4nativeDs(q, ds, samples)
|
|
92
|
+
const genes = await computeGenes4nativeDs(q, ds, gE.file, samples)
|
|
69
93
|
return genes
|
|
70
94
|
}
|
|
71
95
|
}
|
|
72
96
|
|
|
73
|
-
async function computeGenes4nativeDs(
|
|
74
|
-
|
|
97
|
+
async function computeGenes4nativeDs(
|
|
98
|
+
q: TermdbTopVariablyExpressedGenesRequest,
|
|
99
|
+
ds: any,
|
|
100
|
+
matrixFile: string,
|
|
101
|
+
samples: string[]
|
|
102
|
+
) {
|
|
103
|
+
// The param option in input JSON is very important. It instructs what method will be used to calculate variation in the counts for a particular gene. It supports variance as well as interquartile region. This is based on the recommendation of this article https://www.frontiersin.org/articles/10.3389/fgene.2021.632620/full . This article recommends using interquartile region over variance.
|
|
104
|
+
const input_json = {
|
|
105
|
+
input_file: matrixFile,
|
|
106
|
+
samples: samples.join(','),
|
|
107
|
+
filter_extreme_values: true,
|
|
108
|
+
num_genes: Number(q.maxGenes),
|
|
109
|
+
param: 'var'
|
|
110
|
+
}
|
|
111
|
+
const rust_output = await run_rust('topGeneByExpressionVariance', JSON.stringify(input_json))
|
|
112
|
+
const rust_output_list = rust_output.split('\n')
|
|
113
|
+
|
|
114
|
+
let output_json
|
|
115
|
+
for (const item of rust_output_list) {
|
|
116
|
+
if (item.includes('output_json:')) {
|
|
117
|
+
output_json = JSON.parse(item.replace('output_json:', ''))
|
|
118
|
+
} else {
|
|
119
|
+
console.log(item)
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
const varGenes = output_json.map(i => i.gene_symbol)
|
|
123
|
+
return varGenes
|
|
75
124
|
}
|
|
76
125
|
|
|
77
126
|
function gdcValidateQuery(ds: any, genome: any) {
|
package/routes/termdb.violin.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
import { getViolinRequest, getViolinResponse } from '#shared/types/routes/termdb.violin.ts'
|
|
2
2
|
import { trigger_getViolinPlotData } from '#src/termdb.violin.js'
|
|
3
3
|
|
|
4
4
|
export const api: any = {
|
|
@@ -61,13 +61,13 @@ export const api: any = {
|
|
|
61
61
|
|
|
62
62
|
function init({ genomes }) {
|
|
63
63
|
return async (req: any, res: any): Promise<void> => {
|
|
64
|
-
const q = req.query
|
|
64
|
+
const q = req.query as getViolinRequest
|
|
65
65
|
try {
|
|
66
66
|
const g = genomes[req.query.genome]
|
|
67
67
|
const ds = g.datasets[req.query.dslabel]
|
|
68
68
|
if (!g) throw 'invalid genome name'
|
|
69
69
|
const data = await trigger_getViolinPlotData(req.query, null, ds, g) // as getViolinResponse
|
|
70
|
-
res.send(data)
|
|
70
|
+
res.send(data as getViolinResponse)
|
|
71
71
|
} catch (e) {
|
|
72
72
|
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
|
73
73
|
// @ts-ignore
|