@sjcrh/proteinpaint-server 2.44.0 → 2.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/dataset/clinvar.hg19.js +53 -52
  2. package/dataset/clinvar.hg38.js +74 -73
  3. package/dataset/clinvar.js +164 -47
  4. package/dataset/termdb.test.js +257 -0
  5. package/genome/CriGri.js +1859 -27
  6. package/genome/cgc.js +743 -7
  7. package/genome/danRer10.js +1108 -46
  8. package/genome/dm3.js +71 -44
  9. package/genome/dm6.js +1926 -45
  10. package/genome/galGal5.js +23522 -46
  11. package/genome/galGal6.js +512 -46
  12. package/genome/hg19.js +293 -198
  13. package/genome/hg38.js +472 -105
  14. package/genome/hg38.test.js +406 -40
  15. package/genome/hgvirus.js +45 -20
  16. package/genome/mm10.js +135 -67
  17. package/genome/mm9.js +116 -79
  18. package/genome/rn6.js +1002 -47
  19. package/package.json +31 -35
  20. package/routes/_template_.js +30 -0
  21. package/routes/burden.js +149 -0
  22. package/routes/dataset.js +266 -0
  23. package/routes/dsdata.js +127 -0
  24. package/routes/gdc.maf.js +120 -0
  25. package/routes/gdc.mafBuild.js +106 -0
  26. package/routes/gdc.topMutatedGenes.js +465 -0
  27. package/routes/gene2canonicalisoform.js +41 -0
  28. package/routes/genelookup.js +52 -0
  29. package/routes/genomes.js +144 -0
  30. package/routes/healthcheck.js +30 -0
  31. package/routes/hicdata.js +98 -0
  32. package/routes/hicstat.js +55 -0
  33. package/routes/isoformlst.js +57 -0
  34. package/routes/ntseq.js +43 -0
  35. package/routes/pdomain.js +61 -0
  36. package/routes/snp.js +107 -0
  37. package/routes/termdb.categories.js +209 -0
  38. package/routes/termdb.cluster.js +228 -0
  39. package/routes/termdb.cohort.summary.js +38 -0
  40. package/routes/termdb.cohorts.js +49 -0
  41. package/routes/termdb.config.js +201 -0
  42. package/routes/termdb.getdescrstats.js +102 -0
  43. package/routes/termdb.getnumericcategories.js +92 -0
  44. package/routes/termdb.getpercentile.js +108 -0
  45. package/routes/termdb.getrootterm.js +65 -0
  46. package/routes/termdb.gettermchildren.js +67 -0
  47. package/routes/termdb.singleSampleMutation.js +80 -0
  48. package/routes/termdb.singlecellData.js +46 -0
  49. package/routes/termdb.singlecellSamples.js +160 -0
  50. package/routes/termdb.termsbyids.js +59 -0
  51. package/routes/termdb.topVariablyExpressedGenes.js +171 -0
  52. package/routes/termdb.violin.js +77 -0
  53. package/src/app.js +41498 -0
  54. package/src/serverconfig.js +14 -8
  55. package/start.js +3 -3
  56. package/routes/README.md +0 -84
  57. package/routes/burden.ts +0 -143
  58. package/routes/gdc.maf.ts +0 -195
  59. package/routes/gdc.mafBuild.ts +0 -114
  60. package/routes/gdc.topMutatedGenes.ts +0 -586
  61. package/routes/genelookup.ts +0 -50
  62. package/routes/healthcheck.ts +0 -29
  63. package/routes/hicdata.ts +0 -111
  64. package/routes/hicstat.ts +0 -55
  65. package/routes/termdb.categories.ts +0 -245
  66. package/routes/termdb.cluster.ts +0 -248
  67. package/routes/termdb.getdescrstats.ts +0 -102
  68. package/routes/termdb.getnumericcategories.ts +0 -99
  69. package/routes/termdb.getpercentile.ts +0 -118
  70. package/routes/termdb.getrootterm.ts +0 -73
  71. package/routes/termdb.gettermchildren.ts +0 -82
  72. package/routes/termdb.singleSampleMutation.ts +0 -87
  73. package/routes/termdb.singlecellData.ts +0 -49
  74. package/routes/termdb.singlecellSamples.ts +0 -175
  75. package/routes/termdb.termsbyids.ts +0 -63
  76. package/routes/termdb.topVariablyExpressedGenes.ts +0 -214
  77. package/routes/termdb.violin.ts +0 -77
  78. package/server.js +0 -2
  79. package/server.js.map +0 -1
  80. package/shared/common.js +0 -1080
  81. package/shared/termdb.initbinconfig.js +0 -96
  82. package/shared/vcf.js +0 -629
package/routes/hicdata.ts DELETED
@@ -1,111 +0,0 @@
1
- import { HicdataRequest, HicdataResponse, Item } from '#shared/types/routes/hicdata.ts'
2
- import { fileurl } from '#src/utils.js'
3
- import { spawn } from 'child_process'
4
- import readline from 'readline'
5
- import serverconfig from '#src/serverconfig.js'
6
-
7
- export const api: any = {
8
- endpoint: 'hicdata',
9
- methods: {
10
- get: {
11
- init,
12
- request: {
13
- typeId: 'HicdataRequest'
14
- },
15
- response: {
16
- typeId: 'HicdataResponse'
17
- }
18
- /*
19
- examples: [
20
- {
21
- request: {
22
- body: {
23
- genome: 'hg38-test',
24
- dslabel: 'TermdbTest',
25
- embedder: 'localhost',
26
- gettermbyid: 'subcohort'
27
- }
28
- },
29
- response: {
30
- header: { status: 200 }
31
- }
32
- }
33
- ]
34
- */
35
- },
36
- post: {
37
- alternativeFor: 'get',
38
- init
39
- }
40
- }
41
- }
42
-
43
- function init() {
44
- return async (req: any, res: any): Promise<void> => {
45
- try {
46
- const payload = await handle_hicdata(req.query as HicdataRequest)
47
- res.send(payload)
48
- } catch (e) {
49
- // eslint-disable-next-line @typescript-eslint/ban-ts-comment
50
- // @ts-ignore
51
- res.send({ error: e?.message || e })
52
- if (e instanceof Error && e.stack) console.log(e)
53
- }
54
- }
55
- }
56
-
57
- function handle_hicdata(q: HicdataRequest) {
58
- return new Promise((resolve, reject) => {
59
- const [e, file, isurl] = fileurl({ query: q })
60
- if (e) reject({ error: 'illegal file name' })
61
-
62
- /*Value passed from client is not the proper straw parameter.
63
- Must convert to straw parameter and apply the corresponding maths to the result.
64
- Use 'observed' as default if not provided.
65
- */
66
- const matrixType = q.matrixType == 'log(oe)' ? 'oe' : q.matrixType ? q.matrixType : 'observed'
67
-
68
- const par = [matrixType, q.nmeth || 'NONE', file, q.pos1, q.pos2, q.isfrag ? 'FRAG' : 'BP', q.resolution]
69
-
70
- const ps = spawn(serverconfig.hicstraw, par)
71
- const rl = readline.createInterface({ input: ps.stdout })
72
-
73
- const items = [] as Item[]
74
- const erroutput = [] as string[]
75
- let linenot3fields = 0
76
- let fieldnotnumerical = 0
77
-
78
- rl.on('line', line => {
79
- // straw output: pos1 \t pos2 \t value
80
- const l = line.split('\t')
81
- if (l.length != 3) {
82
- linenot3fields++
83
- return
84
- }
85
- const n1 = Number.parseInt(l[0])
86
- const n2 = Number.parseInt(l[1])
87
- const v = q.matrixType == 'log(oe)' ? Math.log(Number.parseFloat(l[2])) : Number.parseFloat(l[2])
88
- if (Number.isNaN(n1) || Number.isNaN(n2) || Number.isNaN(v)) {
89
- fieldnotnumerical++
90
- return
91
- }
92
- if (q.mincutoff != undefined && v <= q.mincutoff) {
93
- return
94
- }
95
- items.push([n1, n2, v] as Item)
96
- })
97
-
98
- ps.stderr.on('data', i => erroutput.push(i))
99
- ps.on('close', () => {
100
- const err = erroutput.join('')
101
- if (err) reject({ error: err })
102
-
103
- if (linenot3fields) reject({ error: linenot3fields + ' lines have other than 3 fields' })
104
-
105
- if (fieldnotnumerical)
106
- reject({ error: fieldnotnumerical + ' lines have non-numerical values in any of the 3 fields' })
107
-
108
- resolve({ items })
109
- })
110
- })
111
- }
package/routes/hicstat.ts DELETED
@@ -1,55 +0,0 @@
1
- import { fileurl, file_is_readable } from '#src/utils.js'
2
- import { do_hicstat } from '#src/hicstat.ts'
3
- import { HicstatRequestWithValidation } from '#shared/types/routes/hicstat.ts'
4
-
5
- export const api = {
6
- endpoint: 'hicstat',
7
- methods: {
8
- get: {
9
- init,
10
- request: {
11
- typeId: 'HicstatRequest'
12
- },
13
- response: {
14
- typeId: 'HicstatResponse'
15
- },
16
- examples: [
17
- {
18
- request: {
19
- body: {
20
- genome: 'hg19',
21
- file: 'proteinpaint_demo/hg19/hic/hic_demo.hic',
22
- embedder: 'localhost'
23
- }
24
- },
25
- response: {
26
- header: { status: 200 }
27
- }
28
- }
29
- ]
30
- },
31
- post: {
32
- alternativeFor: 'get',
33
- init
34
- }
35
- }
36
- }
37
-
38
- function init() {
39
- return async (req: HicstatRequestWithValidation, res: any): Promise<void> => {
40
- try {
41
- const [e, file, isurl] = fileurl(req)
42
- if (e) throw 'illegal file name'
43
- if (!isurl) {
44
- await file_is_readable(file)
45
- }
46
- const out = await do_hicstat(file, isurl)
47
- res.send({ out })
48
- } catch (e: any) {
49
- // eslint-disable-next-line @typescript-eslint/ban-ts-comment
50
- // @ts-ignore
51
- res.send({ error: e?.message || e })
52
- if (e instanceof Error && e.stack) console.log(e)
53
- }
54
- }
55
- }
@@ -1,245 +0,0 @@
1
- import { getcategoriesRequest, getcategoriesResponse } from '#shared/types/routes/termdb.categories.ts'
2
- import { getOrderedLabels } from '#src/termdb.barchart.js'
3
- import { getData } from '#src/termdb.matrix.js'
4
-
5
- export const api: any = {
6
- endpoint: 'termdb/categories',
7
- methods: {
8
- get: {
9
- init,
10
- request: {
11
- typeId: 'getcategoriesRequest'
12
- },
13
- response: {
14
- typeId: 'getcategoriesResponse'
15
- },
16
- examples: [
17
- {
18
- request: {
19
- body: {
20
- genome: 'hg38-test',
21
- dslabel: 'TermdbTest',
22
- embedder: 'localhost',
23
- getcategories: 1,
24
- tid: 'diaggrp',
25
- term1_q: {
26
- isAtomic: true,
27
- hiddenValues: {},
28
- type: 'values',
29
- groupsetting: { disabled: true },
30
- mode: 'discrete'
31
- },
32
- filter: {
33
- type: 'tvslst',
34
- in: true,
35
- join: '',
36
- lst: [
37
- {
38
- tag: 'cohortFilter',
39
- type: 'tvs',
40
- tvs: {
41
- term: {
42
- name: 'Cohort',
43
- type: 'categorical',
44
- values: { ABC: { label: 'ABC' }, XYZ: { label: 'XYZ' } },
45
- id: 'subcohort',
46
- isleaf: false,
47
- groupsetting: { disabled: true }
48
- },
49
- values: [{ key: 'ABC', label: 'ABC' }]
50
- }
51
- }
52
- ]
53
- }
54
- }
55
- },
56
- response: {
57
- header: { status: 200 }
58
- }
59
- }
60
- ]
61
- },
62
- post: {
63
- alternativeFor: 'get',
64
- init
65
- }
66
- }
67
- }
68
-
69
- function init({ genomes }) {
70
- return async (req: any, res: any): Promise<void> => {
71
- const q = req.query as getcategoriesRequest
72
- try {
73
- const g = genomes[req.query.genome]
74
- if (!g) throw 'invalid genome name'
75
- const ds = g.datasets[req.query.dslabel]
76
- if (!ds) throw 'invalid dataset name'
77
- const tdb = ds.cohort.termdb
78
- if (!tdb) throw 'invalid termdb object'
79
-
80
- await trigger_getcategories(q, res, tdb, ds, g) // as getcategoriesResponse
81
- } catch (e) {
82
- // eslint-disable-next-line @typescript-eslint/ban-ts-comment
83
- // @ts-ignore
84
- res.send({ error: e?.message || e })
85
- if (e instanceof Error && e.stack) console.log(e)
86
- }
87
- }
88
- }
89
-
90
- async function trigger_getcategories(
91
- q: { tid: string | number; type: string; filter: any; term1_q: any; currentGeneNames?: string[]; rglst?: any },
92
- res: { send: (arg0: { lst: any[]; orderedLabels?: any }) => void },
93
- tdb: { q: { termjsonByOneid: (arg0: any) => any } },
94
- ds: { assayAvailability: { byDt: { [s: string]: any } | ArrayLike<any> } },
95
- genome: any
96
- ) {
97
- // thin wrapper of get_summary
98
- // works for all types of terms
99
- if (!q.tid) throw '.tid missing'
100
- const term =
101
- q.type == 'geneVariant' ? { name: q.tid, type: 'geneVariant', isleaf: true } : tdb.q.termjsonByOneid(q.tid)
102
-
103
- const arg = {
104
- filter: q.filter,
105
- terms:
106
- q.type == 'geneVariant'
107
- ? [{ term: term, q: { isAtomic: true } }]
108
- : [{ id: q.tid, term, q: q.term1_q || getDefaultQ(term, q) }],
109
- currentGeneNames: q.currentGeneNames, // optional, from mds3 mayAddGetCategoryArgs()
110
- rglst: q.rglst // optional, from mds3 mayAddGetCategoryArgs()
111
- }
112
-
113
- const data = await getData(arg, ds, genome)
114
- if (data.error) throw data.error
115
-
116
- const lst = [] as any[]
117
- if (q.type == 'geneVariant') {
118
- const samples = data.samples as { [sampleId: string]: any }
119
- const dtClassMap = new Map()
120
- if (ds.assayAvailability?.byDt) {
121
- for (const [dtType, dtValue] of Object.entries(ds.assayAvailability.byDt)) {
122
- if (dtValue.byOrigin) {
123
- dtClassMap.set(parseInt(dtType), { byOrigin: { germline: {}, somatic: {} } })
124
- }
125
- }
126
- }
127
- const sampleCountedFor = new Set() // if the sample is counted
128
- for (const [sampleId, sampleData] of Object.entries(samples)) {
129
- const values = sampleData[q.tid].values
130
- sampleCountedFor.clear()
131
- /* values here is an array of result entires, one or more entries for each dt. e.g.
132
- [
133
- { dt: 1, class: 'Blank', _SAMPLEID_: 1, origin: 'germline' },
134
- { dt: 1, class: 'WT', _SAMPLEID_: 1, origin: 'somatic' },
135
- { dt: 2, class: 'Blank', _SAMPLEID_: 1 },
136
- { dt: 4, class: 'WT', _SAMPLEID_: 1 }
137
- ]
138
- */
139
- for (const value of values) {
140
- if (!dtClassMap.has(value.dt)) {
141
- dtClassMap.set(value.dt, {})
142
- }
143
- const dtClasses = dtClassMap.get(value.dt)
144
- if (dtClasses.byOrigin) {
145
- if (!dtClasses.byOrigin[value.origin][value.class]) {
146
- dtClasses.byOrigin[value.origin][value.class] = 1
147
- sampleCountedFor.add(`${value.dt} ${value.origin} ${value.class}`)
148
- }
149
- if (!sampleCountedFor.has(`${value.dt} ${value.origin} ${value.class}`)) {
150
- sampleCountedFor.add(`${value.dt} ${value.origin} ${value.class}`)
151
- dtClasses.byOrigin[value.origin][value.class] += 1
152
- }
153
- } else {
154
- if (!dtClasses[value.class]) {
155
- sampleCountedFor.add(`${value.dt} ${value.class}`)
156
- dtClasses[value.class] = 1
157
- }
158
- if (!sampleCountedFor.has(`${value.dt} ${value.class}`)) {
159
- sampleCountedFor.add(`${value.dt} ${value.class}`)
160
- dtClasses[value.class] += 1
161
- }
162
- }
163
- }
164
- }
165
- for (const [dt, classes] of dtClassMap) {
166
- lst.push({
167
- dt,
168
- classes
169
- })
170
- }
171
- } else {
172
- const key2count = new Map()
173
- // k: category key
174
- // v: number of samples
175
- for (const sid in data.samples) {
176
- const v = data.samples[sid][q.tid]
177
- if (!v) continue
178
- if (!('key' in v)) continue
179
- key2count.set(v.key, 1 + (key2count.get(v.key) || 0))
180
- }
181
- for (const [key, count] of key2count) {
182
- lst.push({
183
- samplecount: count,
184
- key,
185
- label:
186
- data.refs?.byTermId?.[q.tid]?.events?.find((e: { event: any }) => e.event === key).label ||
187
- term?.values?.[key]?.label ||
188
- key
189
- })
190
- }
191
- }
192
-
193
- const orderedLabels = getOrderedLabels(
194
- term,
195
- data.refs?.byTermId?.[q.tid]?.bins || [],
196
- data.refs?.byTermId?.[q.tid]?.events,
197
- q.term1_q
198
- )
199
- if (orderedLabels.length) {
200
- lst.sort((a, b) => orderedLabels.indexOf(a.label) - orderedLabels.indexOf(b.label))
201
- }
202
- res.send({
203
- lst,
204
- orderedLabels
205
- } as getcategoriesResponse)
206
- }
207
-
208
- function getDefaultQ(
209
- term: { type: string; bins: { default: any } },
210
- q: {
211
- mode?: any
212
- breaks?: any
213
- bar_by_grade?: any
214
- bar_by_children?: any
215
- value_by_max_grade?: any
216
- value_by_most_recent?: any
217
- value_by_computable_grade?: any
218
- tid?: string | number
219
- type?: string
220
- filter?: any
221
- term1_q?: any
222
- currentGeneNames?: any
223
- }
224
- ) {
225
- if (term.type == 'categorical') return {}
226
- if (term.type == 'survival') return {}
227
- if (term.type == 'integer' || term.type == 'float') return term.bins.default
228
- if (term.type == 'condition') {
229
- return {
230
- mode: q.mode,
231
- breaks: q.breaks,
232
- bar_by_grade: q.bar_by_grade,
233
- /*Leave this here until bug with term1_q not passing to getCategories is figured out.
234
- Commented out b/c tvs condition tests fail.*/
235
- //bar_by_children: term.subconditions || q.bar_by_children,
236
- bar_by_children: q.bar_by_children,
237
- value_by_max_grade: q.value_by_max_grade,
238
- value_by_most_recent: q.value_by_most_recent,
239
- //value_by_computable_grade: term.subconditions || q.value_by_computable_grade
240
- value_by_computable_grade: q.value_by_computable_grade
241
- }
242
- }
243
- if (term.type == 'geneVariant') return {}
244
- throw 'unknown term type'
245
- }
@@ -1,248 +0,0 @@
1
- import path from 'path'
2
- import fs from 'fs'
3
- import lines2R from '#src/lines2R.js'
4
- import {
5
- TermdbClusterRequest,
6
- TermdbClusterResponse,
7
- Clustering,
8
- ValidResponse,
9
- SinglegeneResponse
10
- } from '#shared/types/routes/termdb.cluster.ts'
11
- import * as utils from '#src/utils.js'
12
- import serverconfig from '#src/serverconfig.js'
13
- import { GeneExpressionQuery, GeneExpressionQueryNative } from '#shared/types/dataset.ts'
14
- import { gdc_validate_query_geneExpression } from '#src/mds3.gdc.js'
15
- import { mayLimitSamples } from '#src/mds3.filter.js'
16
- import { dtgeneexpression } from '#shared/common.js'
17
-
18
- export const api = {
19
- endpoint: 'termdb/cluster',
20
- methods: {
21
- all: {
22
- init,
23
- request: {
24
- typeId: 'TermdbClusterRequest'
25
- },
26
- response: {
27
- typeId: 'TermdbClusterResponse'
28
- }
29
- }
30
- }
31
- }
32
-
33
- function init({ genomes }) {
34
- return async (req: any, res: any): Promise<void> => {
35
- const q = req.query as TermdbClusterRequest
36
- let result
37
- try {
38
- const g = genomes[q.genome]
39
- if (!g) throw 'invalid genome name'
40
- const ds = g.datasets[q.dslabel]
41
- if (!ds) throw 'invalid dataset name'
42
- if (ds.__gdc && !ds.__gdc.doneCaching)
43
- throw 'The server has not finished caching the case IDs: try again in ~2 minutes'
44
- if (q.dataType == dtgeneexpression) {
45
- if (!ds.queries?.geneExpression) throw 'no geneExpression data on this dataset'
46
- result = (await getResult(q, ds)) as TermdbClusterResponse
47
- } else {
48
- throw 'unknown q.dataType ' + q.dataType
49
- }
50
- } catch (e: any) {
51
- if (e.stack) console.log(e.stack)
52
- result = {
53
- status: e.status || 400,
54
- error: e.message || e
55
- } as TermdbClusterResponse
56
- }
57
- res.send(result)
58
- }
59
- }
60
-
61
- async function getResult(q: TermdbClusterRequest, ds: any) {
62
- const { gene2sample2value, byTermId, bySampleId } = await ds.queries.geneExpression.get(q)
63
- if (gene2sample2value.size == 0) throw 'no data'
64
- if (gene2sample2value.size == 1) {
65
- // get data for only 1 gene; still return data, may create violin plot later
66
- const g = Array.from(gene2sample2value.keys())[0]
67
- return { gene: g, data: gene2sample2value.get(g) } as SinglegeneResponse
68
- }
69
-
70
- // have data for multiple genes, run clustering
71
- const t = Date.now() // use "t=new Date()" will lead to tsc error
72
- const clustering: Clustering = await doClustering(gene2sample2value, q)
73
- if (serverconfig.debugmode) console.log('clustering done:', Date.now() - t, 'ms')
74
- return { clustering, byTermId, bySampleId } as ValidResponse
75
- }
76
-
77
- async function doClustering(data: any, q: TermdbClusterRequest) {
78
- // get set of unique sample names, to generate col_names dimension
79
- const sampleSet = new Set()
80
- for (const o of data.values()) {
81
- // {sampleId: value}
82
- for (const s in o) sampleSet.add(s)
83
- break
84
- }
85
-
86
- const inputData = {
87
- matrix: [] as number[][],
88
- row_names: [] as string[], // genes
89
- col_names: [...sampleSet] as string[], // samples
90
- cluster_method: q.clusterMethod as string,
91
- plot_image: false // When true causes cluster.rs to plot the image into a png file (EXPERIMENTAL)
92
- }
93
-
94
- // compose "data{}" into a matrix
95
- for (const [gene, o] of data) {
96
- inputData.row_names.push(gene)
97
- const row: number[] = []
98
- for (const s of inputData.col_names) {
99
- row.push(o[s] || 0)
100
- }
101
- inputData.matrix.push(getZscore(row))
102
- }
103
-
104
- const Rinputfile = path.join(serverconfig.cachedir, Math.random().toString() + '.json')
105
- await utils.write_file(Rinputfile, JSON.stringify(inputData))
106
- const Routput = JSON.parse(await lines2R(path.join(serverconfig.binpath, 'utils/hclust.R'), [], [Rinputfile]))
107
- await fs.promises.unlink(Rinputfile)
108
-
109
- const row_names_index: number[] = Routput.RowOrder.map(row => inputData.row_names.indexOf(row.name)) // sorted rows. value is array index in input data
110
- const col_names_index: number[] = Routput.ColOrder.map(col => inputData.col_names.indexOf(col.name)) // sorted columns, value is array index from input array
111
-
112
- // generated sorted matrix based on row/col clustering order
113
- const output_matrix: number[][] = []
114
- for (const rowI of row_names_index) {
115
- const newRow: number[] = []
116
- for (const colI of col_names_index) {
117
- newRow.push(inputData.matrix[rowI][colI])
118
- }
119
- output_matrix.push(newRow)
120
- }
121
-
122
- return {
123
- row: {
124
- merge: Routput.RowMerge,
125
- height: Routput.RowHeight,
126
- order: Routput.RowOrder,
127
- inputOrder: inputData.row_names
128
- },
129
- col: {
130
- merge: Routput.ColumnMerge,
131
- height: Routput.ColumnHeight,
132
- order: Routput.ColOrder,
133
- inputOrder: inputData.col_names
134
- },
135
- matrix: output_matrix
136
- }
137
- }
138
- function getZscore(l: number[]) {
139
- const mean: number = l.reduce((sum, v) => sum + v, 0) / l.length
140
- const sd: number = Math.sqrt(l.reduce((sum, v) => sum + Math.pow(v - mean, 2), 0) / l.length)
141
-
142
- if (sd == 0) {
143
- return l
144
- }
145
- return l.map(v => (v - mean) / sd)
146
- }
147
-
148
- export async function validate_query_geneExpression(ds: any, genome: any) {
149
- const q = ds.queries.geneExpression as GeneExpressionQuery
150
- if (!q) return
151
-
152
- if (q.src == 'gdcapi') {
153
- gdc_validate_query_geneExpression(ds, genome)
154
- // q.get() added
155
- return
156
- }
157
- if (q.src == 'native') {
158
- await validateNative(q, ds, genome)
159
- return
160
- }
161
- throw 'unknown queries.geneExpression.src'
162
- }
163
-
164
- async function validateNative(q: GeneExpressionQueryNative, ds: any, genome: any) {
165
- if (!q.file.startsWith(serverconfig.tpmasterdir)) q.file = path.join(serverconfig.tpmasterdir, q.file)
166
- if (!q.samples) q.samples = []
167
- await utils.validate_tabixfile(q.file)
168
- q.nochr = await utils.tabix_is_nochr(q.file, null, genome)
169
- q.samples = [] as number[]
170
-
171
- {
172
- // is a gene-by-sample matrix file
173
- const lines = await utils.get_header_tabix(q.file)
174
- if (!lines[0]) throw 'header line missing from ' + q.file
175
- const l = lines[0].split('\t')
176
- if (l.slice(0, 4).join('\t') != '#chr\tstart\tstop\tgene') throw 'header line has wrong content for columns 1-4'
177
- for (let i = 4; i < l.length; i++) {
178
- const id = ds.cohort.termdb.q.sampleName2id(l[i])
179
- if (id == undefined) throw 'queries.geneExpression: unknown sample from header: ' + l[i]
180
- q.samples.push(id)
181
- }
182
- console.log(q.samples.length, 'samples from geneExpression of', ds.label)
183
- }
184
-
185
- q.get = async (param: TermdbClusterRequest) => {
186
- const limitSamples = await mayLimitSamples(param, q.samples, ds)
187
- if (limitSamples?.size == 0) {
188
- // got 0 sample after filtering, must still return expected structure with no data
189
- return { gene2sample2value: new Set(), byTermId: {}, bySampleId: {} }
190
- }
191
-
192
- // has at least 1 sample passing filter and with exp data
193
- // TODO what if there's just 1 sample not enough for clustering?
194
- const bySampleId = {}
195
- const samples = q.samples || []
196
- if (limitSamples) {
197
- for (const sid of limitSamples) {
198
- bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) }
199
- }
200
- } else {
201
- // use all samples with exp data
202
- for (const sid of samples) {
203
- bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) }
204
- }
205
- }
206
-
207
- // only valid genes with data are added. invalid genes or genes missing from data file is not added. backend returned genes is allowed to be fewer than supplied by client
208
- const gene2sample2value = new Map() // k: gene symbol, v: { sampleId : value }
209
-
210
- for (const g of param.genes) {
211
- // FIXME newly added geneVariant terms from client to be changed to {gene} but not {name}
212
- if (!g.gene) continue
213
-
214
- if (!g.chr) {
215
- // quick fix: newly added gene from client will lack chr/start/stop
216
- const lst = genome.genedb.getjsonbyname.all(g.gene)
217
- if (lst.length == 0) continue
218
- const j = JSON.parse(lst.find(i => i.isdefault).genemodel || lst[0].genemodel)
219
- g.start = j.start
220
- g.stop = j.stop
221
- g.chr = j.chr
222
- }
223
-
224
- const s2v = {}
225
- await utils.get_lines_bigfile({
226
- args: [q.file, (q.nochr ? g.chr?.replace('chr', '') : g.chr) + ':' + g.start + '-' + g.stop], // must do g.chr?.replace to avoid tsc error
227
- callback: line => {
228
- const l = line.split('\t')
229
- // case-insensitive match! FIXME if g.gene is alias won't work
230
- if (l[3].toLowerCase() != g.gene.toLowerCase()) return
231
- for (let i = 4; i < l.length; i++) {
232
- const sampleId = samples[i - 4]
233
- if (limitSamples && !limitSamples.has(sampleId)) continue // doing filtering and sample of current column is not used
234
- if (!l[i]) continue // blank string
235
- const v = Number(l[i])
236
- if (Number.isNaN(v)) throw 'exp value not number'
237
- s2v[sampleId] = v
238
- }
239
- }
240
- } as any)
241
- // Above!! add "as any" to suppress a npx tsc alert
242
- if (Object.keys(s2v).length) gene2sample2value.set(g.gene, s2v) // only add gene if has data
243
- }
244
- // pass blank byTermId to match with expected output structure
245
- const byTermId = {}
246
- return { gene2sample2value, byTermId, bySampleId }
247
- }
248
- }