@sjcrh/proteinpaint-shared 2.78.0-0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +18 -0
- package/src/bulk.cnv.js +86 -0
- package/src/bulk.del.js +124 -0
- package/src/bulk.itd.js +123 -0
- package/src/bulk.js +197 -0
- package/src/bulk.snv.js +234 -0
- package/src/bulk.sv.js +276 -0
- package/src/bulk.svjson.js +162 -0
- package/src/bulk.trunc.js +126 -0
- package/src/clustering.js +66 -0
- package/src/common.js +1297 -0
- package/src/compute.percentile.js +8 -0
- package/src/descriptive.stats.js +62 -0
- package/src/doc.js +9 -0
- package/src/doc.ts +13 -0
- package/src/fileSize.js +6 -0
- package/src/filter.js +244 -0
- package/src/helpers.js +31 -0
- package/src/index.js +23 -0
- package/src/mds.termdb.termvaluesetting.js +81 -0
- package/src/mds3tk.js +16 -0
- package/src/roundValue.js +48 -0
- package/src/termdb.bins.js +381 -0
- package/src/termdb.initbinconfig.js +96 -0
- package/src/termdb.usecase.js +207 -0
- package/src/terms.js +177 -0
- package/src/test/termdb.bins.unit.spec.js +759 -0
- package/src/test/termdb.initbinconfig.unit.spec.js +267 -0
- package/src/test/termdb.usecase.unit.spec.js +134 -0
- package/src/test/termdb.violin.unit.spec.js +47 -0
- package/src/test/urljson.unit.spec.ts +88 -0
- package/src/tree.js +138 -0
- package/src/urljson.ts +85 -0
- package/src/vcf.ann.js +62 -0
- package/src/vcf.csq.js +153 -0
- package/src/vcf.info.js +50 -0
- package/src/vcf.js +629 -0
- package/src/vcf.type.js +18 -0
- package/src/violin.bins.js +150 -0
package/src/bulk.snv.js
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
///////////////////////////////
|
|
2
|
+
//
|
|
3
|
+
// shared between client and server
|
|
4
|
+
//
|
|
5
|
+
///////////////////////////////
|
|
6
|
+
|
|
7
|
+
import * as common from './common.js'
|
|
8
|
+
import * as bulk from './bulk.js'
|
|
9
|
+
|
|
10
|
+
export function parseheader(line, flag) {
|
|
11
|
+
const header = line.toLowerCase().split('\t')
|
|
12
|
+
if (header.length <= 1) return 'invalid file header for snv/indel'
|
|
13
|
+
const htry = (...args) => {
|
|
14
|
+
for (const s of args) {
|
|
15
|
+
const i = header.indexOf(s)
|
|
16
|
+
if (i != -1) return i
|
|
17
|
+
}
|
|
18
|
+
return -1
|
|
19
|
+
}
|
|
20
|
+
let i = htry('annovar_gene', 'annovar_sj_gene', 'gene', 'genename', 'gene_symbol', 'hugo_symbol')
|
|
21
|
+
if (i == -1) return 'gene missing from header'
|
|
22
|
+
header[i] = 'gene'
|
|
23
|
+
i = htry('annovar_aachange', 'amino_acid_change', 'annovar_sj_aachange', 'aachange', 'protein_change', 'variant')
|
|
24
|
+
if (i == -1) return 'amino_acid_change missing from header'
|
|
25
|
+
header[i] = 'mname'
|
|
26
|
+
i = htry('annovar_class', 'class', 'mclass', 'variant_class', 'variant_classification', 'annovar_sj_class')
|
|
27
|
+
if (i == -1) return 'variant_class missing from header'
|
|
28
|
+
header[i] = 'class'
|
|
29
|
+
i = htry('chromosome', 'chr')
|
|
30
|
+
if (i == -1) return 'chromosome missing from header'
|
|
31
|
+
header[i] = 'chr'
|
|
32
|
+
i = htry('wu_hg19_pos', 'start', 'start_position', 'chr_position', 'position')
|
|
33
|
+
if (i == -1) return 'start missing from header'
|
|
34
|
+
header[i] = 'pos'
|
|
35
|
+
i = htry(
|
|
36
|
+
'annovar_isoform',
|
|
37
|
+
'mrna_accession',
|
|
38
|
+
'mrna accession',
|
|
39
|
+
'refseq_mrna_id',
|
|
40
|
+
'annovar_sj_filter_isoform',
|
|
41
|
+
'refseq',
|
|
42
|
+
'isoform'
|
|
43
|
+
)
|
|
44
|
+
if (i == -1) return 'isoform missing from header'
|
|
45
|
+
header[i] = 'isoform'
|
|
46
|
+
|
|
47
|
+
// optional
|
|
48
|
+
i = htry('sample', 'sample_name', 'tumor_sample_barcode')
|
|
49
|
+
if (i != -1) header[i] = 'sample'
|
|
50
|
+
i = htry('patient', 'donor', 'target_case_id')
|
|
51
|
+
if (i != -1) header[i] = 'patient'
|
|
52
|
+
i = htry('quantitative_measurements')
|
|
53
|
+
if (i != -1) header[i] = 'qmset'
|
|
54
|
+
// dna maf tumor
|
|
55
|
+
i = htry('mutant_reads_in_case', 'mutant_in_tumor', 'tumor_readcount_alt')
|
|
56
|
+
if (i != -1) header[i] = 'maf_tumor_v1'
|
|
57
|
+
i = htry('total_reads_in_case', 'total_in_tumor', 'tumor_readcount_total')
|
|
58
|
+
if (i != -1) header[i] = 'maf_tumor_v2'
|
|
59
|
+
// dna maf normal
|
|
60
|
+
i = htry('mutant_reads_in_control', 'mutant_in_normal', 'normal_readcount_alt')
|
|
61
|
+
if (i != -1) header[i] = 'maf_normal_v1'
|
|
62
|
+
i = htry('total_reads_in_control', 'total_in_normal', 'normal_readcount_total')
|
|
63
|
+
if (i != -1) header[i] = 'maf_normal_v2'
|
|
64
|
+
// rna maf
|
|
65
|
+
// cdna
|
|
66
|
+
i = htry('cdna_change')
|
|
67
|
+
if (i != -1) header[i] = 'cdna_change'
|
|
68
|
+
i = htry('sampletype', 'sample type', 'sample_type')
|
|
69
|
+
if (i != -1) header[i] = 'sampletype'
|
|
70
|
+
i = htry('origin')
|
|
71
|
+
if (i != -1) header[i] = 'origin'
|
|
72
|
+
i = htry('cancer', 'disease', 'diagnosis')
|
|
73
|
+
if (i != -1) header[i] = 'disease'
|
|
74
|
+
flag.snv.header = header
|
|
75
|
+
flag.snv.loaded = true
|
|
76
|
+
return false
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export function parseline(linei, line, flag) {
|
|
80
|
+
if (line == '' || line[0] == '#') return
|
|
81
|
+
const lst = line.split('\t')
|
|
82
|
+
const m = {}
|
|
83
|
+
for (let j = 0; j < flag.snv.header.length; j++) {
|
|
84
|
+
if (lst[j] == undefined) break
|
|
85
|
+
m[flag.snv.header[j]] = lst[j]
|
|
86
|
+
}
|
|
87
|
+
if (!m.gene) {
|
|
88
|
+
flag.snv.badlines.push([linei, 'missing gene', lst])
|
|
89
|
+
return
|
|
90
|
+
}
|
|
91
|
+
if (m.gene.toUpperCase() == 'UNKNOWN') {
|
|
92
|
+
flag.snv.badlines.push([linei, 'gene name is UNKNOWN', lst])
|
|
93
|
+
return
|
|
94
|
+
}
|
|
95
|
+
if (!m.isoform) {
|
|
96
|
+
flag.snv.badlines.push([linei, 'missing isoform', lst])
|
|
97
|
+
return
|
|
98
|
+
}
|
|
99
|
+
if (!m.mname) {
|
|
100
|
+
m.mname = m.cdna_change
|
|
101
|
+
if (!m.mname) {
|
|
102
|
+
flag.snv.badlines.push([linei, 'missing amino acid change', lst])
|
|
103
|
+
return
|
|
104
|
+
}
|
|
105
|
+
} else {
|
|
106
|
+
if (m.mname.indexOf('p.') == 0) {
|
|
107
|
+
m.mname = m.mname.replace(/^p\./, '')
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
if (!m.class) {
|
|
111
|
+
flag.snv.badlines.push([linei, 'missing mutation class', lst])
|
|
112
|
+
return
|
|
113
|
+
}
|
|
114
|
+
let _c = flag.mclasslabel2key[m.class.toUpperCase()]
|
|
115
|
+
if (_c) {
|
|
116
|
+
m.class = _c
|
|
117
|
+
} else {
|
|
118
|
+
_c = common.mclasstester(m.class)
|
|
119
|
+
if (_c) {
|
|
120
|
+
m.class = _c
|
|
121
|
+
} else {
|
|
122
|
+
flag.snv.badlines.push([linei, 'wrong mutation class: ' + m.class, lst])
|
|
123
|
+
return
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
if (bulk.parsesample(m, flag, linei, lst, flag.snv.badlines)) {
|
|
127
|
+
return
|
|
128
|
+
}
|
|
129
|
+
if (!m.chr) {
|
|
130
|
+
flag.snv.badlines.push([linei, 'missing chromosome', lst])
|
|
131
|
+
return
|
|
132
|
+
}
|
|
133
|
+
if (m.chr.toLowerCase().indexOf('chr') != 0) {
|
|
134
|
+
m.chr = 'chr' + m.chr
|
|
135
|
+
}
|
|
136
|
+
if (!m.pos) {
|
|
137
|
+
flag.snv.badlines.push([linei, 'missing chromosome position', lst])
|
|
138
|
+
return
|
|
139
|
+
}
|
|
140
|
+
const v = Number.parseInt(m.pos)
|
|
141
|
+
if (Number.isNaN(v)) {
|
|
142
|
+
flag.snv.badlines.push([linei, 'invalid chromosome position', lst])
|
|
143
|
+
return
|
|
144
|
+
}
|
|
145
|
+
m.pos = v - 1
|
|
146
|
+
|
|
147
|
+
if (m.maf_tumor_v2 != undefined && m.maf_tumor_v1 != undefined) {
|
|
148
|
+
if (m.maf_tumor_v2 == '') {
|
|
149
|
+
// no value, do not parse
|
|
150
|
+
} else {
|
|
151
|
+
let v1 = Number.parseInt(m.maf_tumor_v1),
|
|
152
|
+
v2 = Number.parseInt(m.maf_tumor_v2)
|
|
153
|
+
if (Number.isNaN(v1) || Number.isNaN(v2)) {
|
|
154
|
+
flag.snv.badlines.push([linei, 'invalid maf_tumor mutant and/or total read count', lst])
|
|
155
|
+
return
|
|
156
|
+
}
|
|
157
|
+
m.maf_tumor = { f: v1 / v2, v1: v1, v2: v2 }
|
|
158
|
+
}
|
|
159
|
+
delete m.maf_tumor_v1
|
|
160
|
+
delete m.maf_tumor_v2
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if (m.maf_normal_v1 != undefined && m.maf_normal_v2 != undefined) {
|
|
164
|
+
if (m.maf_normal_v2 == '') {
|
|
165
|
+
// no value
|
|
166
|
+
} else {
|
|
167
|
+
let v1 = Number.parseInt(m.maf_normal_v1),
|
|
168
|
+
v2 = Number.parseInt(m.maf_normal_v2)
|
|
169
|
+
if (Number.isNaN(v1) || Number.isNaN(v2)) {
|
|
170
|
+
flag.snv.badlines.push([linei, 'invalid maf_normal mutant and/or total read count', lst])
|
|
171
|
+
return
|
|
172
|
+
}
|
|
173
|
+
m.maf_normal = { f: v1 / v2, v1: v1, v2: v2 }
|
|
174
|
+
}
|
|
175
|
+
delete m.maf_normal_v1
|
|
176
|
+
delete m.maf_normal_v2
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/*
|
|
180
|
+
if(m.qmset) {
|
|
181
|
+
try{
|
|
182
|
+
var v=JSON.parse(m.qmset)
|
|
183
|
+
} catch(e){
|
|
184
|
+
flag.snv.badlines.push([linei,'invalid JSON for quantitative_measurements',lst])
|
|
185
|
+
v=null
|
|
186
|
+
} finally {
|
|
187
|
+
if(v) {
|
|
188
|
+
if(typeof(v)!='object') {
|
|
189
|
+
flag.snv.badlines.push([linei,'value of quantitative_measurements must be an object',lst])
|
|
190
|
+
delete m.qmset
|
|
191
|
+
} else {
|
|
192
|
+
for(var n in v) {
|
|
193
|
+
if(!Array.isArray(v[n])) {
|
|
194
|
+
flag.snv.badlines.push([linei,'quantitative_measurements: "'+n+'" value must be an array',lst])
|
|
195
|
+
delete v[n]
|
|
196
|
+
} else {
|
|
197
|
+
var tmp=[]
|
|
198
|
+
v[n].forEach(function(v2){
|
|
199
|
+
if(typeof(v2)=='number') {
|
|
200
|
+
tmp.push({v:v2})
|
|
201
|
+
} else if(v2.v && typeof(v2.v)=='number') {
|
|
202
|
+
tmp.push(v2)
|
|
203
|
+
}
|
|
204
|
+
})
|
|
205
|
+
if(tmp.length) {
|
|
206
|
+
v[n]=tmp
|
|
207
|
+
} else {
|
|
208
|
+
flag.snv.badlines.push([linei,'quantitative_measurements: no valid value for "'+n+'"',lst])
|
|
209
|
+
delete v[n]
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
m.qmset=v
|
|
214
|
+
}
|
|
215
|
+
} else {
|
|
216
|
+
delete m.qmset
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
*/
|
|
221
|
+
flag.good++
|
|
222
|
+
// FIXME hard-coded M and S
|
|
223
|
+
if (m.class == 'M') {
|
|
224
|
+
flag.snv.missense++
|
|
225
|
+
} else if (m.class == 'S') {
|
|
226
|
+
flag.snv.silent++
|
|
227
|
+
}
|
|
228
|
+
const n = flag.geneToUpper ? m.gene.toUpperCase() : m.gene
|
|
229
|
+
if (!flag.data[n]) {
|
|
230
|
+
flag.data[n] = []
|
|
231
|
+
}
|
|
232
|
+
m.dt = common.dtsnvindel
|
|
233
|
+
flag.data[n].push(m)
|
|
234
|
+
}
|
package/src/bulk.sv.js
ADDED
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
import * as bulk from './bulk.js'
|
|
2
|
+
import * as common from './common.js'
|
|
3
|
+
|
|
4
|
+
/////////////////////////////////
|
|
5
|
+
//
|
|
6
|
+
// client/server shared
|
|
7
|
+
//
|
|
8
|
+
/////////////////////////////////
|
|
9
|
+
|
|
10
|
+
// work for both sv/fusion
|
|
11
|
+
// must tell if the data is fusion or sv
|
|
12
|
+
|
|
13
|
+
export function parseheader(line, flag, issv) {
|
|
14
|
+
const header = line.toLowerCase().split('\t')
|
|
15
|
+
if (header.length <= 1) return 'invalid file header for fusions'
|
|
16
|
+
const htry = (...lst) => {
|
|
17
|
+
for (const a of lst) {
|
|
18
|
+
const j = header.indexOf(a)
|
|
19
|
+
if (j != -1) return j
|
|
20
|
+
}
|
|
21
|
+
return -1
|
|
22
|
+
}
|
|
23
|
+
let i = htry('gene_a', 'gene1', 'genea')
|
|
24
|
+
if (i == -1) return 'gene_a missing from header'
|
|
25
|
+
header[i] = 'gene1'
|
|
26
|
+
i = htry('gene_b', 'gene2', 'geneb')
|
|
27
|
+
if (i == -1) return 'gene_b missing from header'
|
|
28
|
+
header[i] = 'gene2'
|
|
29
|
+
i = htry('chr_a', 'chr1', 'chra')
|
|
30
|
+
if (i == -1) return 'chr_a missing from header'
|
|
31
|
+
header[i] = 'chr1'
|
|
32
|
+
i = htry('chr_b', 'chr2', 'chrb')
|
|
33
|
+
if (i == -1) return 'chr_b missing from header'
|
|
34
|
+
header[i] = 'chr2'
|
|
35
|
+
i = htry('pos_a', 'position_a', 'position1', 'posa')
|
|
36
|
+
if (i == -1) return 'pos_a missing from header'
|
|
37
|
+
header[i] = 'position1'
|
|
38
|
+
i = htry('pos_b', 'position_b', 'position2', 'posb')
|
|
39
|
+
if (i == -1) return 'pos_b missing from header'
|
|
40
|
+
header[i] = 'position2'
|
|
41
|
+
i = htry('isoform_a', 'refseq_a', 'refseq1', 'isoform1', 'sv_refseqa')
|
|
42
|
+
if (i == -1) return 'isoform_a missing from header'
|
|
43
|
+
header[i] = 'isoform1'
|
|
44
|
+
i = htry('isoform_b', 'refseq_b', 'refseq2', 'isoform2', 'sv_refseqb')
|
|
45
|
+
if (i == -1) return 'isoform_b missing from header'
|
|
46
|
+
header[i] = 'isoform2'
|
|
47
|
+
i = htry('strand_a', 'orta')
|
|
48
|
+
if (i == -1) return 'strand_a missing from header'
|
|
49
|
+
header[i] = 'strand1'
|
|
50
|
+
i = htry('strand_b', 'ortb')
|
|
51
|
+
if (i == -1) return 'strand_b missing from header'
|
|
52
|
+
header[i] = 'strand2'
|
|
53
|
+
// optional
|
|
54
|
+
i = htry('sample', 'sample_name', 'tumor_sample_barcode')
|
|
55
|
+
if (i != -1) header[i] = 'sample'
|
|
56
|
+
i = htry('patient', 'donor', 'target_case_id')
|
|
57
|
+
if (i != -1) header[i] = 'patient'
|
|
58
|
+
i = htry('sampletype', 'sample type', 'sample_type')
|
|
59
|
+
if (i != -1) header[i] = 'sampletype'
|
|
60
|
+
i = htry('disease')
|
|
61
|
+
if (i != -1) header[i] = 'disease'
|
|
62
|
+
i = htry('origin')
|
|
63
|
+
if (i != -1) header[i] = 'origin'
|
|
64
|
+
if (issv) {
|
|
65
|
+
flag.sv.loaded = true
|
|
66
|
+
flag.sv.header = header
|
|
67
|
+
} else {
|
|
68
|
+
flag.fusion.loaded = true
|
|
69
|
+
flag.fusion.header = header
|
|
70
|
+
}
|
|
71
|
+
return false
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export function parseline(i, line, flag, issv) {
|
|
75
|
+
if (line == '' || line[0] == '#') return
|
|
76
|
+
const lst = line.split('\t')
|
|
77
|
+
const m = {}
|
|
78
|
+
const header = issv ? flag.sv.header : flag.fusion.header
|
|
79
|
+
const badlines = issv ? flag.sv.badlines : flag.fusion.badlines
|
|
80
|
+
|
|
81
|
+
for (let j = 0; j < header.length; j++) {
|
|
82
|
+
m[header[j]] = lst[j]
|
|
83
|
+
}
|
|
84
|
+
if (!m.chr1) {
|
|
85
|
+
badlines.push([i, 'missing chr1', lst])
|
|
86
|
+
return
|
|
87
|
+
}
|
|
88
|
+
if (m.chr1.toLowerCase().indexOf('chr') != 0) {
|
|
89
|
+
m.chr1 = 'chr' + m.chr1
|
|
90
|
+
}
|
|
91
|
+
if (!m.chr2) {
|
|
92
|
+
badlines.push([i, 'missing chr2', lst])
|
|
93
|
+
return
|
|
94
|
+
}
|
|
95
|
+
if (m.chr2.toLowerCase().indexOf('chr') != 0) {
|
|
96
|
+
m.chr2 = 'chr' + m.chr2
|
|
97
|
+
}
|
|
98
|
+
let v = m.position1
|
|
99
|
+
if (!v) {
|
|
100
|
+
badlines.push([i, 'missing position1', lst])
|
|
101
|
+
return
|
|
102
|
+
}
|
|
103
|
+
let v2 = Number.parseInt(v)
|
|
104
|
+
if (Number.isNaN(v2) || v2 <= 0) {
|
|
105
|
+
badlines.push([i, 'invalid value for position1', lst])
|
|
106
|
+
return
|
|
107
|
+
}
|
|
108
|
+
m.position1 = v2
|
|
109
|
+
v = m.position2
|
|
110
|
+
if (!v) {
|
|
111
|
+
badlines.push([i, 'missing position2', lst])
|
|
112
|
+
return
|
|
113
|
+
}
|
|
114
|
+
v2 = Number.parseInt(v)
|
|
115
|
+
if (Number.isNaN(v2) || v2 <= 0) {
|
|
116
|
+
badlines.push([i, 'invalid value for position2', lst])
|
|
117
|
+
return
|
|
118
|
+
}
|
|
119
|
+
m.position2 = v2
|
|
120
|
+
if (bulk.parsesample(m, flag, i, lst, badlines)) {
|
|
121
|
+
return
|
|
122
|
+
}
|
|
123
|
+
if (m.isoform1 && m.isoform1.indexOf(',') != -1) {
|
|
124
|
+
const lst2 = m.isoform1.split(',')
|
|
125
|
+
m.isoform1 = undefined
|
|
126
|
+
for (const t of lst2) {
|
|
127
|
+
if (t != '') m.isoform1 = t
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
if (m.isoform2 && m.isoform2.indexOf(',') != -1) {
|
|
131
|
+
const lst2 = m.isoform2.split(',')
|
|
132
|
+
m.isoform2 = undefined
|
|
133
|
+
for (const t of lst2) {
|
|
134
|
+
if (t != '') m.isoform2 = t
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
if (!m.gene1) {
|
|
138
|
+
m.isoform1 = undefined
|
|
139
|
+
}
|
|
140
|
+
if (!m.gene2) {
|
|
141
|
+
m.isoform2 = undefined
|
|
142
|
+
}
|
|
143
|
+
if (m.gene1) {
|
|
144
|
+
// put data under gene1
|
|
145
|
+
flag.good++
|
|
146
|
+
const m2 = {
|
|
147
|
+
dt: issv ? common.dtsv : common.dtfusionrna,
|
|
148
|
+
class: issv ? common.mclasssv : common.mclassfusionrna,
|
|
149
|
+
isoform: m.isoform1,
|
|
150
|
+
mname: m.gene2 || m.chr2,
|
|
151
|
+
sample: m.sample,
|
|
152
|
+
patient: m.patient,
|
|
153
|
+
sampletype: m.sampletype,
|
|
154
|
+
origin: m.origin,
|
|
155
|
+
disease: m.disease,
|
|
156
|
+
pairlst: [
|
|
157
|
+
{
|
|
158
|
+
a: {
|
|
159
|
+
name: m.gene1,
|
|
160
|
+
isoform: m.isoform1,
|
|
161
|
+
strand: m.strand1,
|
|
162
|
+
chr: m.chr1,
|
|
163
|
+
position: m.position1
|
|
164
|
+
},
|
|
165
|
+
b: {
|
|
166
|
+
name: m.gene2,
|
|
167
|
+
isoform: m.isoform2,
|
|
168
|
+
strand: m.strand2,
|
|
169
|
+
chr: m.chr2,
|
|
170
|
+
position: m.position2
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
]
|
|
174
|
+
}
|
|
175
|
+
const n = flag.geneToUpper ? m.gene1.toUpperCase() : m.gene1
|
|
176
|
+
if (!flag.data[n]) {
|
|
177
|
+
flag.data[n] = []
|
|
178
|
+
}
|
|
179
|
+
flag.data[n].push(m2)
|
|
180
|
+
}
|
|
181
|
+
if (m.gene2 && m.gene2 != m.gene1) {
|
|
182
|
+
// put data under gene2
|
|
183
|
+
flag.good++
|
|
184
|
+
const m2 = {
|
|
185
|
+
dt: issv ? common.dtsv : common.dtfusionrna,
|
|
186
|
+
class: issv ? common.mclasssv : common.mclassfusionrna,
|
|
187
|
+
isoform: m.isoform2,
|
|
188
|
+
mname: m.gene1 || m.chr1,
|
|
189
|
+
sample: m.sample,
|
|
190
|
+
patient: m.patient,
|
|
191
|
+
sampletype: m.sampletype,
|
|
192
|
+
origin: m.origin,
|
|
193
|
+
disease: m.disease,
|
|
194
|
+
pairlst: [
|
|
195
|
+
{
|
|
196
|
+
a: {
|
|
197
|
+
name: m.gene1,
|
|
198
|
+
isoform: m.isoform1,
|
|
199
|
+
strand: m.strand1,
|
|
200
|
+
chr: m.chr1,
|
|
201
|
+
position: m.position1
|
|
202
|
+
},
|
|
203
|
+
b: {
|
|
204
|
+
name: m.gene2,
|
|
205
|
+
isoform: m.isoform2,
|
|
206
|
+
strand: m.strand2,
|
|
207
|
+
chr: m.chr2,
|
|
208
|
+
position: m.position2
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
]
|
|
212
|
+
}
|
|
213
|
+
const n = flag.geneToUpper ? m.gene2.toUpperCase() : m.gene2
|
|
214
|
+
if (!flag.data[n]) {
|
|
215
|
+
flag.data[n] = []
|
|
216
|
+
}
|
|
217
|
+
flag.data[n].push(m2)
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
export function duplicate(m) {
|
|
222
|
+
const n = {}
|
|
223
|
+
for (const k in m) {
|
|
224
|
+
if (k == 'pairlst') continue
|
|
225
|
+
const v = m[k]
|
|
226
|
+
const type = typeof v
|
|
227
|
+
if (type == 'object') {
|
|
228
|
+
continue
|
|
229
|
+
}
|
|
230
|
+
n[k] = v
|
|
231
|
+
}
|
|
232
|
+
if (m.pairlst) {
|
|
233
|
+
n.pairlst = []
|
|
234
|
+
for (const pair of m.pairlst) {
|
|
235
|
+
const p = {}
|
|
236
|
+
for (const k in pair) {
|
|
237
|
+
if (k == 'a' || k == 'b' || k == 'interstitial') {
|
|
238
|
+
continue
|
|
239
|
+
}
|
|
240
|
+
p[k] = pair[k]
|
|
241
|
+
}
|
|
242
|
+
if (pair.a) {
|
|
243
|
+
p.a = {}
|
|
244
|
+
for (const k in pair.a) {
|
|
245
|
+
const v = pair.a[k]
|
|
246
|
+
if (typeof v == 'object') {
|
|
247
|
+
continue
|
|
248
|
+
}
|
|
249
|
+
p.a[k] = v
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
if (pair.b) {
|
|
253
|
+
p.b = {}
|
|
254
|
+
for (const k in pair.b) {
|
|
255
|
+
const v = pair.b[k]
|
|
256
|
+
if (typeof v == 'object') {
|
|
257
|
+
continue
|
|
258
|
+
}
|
|
259
|
+
p.b[k] = v
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
if (pair.interstitial) {
|
|
263
|
+
p.interstitial = {}
|
|
264
|
+
for (const k in pair.interstitial) {
|
|
265
|
+
const v = pair.interstitial[k]
|
|
266
|
+
if (typeof v == 'object') {
|
|
267
|
+
continue
|
|
268
|
+
}
|
|
269
|
+
p.interstitial[k] = v
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
n.pairlst.push(p)
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
return n
|
|
276
|
+
}
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/////////////////////////////////
|
|
2
|
+
//
|
|
3
|
+
// client/server shared
|
|
4
|
+
//
|
|
5
|
+
/////////////////////////////////
|
|
6
|
+
|
|
7
|
+
import * as common from './common.js'
|
|
8
|
+
import * as bulk from './bulk.js'
|
|
9
|
+
|
|
10
|
+
// work for both sv/fusion
|
|
11
|
+
// must tell if the data is fusion or sv
|
|
12
|
+
|
|
13
|
+
export function parseheader(line, flag) {
|
|
14
|
+
const header = line.toLowerCase().split('\t')
|
|
15
|
+
if (header.length <= 1) return 'invalid file header for svjson'
|
|
16
|
+
const htry = (...lst) => {
|
|
17
|
+
for (const a of lst) {
|
|
18
|
+
const j = header.indexOf(a)
|
|
19
|
+
if (j != -1) return j
|
|
20
|
+
}
|
|
21
|
+
return -1
|
|
22
|
+
}
|
|
23
|
+
let i = htry('sample')
|
|
24
|
+
if (i != -1) header[i] = 'sample'
|
|
25
|
+
i = htry('sampletype')
|
|
26
|
+
if (i != -1) header[i] = 'sampletype'
|
|
27
|
+
i = htry('patient')
|
|
28
|
+
if (i != -1) header[i] = 'patient'
|
|
29
|
+
i = htry('json', 'jsontext')
|
|
30
|
+
if (i == -1) return ['json missing from header']
|
|
31
|
+
header[i] = 'jsontext'
|
|
32
|
+
return [null, header]
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function parseline(i, line, flag, header) {
|
|
36
|
+
if (line == '' || line[0] == '#') return
|
|
37
|
+
const lst = line.split('\t')
|
|
38
|
+
const m = {}
|
|
39
|
+
const badlines = flag.svjson.badlines
|
|
40
|
+
|
|
41
|
+
for (let j = 0; j < header.length; j++) {
|
|
42
|
+
m[header[j]] = lst[j]
|
|
43
|
+
}
|
|
44
|
+
if (!m.jsontext) {
|
|
45
|
+
badlines.push([i, 'missing jsontext', lst])
|
|
46
|
+
return
|
|
47
|
+
}
|
|
48
|
+
if (bulk.parsesample(m, flag, i, lst, badlines)) {
|
|
49
|
+
return
|
|
50
|
+
}
|
|
51
|
+
let json
|
|
52
|
+
try {
|
|
53
|
+
json = JSON.parse(m.jsontext)
|
|
54
|
+
} catch (e) {
|
|
55
|
+
badlines.push([i, 'invalid JSON text', lst])
|
|
56
|
+
return
|
|
57
|
+
}
|
|
58
|
+
// duplicating logic in pediatric.js
|
|
59
|
+
if (Array.isArray(json)) {
|
|
60
|
+
// json is pairlst
|
|
61
|
+
for (const pair of json) {
|
|
62
|
+
if (pair.a && pair.a.name && pair.a.isoform) {
|
|
63
|
+
flag.good++
|
|
64
|
+
const m2 = {
|
|
65
|
+
dt: common.dtfusionrna,
|
|
66
|
+
class: common.mclassfusionrna,
|
|
67
|
+
isoform: pair.a.isoform,
|
|
68
|
+
mname: pair.b.name
|
|
69
|
+
}
|
|
70
|
+
for (const k in m) {
|
|
71
|
+
if (k != 'jsontext') m2[k] = m[k]
|
|
72
|
+
}
|
|
73
|
+
m2.pairlst = duplicate(json)
|
|
74
|
+
const n = pair.a.name.toUpperCase()
|
|
75
|
+
if (!flag.data[n]) {
|
|
76
|
+
flag.data[n] = []
|
|
77
|
+
}
|
|
78
|
+
flag.data[n].push(m2)
|
|
79
|
+
}
|
|
80
|
+
if (pair.b && pair.b.name && pair.b.isoform) {
|
|
81
|
+
flag.good++
|
|
82
|
+
const m2 = {
|
|
83
|
+
dt: common.dtfusionrna,
|
|
84
|
+
class: common.mclassfusionrna,
|
|
85
|
+
isoform: pair.b.isoform,
|
|
86
|
+
mname: pair.a.name
|
|
87
|
+
}
|
|
88
|
+
for (const k in m) {
|
|
89
|
+
if (k != 'jsontext') m2[k] = m[k]
|
|
90
|
+
}
|
|
91
|
+
m2.pairlst = duplicate(json)
|
|
92
|
+
const n = pair.b.name.toUpperCase()
|
|
93
|
+
if (!flag.data[n]) {
|
|
94
|
+
flag.data[n] = []
|
|
95
|
+
}
|
|
96
|
+
flag.data[n].push(m2)
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
} else {
|
|
100
|
+
json.dt = json.typecode
|
|
101
|
+
delete json.typecode
|
|
102
|
+
switch (json.dt) {
|
|
103
|
+
case common.dtitd:
|
|
104
|
+
json.class = common.mclassitd
|
|
105
|
+
json.mname = 'ITD'
|
|
106
|
+
break
|
|
107
|
+
case common.dtnloss:
|
|
108
|
+
json.class = common.mclassnloss
|
|
109
|
+
json.mname = 'N-loss'
|
|
110
|
+
break
|
|
111
|
+
case common.dtcloss:
|
|
112
|
+
json.class = common.mclasscloss
|
|
113
|
+
json.mname = 'C-loss'
|
|
114
|
+
break
|
|
115
|
+
case common.dtdel:
|
|
116
|
+
json.class = common.mclassdel
|
|
117
|
+
json.mname = 'Del'
|
|
118
|
+
break
|
|
119
|
+
case common.dtsv:
|
|
120
|
+
json.class = common.mclasssv
|
|
121
|
+
json.mname = 'SV'
|
|
122
|
+
break
|
|
123
|
+
default:
|
|
124
|
+
badlines.push([i, 'unknown datatype', lst])
|
|
125
|
+
return
|
|
126
|
+
}
|
|
127
|
+
// record only about a single gene
|
|
128
|
+
if (!json.gene) {
|
|
129
|
+
badlines.push([i, 'json.gene missing', lst])
|
|
130
|
+
return
|
|
131
|
+
}
|
|
132
|
+
flag.good++
|
|
133
|
+
for (const k in m) {
|
|
134
|
+
if (k != 'jsontext') {
|
|
135
|
+
json[k] = m[k]
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
const n = flag.geneToUpper ? json.gene.toUpperCase() : json.gene.toUpperCase()
|
|
139
|
+
if (!flag.data[n]) {
|
|
140
|
+
flag.data[n] = []
|
|
141
|
+
}
|
|
142
|
+
flag.data[n].push(json)
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function duplicate(lst) {
|
|
147
|
+
const d = []
|
|
148
|
+
for (const pair of lst) {
|
|
149
|
+
const p = { a: {}, b: {} }
|
|
150
|
+
for (const k in pair) {
|
|
151
|
+
if (k != 'a' && k != 'b') p[k] = pair[k]
|
|
152
|
+
}
|
|
153
|
+
for (const k in pair.a) {
|
|
154
|
+
p.a[k] = pair.a[k]
|
|
155
|
+
}
|
|
156
|
+
for (const k in pair.b) {
|
|
157
|
+
p.b[k] = pair.b[k]
|
|
158
|
+
}
|
|
159
|
+
d.push(p)
|
|
160
|
+
}
|
|
161
|
+
return d
|
|
162
|
+
}
|