@sjcrh/proteinpaint-shared 2.180.0 → 2.180.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/bulk.cnv.js +30 -30
- package/src/bulk.del.js +48 -48
- package/src/bulk.itd.js +48 -48
- package/src/bulk.js +31 -31
- package/src/bulk.snv.js +109 -72
- package/src/bulk.sv.js +78 -78
- package/src/bulk.svjson.js +33 -31
- package/src/bulk.trunc.js +53 -47
- package/src/clustering.js +27 -27
- package/src/common.js +665 -558
- package/src/compute.percentile.js +3 -1
- package/src/fetch-helpers.js +67 -42
- package/src/fileSize.js +4 -4
- package/src/filter.js +207 -179
- package/src/hash.js +8 -5
- package/src/helpers.js +17 -9
- package/src/index.js +24 -24
- package/src/mds3tk.js +14 -12
- package/src/roundValue.js +5 -4
- package/src/termdb.bins.js +151 -84
- package/src/termdb.initbinconfig.js +46 -18
- package/src/termdb.usecase.js +125 -116
- package/src/terms.js +281 -266
- package/src/tree.js +4 -4
- package/src/vcf.ann.js +9 -9
- package/src/vcf.csq.js +8 -8
- package/src/vcf.info.js +3 -3
- package/src/vcf.js +99 -74
- package/src/vcf.type.js +8 -2
package/src/vcf.csq.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { vepinfo } from
|
|
1
|
+
import { vepinfo } from "./common.js"
|
|
2
2
|
/*
|
|
3
3
|
parse csq field from a variant line, not header
|
|
4
4
|
CSQ header must have already been parsed
|
|
@@ -57,8 +57,8 @@ export function parse_CSQ(str, header, m) {
|
|
|
57
57
|
if (!header) {
|
|
58
58
|
return null
|
|
59
59
|
}
|
|
60
|
-
for (const thisannotation of str.split(
|
|
61
|
-
const lst = thisannotation.replace(/&/g,
|
|
60
|
+
for (const thisannotation of str.split(",")) {
|
|
61
|
+
const lst = thisannotation.replace(/&/g, ",").split("|")
|
|
62
62
|
|
|
63
63
|
const o = {}
|
|
64
64
|
|
|
@@ -86,7 +86,7 @@ export function parse_CSQ(str, header, m) {
|
|
|
86
86
|
}
|
|
87
87
|
}
|
|
88
88
|
if (!allele) {
|
|
89
|
-
if (o.Allele ==
|
|
89
|
+
if (o.Allele == "-") {
|
|
90
90
|
// deletion
|
|
91
91
|
if (m.mlst) {
|
|
92
92
|
if (m.mlst.length == 1) {
|
|
@@ -120,8 +120,8 @@ export function parse_CSQ(str, header, m) {
|
|
|
120
120
|
o._gene = o.SYMBOL || o.Gene
|
|
121
121
|
|
|
122
122
|
// isoform
|
|
123
|
-
if (o.Feature_type && o.Feature_type ==
|
|
124
|
-
o._isoform = o.Feature.split(
|
|
123
|
+
if (o.Feature_type && o.Feature_type == "Transcript") {
|
|
124
|
+
o._isoform = o.Feature.split(".")[0] // remove version
|
|
125
125
|
} else {
|
|
126
126
|
o._isoform = o._gene
|
|
127
127
|
}
|
|
@@ -139,11 +139,11 @@ export function parse_CSQ(str, header, m) {
|
|
|
139
139
|
}
|
|
140
140
|
// mname
|
|
141
141
|
if (o.HGVSp) {
|
|
142
|
-
o._mname = decodeURIComponent(o.HGVSp.substr(o.HGVSp.indexOf(
|
|
142
|
+
o._mname = decodeURIComponent(o.HGVSp.substr(o.HGVSp.indexOf(":") + 1))
|
|
143
143
|
} else if (o.Protein_position && o.Amino_acids) {
|
|
144
144
|
o._mname = decodeURIComponent(o.Protein_position + o.Amino_acids)
|
|
145
145
|
} else if (o.HGVSc) {
|
|
146
|
-
o._mname = o.HGVSc.substr(o.HGVSc.indexOf(
|
|
146
|
+
o._mname = o.HGVSc.substr(o.HGVSc.indexOf(":") + 1)
|
|
147
147
|
} else if (o.Existing_variation) {
|
|
148
148
|
o._name = o.Existing_variation
|
|
149
149
|
} else {
|
package/src/vcf.info.js
CHANGED
|
@@ -19,17 +19,17 @@ export function dissect_INFO(str) {
|
|
|
19
19
|
while (i < str.length) {
|
|
20
20
|
const c = str[i]
|
|
21
21
|
if (findequalorsemicolon) {
|
|
22
|
-
if (c ==
|
|
22
|
+
if (c == "=") {
|
|
23
23
|
findsemicolon = true
|
|
24
24
|
findequalorsemicolon = false
|
|
25
25
|
lastkey = str.substring(idx, i)
|
|
26
26
|
idx = i + 1
|
|
27
|
-
} else if (c ==
|
|
27
|
+
} else if (c == ";") {
|
|
28
28
|
// should be a flag
|
|
29
29
|
k2v[str.substring(idx, i)] = 1
|
|
30
30
|
idx = i + 1
|
|
31
31
|
}
|
|
32
|
-
} else if (findsemicolon && c ==
|
|
32
|
+
} else if (findsemicolon && c == ";") {
|
|
33
33
|
findequalorsemicolon = true
|
|
34
34
|
findsemicolon = false
|
|
35
35
|
k2v[lastkey] = str.substring(idx, i)
|
package/src/vcf.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { mclass } from
|
|
2
|
-
import { dissect_INFO } from
|
|
3
|
-
import { parse_CSQ } from
|
|
4
|
-
import { parse_ANN } from
|
|
5
|
-
import { getVariantType } from
|
|
1
|
+
import { mclass } from "./common.js"
|
|
2
|
+
import { dissect_INFO } from "./vcf.info.js"
|
|
3
|
+
import { parse_CSQ } from "./vcf.csq.js"
|
|
4
|
+
import { parse_ANN } from "./vcf.ann.js"
|
|
5
|
+
import { getVariantType } from "./vcf.type.js"
|
|
6
6
|
|
|
7
7
|
/*
|
|
8
8
|
Only for parsing vcf files
|
|
@@ -36,30 +36,30 @@ export function vcfparsemeta(lines) {
|
|
|
36
36
|
hasformat = false
|
|
37
37
|
|
|
38
38
|
for (const line of lines) {
|
|
39
|
-
if (!line.startsWith(
|
|
39
|
+
if (!line.startsWith("#")) {
|
|
40
40
|
continue
|
|
41
41
|
}
|
|
42
42
|
|
|
43
|
-
if (line.startsWith(
|
|
43
|
+
if (line.startsWith("#C")) {
|
|
44
44
|
// header, get samples
|
|
45
|
-
sample = line.split(
|
|
45
|
+
sample = line.split("\t").slice(9)
|
|
46
46
|
continue
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
-
if (line.startsWith(
|
|
49
|
+
if (line.startsWith("##INFO")) {
|
|
50
50
|
const e = tohash(line.substring(8, line.length - 1), info)
|
|
51
51
|
if (e) {
|
|
52
|
-
errlst.push(
|
|
52
|
+
errlst.push("INFO error: " + e)
|
|
53
53
|
} else {
|
|
54
54
|
hasinfo = true
|
|
55
55
|
}
|
|
56
56
|
continue
|
|
57
57
|
}
|
|
58
58
|
|
|
59
|
-
if (line.startsWith(
|
|
59
|
+
if (line.startsWith("##FORMAT")) {
|
|
60
60
|
const e = tohash(line.substring(10, line.length - 1), format)
|
|
61
61
|
if (e) {
|
|
62
|
-
errlst.push(
|
|
62
|
+
errlst.push("FORMAT error: " + e)
|
|
63
63
|
} else {
|
|
64
64
|
hasformat = true
|
|
65
65
|
}
|
|
@@ -77,9 +77,9 @@ export function vcfparsemeta(lines) {
|
|
|
77
77
|
|
|
78
78
|
// reserved INFO fields
|
|
79
79
|
if (info.CSQ) {
|
|
80
|
-
const lst = info.CSQ.Description.split(
|
|
80
|
+
const lst = info.CSQ.Description.split(" Format: ")
|
|
81
81
|
if (lst[1]) {
|
|
82
|
-
const lst2 = lst[1].split(
|
|
82
|
+
const lst2 = lst[1].split("|")
|
|
83
83
|
if (lst2.length > 1) {
|
|
84
84
|
// fix csq headers so to allow configuring show/hide of csq fields
|
|
85
85
|
info.CSQ.csqheader = []
|
|
@@ -88,17 +88,17 @@ export function vcfparsemeta(lines) {
|
|
|
88
88
|
info.CSQ.csqheader.push(attr)
|
|
89
89
|
}
|
|
90
90
|
} else {
|
|
91
|
-
errlst.push(
|
|
91
|
+
errlst.push("unknown format for CSQ header: " + info.CSQ.Description)
|
|
92
92
|
}
|
|
93
93
|
} else {
|
|
94
|
-
errlst.push(
|
|
94
|
+
errlst.push("unknown format for CSQ header: " + info.CSQ.Description)
|
|
95
95
|
}
|
|
96
96
|
}
|
|
97
97
|
|
|
98
98
|
if (info.ANN) {
|
|
99
99
|
const lst = info.ANN.Description.split("'")
|
|
100
100
|
if (lst[1]) {
|
|
101
|
-
const lst2 = lst[1].split(
|
|
101
|
+
const lst2 = lst[1].split(" | ")
|
|
102
102
|
if (lst2.length) {
|
|
103
103
|
info.ANN.annheader = []
|
|
104
104
|
for (const s of lst2) {
|
|
@@ -106,14 +106,25 @@ export function vcfparsemeta(lines) {
|
|
|
106
106
|
info.ANN.annheader.push(attr)
|
|
107
107
|
}
|
|
108
108
|
} else {
|
|
109
|
-
errlst.push(
|
|
109
|
+
errlst.push(
|
|
110
|
+
'no " | " joined annotation fields for ANN (snpEff annotation): ' +
|
|
111
|
+
info.ANN.Description
|
|
112
|
+
)
|
|
110
113
|
}
|
|
111
114
|
} else {
|
|
112
|
-
errlst.push(
|
|
115
|
+
errlst.push(
|
|
116
|
+
"no single-quote enclosed annotation fields for ANN (snpEff annotation): " +
|
|
117
|
+
info.ANN.Description
|
|
118
|
+
)
|
|
113
119
|
}
|
|
114
120
|
}
|
|
115
121
|
|
|
116
|
-
return [
|
|
122
|
+
return [
|
|
123
|
+
hasinfo ? info : null,
|
|
124
|
+
hasformat ? format : null,
|
|
125
|
+
sampleobjlst,
|
|
126
|
+
errlst.length ? errlst : null,
|
|
127
|
+
]
|
|
117
128
|
}
|
|
118
129
|
|
|
119
130
|
export function vcfparseline(line, vcf) {
|
|
@@ -140,22 +151,22 @@ export function vcfparseline(line, vcf) {
|
|
|
140
151
|
altinfo
|
|
141
152
|
*/
|
|
142
153
|
|
|
143
|
-
const lst = line.split(
|
|
154
|
+
const lst = line.split("\t")
|
|
144
155
|
if (lst.length < 8) {
|
|
145
156
|
// no good
|
|
146
|
-
return [
|
|
157
|
+
return ["line has less than 8 fields", null, null]
|
|
147
158
|
}
|
|
148
159
|
|
|
149
160
|
const rawpos = Number.parseInt(lst[2 - 1])
|
|
150
161
|
if (!Number.isInteger(rawpos)) {
|
|
151
|
-
return [
|
|
162
|
+
return ["invalid value for genomic position", null, null]
|
|
152
163
|
}
|
|
153
164
|
|
|
154
165
|
const refallele = lst[4 - 1]
|
|
155
166
|
|
|
156
167
|
const m = {
|
|
157
168
|
vcf_ID: lst[3 - 1],
|
|
158
|
-
chr: (vcf.nochr ?
|
|
169
|
+
chr: (vcf.nochr ? "chr" : "") + lst[1 - 1],
|
|
159
170
|
pos: rawpos - 1,
|
|
160
171
|
ref: refallele,
|
|
161
172
|
//refstr:refallele, // e.g. GA>GCC, ref:A, refstr:GA, "refstr" is required for matching in FORMAT
|
|
@@ -168,18 +179,18 @@ export function vcfparseline(line, vcf) {
|
|
|
168
179
|
also allows GT allele index to work
|
|
169
180
|
*/
|
|
170
181
|
allele: refallele,
|
|
171
|
-
sampledata: []
|
|
172
|
-
}
|
|
182
|
+
sampledata: [],
|
|
183
|
+
},
|
|
173
184
|
],
|
|
174
185
|
|
|
175
186
|
info: {}, // locus info, do not contain allele info
|
|
176
187
|
|
|
177
|
-
name: lst[3 - 1] ==
|
|
188
|
+
name: lst[3 - 1] == "." ? null : lst[3 - 1],
|
|
178
189
|
}
|
|
179
190
|
|
|
180
191
|
// parse alt
|
|
181
192
|
const altinvalid = []
|
|
182
|
-
for (const alt of lst[5 - 1].split(
|
|
193
|
+
for (const alt of lst[5 - 1].split(",")) {
|
|
183
194
|
const a = {
|
|
184
195
|
ref: m.ref, // may be corrected just below!
|
|
185
196
|
allele: alt,
|
|
@@ -187,10 +198,10 @@ export function vcfparseline(line, vcf) {
|
|
|
187
198
|
allele_original: alt,
|
|
188
199
|
sampledata: [],
|
|
189
200
|
_m: m,
|
|
190
|
-
info: {} // allele info, do not contain locus info
|
|
201
|
+
info: {}, // allele info, do not contain locus info
|
|
191
202
|
}
|
|
192
203
|
m.alleles.push(a)
|
|
193
|
-
if (alt[0] ==
|
|
204
|
+
if (alt[0] == "<") {
|
|
194
205
|
/*
|
|
195
206
|
symbolic allele, show text within <> as name
|
|
196
207
|
FIXME match INFO
|
|
@@ -225,7 +236,7 @@ export function vcfparseline(line, vcf) {
|
|
|
225
236
|
m.alleles.shift()
|
|
226
237
|
|
|
227
238
|
// info
|
|
228
|
-
const tmp = lst[8 - 1] ==
|
|
239
|
+
const tmp = lst[8 - 1] == "." ? [] : dissect_INFO(lst[8 - 1])
|
|
229
240
|
let badinfokeys = []
|
|
230
241
|
|
|
231
242
|
if (vcf.info) {
|
|
@@ -239,20 +250,20 @@ export function vcfparseline(line, vcf) {
|
|
|
239
250
|
for (const a of m.alleles) {
|
|
240
251
|
const m2 = {}
|
|
241
252
|
for (const k in m) {
|
|
242
|
-
if (k !=
|
|
253
|
+
if (k != "alleles") {
|
|
243
254
|
m2[k] = m[k]
|
|
244
255
|
}
|
|
245
256
|
}
|
|
246
257
|
for (const k in a) {
|
|
247
|
-
if (k ==
|
|
258
|
+
if (k == "allele") {
|
|
248
259
|
m2.alt = a[k]
|
|
249
|
-
} else if (k ==
|
|
260
|
+
} else if (k == "info") {
|
|
250
261
|
m2.altinfo = a[k]
|
|
251
262
|
} else {
|
|
252
263
|
m2[k] = a[k]
|
|
253
264
|
}
|
|
254
265
|
}
|
|
255
|
-
if (!m2.issymbolicallele && m2.alt !=
|
|
266
|
+
if (!m2.issymbolicallele && m2.alt != "NON_REF") {
|
|
256
267
|
m2.type = getVariantType(m2.ref, m2.alt)
|
|
257
268
|
/*
|
|
258
269
|
// valid alt allele, apply Dr. J's cool method
|
|
@@ -265,15 +276,19 @@ export function vcfparseline(line, vcf) {
|
|
|
265
276
|
mlst.push(m2)
|
|
266
277
|
}
|
|
267
278
|
return [
|
|
268
|
-
badinfokeys.length ?
|
|
279
|
+
badinfokeys.length ? "unknown info keys: " + badinfokeys.join(",") : null,
|
|
269
280
|
mlst,
|
|
270
|
-
altinvalid.length > 0 ? altinvalid : null
|
|
281
|
+
altinvalid.length > 0 ? altinvalid : null,
|
|
271
282
|
]
|
|
272
283
|
}
|
|
273
284
|
|
|
274
285
|
function correctRefAlt(p, ref, alt) {
|
|
275
286
|
// for oligos, always trim the last identical base
|
|
276
|
-
while (
|
|
287
|
+
while (
|
|
288
|
+
ref.length > 1 &&
|
|
289
|
+
alt.length > 1 &&
|
|
290
|
+
ref[ref.length - 1] == alt[alt.length - 1]
|
|
291
|
+
) {
|
|
277
292
|
ref = ref.substr(0, ref.length - 1)
|
|
278
293
|
alt = alt.substr(0, alt.length - 1)
|
|
279
294
|
}
|
|
@@ -296,17 +311,17 @@ function parse_FORMAT2(lst, m, vcf) {
|
|
|
296
311
|
.allele_original
|
|
297
312
|
.sampledata[] blank array
|
|
298
313
|
*/
|
|
299
|
-
const formatfields = lst[9 - 1].split(
|
|
314
|
+
const formatfields = lst[9 - 1].split(":")
|
|
300
315
|
|
|
301
316
|
for (let _sampleidx = 9; _sampleidx < lst.length; _sampleidx++) {
|
|
302
317
|
// for each sample
|
|
303
318
|
|
|
304
|
-
const valuelst = lst[_sampleidx].split(
|
|
319
|
+
const valuelst = lst[_sampleidx].split(":")
|
|
305
320
|
{
|
|
306
321
|
// tell if this sample have any data in this line (variant), if .:., then skip
|
|
307
322
|
let none = true
|
|
308
323
|
for (const v of valuelst) {
|
|
309
|
-
if (v !=
|
|
324
|
+
if (v != ".") {
|
|
310
325
|
none = false
|
|
311
326
|
break
|
|
312
327
|
}
|
|
@@ -336,10 +351,10 @@ function parse_FORMAT2(lst, m, vcf) {
|
|
|
336
351
|
sobj[k] = vcf.samples[sampleidx][k]
|
|
337
352
|
}
|
|
338
353
|
} else {
|
|
339
|
-
sobj.name =
|
|
354
|
+
sobj.name = "missing_samplename_from_vcf_header"
|
|
340
355
|
}
|
|
341
356
|
m.alleles[i].sampledata.push({
|
|
342
|
-
sampleobj: sobj
|
|
357
|
+
sampleobj: sobj,
|
|
343
358
|
})
|
|
344
359
|
}
|
|
345
360
|
|
|
@@ -348,18 +363,18 @@ function parse_FORMAT2(lst, m, vcf) {
|
|
|
348
363
|
|
|
349
364
|
const field = formatfields[fi]
|
|
350
365
|
const value = valuelst[fi]
|
|
351
|
-
if (value ==
|
|
366
|
+
if (value == ".") {
|
|
352
367
|
// no value for this field
|
|
353
368
|
continue
|
|
354
369
|
}
|
|
355
370
|
|
|
356
|
-
if (field ==
|
|
357
|
-
const splitter = value.indexOf(
|
|
371
|
+
if (field == "GT") {
|
|
372
|
+
const splitter = value.indexOf("/") != -1 ? "/" : "|"
|
|
358
373
|
let gtsum = 0 // for calculating gtallref=true, old
|
|
359
374
|
let unknowngt = false // if any is '.', then won't calculate gtallref
|
|
360
375
|
const gtalleles = []
|
|
361
376
|
for (const i of value.split(splitter)) {
|
|
362
|
-
if (i ==
|
|
377
|
+
if (i == ".") {
|
|
363
378
|
unknowngt = true
|
|
364
379
|
continue
|
|
365
380
|
}
|
|
@@ -399,20 +414,21 @@ function parse_FORMAT2(lst, m, vcf) {
|
|
|
399
414
|
if (!formatdesc) {
|
|
400
415
|
// unspecified field, put to all alt alleles
|
|
401
416
|
for (let i = 1; i < m.alleles.length; i++) {
|
|
402
|
-
m.alleles[i].sampledata[m.alleles[i].sampledata.length - 1][field] =
|
|
417
|
+
m.alleles[i].sampledata[m.alleles[i].sampledata.length - 1][field] =
|
|
418
|
+
value
|
|
403
419
|
}
|
|
404
420
|
continue
|
|
405
421
|
}
|
|
406
422
|
|
|
407
|
-
const isinteger = formatdesc.Type ==
|
|
408
|
-
const isfloat = formatdesc.Type ==
|
|
423
|
+
const isinteger = formatdesc.Type == "Integer"
|
|
424
|
+
const isfloat = formatdesc.Type == "Float"
|
|
409
425
|
|
|
410
|
-
if ((formatdesc.Number && formatdesc.Number ==
|
|
426
|
+
if ((formatdesc.Number && formatdesc.Number == "R") || field == "AD") {
|
|
411
427
|
/*
|
|
412
428
|
per-allele value, including ref
|
|
413
429
|
v4.1 has AD not with "R", must process as R
|
|
414
430
|
*/
|
|
415
|
-
const fvlst = value.split(
|
|
431
|
+
const fvlst = value.split(",").map((i) => {
|
|
416
432
|
if (isinteger) return Number.parseInt(i)
|
|
417
433
|
if (isfloat) return Number.parseFloat(i)
|
|
418
434
|
return i
|
|
@@ -430,9 +446,9 @@ function parse_FORMAT2(lst, m, vcf) {
|
|
|
430
446
|
}
|
|
431
447
|
continue
|
|
432
448
|
}
|
|
433
|
-
if (formatdesc.Number && formatdesc.Number ==
|
|
449
|
+
if (formatdesc.Number && formatdesc.Number == "A") {
|
|
434
450
|
// per alt-allele value
|
|
435
|
-
const fvlst = value.split(
|
|
451
|
+
const fvlst = value.split(",").map((i) => {
|
|
436
452
|
if (isinteger) return Number.parseInt(i)
|
|
437
453
|
if (isfloat) return Number.parseFloat(i)
|
|
438
454
|
return i
|
|
@@ -451,7 +467,8 @@ function parse_FORMAT2(lst, m, vcf) {
|
|
|
451
467
|
}
|
|
452
468
|
// otherwise, append this field to all alt
|
|
453
469
|
for (let i = 1; i < m.alleles.length; i++) {
|
|
454
|
-
m.alleles[i].sampledata[m.alleles[i].sampledata.length - 1][field] =
|
|
470
|
+
m.alleles[i].sampledata[m.alleles[i].sampledata.length - 1][field] =
|
|
471
|
+
value
|
|
455
472
|
}
|
|
456
473
|
}
|
|
457
474
|
}
|
|
@@ -489,17 +506,17 @@ function tohash(s, hash) {
|
|
|
489
506
|
h[k] = s.substring(thisstart, i)
|
|
490
507
|
k = null
|
|
491
508
|
} else {
|
|
492
|
-
err.push(
|
|
509
|
+
err.push("k undefined before double quotes")
|
|
493
510
|
}
|
|
494
511
|
prevdoublequote = true
|
|
495
512
|
continue
|
|
496
513
|
}
|
|
497
|
-
if (s[i] ==
|
|
514
|
+
if (s[i] == "=") {
|
|
498
515
|
k = s.substring(prev, i)
|
|
499
516
|
prev = i + 1
|
|
500
517
|
continue
|
|
501
518
|
}
|
|
502
|
-
if (s[i] ==
|
|
519
|
+
if (s[i] == ",") {
|
|
503
520
|
if (prevdoublequote) {
|
|
504
521
|
prevdoublequote = false
|
|
505
522
|
} else {
|
|
@@ -507,7 +524,7 @@ function tohash(s, hash) {
|
|
|
507
524
|
h[k] = s.substring(prev, i)
|
|
508
525
|
k = null
|
|
509
526
|
} else {
|
|
510
|
-
err.push(
|
|
527
|
+
err.push("k undefined")
|
|
511
528
|
}
|
|
512
529
|
}
|
|
513
530
|
prev = i + 1
|
|
@@ -520,9 +537,9 @@ function tohash(s, hash) {
|
|
|
520
537
|
if (h.ID) {
|
|
521
538
|
hash[h.ID] = h
|
|
522
539
|
} else {
|
|
523
|
-
return
|
|
540
|
+
return "no ID"
|
|
524
541
|
}
|
|
525
|
-
if (err.length) return err.join(
|
|
542
|
+
if (err.length) return err.join("\n")
|
|
526
543
|
}
|
|
527
544
|
|
|
528
545
|
function parse_INFO(tmp, m, vcf) {
|
|
@@ -546,14 +563,14 @@ function parse_INFO(tmp, m, vcf) {
|
|
|
546
563
|
|
|
547
564
|
////////////////// hard-coded fields
|
|
548
565
|
|
|
549
|
-
if (key ==
|
|
566
|
+
if (key == "CSQ") {
|
|
550
567
|
const okay = parse_CSQ(value, vcf.info.CSQ.csqheader, m)
|
|
551
568
|
if (!okay) {
|
|
552
569
|
m.info[key] = value
|
|
553
570
|
}
|
|
554
571
|
continue
|
|
555
572
|
}
|
|
556
|
-
if (key ==
|
|
573
|
+
if (key == "ANN") {
|
|
557
574
|
const okay = parse_ANN(value, vcf.info.ANN.annheader, m)
|
|
558
575
|
if (!okay) {
|
|
559
576
|
m.info[key] = value
|
|
@@ -563,17 +580,17 @@ function parse_INFO(tmp, m, vcf) {
|
|
|
563
580
|
|
|
564
581
|
////////////////// end of hardcoded fields
|
|
565
582
|
|
|
566
|
-
if (vcf.info[key].Type ==
|
|
583
|
+
if (vcf.info[key].Type == "Flag") {
|
|
567
584
|
// flag has no value
|
|
568
585
|
m.info[key] = key
|
|
569
586
|
continue
|
|
570
587
|
}
|
|
571
588
|
|
|
572
589
|
const __number = vcf.info[key].Number
|
|
573
|
-
const isinteger = vcf.info[key].Type ==
|
|
574
|
-
const isfloat = vcf.info[key].Type ==
|
|
590
|
+
const isinteger = vcf.info[key].Type == "Integer"
|
|
591
|
+
const isfloat = vcf.info[key].Type == "Float"
|
|
575
592
|
|
|
576
|
-
if (__number ==
|
|
593
|
+
if (__number == "0") {
|
|
577
594
|
/*
|
|
578
595
|
no value, should be a Flag
|
|
579
596
|
*/
|
|
@@ -581,31 +598,39 @@ function parse_INFO(tmp, m, vcf) {
|
|
|
581
598
|
continue
|
|
582
599
|
}
|
|
583
600
|
|
|
584
|
-
if (__number ==
|
|
601
|
+
if (__number == "A") {
|
|
585
602
|
/*
|
|
586
603
|
per alt allele
|
|
587
604
|
*/
|
|
588
|
-
const tt = value.split(
|
|
605
|
+
const tt = value.split(",")
|
|
589
606
|
for (let j = 0; j < tt.length; j++) {
|
|
590
607
|
if (m.alleles[j]) {
|
|
591
|
-
m.alleles[j].info[key] = isinteger
|
|
608
|
+
m.alleles[j].info[key] = isinteger
|
|
609
|
+
? Number.parseInt(tt[j])
|
|
610
|
+
: isfloat
|
|
611
|
+
? Number.parseFloat(tt[j])
|
|
612
|
+
: tt[j]
|
|
592
613
|
}
|
|
593
614
|
}
|
|
594
615
|
continue
|
|
595
616
|
}
|
|
596
617
|
|
|
597
|
-
if (__number ==
|
|
618
|
+
if (__number == "R") {
|
|
598
619
|
/*
|
|
599
620
|
FIXME "R" is not considered, m.alleles only contain alt, which .info{} for each
|
|
600
621
|
the current datastructure does not support info for ref allele!
|
|
601
622
|
*/
|
|
602
623
|
}
|
|
603
624
|
|
|
604
|
-
if (__number ==
|
|
625
|
+
if (__number == "1") {
|
|
605
626
|
/*
|
|
606
627
|
single value
|
|
607
628
|
*/
|
|
608
|
-
m.info[key] = isinteger
|
|
629
|
+
m.info[key] = isinteger
|
|
630
|
+
? Number.parseInt(value)
|
|
631
|
+
: isfloat
|
|
632
|
+
? Number.parseFloat(value)
|
|
633
|
+
: value
|
|
609
634
|
continue
|
|
610
635
|
}
|
|
611
636
|
|
|
@@ -616,7 +641,7 @@ function parse_INFO(tmp, m, vcf) {
|
|
|
616
641
|
|
|
617
642
|
// number of values unknown, "commas are permitted only as delimiters for lists of values"
|
|
618
643
|
|
|
619
|
-
const lst = value.split(
|
|
644
|
+
const lst = value.split(",") // value is always array!!
|
|
620
645
|
if (isinteger) {
|
|
621
646
|
m.info[key] = lst.map(Number.parseInt)
|
|
622
647
|
} else if (isfloat) {
|
package/src/vcf.type.js
CHANGED
|
@@ -1,9 +1,15 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
mclassdeletion,
|
|
3
|
+
mclasssnv,
|
|
4
|
+
mclassmnv,
|
|
5
|
+
mclassinsertion,
|
|
6
|
+
mclassnonstandard,
|
|
7
|
+
} from "./common.js"
|
|
2
8
|
|
|
3
9
|
export function getVariantType(ref, alt) {
|
|
4
10
|
if (ref.length == 1 && alt.length == 1) {
|
|
5
11
|
// both alleles length of 1
|
|
6
|
-
if (alt ==
|
|
12
|
+
if (alt == ".") {
|
|
7
13
|
// alt is missing
|
|
8
14
|
return mclassdeletion
|
|
9
15
|
}
|