@sjcrh/proteinpaint-shared 2.78.0-0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +18 -0
- package/src/bulk.cnv.js +86 -0
- package/src/bulk.del.js +124 -0
- package/src/bulk.itd.js +123 -0
- package/src/bulk.js +197 -0
- package/src/bulk.snv.js +234 -0
- package/src/bulk.sv.js +276 -0
- package/src/bulk.svjson.js +162 -0
- package/src/bulk.trunc.js +126 -0
- package/src/clustering.js +66 -0
- package/src/common.js +1297 -0
- package/src/compute.percentile.js +8 -0
- package/src/descriptive.stats.js +62 -0
- package/src/doc.js +9 -0
- package/src/doc.ts +13 -0
- package/src/fileSize.js +6 -0
- package/src/filter.js +244 -0
- package/src/helpers.js +31 -0
- package/src/index.js +23 -0
- package/src/mds.termdb.termvaluesetting.js +81 -0
- package/src/mds3tk.js +16 -0
- package/src/roundValue.js +48 -0
- package/src/termdb.bins.js +381 -0
- package/src/termdb.initbinconfig.js +96 -0
- package/src/termdb.usecase.js +207 -0
- package/src/terms.js +177 -0
- package/src/test/termdb.bins.unit.spec.js +759 -0
- package/src/test/termdb.initbinconfig.unit.spec.js +267 -0
- package/src/test/termdb.usecase.unit.spec.js +134 -0
- package/src/test/termdb.violin.unit.spec.js +47 -0
- package/src/test/urljson.unit.spec.ts +88 -0
- package/src/tree.js +138 -0
- package/src/urljson.ts +85 -0
- package/src/vcf.ann.js +62 -0
- package/src/vcf.csq.js +153 -0
- package/src/vcf.info.js +50 -0
- package/src/vcf.js +629 -0
- package/src/vcf.type.js +18 -0
- package/src/violin.bins.js +150 -0
package/src/vcf.js
ADDED
|
@@ -0,0 +1,629 @@
|
|
|
1
|
+
import { mclass } from './common.js'
|
|
2
|
+
import { dissect_INFO } from './vcf.info.js'
|
|
3
|
+
import { parse_CSQ } from './vcf.csq.js'
|
|
4
|
+
import { parse_ANN } from './vcf.ann.js'
|
|
5
|
+
import { getVariantType } from './vcf.type.js'
|
|
6
|
+
|
|
7
|
+
/*
|
|
8
|
+
Only for parsing vcf files
|
|
9
|
+
is not involved in creating vcf tracks
|
|
10
|
+
|
|
11
|
+
shared between client-server
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
// for telling symbolic alleles e.g. <*:DEL>
|
|
15
|
+
const getallelename = new RegExp(/<(.+)>/)
|
|
16
|
+
|
|
17
|
+
const mclasslabel2key = {}
|
|
18
|
+
for (const k in mclass) {
|
|
19
|
+
mclasslabel2key[mclass[k].label.toUpperCase()] = k
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function vcfparsemeta(lines) {
|
|
23
|
+
/*
|
|
24
|
+
input: array of string, as lines separated by linebreak
|
|
25
|
+
|
|
26
|
+
##INFO for meta lines
|
|
27
|
+
#CHROM for header, to get samples
|
|
28
|
+
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
let sample = [],
|
|
32
|
+
errlst = [],
|
|
33
|
+
info = {},
|
|
34
|
+
hasinfo = false,
|
|
35
|
+
format = {},
|
|
36
|
+
hasformat = false
|
|
37
|
+
|
|
38
|
+
for (const line of lines) {
|
|
39
|
+
if (!line.startsWith('#')) {
|
|
40
|
+
continue
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (line.startsWith('#C')) {
|
|
44
|
+
// header, get samples
|
|
45
|
+
sample = line.split('\t').slice(9)
|
|
46
|
+
continue
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if (line.startsWith('##INFO')) {
|
|
50
|
+
const e = tohash(line.substring(8, line.length - 1), info)
|
|
51
|
+
if (e) {
|
|
52
|
+
errlst.push('INFO error: ' + e)
|
|
53
|
+
} else {
|
|
54
|
+
hasinfo = true
|
|
55
|
+
}
|
|
56
|
+
continue
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
if (line.startsWith('##FORMAT')) {
|
|
60
|
+
const e = tohash(line.substring(10, line.length - 1), format)
|
|
61
|
+
if (e) {
|
|
62
|
+
errlst.push('FORMAT error: ' + e)
|
|
63
|
+
} else {
|
|
64
|
+
hasformat = true
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const sampleobjlst = []
|
|
70
|
+
for (const samplename of sample) {
|
|
71
|
+
const a = { name: samplename }
|
|
72
|
+
|
|
73
|
+
// this enables adding key4annotation to match with .ds.cohort.annotation
|
|
74
|
+
|
|
75
|
+
sampleobjlst.push(a)
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// reserved INFO fields
|
|
79
|
+
if (info.CSQ) {
|
|
80
|
+
const lst = info.CSQ.Description.split(' Format: ')
|
|
81
|
+
if (lst[1]) {
|
|
82
|
+
const lst2 = lst[1].split('|')
|
|
83
|
+
if (lst2.length > 1) {
|
|
84
|
+
// fix csq headers so to allow configuring show/hide of csq fields
|
|
85
|
+
info.CSQ.csqheader = []
|
|
86
|
+
for (const str of lst2) {
|
|
87
|
+
const attr = { name: str }
|
|
88
|
+
info.CSQ.csqheader.push(attr)
|
|
89
|
+
}
|
|
90
|
+
} else {
|
|
91
|
+
errlst.push('unknown format for CSQ header: ' + info.CSQ.Description)
|
|
92
|
+
}
|
|
93
|
+
} else {
|
|
94
|
+
errlst.push('unknown format for CSQ header: ' + info.CSQ.Description)
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (info.ANN) {
|
|
99
|
+
const lst = info.ANN.Description.split("'")
|
|
100
|
+
if (lst[1]) {
|
|
101
|
+
const lst2 = lst[1].split(' | ')
|
|
102
|
+
if (lst2.length) {
|
|
103
|
+
info.ANN.annheader = []
|
|
104
|
+
for (const s of lst2) {
|
|
105
|
+
const attr = { name: s }
|
|
106
|
+
info.ANN.annheader.push(attr)
|
|
107
|
+
}
|
|
108
|
+
} else {
|
|
109
|
+
errlst.push('no " | " joined annotation fields for ANN (snpEff annotation): ' + info.ANN.Description)
|
|
110
|
+
}
|
|
111
|
+
} else {
|
|
112
|
+
errlst.push('no single-quote enclosed annotation fields for ANN (snpEff annotation): ' + info.ANN.Description)
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return [hasinfo ? info : null, hasformat ? format : null, sampleobjlst, errlst.length ? errlst : null]
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
export function vcfparseline(line, vcf) {
|
|
120
|
+
/*
|
|
121
|
+
vcf, samples/info is generated by vcfparsemeta()
|
|
122
|
+
.nochr BOOL
|
|
123
|
+
.samples [ {} ]
|
|
124
|
+
.name
|
|
125
|
+
.info {}
|
|
126
|
+
.format {}
|
|
127
|
+
|
|
128
|
+
return:
|
|
129
|
+
error message STR
|
|
130
|
+
altinvalid []
|
|
131
|
+
mlst [ m ] one m per alt allele
|
|
132
|
+
chr
|
|
133
|
+
pos
|
|
134
|
+
name
|
|
135
|
+
type
|
|
136
|
+
ref
|
|
137
|
+
alt
|
|
138
|
+
altstr
|
|
139
|
+
sampledata []
|
|
140
|
+
altinfo
|
|
141
|
+
*/
|
|
142
|
+
|
|
143
|
+
const lst = line.split('\t')
|
|
144
|
+
if (lst.length < 8) {
|
|
145
|
+
// no good
|
|
146
|
+
return ['line has less than 8 fields', null, null]
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const rawpos = Number.parseInt(lst[2 - 1])
|
|
150
|
+
if (!Number.isInteger(rawpos)) {
|
|
151
|
+
return ['invalid value for genomic position', null, null]
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
const refallele = lst[4 - 1]
|
|
155
|
+
|
|
156
|
+
const m = {
|
|
157
|
+
vcf_ID: lst[3 - 1],
|
|
158
|
+
chr: (vcf.nochr ? 'chr' : '') + lst[1 - 1],
|
|
159
|
+
pos: rawpos - 1,
|
|
160
|
+
ref: refallele,
|
|
161
|
+
//refstr:refallele, // e.g. GA>GCC, ref:A, refstr:GA, "refstr" is required for matching in FORMAT
|
|
162
|
+
altstr: lst[5 - 1],
|
|
163
|
+
alleles: [
|
|
164
|
+
{
|
|
165
|
+
/*
|
|
166
|
+
ref allele only a placeholder, to be removed, this array only contains alt alleles
|
|
167
|
+
this is a must
|
|
168
|
+
also allows GT allele index to work
|
|
169
|
+
*/
|
|
170
|
+
allele: refallele,
|
|
171
|
+
sampledata: []
|
|
172
|
+
}
|
|
173
|
+
],
|
|
174
|
+
|
|
175
|
+
info: {}, // locus info, do not contain allele info
|
|
176
|
+
|
|
177
|
+
name: lst[3 - 1] == '.' ? null : lst[3 - 1]
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// parse alt
|
|
181
|
+
const altinvalid = []
|
|
182
|
+
for (const alt of lst[5 - 1].split(',')) {
|
|
183
|
+
const a = {
|
|
184
|
+
ref: m.ref, // may be corrected just below!
|
|
185
|
+
allele: alt,
|
|
186
|
+
// 5078356.TATCAGAGAA.GGGAGGA keep original allele for matching with csq which hardcodes original allele
|
|
187
|
+
allele_original: alt,
|
|
188
|
+
sampledata: [],
|
|
189
|
+
_m: m,
|
|
190
|
+
info: {} // allele info, do not contain locus info
|
|
191
|
+
}
|
|
192
|
+
m.alleles.push(a)
|
|
193
|
+
if (alt[0] == '<') {
|
|
194
|
+
/*
|
|
195
|
+
symbolic allele, show text within <> as name
|
|
196
|
+
FIXME match INFO
|
|
197
|
+
*/
|
|
198
|
+
const tmp = alt.match(getallelename)
|
|
199
|
+
if (!tmp) {
|
|
200
|
+
altinvalid.push(alt)
|
|
201
|
+
continue
|
|
202
|
+
}
|
|
203
|
+
a.type = tmp[1]
|
|
204
|
+
|
|
205
|
+
a.allele = tmp[1]
|
|
206
|
+
a.issymbolicallele = true
|
|
207
|
+
} else {
|
|
208
|
+
// normal nucleotide
|
|
209
|
+
|
|
210
|
+
const [p, ref, alt] = correctRefAlt(m.pos, m.ref, a.allele)
|
|
211
|
+
a.pos = p
|
|
212
|
+
a.ref = ref
|
|
213
|
+
a.allele = alt
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
if (lst[9 - 1] && lst[10 - 1]) {
|
|
218
|
+
parse_FORMAT2(lst, m, vcf)
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/*
|
|
222
|
+
remove ref allele so it only contain alternative alleles
|
|
223
|
+
so that parse_INFO can safely apply Number=A fields to m.alleles
|
|
224
|
+
*/
|
|
225
|
+
m.alleles.shift()
|
|
226
|
+
|
|
227
|
+
// info
|
|
228
|
+
const tmp = lst[8 - 1] == '.' ? [] : dissect_INFO(lst[8 - 1])
|
|
229
|
+
let badinfokeys = []
|
|
230
|
+
|
|
231
|
+
if (vcf.info) {
|
|
232
|
+
badinfokeys = parse_INFO(tmp, m, vcf)
|
|
233
|
+
} else {
|
|
234
|
+
// vcf meta lines told nothing about INFO, do not parse
|
|
235
|
+
m.info = tmp
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
const mlst = []
|
|
239
|
+
for (const a of m.alleles) {
|
|
240
|
+
const m2 = {}
|
|
241
|
+
for (const k in m) {
|
|
242
|
+
if (k != 'alleles') {
|
|
243
|
+
m2[k] = m[k]
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
for (const k in a) {
|
|
247
|
+
if (k == 'allele') {
|
|
248
|
+
m2.alt = a[k]
|
|
249
|
+
} else if (k == 'info') {
|
|
250
|
+
m2.altinfo = a[k]
|
|
251
|
+
} else {
|
|
252
|
+
m2[k] = a[k]
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
if (!m2.issymbolicallele && m2.alt != 'NON_REF') {
|
|
256
|
+
m2.type = getVariantType(m2.ref, m2.alt)
|
|
257
|
+
/*
|
|
258
|
+
// valid alt allele, apply Dr. J's cool method
|
|
259
|
+
const [p,ref,alt]=correctRefAlt(m2.pos, m2.ref, m2.alt)
|
|
260
|
+
m2.pos=p
|
|
261
|
+
m2.ref=ref
|
|
262
|
+
m2.alt=alt
|
|
263
|
+
*/
|
|
264
|
+
}
|
|
265
|
+
mlst.push(m2)
|
|
266
|
+
}
|
|
267
|
+
return [
|
|
268
|
+
badinfokeys.length ? 'unknown info keys: ' + badinfokeys.join(',') : null,
|
|
269
|
+
mlst,
|
|
270
|
+
altinvalid.length > 0 ? altinvalid : null
|
|
271
|
+
]
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
function correctRefAlt(p, ref, alt) {
|
|
275
|
+
// for oligos, always trim the last identical base
|
|
276
|
+
while (ref.length > 1 && alt.length > 1 && ref[ref.length - 1] == alt[alt.length - 1]) {
|
|
277
|
+
ref = ref.substr(0, ref.length - 1)
|
|
278
|
+
alt = alt.substr(0, alt.length - 1)
|
|
279
|
+
}
|
|
280
|
+
// move position up as long as first positions are equal
|
|
281
|
+
while (ref.length > 1 && alt.length > 1 && ref[0] == alt[0]) {
|
|
282
|
+
ref = ref.substr(1)
|
|
283
|
+
alt = alt.substr(1)
|
|
284
|
+
p++
|
|
285
|
+
}
|
|
286
|
+
return [p, ref, alt]
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
function parse_FORMAT2(lst, m, vcf) {
|
|
290
|
+
/*
|
|
291
|
+
m.alleles[0] is ref allele
|
|
292
|
+
|
|
293
|
+
each allele:
|
|
294
|
+
.ref
|
|
295
|
+
.allele
|
|
296
|
+
.allele_original
|
|
297
|
+
.sampledata[] blank array
|
|
298
|
+
*/
|
|
299
|
+
const formatfields = lst[9 - 1].split(':')
|
|
300
|
+
|
|
301
|
+
for (let _sampleidx = 9; _sampleidx < lst.length; _sampleidx++) {
|
|
302
|
+
// for each sample
|
|
303
|
+
|
|
304
|
+
const valuelst = lst[_sampleidx].split(':')
|
|
305
|
+
{
|
|
306
|
+
// tell if this sample have any data in this line (variant), if .:., then skip
|
|
307
|
+
let none = true
|
|
308
|
+
for (const v of valuelst) {
|
|
309
|
+
if (v != '.') {
|
|
310
|
+
none = false
|
|
311
|
+
break
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
if (none) {
|
|
315
|
+
// this sample has no format value
|
|
316
|
+
continue
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
/* should create an object of {format:value} of this sample
|
|
321
|
+
with this object, for each alt allele this sample has,
|
|
322
|
+
put a copy in m.allele[x].sampledata[]
|
|
323
|
+
*/
|
|
324
|
+
|
|
325
|
+
const sampleidx = _sampleidx - 9
|
|
326
|
+
|
|
327
|
+
/*
|
|
328
|
+
for each alt allele, initialize obj of this sample and store in this allele
|
|
329
|
+
later, to iterate over format fields and put in appropriate values
|
|
330
|
+
note that this sample may not actually have this allele
|
|
331
|
+
*/
|
|
332
|
+
for (let i = 1; i < m.alleles.length; i++) {
|
|
333
|
+
const sobj = {}
|
|
334
|
+
if (vcf.samples && vcf.samples[sampleidx]) {
|
|
335
|
+
for (const k in vcf.samples[sampleidx]) {
|
|
336
|
+
sobj[k] = vcf.samples[sampleidx][k]
|
|
337
|
+
}
|
|
338
|
+
} else {
|
|
339
|
+
sobj.name = 'missing_samplename_from_vcf_header'
|
|
340
|
+
}
|
|
341
|
+
m.alleles[i].sampledata.push({
|
|
342
|
+
sampleobj: sobj
|
|
343
|
+
})
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
for (let fi = 0; fi < formatfields.length; fi++) {
|
|
347
|
+
// for each field of this sample
|
|
348
|
+
|
|
349
|
+
const field = formatfields[fi]
|
|
350
|
+
const value = valuelst[fi]
|
|
351
|
+
if (value == '.') {
|
|
352
|
+
// no value for this field
|
|
353
|
+
continue
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
if (field == 'GT') {
|
|
357
|
+
const splitter = value.indexOf('/') != -1 ? '/' : '|'
|
|
358
|
+
let gtsum = 0 // for calculating gtallref=true, old
|
|
359
|
+
let unknowngt = false // if any is '.', then won't calculate gtallref
|
|
360
|
+
const gtalleles = []
|
|
361
|
+
for (const i of value.split(splitter)) {
|
|
362
|
+
if (i == '.') {
|
|
363
|
+
unknowngt = true
|
|
364
|
+
continue
|
|
365
|
+
}
|
|
366
|
+
const j = Number.parseInt(i)
|
|
367
|
+
if (Number.isNaN(j)) {
|
|
368
|
+
unknowngt = true
|
|
369
|
+
continue
|
|
370
|
+
}
|
|
371
|
+
gtsum += j
|
|
372
|
+
const ale = m.alleles[j]
|
|
373
|
+
if (ale) {
|
|
374
|
+
gtalleles.push(ale.allele)
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
let gtallref = false
|
|
378
|
+
if (!unknowngt) {
|
|
379
|
+
gtallref = gtsum == 0
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
const genotype = gtalleles.join(splitter)
|
|
383
|
+
for (let i = 1; i < m.alleles.length; i++) {
|
|
384
|
+
const ms = m.alleles[i].sampledata[m.alleles[i].sampledata.length - 1]
|
|
385
|
+
ms.GT = value
|
|
386
|
+
ms.genotype = genotype
|
|
387
|
+
if (gtallref) {
|
|
388
|
+
ms.gtallref = true
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// for mds vcf to drop out samples that do not have this alt allele
|
|
392
|
+
ms.__gtalleles = gtalleles
|
|
393
|
+
}
|
|
394
|
+
continue
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
// other data fields
|
|
398
|
+
const formatdesc = vcf.format ? vcf.format[field] : null
|
|
399
|
+
if (!formatdesc) {
|
|
400
|
+
// unspecified field, put to all alt alleles
|
|
401
|
+
for (let i = 1; i < m.alleles.length; i++) {
|
|
402
|
+
m.alleles[i].sampledata[m.alleles[i].sampledata.length - 1][field] = value
|
|
403
|
+
}
|
|
404
|
+
continue
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
const isinteger = formatdesc.Type == 'Integer'
|
|
408
|
+
const isfloat = formatdesc.Type == 'Float'
|
|
409
|
+
|
|
410
|
+
if ((formatdesc.Number && formatdesc.Number == 'R') || field == 'AD') {
|
|
411
|
+
/*
|
|
412
|
+
per-allele value, including ref
|
|
413
|
+
v4.1 has AD not with "R", must process as R
|
|
414
|
+
*/
|
|
415
|
+
const fvlst = value.split(',').map(i => {
|
|
416
|
+
if (isinteger) return Number.parseInt(i)
|
|
417
|
+
if (isfloat) return Number.parseFloat(i)
|
|
418
|
+
return i
|
|
419
|
+
})
|
|
420
|
+
for (let i = 1; i < m.alleles.length; i++) {
|
|
421
|
+
if (fvlst[i] != undefined) {
|
|
422
|
+
// this allele has value
|
|
423
|
+
const m2 = m.alleles[i]
|
|
424
|
+
const m2s = m2.sampledata[m2.sampledata.length - 1]
|
|
425
|
+
// use this allele's ref/alt (after nt trimming)
|
|
426
|
+
m2s[field] = {}
|
|
427
|
+
m2s[field][m2.ref] = fvlst[0]
|
|
428
|
+
m2s[field][m2.allele] = fvlst[i]
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
continue
|
|
432
|
+
}
|
|
433
|
+
if (formatdesc.Number && formatdesc.Number == 'A') {
|
|
434
|
+
// per alt-allele value
|
|
435
|
+
const fvlst = value.split(',').map(i => {
|
|
436
|
+
if (isinteger) return Number.parseInt(i)
|
|
437
|
+
if (isfloat) return Number.parseFloat(i)
|
|
438
|
+
return i
|
|
439
|
+
})
|
|
440
|
+
for (let i = 1; i < m.alleles.length; i++) {
|
|
441
|
+
if (fvlst[i - 1] != undefined) {
|
|
442
|
+
// this allele has value
|
|
443
|
+
const m2 = m.alleles[i]
|
|
444
|
+
const m2s = m2.sampledata[m2.sampledata.length - 1]
|
|
445
|
+
// use this allele's ref/alt (after nt trimming)
|
|
446
|
+
m2s[field] = {}
|
|
447
|
+
m2s[field][m2.allele] = fvlst[i - 1]
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
continue
|
|
451
|
+
}
|
|
452
|
+
// otherwise, append this field to all alt
|
|
453
|
+
for (let i = 1; i < m.alleles.length; i++) {
|
|
454
|
+
m.alleles[i].sampledata[m.alleles[i].sampledata.length - 1][field] = value
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// compatible with old ds: make allele2readcount from AD
|
|
460
|
+
for (const a of m.alleles) {
|
|
461
|
+
for (const s of a.sampledata) {
|
|
462
|
+
if (s.AD) {
|
|
463
|
+
s.allele2readcount = {}
|
|
464
|
+
for (const k in s.AD) {
|
|
465
|
+
s.allele2readcount[k] = s.AD[k]
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
function tohash(s, hash) {
|
|
473
|
+
/*
|
|
474
|
+
parse INFO
|
|
475
|
+
*/
|
|
476
|
+
const h = {},
|
|
477
|
+
err = []
|
|
478
|
+
let prev = 0,
|
|
479
|
+
prevdoublequote = false,
|
|
480
|
+
k = null
|
|
481
|
+
for (let i = 0; i < s.length; i++) {
|
|
482
|
+
if (s[i] == '"') {
|
|
483
|
+
i++
|
|
484
|
+
const thisstart = i
|
|
485
|
+
while (s[i] != '"') {
|
|
486
|
+
i++
|
|
487
|
+
}
|
|
488
|
+
if (k) {
|
|
489
|
+
h[k] = s.substring(thisstart, i)
|
|
490
|
+
k = null
|
|
491
|
+
} else {
|
|
492
|
+
err.push('k undefined before double quotes')
|
|
493
|
+
}
|
|
494
|
+
prevdoublequote = true
|
|
495
|
+
continue
|
|
496
|
+
}
|
|
497
|
+
if (s[i] == '=') {
|
|
498
|
+
k = s.substring(prev, i)
|
|
499
|
+
prev = i + 1
|
|
500
|
+
continue
|
|
501
|
+
}
|
|
502
|
+
if (s[i] == ',') {
|
|
503
|
+
if (prevdoublequote) {
|
|
504
|
+
prevdoublequote = false
|
|
505
|
+
} else {
|
|
506
|
+
if (k) {
|
|
507
|
+
h[k] = s.substring(prev, i)
|
|
508
|
+
k = null
|
|
509
|
+
} else {
|
|
510
|
+
err.push('k undefined')
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
prev = i + 1
|
|
514
|
+
continue
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
if (k) {
|
|
518
|
+
h[k] = s.substring(prev, i)
|
|
519
|
+
}
|
|
520
|
+
if (h.ID) {
|
|
521
|
+
hash[h.ID] = h
|
|
522
|
+
} else {
|
|
523
|
+
return 'no ID'
|
|
524
|
+
}
|
|
525
|
+
if (err.length) return err.join('\n')
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
function parse_INFO(tmp, m, vcf) {
|
|
529
|
+
/*
|
|
530
|
+
this function fills in both m.info{} and m.alleles[].info{}
|
|
531
|
+
|
|
532
|
+
the m.alleles[] will later be converted to [m], each carrying one alt allele
|
|
533
|
+
each m will have .info{} for locus info, and .altinfo{} for alt allele info
|
|
534
|
+
|
|
535
|
+
*/
|
|
536
|
+
|
|
537
|
+
const badinfokeys = []
|
|
538
|
+
|
|
539
|
+
for (const key in tmp) {
|
|
540
|
+
if (vcf.info[key] == undefined) {
|
|
541
|
+
badinfokeys.push(key)
|
|
542
|
+
continue
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
const value = tmp[key]
|
|
546
|
+
|
|
547
|
+
////////////////// hard-coded fields
|
|
548
|
+
|
|
549
|
+
if (key == 'CSQ') {
|
|
550
|
+
const okay = parse_CSQ(value, vcf.info.CSQ.csqheader, m)
|
|
551
|
+
if (!okay) {
|
|
552
|
+
m.info[key] = value
|
|
553
|
+
}
|
|
554
|
+
continue
|
|
555
|
+
}
|
|
556
|
+
if (key == 'ANN') {
|
|
557
|
+
const okay = parse_ANN(value, vcf.info.ANN.annheader, m)
|
|
558
|
+
if (!okay) {
|
|
559
|
+
m.info[key] = value
|
|
560
|
+
}
|
|
561
|
+
continue
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
////////////////// end of hardcoded fields
|
|
565
|
+
|
|
566
|
+
if (vcf.info[key].Type == 'Flag') {
|
|
567
|
+
// flag has no value
|
|
568
|
+
m.info[key] = key
|
|
569
|
+
continue
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
const __number = vcf.info[key].Number
|
|
573
|
+
const isinteger = vcf.info[key].Type == 'Integer'
|
|
574
|
+
const isfloat = vcf.info[key].Type == 'Float'
|
|
575
|
+
|
|
576
|
+
if (__number == '0') {
|
|
577
|
+
/*
|
|
578
|
+
no value, should be a Flag
|
|
579
|
+
*/
|
|
580
|
+
m.info[key] = key
|
|
581
|
+
continue
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
if (__number == 'A') {
|
|
585
|
+
/*
|
|
586
|
+
per alt allele
|
|
587
|
+
*/
|
|
588
|
+
const tt = value.split(',')
|
|
589
|
+
for (let j = 0; j < tt.length; j++) {
|
|
590
|
+
if (m.alleles[j]) {
|
|
591
|
+
m.alleles[j].info[key] = isinteger ? Number.parseInt(tt[j]) : isfloat ? Number.parseFloat(tt[j]) : tt[j]
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
continue
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
if (__number == 'R') {
|
|
598
|
+
/*
|
|
599
|
+
FIXME "R" is not considered, m.alleles only contain alt, which .info{} for each
|
|
600
|
+
the current datastructure does not support info for ref allele!
|
|
601
|
+
*/
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
if (__number == '1') {
|
|
605
|
+
/*
|
|
606
|
+
single value
|
|
607
|
+
*/
|
|
608
|
+
m.info[key] = isinteger ? Number.parseInt(value) : isfloat ? Number.parseFloat(value) : value
|
|
609
|
+
continue
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
if (!value.split) {
|
|
613
|
+
// unknown error
|
|
614
|
+
continue
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
// number of values unknown, "commas are permitted only as delimiters for lists of values"
|
|
618
|
+
|
|
619
|
+
const lst = value.split(',') // value is always array!!
|
|
620
|
+
if (isinteger) {
|
|
621
|
+
m.info[key] = lst.map(Number.parseInt)
|
|
622
|
+
} else if (isfloat) {
|
|
623
|
+
m.info[key] = lst.map(Number.parseFloat)
|
|
624
|
+
} else {
|
|
625
|
+
m.info[key] = lst
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
return badinfokeys
|
|
629
|
+
}
|
package/src/vcf.type.js
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { mclassdeletion, mclasssnv, mclassmnv, mclassinsertion, mclassnonstandard } from './common.js'
|
|
2
|
+
|
|
3
|
+
export function getVariantType(ref, alt) {
|
|
4
|
+
if (ref.length == 1 && alt.length == 1) {
|
|
5
|
+
// both alleles length of 1
|
|
6
|
+
if (alt == '.') {
|
|
7
|
+
// alt is missing
|
|
8
|
+
return mclassdeletion
|
|
9
|
+
}
|
|
10
|
+
// snv
|
|
11
|
+
return mclasssnv
|
|
12
|
+
}
|
|
13
|
+
if (ref.length == alt.length) return mclassmnv
|
|
14
|
+
// FIXME only when empty length of one allele
|
|
15
|
+
if (ref.length < alt.length) return mclassinsertion
|
|
16
|
+
if (ref.length > alt.length) return mclassdeletion
|
|
17
|
+
return mclassnonstandard
|
|
18
|
+
}
|