@gmod/bam 1.1.13 → 1.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/record.ts ADDED
@@ -0,0 +1,610 @@
1
+ /* eslint-disable @typescript-eslint/no-empty-function */
2
+ import Constants from './constants'
3
+
4
+ const SEQRET_DECODER = '=ACMGRSVTWYHKDBN'.split('')
5
+ const CIGAR_DECODER = 'MIDNSHP=X???????'.split('')
6
+
7
+ /**
8
+ * Class of each BAM record returned by this API.
9
+ */
10
+ export default class BamRecord {
11
+ private data = {} as { [key: string]: any }
12
+ private bytes: { start: number; end: number; byteArray: Buffer }
13
+ private _id: number
14
+ private _tagOffset: number | undefined
15
+ private _tagList: string[] = []
16
+ private _allTagsParsed = false
17
+
18
+ public flags: any
19
+ public _refID: number
20
+ constructor(args: any) {
21
+ const { bytes, fileOffset } = args
22
+ const { byteArray, start } = bytes
23
+ this.data = {}
24
+ this.bytes = bytes
25
+ this._id = fileOffset
26
+ this._refID = byteArray.readInt32LE(start + 4)
27
+ this.data.start = byteArray.readInt32LE(start + 8)
28
+ this.flags = (byteArray.readInt32LE(start + 16) & 0xffff0000) >> 16
29
+ }
30
+
31
+ get(field: string) {
32
+ //@ts-ignore
33
+ if (this[field]) {
34
+ //@ts-ignore
35
+ if (this.data[field]) {
36
+ return this.data[field]
37
+ }
38
+ //@ts-ignore
39
+ this.data[field] = this[field]()
40
+ return this.data[field]
41
+ }
42
+ return this._get(field.toLowerCase())
43
+ }
44
+
45
+ end() {
46
+ return this.get('start') + this.get('length_on_ref')
47
+ }
48
+
49
+ seq_id() {
50
+ return this._refID
51
+ }
52
+
53
+ // same as get(), except requires lower-case arguments. used
54
+ // internally to save lots of calls to field.toLowerCase()
55
+ _get(field: string) {
56
+ if (field in this.data) {
57
+ return this.data[field]
58
+ }
59
+ this.data[field] = this._parseTag(field)
60
+ return this.data[field]
61
+ }
62
+
63
+ _tags() {
64
+ this._parseAllTags()
65
+
66
+ let tags = ['seq']
67
+
68
+ if (!this.isSegmentUnmapped()) {
69
+ tags.push(
70
+ 'start',
71
+ 'end',
72
+ 'strand',
73
+ 'score',
74
+ 'qual',
75
+ 'MQ',
76
+ 'CIGAR',
77
+ 'length_on_ref',
78
+ 'template_length',
79
+ )
80
+ }
81
+ if (this.isPaired()) {
82
+ tags.push('next_segment_position', 'pair_orientation')
83
+ }
84
+ tags = tags.concat(this._tagList || [])
85
+
86
+ Object.keys(this.data).forEach(k => {
87
+ if (k[0] !== '_' && k !== 'next_seq_id') {
88
+ tags.push(k)
89
+ }
90
+ })
91
+
92
+ const seen: { [key: string]: boolean } = {}
93
+ return tags.filter(t => {
94
+ if (
95
+ (t in this.data && this.data[t] === undefined) ||
96
+ t === 'CG' ||
97
+ t === 'cg'
98
+ ) {
99
+ return false
100
+ }
101
+
102
+ const lt = t.toLowerCase()
103
+ const s = seen[lt]
104
+ seen[lt] = true
105
+ return !s
106
+ })
107
+ }
108
+
109
+ parent() {
110
+ return undefined
111
+ }
112
+
113
+ children() {
114
+ return this.get('subfeatures')
115
+ }
116
+
117
+ id() {
118
+ return this._id
119
+ }
120
+
121
+ // special parsers
122
+ /**
123
+ * Mapping quality score.
124
+ */
125
+ mq() {
126
+ const mq = (this.get('_bin_mq_nl') & 0xff00) >> 8
127
+ return mq === 255 ? undefined : mq
128
+ }
129
+
130
+ score() {
131
+ return this.get('mq')
132
+ }
133
+
134
+ qual() {
135
+ return this.qualRaw()?.join(' ')
136
+ }
137
+
138
+ qualRaw() {
139
+ if (this.isSegmentUnmapped()) {
140
+ return undefined
141
+ }
142
+
143
+ const { start, byteArray } = this.bytes
144
+ const p =
145
+ start +
146
+ 36 +
147
+ this.get('_l_read_name') +
148
+ this.get('_n_cigar_op') * 4 +
149
+ this.get('_seq_bytes')
150
+ const lseq = this.get('seq_length')
151
+ return byteArray.subarray(p, p + lseq)
152
+ }
153
+
154
+ strand() {
155
+ return this.isReverseComplemented() ? -1 : 1
156
+ }
157
+
158
+ multi_segment_next_segment_strand() {
159
+ if (this.isMateUnmapped()) {
160
+ return undefined
161
+ }
162
+ return this.isMateReverseComplemented() ? -1 : 1
163
+ }
164
+
165
+ name() {
166
+ return this.get('_read_name')
167
+ }
168
+
169
+ _read_name() {
170
+ const nl = this.get('_l_read_name')
171
+ const { byteArray, start } = this.bytes
172
+ return byteArray.toString('ascii', start + 36, start + 36 + nl - 1)
173
+ }
174
+
175
+ /**
176
+ * Get the value of a tag, parsing the tags as far as necessary.
177
+ * Only called if we have not already parsed that field.
178
+ */
179
+ _parseTag(tagName?: string) {
180
+ // if all of the tags have been parsed and we're still being
181
+ // called, we already know that we have no such tag, because
182
+ // it would already have been cached.
183
+ if (this._allTagsParsed) {
184
+ return undefined
185
+ }
186
+
187
+ const { byteArray, start } = this.bytes
188
+ let p =
189
+ this._tagOffset ||
190
+ start +
191
+ 36 +
192
+ this.get('_l_read_name') +
193
+ this.get('_n_cigar_op') * 4 +
194
+ this.get('_seq_bytes') +
195
+ this.get('seq_length')
196
+
197
+ const blockEnd = this.bytes.end
198
+ let lcTag
199
+ while (p < blockEnd && lcTag !== tagName) {
200
+ const tag = String.fromCharCode(byteArray[p], byteArray[p + 1])
201
+ lcTag = tag.toLowerCase()
202
+ const type = String.fromCharCode(byteArray[p + 2])
203
+ p += 3
204
+
205
+ let value
206
+ switch (type) {
207
+ case 'A':
208
+ value = String.fromCharCode(byteArray[p])
209
+ p += 1
210
+ break
211
+ case 'i':
212
+ value = byteArray.readInt32LE(p)
213
+ p += 4
214
+ break
215
+ case 'I':
216
+ value = byteArray.readUInt32LE(p)
217
+ p += 4
218
+ break
219
+ case 'c':
220
+ value = byteArray.readInt8(p)
221
+ p += 1
222
+ break
223
+ case 'C':
224
+ value = byteArray.readUInt8(p)
225
+ p += 1
226
+ break
227
+ case 's':
228
+ value = byteArray.readInt16LE(p)
229
+ p += 2
230
+ break
231
+ case 'S':
232
+ value = byteArray.readUInt16LE(p)
233
+ p += 2
234
+ break
235
+ case 'f':
236
+ value = byteArray.readFloatLE(p)
237
+ p += 4
238
+ break
239
+ case 'Z':
240
+ case 'H':
241
+ value = ''
242
+ while (p <= blockEnd) {
243
+ const cc = byteArray[p++]
244
+ if (cc === 0) {
245
+ break
246
+ } else {
247
+ value += String.fromCharCode(cc)
248
+ }
249
+ }
250
+ break
251
+ case 'B': {
252
+ value = ''
253
+ const cc = byteArray[p++]
254
+ const Btype = String.fromCharCode(cc)
255
+ const limit = byteArray.readInt32LE(p)
256
+ p += 4
257
+ if (Btype === 'i') {
258
+ if (tag === 'CG') {
259
+ for (let k = 0; k < limit; k++) {
260
+ const cigop = byteArray.readInt32LE(p)
261
+ const lop = cigop >> 4
262
+ const op = CIGAR_DECODER[cigop & 0xf]
263
+ value += lop + op
264
+ p += 4
265
+ }
266
+ } else {
267
+ for (let k = 0; k < limit; k++) {
268
+ value += byteArray.readInt32LE(p)
269
+ if (k + 1 < limit) {
270
+ value += ','
271
+ }
272
+ p += 4
273
+ }
274
+ }
275
+ }
276
+ if (Btype === 'I') {
277
+ if (tag === 'CG') {
278
+ for (let k = 0; k < limit; k++) {
279
+ const cigop = byteArray.readUInt32LE(p)
280
+ const lop = cigop >> 4
281
+ const op = CIGAR_DECODER[cigop & 0xf]
282
+ value += lop + op
283
+ p += 4
284
+ }
285
+ } else {
286
+ for (let k = 0; k < limit; k++) {
287
+ value += byteArray.readUInt32LE(p)
288
+ if (k + 1 < limit) {
289
+ value += ','
290
+ }
291
+ p += 4
292
+ }
293
+ }
294
+ }
295
+ if (Btype === 's') {
296
+ for (let k = 0; k < limit; k++) {
297
+ value += byteArray.readInt16LE(p)
298
+ if (k + 1 < limit) {
299
+ value += ','
300
+ }
301
+ p += 2
302
+ }
303
+ }
304
+ if (Btype === 'S') {
305
+ for (let k = 0; k < limit; k++) {
306
+ value += byteArray.readUInt16LE(p)
307
+ if (k + 1 < limit) {
308
+ value += ','
309
+ }
310
+ p += 2
311
+ }
312
+ }
313
+ if (Btype === 'c') {
314
+ for (let k = 0; k < limit; k++) {
315
+ value += byteArray.readInt8(p)
316
+ if (k + 1 < limit) {
317
+ value += ','
318
+ }
319
+ p += 1
320
+ }
321
+ }
322
+ if (Btype === 'C') {
323
+ for (let k = 0; k < limit; k++) {
324
+ value += byteArray.readUInt8(p)
325
+ if (k + 1 < limit) {
326
+ value += ','
327
+ }
328
+ p += 1
329
+ }
330
+ }
331
+ if (Btype === 'f') {
332
+ for (let k = 0; k < limit; k++) {
333
+ value += byteArray.readFloatLE(p)
334
+ if (k + 1 < limit) {
335
+ value += ','
336
+ }
337
+ p += 4
338
+ }
339
+ }
340
+ break
341
+ }
342
+ default:
343
+ console.warn(`Unknown BAM tag type '${type}', tags may be incomplete`)
344
+ value = undefined
345
+ p = blockEnd // stop parsing tags
346
+ }
347
+
348
+ this._tagOffset = p
349
+
350
+ this._tagList.push(tag)
351
+ if (lcTag === tagName) {
352
+ return value
353
+ }
354
+
355
+ this.data[lcTag] = value
356
+ }
357
+ this._allTagsParsed = true
358
+ return undefined
359
+ }
360
+
361
+ _parseAllTags() {
362
+ this._parseTag('')
363
+ }
364
+
365
+ _parseCigar(cigar: string) {
366
+ return (
367
+ //@ts-ignore
368
+ cigar
369
+ .match(/\d+\D/g)
370
+ //@ts-ignore
371
+ .map(op => [op.match(/\D/)[0].toUpperCase(), parseInt(op, 10)])
372
+ )
373
+ }
374
+
375
+ /**
376
+ * @returns {boolean} true if the read is paired, regardless of whether both segments are mapped
377
+ */
378
+ isPaired() {
379
+ return !!(this.flags & Constants.BAM_FPAIRED)
380
+ }
381
+
382
+ /** @returns {boolean} true if the read is paired, and both segments are mapped */
383
+ isProperlyPaired() {
384
+ return !!(this.flags & Constants.BAM_FPROPER_PAIR)
385
+ }
386
+
387
+ /** @returns {boolean} true if the read itself is unmapped; conflictive with isProperlyPaired */
388
+ isSegmentUnmapped() {
389
+ return !!(this.flags & Constants.BAM_FUNMAP)
390
+ }
391
+
392
+ /** @returns {boolean} true if the read itself is unmapped; conflictive with isProperlyPaired */
393
+ isMateUnmapped() {
394
+ return !!(this.flags & Constants.BAM_FMUNMAP)
395
+ }
396
+
397
+ /** @returns {boolean} true if the read is mapped to the reverse strand */
398
+ isReverseComplemented() {
399
+ return !!(this.flags & Constants.BAM_FREVERSE)
400
+ }
401
+
402
+ /** @returns {boolean} true if the mate is mapped to the reverse strand */
403
+ isMateReverseComplemented() {
404
+ return !!(this.flags & Constants.BAM_FMREVERSE)
405
+ }
406
+
407
+ /** @returns {boolean} true if this is read number 1 in a pair */
408
+ isRead1() {
409
+ return !!(this.flags & Constants.BAM_FREAD1)
410
+ }
411
+
412
+ /** @returns {boolean} true if this is read number 2 in a pair */
413
+ isRead2() {
414
+ return !!(this.flags & Constants.BAM_FREAD2)
415
+ }
416
+
417
+ /** @returns {boolean} true if this is a secondary alignment */
418
+ isSecondary() {
419
+ return !!(this.flags & Constants.BAM_FSECONDARY)
420
+ }
421
+
422
+ /** @returns {boolean} true if this read has failed QC checks */
423
+ isFailedQc() {
424
+ return !!(this.flags & Constants.BAM_FQCFAIL)
425
+ }
426
+
427
+ /** @returns {boolean} true if the read is an optical or PCR duplicate */
428
+ isDuplicate() {
429
+ return !!(this.flags & Constants.BAM_FDUP)
430
+ }
431
+
432
+ /** @returns {boolean} true if this is a supplementary alignment */
433
+ isSupplementary() {
434
+ return !!(this.flags & Constants.BAM_FSUPPLEMENTARY)
435
+ }
436
+
437
+ cigar() {
438
+ if (this.isSegmentUnmapped()) {
439
+ return undefined
440
+ }
441
+
442
+ const { byteArray, start } = this.bytes
443
+ const numCigarOps = this.get('_n_cigar_op')
444
+ let p = start + 36 + this.get('_l_read_name')
445
+ const seqLen = this.get('seq_length')
446
+ let cigar = ''
447
+ let lref = 0
448
+
449
+ // check for CG tag by inspecting whether the CIGAR field
450
+ // contains a clip that consumes entire seqLen
451
+ let cigop = byteArray.readInt32LE(p)
452
+ let lop = cigop >> 4
453
+ let op = CIGAR_DECODER[cigop & 0xf]
454
+ if (op === 'S' && lop === seqLen) {
455
+ // if there is a CG the second CIGAR field will
456
+ // be a N tag the represents the length on ref
457
+ p += 4
458
+ cigop = byteArray.readInt32LE(p)
459
+ lop = cigop >> 4
460
+ op = CIGAR_DECODER[cigop & 0xf]
461
+ if (op !== 'N') {
462
+ console.warn('CG tag with no N tag')
463
+ }
464
+ this.data.length_on_ref = lop
465
+ return this.get('CG')
466
+ } else {
467
+ for (let c = 0; c < numCigarOps; ++c) {
468
+ cigop = byteArray.readInt32LE(p)
469
+ lop = cigop >> 4
470
+ op = CIGAR_DECODER[cigop & 0xf]
471
+ cigar += lop + op
472
+
473
+ // soft clip, hard clip, and insertion don't count toward
474
+ // the length on the reference
475
+ if (op !== 'H' && op !== 'S' && op !== 'I') {
476
+ lref += lop
477
+ }
478
+
479
+ p += 4
480
+ }
481
+
482
+ this.data.length_on_ref = lref
483
+ return cigar
484
+ }
485
+ }
486
+
487
+ _flags() {}
488
+
489
+ length_on_ref() {
490
+ if (this.data.length_on_ref) {
491
+ return this.data.length_on_ref
492
+ } else {
493
+ this.get('cigar') // the length_on_ref is set as a side effect
494
+ return this.data.length_on_ref
495
+ }
496
+ }
497
+
498
+ _n_cigar_op() {
499
+ return this.get('_flag_nc') & 0xffff
500
+ }
501
+
502
+ _l_read_name() {
503
+ return this.get('_bin_mq_nl') & 0xff
504
+ }
505
+
506
+ /**
507
+ * number of bytes in the sequence field
508
+ */
509
+ _seq_bytes() {
510
+ return (this.get('seq_length') + 1) >> 1
511
+ }
512
+
513
+ getReadBases() {
514
+ return this.seq()
515
+ }
516
+
517
+ seq() {
518
+ const { byteArray, start } = this.bytes
519
+ const p =
520
+ start + 36 + this.get('_l_read_name') + this.get('_n_cigar_op') * 4
521
+ const seqBytes = this.get('_seq_bytes')
522
+ const len = this.get('seq_length')
523
+ let buf = ''
524
+ let i = 0
525
+ for (let j = 0; j < seqBytes; ++j) {
526
+ const sb = byteArray[p + j]
527
+ buf += SEQRET_DECODER[(sb & 0xf0) >> 4]
528
+ i++
529
+ if (i < len) {
530
+ buf += SEQRET_DECODER[sb & 0x0f]
531
+ i++
532
+ }
533
+ }
534
+ return buf
535
+ }
536
+
537
+ // adapted from igv.js
538
+ getPairOrientation() {
539
+ if (
540
+ !this.isSegmentUnmapped() &&
541
+ !this.isMateUnmapped() &&
542
+ this._refID === this._next_refid()
543
+ ) {
544
+ const s1 = this.isReverseComplemented() ? 'R' : 'F'
545
+ const s2 = this.isMateReverseComplemented() ? 'R' : 'F'
546
+ let o1 = ' '
547
+ let o2 = ' '
548
+ if (this.isRead1()) {
549
+ o1 = '1'
550
+ o2 = '2'
551
+ } else if (this.isRead2()) {
552
+ o1 = '2'
553
+ o2 = '1'
554
+ }
555
+
556
+ const tmp = []
557
+ const isize = this.template_length()
558
+ if (isize > 0) {
559
+ tmp[0] = s1
560
+ tmp[1] = o1
561
+ tmp[2] = s2
562
+ tmp[3] = o2
563
+ } else {
564
+ tmp[2] = s1
565
+ tmp[3] = o1
566
+ tmp[0] = s2
567
+ tmp[1] = o2
568
+ }
569
+ return tmp.join('')
570
+ }
571
+ return null
572
+ }
573
+
574
+ _bin_mq_nl() {
575
+ return this.bytes.byteArray.readInt32LE(this.bytes.start + 12)
576
+ }
577
+
578
+ _flag_nc() {
579
+ return this.bytes.byteArray.readInt32LE(this.bytes.start + 16)
580
+ }
581
+
582
+ seq_length() {
583
+ return this.bytes.byteArray.readInt32LE(this.bytes.start + 20)
584
+ }
585
+
586
+ _next_refid() {
587
+ return this.bytes.byteArray.readInt32LE(this.bytes.start + 24)
588
+ }
589
+
590
+ _next_pos() {
591
+ return this.bytes.byteArray.readInt32LE(this.bytes.start + 28)
592
+ }
593
+
594
+ template_length() {
595
+ return this.bytes.byteArray.readInt32LE(this.bytes.start + 32)
596
+ }
597
+
598
+ toJSON() {
599
+ const data: { [key: string]: any } = {}
600
+ Object.keys(this).forEach(k => {
601
+ if (k.charAt(0) === '_' || k === 'bytes') {
602
+ return
603
+ }
604
+ //@ts-ignore
605
+ data[k] = this[k]
606
+ })
607
+
608
+ return data
609
+ }
610
+ }
package/src/sam.ts ADDED
@@ -0,0 +1,15 @@
1
+ export function parseHeaderText(text: string) {
2
+ const lines = text.split(/\r?\n/)
3
+ const data: { tag: string; data: { tag: string; value: string }[] }[] = []
4
+ lines.forEach(line => {
5
+ const [tag, ...fields] = line.split(/\t/)
6
+ const parsedFields = fields.map(f => {
7
+ const [fieldTag, value] = f.split(':', 2)
8
+ return { tag: fieldTag, value }
9
+ })
10
+ if (tag) {
11
+ data.push({ tag: tag.substr(1), data: parsedFields })
12
+ }
13
+ })
14
+ return data
15
+ }