@gmod/bam 7.1.4 → 7.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/record.ts CHANGED
@@ -1,3 +1,9 @@
1
+ import {
2
+ CIGAR_HARD_CLIP,
3
+ CIGAR_INS,
4
+ CIGAR_REF_SKIP,
5
+ CIGAR_SOFT_CLIP,
6
+ } from './cigar.ts'
1
7
  import Constants from './constants.ts'
2
8
 
3
9
  const SEQRET_DECODER = '=ACMGRSVTWYHKDBN'.split('')
@@ -5,31 +11,31 @@ const ASCII_CIGAR_CODES = [
5
11
  77, 73, 68, 78, 83, 72, 80, 61, 88, 63, 63, 63, 63, 63, 63, 63,
6
12
  ]
7
13
 
8
- // const CIGAR_MATCH = 0
9
- const CIGAR_INS = 1
10
- // const CIGAR_DEL = 2
11
- const CIGAR_REF_SKIP = 3
12
- const CIGAR_SOFT_CLIP = 4
13
- const CIGAR_HARD_CLIP = 5
14
- // const CIGAR_PAD = 6
15
-
16
14
  // ops that don't consume reference: INS, SOFT_CLIP, HARD_CLIP
17
15
  const CIGAR_SKIP_MASK =
18
16
  (1 << CIGAR_INS) | (1 << CIGAR_SOFT_CLIP) | (1 << CIGAR_HARD_CLIP)
19
- // const CIGAR_EQUAL = 7
20
- // const CIGAR_DIFF = 8
21
17
 
22
- interface Bytes {
18
+ export interface Bytes {
23
19
  start: number
24
20
  end: number
25
21
  byteArray: Uint8Array
26
22
  }
27
23
 
24
+ interface CIGAR_AND_LENGTH {
25
+ length_on_ref: number
26
+ NUMERIC_CIGAR: Uint32Array
27
+ }
28
+
28
29
  export default class BamRecord {
29
30
  public fileOffset: number
30
31
  private bytes: Bytes
31
32
  private _dataView: DataView
32
33
 
34
+ private _cachedFlags?: number
35
+ private _cachedTags?: Record<string, unknown>
36
+ private _cachedCigarAndLength?: CIGAR_AND_LENGTH
37
+ private _cachedNUMERIC_MD?: Uint8Array | null
38
+
33
39
  constructor(args: { bytes: Bytes; fileOffset: number }) {
34
40
  this.bytes = args.bytes
35
41
  this.fileOffset = args.fileOffset
@@ -41,9 +47,12 @@ export default class BamRecord {
41
47
  }
42
48
 
43
49
  get flags() {
44
- return (
45
- (this._dataView.getInt32(this.bytes.start + 16, true) & 0xffff0000) >> 16
46
- )
50
+ if (this._cachedFlags === undefined) {
51
+ this._cachedFlags =
52
+ (this._dataView.getInt32(this.bytes.start + 16, true) & 0xffff0000) >>
53
+ 16
54
+ }
55
+ return this._cachedFlags
47
56
  }
48
57
  get ref_id() {
49
58
  return this._dataView.getInt32(this.bytes.start + 4, true)
@@ -72,15 +81,15 @@ export default class BamRecord {
72
81
 
73
82
  get qual() {
74
83
  if (this.isSegmentUnmapped()) {
75
- return
84
+ return null
85
+ } else {
86
+ const p =
87
+ this.b0 +
88
+ this.read_name_length +
89
+ this.num_cigar_bytes +
90
+ this.num_seq_bytes
91
+ return this.byteArray.subarray(p, p + this.seq_length)
76
92
  }
77
-
78
- const p =
79
- this.b0 +
80
- this.read_name_length +
81
- this.num_cigar_bytes +
82
- this.num_seq_bytes
83
- return this.byteArray.subarray(p, p + this.seq_length)
84
93
  }
85
94
 
86
95
  get strand() {
@@ -99,69 +108,80 @@ export default class BamRecord {
99
108
  }
100
109
 
101
110
  get NUMERIC_MD() {
102
- let p =
103
- this.b0 +
104
- this.read_name_length +
105
- this.num_cigar_bytes +
106
- this.num_seq_bytes +
107
- this.seq_length
108
-
109
- const blockEnd = this.bytes.end
110
- const ba = this.byteArray
111
- while (p < blockEnd) {
112
- const tag1 = ba[p]!
113
- const tag2 = ba[p + 1]!
114
- const type = ba[p + 2]!
115
- p += 3
116
-
117
- // 'M' = 0x4D, 'D' = 0x44, 'Z' = 0x5A
118
- if (tag1 === 0x4d && tag2 === 0x44 && type === 0x5a) {
119
- const start = p
120
- while (p < blockEnd && ba[p] !== 0) {
121
- p++
111
+ if (this._cachedNUMERIC_MD === undefined) {
112
+ let p =
113
+ this.b0 +
114
+ this.read_name_length +
115
+ this.num_cigar_bytes +
116
+ this.num_seq_bytes +
117
+ this.seq_length
118
+
119
+ const blockEnd = this.bytes.end
120
+ const ba = this.byteArray
121
+ while (p < blockEnd) {
122
+ const tag1 = ba[p]!
123
+ const tag2 = ba[p + 1]!
124
+ const type = ba[p + 2]!
125
+ p += 3
126
+
127
+ // 'M' = 0x4D, 'D' = 0x44, 'Z' = 0x5A
128
+ if (tag1 === 0x4d && tag2 === 0x44 && type === 0x5a) {
129
+ const start = p
130
+ while (p < blockEnd && ba[p] !== 0) {
131
+ p++
132
+ }
133
+ this._cachedNUMERIC_MD = ba.subarray(start, p)
122
134
  }
123
- return ba.subarray(start, p)
124
- }
125
135
 
126
- switch (type) {
127
- case 0x41: // 'A'
128
- p += 1
129
- break
130
- case 0x69: // 'i'
131
- case 0x49: // 'I'
132
- case 0x66: // 'f'
133
- p += 4
134
- break
135
- case 0x63: // 'c'
136
- case 0x43: // 'C'
137
- p += 1
138
- break
139
- case 0x73: // 's'
140
- case 0x53: // 'S'
141
- p += 2
142
- break
143
- case 0x5a: // 'Z'
144
- case 0x48: // 'H'
145
- while (p <= blockEnd && ba[p++] !== 0) {}
146
- break
147
- case 0x42: { // 'B'
148
- const Btype = ba[p++]!
149
- const limit = this._dataView.getInt32(p, true)
150
- p += 4
151
- if (Btype === 0x69 || Btype === 0x49 || Btype === 0x66) {
152
- p += limit << 2
153
- } else if (Btype === 0x73 || Btype === 0x53) {
154
- p += limit << 1
155
- } else if (Btype === 0x63 || Btype === 0x43) {
156
- p += limit
136
+ switch (type) {
137
+ case 0x41: // 'A'
138
+ p += 1
139
+ break
140
+ case 0x69: // 'i'
141
+ case 0x49: // 'I'
142
+ case 0x66: // 'f'
143
+ p += 4
144
+ break
145
+ case 0x63: // 'c'
146
+ case 0x43: // 'C'
147
+ p += 1
148
+ break
149
+ case 0x73: // 's'
150
+ case 0x53: // 'S'
151
+ p += 2
152
+ break
153
+ case 0x5a: // 'Z'
154
+ case 0x48: // 'H'
155
+ while (p <= blockEnd && ba[p++] !== 0) {}
156
+ break
157
+ case 0x42: {
158
+ // 'B'
159
+ const Btype = ba[p++]!
160
+ const limit = this._dataView.getInt32(p, true)
161
+ p += 4
162
+ if (Btype === 0x69 || Btype === 0x49 || Btype === 0x66) {
163
+ p += limit << 2
164
+ } else if (Btype === 0x73 || Btype === 0x53) {
165
+ p += limit << 1
166
+ } else if (Btype === 0x63 || Btype === 0x43) {
167
+ p += limit
168
+ }
169
+ break
157
170
  }
158
- break
159
171
  }
160
172
  }
161
173
  }
162
- return undefined
174
+ return this._cachedNUMERIC_MD === null ? undefined : this._cachedNUMERIC_MD
163
175
  }
176
+
164
177
  get tags() {
178
+ if (this._cachedTags === undefined) {
179
+ this._cachedTags = this._computeTags()
180
+ }
181
+ return this._cachedTags
182
+ }
183
+
184
+ private _computeTags() {
165
185
  let p =
166
186
  this.b0 +
167
187
  this.read_name_length +
@@ -170,52 +190,52 @@ export default class BamRecord {
170
190
  this.seq_length
171
191
 
172
192
  const blockEnd = this.bytes.end
193
+ const ba = this.byteArray
173
194
  const tags = {} as Record<string, unknown>
174
195
  while (p < blockEnd) {
175
- const tag =
176
- String.fromCharCode(this.byteArray[p]!) +
177
- String.fromCharCode(this.byteArray[p + 1]!)
178
- const type = String.fromCharCode(this.byteArray[p + 2]!)
196
+ const tag = String.fromCharCode(ba[p]!, ba[p + 1]!)
197
+ const type = ba[p + 2]!
179
198
  p += 3
180
199
 
181
200
  switch (type) {
182
- case 'A':
183
- tags[tag] = String.fromCharCode(this.byteArray[p]!)
201
+ case 0x41: // 'A'
202
+ tags[tag] = String.fromCharCode(ba[p]!)
184
203
  p += 1
185
204
  break
186
- case 'i':
205
+ case 0x69: // 'i'
187
206
  tags[tag] = this._dataView.getInt32(p, true)
188
207
  p += 4
189
208
  break
190
- case 'I':
209
+ case 0x49: // 'I'
191
210
  tags[tag] = this._dataView.getUint32(p, true)
192
211
  p += 4
193
212
  break
194
- case 'c':
213
+ case 0x63: // 'c'
195
214
  tags[tag] = this._dataView.getInt8(p)
196
215
  p += 1
197
216
  break
198
- case 'C':
217
+ case 0x43: // 'C'
199
218
  tags[tag] = this._dataView.getUint8(p)
200
219
  p += 1
201
220
  break
202
- case 's':
221
+ case 0x73: // 's'
203
222
  tags[tag] = this._dataView.getInt16(p, true)
204
223
  p += 2
205
224
  break
206
- case 'S':
225
+ case 0x53: // 'S'
207
226
  tags[tag] = this._dataView.getUint16(p, true)
208
227
  p += 2
209
228
  break
210
- case 'f':
229
+ case 0x66: // 'f'
211
230
  tags[tag] = this._dataView.getFloat32(p, true)
212
231
  p += 4
213
232
  break
214
- case 'Z':
215
- case 'H': {
233
+ case 0x5a: // 'Z'
234
+ case 0x48: {
235
+ // 'H'
216
236
  const value = []
217
237
  while (p <= blockEnd) {
218
- const cc = this.byteArray[p++]!
238
+ const cc = ba[p++]!
219
239
  if (cc !== 0) {
220
240
  value.push(String.fromCharCode(cc))
221
241
  } else {
@@ -225,75 +245,62 @@ export default class BamRecord {
225
245
  tags[tag] = value.join('')
226
246
  break
227
247
  }
228
- case 'B': {
229
- const cc = this.byteArray[p++]!
230
- const Btype = String.fromCharCode(cc)
248
+ case 0x42: {
249
+ // 'B'
250
+ const Btype = ba[p++]!
231
251
  const limit = this._dataView.getInt32(p, true)
232
252
  p += 4
233
- const absOffset = this.byteArray.byteOffset + p
234
- if (Btype === 'i') {
253
+ const absOffset = ba.byteOffset + p
254
+ if (Btype === 0x69) {
255
+ // 'i'
235
256
  if (absOffset % 4 === 0) {
236
- tags[tag] = new Int32Array(
237
- this.byteArray.buffer,
238
- absOffset,
239
- limit,
240
- )
257
+ tags[tag] = new Int32Array(ba.buffer, absOffset, limit)
241
258
  } else {
242
- const bytes = this.byteArray.slice(p, p + (limit << 2))
259
+ const bytes = ba.slice(p, p + (limit << 2))
243
260
  tags[tag] = new Int32Array(bytes.buffer, bytes.byteOffset, limit)
244
261
  }
245
262
  p += limit << 2
246
- } else if (Btype === 'I') {
263
+ } else if (Btype === 0x49) {
264
+ // 'I'
247
265
  if (absOffset % 4 === 0) {
248
- tags[tag] = new Uint32Array(
249
- this.byteArray.buffer,
250
- absOffset,
251
- limit,
252
- )
266
+ tags[tag] = new Uint32Array(ba.buffer, absOffset, limit)
253
267
  } else {
254
- const bytes = this.byteArray.slice(p, p + (limit << 2))
268
+ const bytes = ba.slice(p, p + (limit << 2))
255
269
  tags[tag] = new Uint32Array(bytes.buffer, bytes.byteOffset, limit)
256
270
  }
257
271
  p += limit << 2
258
- } else if (Btype === 's') {
272
+ } else if (Btype === 0x73) {
273
+ // 's'
259
274
  if (absOffset % 2 === 0) {
260
- tags[tag] = new Int16Array(
261
- this.byteArray.buffer,
262
- absOffset,
263
- limit,
264
- )
275
+ tags[tag] = new Int16Array(ba.buffer, absOffset, limit)
265
276
  } else {
266
- const bytes = this.byteArray.slice(p, p + (limit << 1))
277
+ const bytes = ba.slice(p, p + (limit << 1))
267
278
  tags[tag] = new Int16Array(bytes.buffer, bytes.byteOffset, limit)
268
279
  }
269
280
  p += limit << 1
270
- } else if (Btype === 'S') {
281
+ } else if (Btype === 0x53) {
282
+ // 'S'
271
283
  if (absOffset % 2 === 0) {
272
- tags[tag] = new Uint16Array(
273
- this.byteArray.buffer,
274
- absOffset,
275
- limit,
276
- )
284
+ tags[tag] = new Uint16Array(ba.buffer, absOffset, limit)
277
285
  } else {
278
- const bytes = this.byteArray.slice(p, p + (limit << 1))
286
+ const bytes = ba.slice(p, p + (limit << 1))
279
287
  tags[tag] = new Uint16Array(bytes.buffer, bytes.byteOffset, limit)
280
288
  }
281
289
  p += limit << 1
282
- } else if (Btype === 'c') {
283
- tags[tag] = new Int8Array(this.byteArray.buffer, absOffset, limit)
290
+ } else if (Btype === 0x63) {
291
+ // 'c'
292
+ tags[tag] = new Int8Array(ba.buffer, absOffset, limit)
284
293
  p += limit
285
- } else if (Btype === 'C') {
286
- tags[tag] = new Uint8Array(this.byteArray.buffer, absOffset, limit)
294
+ } else if (Btype === 0x43) {
295
+ // 'C'
296
+ tags[tag] = new Uint8Array(ba.buffer, absOffset, limit)
287
297
  p += limit
288
- } else if (Btype === 'f') {
298
+ } else if (Btype === 0x66) {
299
+ // 'f'
289
300
  if (absOffset % 4 === 0) {
290
- tags[tag] = new Float32Array(
291
- this.byteArray.buffer,
292
- absOffset,
293
- limit,
294
- )
301
+ tags[tag] = new Float32Array(ba.buffer, absOffset, limit)
295
302
  } else {
296
- const bytes = this.byteArray.slice(p, p + (limit << 2))
303
+ const bytes = ba.slice(p, p + (limit << 2))
297
304
  tags[tag] = new Float32Array(
298
305
  bytes.buffer,
299
306
  bytes.byteOffset,
@@ -312,70 +319,62 @@ export default class BamRecord {
312
319
  return tags
313
320
  }
314
321
 
315
- /**
316
- * @returns {boolean} true if the read is paired, regardless of whether both
317
- * segments are mapped
318
- */
319
322
  isPaired() {
320
323
  return !!(this.flags & Constants.BAM_FPAIRED)
321
324
  }
322
325
 
323
- /** @returns {boolean} true if the read is paired, and both segments are mapped */
324
326
  isProperlyPaired() {
325
327
  return !!(this.flags & Constants.BAM_FPROPER_PAIR)
326
328
  }
327
329
 
328
- /** @returns {boolean} true if the read itself is unmapped; conflictive with isProperlyPaired */
329
330
  isSegmentUnmapped() {
330
331
  return !!(this.flags & Constants.BAM_FUNMAP)
331
332
  }
332
333
 
333
- /** @returns {boolean} true if the read itself is unmapped; conflictive with isProperlyPaired */
334
334
  isMateUnmapped() {
335
335
  return !!(this.flags & Constants.BAM_FMUNMAP)
336
336
  }
337
337
 
338
- /** @returns {boolean} true if the read is mapped to the reverse strand */
339
338
  isReverseComplemented() {
340
339
  return !!(this.flags & Constants.BAM_FREVERSE)
341
340
  }
342
341
 
343
- /** @returns {boolean} true if the mate is mapped to the reverse strand */
344
342
  isMateReverseComplemented() {
345
343
  return !!(this.flags & Constants.BAM_FMREVERSE)
346
344
  }
347
345
 
348
- /** @returns {boolean} true if this is read number 1 in a pair */
349
346
  isRead1() {
350
347
  return !!(this.flags & Constants.BAM_FREAD1)
351
348
  }
352
349
 
353
- /** @returns {boolean} true if this is read number 2 in a pair */
354
350
  isRead2() {
355
351
  return !!(this.flags & Constants.BAM_FREAD2)
356
352
  }
357
353
 
358
- /** @returns {boolean} true if this is a secondary alignment */
359
354
  isSecondary() {
360
355
  return !!(this.flags & Constants.BAM_FSECONDARY)
361
356
  }
362
357
 
363
- /** @returns {boolean} true if this read has failed QC checks */
364
358
  isFailedQc() {
365
359
  return !!(this.flags & Constants.BAM_FQCFAIL)
366
360
  }
367
361
 
368
- /** @returns {boolean} true if the read is an optical or PCR duplicate */
369
362
  isDuplicate() {
370
363
  return !!(this.flags & Constants.BAM_FDUP)
371
364
  }
372
365
 
373
- /** @returns {boolean} true if this is a supplementary alignment */
374
366
  isSupplementary() {
375
367
  return !!(this.flags & Constants.BAM_FSUPPLEMENTARY)
376
368
  }
377
369
 
378
370
  get cigarAndLength() {
371
+ if (this._cachedCigarAndLength === undefined) {
372
+ this._cachedCigarAndLength = this._computeCigarAndLength()
373
+ }
374
+ return this._cachedCigarAndLength
375
+ }
376
+
377
+ private _computeCigarAndLength() {
379
378
  if (this.isSegmentUnmapped()) {
380
379
  return {
381
380
  length_on_ref: 0,
@@ -406,29 +405,28 @@ export default class BamRecord {
406
405
  NUMERIC_CIGAR: cgArray,
407
406
  length_on_ref: lop,
408
407
  }
409
- } else {
410
- const absOffset = this.byteArray.byteOffset + p
411
- const cigarView =
412
- absOffset % 4 === 0
413
- ? new Uint32Array(this.byteArray.buffer, absOffset, numCigarOps)
414
- : new Uint32Array(
415
- this.byteArray.slice(p, p + (numCigarOps << 2)).buffer,
416
- 0,
417
- numCigarOps,
418
- )
419
- let lref = 0
420
- for (let c = 0; c < numCigarOps; ++c) {
421
- const cigop = cigarView[c]!
422
- const op = cigop & 0xf
423
- if (!((1 << op) & CIGAR_SKIP_MASK)) {
424
- lref += cigop >> 4
425
- }
408
+ }
409
+ const absOffset = this.byteArray.byteOffset + p
410
+ const cigarView =
411
+ absOffset % 4 === 0
412
+ ? new Uint32Array(this.byteArray.buffer, absOffset, numCigarOps)
413
+ : new Uint32Array(
414
+ this.byteArray.slice(p, p + (numCigarOps << 2)).buffer,
415
+ 0,
416
+ numCigarOps,
417
+ )
418
+ let lref = 0
419
+ for (let c = 0; c < numCigarOps; ++c) {
420
+ const cigop = cigarView[c]!
421
+ const op = cigop & 0xf
422
+ if (!((1 << op) & CIGAR_SKIP_MASK)) {
423
+ lref += cigop >> 4
426
424
  }
425
+ }
427
426
 
428
- return {
429
- NUMERIC_CIGAR: cigarView,
430
- length_on_ref: lref,
431
- }
427
+ return {
428
+ NUMERIC_CIGAR: cigarView,
429
+ length_on_ref: lref,
432
430
  }
433
431
  }
434
432
 
@@ -470,12 +468,7 @@ export default class BamRecord {
470
468
 
471
469
  get NUMERIC_SEQ() {
472
470
  const p = this.b0 + this.read_name_length + this.num_cigar_bytes
473
- const seqBytes = this.byteArray.subarray(p, p + this.num_seq_bytes)
474
- return new Uint8Array(
475
- seqBytes.buffer,
476
- seqBytes.byteOffset,
477
- this.num_seq_bytes,
478
- )
471
+ return this.byteArray.subarray(p, p + this.num_seq_bytes)
479
472
  }
480
473
 
481
474
  get seq() {
@@ -589,29 +582,3 @@ export default class BamRecord {
589
582
  return data
590
583
  }
591
584
  }
592
-
593
- function cacheGetter<T>(ctor: { prototype: T }, prop: keyof T): void {
594
- const desc = Object.getOwnPropertyDescriptor(ctor.prototype, prop)
595
- if (!desc) {
596
- throw new Error('OH NO, NO PROPERTY DESCRIPTOR')
597
- }
598
- // eslint-disable-next-line @typescript-eslint/unbound-method
599
- const getter = desc.get
600
- if (!getter) {
601
- throw new Error('OH NO, NOT A GETTER')
602
- }
603
- Object.defineProperty(ctor.prototype, prop, {
604
- get() {
605
- const ret = getter.call(this)
606
- Object.defineProperty(this, prop, { value: ret })
607
- return ret
608
- },
609
- })
610
- }
611
-
612
- cacheGetter(BamRecord, 'tags')
613
- cacheGetter(BamRecord, 'cigarAndLength')
614
- cacheGetter(BamRecord, 'seq')
615
- cacheGetter(BamRecord, 'qual')
616
- cacheGetter(BamRecord, 'end')
617
- cacheGetter(BamRecord, 'NUMERIC_MD')