@gmod/bam 7.1.3 → 7.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/record.ts CHANGED
@@ -1,3 +1,9 @@
1
+ import {
2
+ CIGAR_HARD_CLIP,
3
+ CIGAR_INS,
4
+ CIGAR_REF_SKIP,
5
+ CIGAR_SOFT_CLIP,
6
+ } from './cigar.ts'
1
7
  import Constants from './constants.ts'
2
8
 
3
9
  const SEQRET_DECODER = '=ACMGRSVTWYHKDBN'.split('')
@@ -5,19 +11,9 @@ const ASCII_CIGAR_CODES = [
5
11
  77, 73, 68, 78, 83, 72, 80, 61, 88, 63, 63, 63, 63, 63, 63, 63,
6
12
  ]
7
13
 
8
- // const CIGAR_MATCH = 0
9
- const CIGAR_INS = 1
10
- // const CIGAR_DEL = 2
11
- const CIGAR_REF_SKIP = 3
12
- const CIGAR_SOFT_CLIP = 4
13
- const CIGAR_HARD_CLIP = 5
14
- // const CIGAR_PAD = 6
15
-
16
14
  // ops that don't consume reference: INS, SOFT_CLIP, HARD_CLIP
17
15
  const CIGAR_SKIP_MASK =
18
16
  (1 << CIGAR_INS) | (1 << CIGAR_SOFT_CLIP) | (1 << CIGAR_HARD_CLIP)
19
- // const CIGAR_EQUAL = 7
20
- // const CIGAR_DIFF = 8
21
17
 
22
18
  interface Bytes {
23
19
  start: number
@@ -25,11 +21,21 @@ interface Bytes {
25
21
  byteArray: Uint8Array
26
22
  }
27
23
 
24
+ interface CIGAR_AND_LENGTH {
25
+ length_on_ref: number
26
+ NUMERIC_CIGAR: Uint32Array
27
+ }
28
+
28
29
  export default class BamRecord {
29
30
  public fileOffset: number
30
31
  private bytes: Bytes
31
32
  private _dataView: DataView
32
33
 
34
+ private _cachedFlags?: number
35
+ private _cachedTags?: Record<string, unknown>
36
+ private _cachedCigarAndLength?: CIGAR_AND_LENGTH
37
+ private _cachedNUMERIC_MD?: Uint8Array | null
38
+
33
39
  constructor(args: { bytes: Bytes; fileOffset: number }) {
34
40
  this.bytes = args.bytes
35
41
  this.fileOffset = args.fileOffset
@@ -41,9 +47,12 @@ export default class BamRecord {
41
47
  }
42
48
 
43
49
  get flags() {
44
- return (
45
- (this._dataView.getInt32(this.bytes.start + 16, true) & 0xffff0000) >> 16
46
- )
50
+ if (this._cachedFlags === undefined) {
51
+ this._cachedFlags =
52
+ (this._dataView.getInt32(this.bytes.start + 16, true) & 0xffff0000) >>
53
+ 16
54
+ }
55
+ return this._cachedFlags
47
56
  }
48
57
  get ref_id() {
49
58
  return this._dataView.getInt32(this.bytes.start + 4, true)
@@ -72,15 +81,15 @@ export default class BamRecord {
72
81
 
73
82
  get qual() {
74
83
  if (this.isSegmentUnmapped()) {
75
- return
84
+ return null
85
+ } else {
86
+ const p =
87
+ this.b0 +
88
+ this.read_name_length +
89
+ this.num_cigar_bytes +
90
+ this.num_seq_bytes
91
+ return this.byteArray.subarray(p, p + this.seq_length)
76
92
  }
77
-
78
- const p =
79
- this.b0 +
80
- this.read_name_length +
81
- this.num_cigar_bytes +
82
- this.num_seq_bytes
83
- return this.byteArray.subarray(p, p + this.seq_length)
84
93
  }
85
94
 
86
95
  get strand() {
@@ -99,68 +108,80 @@ export default class BamRecord {
99
108
  }
100
109
 
101
110
  get NUMERIC_MD() {
102
- let p =
103
- this.b0 +
104
- this.read_name_length +
105
- this.num_cigar_bytes +
106
- this.num_seq_bytes +
107
- this.seq_length
108
-
109
- const blockEnd = this.bytes.end
110
- while (p < blockEnd) {
111
- const tag =
112
- String.fromCharCode(this.byteArray[p]!) +
113
- String.fromCharCode(this.byteArray[p + 1]!)
114
- const type = String.fromCharCode(this.byteArray[p + 2]!)
115
- p += 3
116
-
117
- if (tag === 'MD' && type === 'Z') {
118
- const start = p
119
- while (p < blockEnd && this.byteArray[p] !== 0) {
120
- p++
111
+ if (this._cachedNUMERIC_MD === undefined) {
112
+ let p =
113
+ this.b0 +
114
+ this.read_name_length +
115
+ this.num_cigar_bytes +
116
+ this.num_seq_bytes +
117
+ this.seq_length
118
+
119
+ const blockEnd = this.bytes.end
120
+ const ba = this.byteArray
121
+ while (p < blockEnd) {
122
+ const tag1 = ba[p]!
123
+ const tag2 = ba[p + 1]!
124
+ const type = ba[p + 2]!
125
+ p += 3
126
+
127
+ // 'M' = 0x4D, 'D' = 0x44, 'Z' = 0x5A
128
+ if (tag1 === 0x4d && tag2 === 0x44 && type === 0x5a) {
129
+ const start = p
130
+ while (p < blockEnd && ba[p] !== 0) {
131
+ p++
132
+ }
133
+ this._cachedNUMERIC_MD = ba.subarray(start, p)
121
134
  }
122
- return this.byteArray.subarray(start, p)
123
- }
124
135
 
125
- switch (type) {
126
- case 'A':
127
- p += 1
128
- break
129
- case 'i':
130
- case 'I':
131
- case 'f':
132
- p += 4
133
- break
134
- case 'c':
135
- case 'C':
136
- p += 1
137
- break
138
- case 's':
139
- case 'S':
140
- p += 2
141
- break
142
- case 'Z':
143
- case 'H':
144
- while (p <= blockEnd && this.byteArray[p++] !== 0) {}
145
- break
146
- case 'B': {
147
- const Btype = String.fromCharCode(this.byteArray[p++]!)
148
- const limit = this._dataView.getInt32(p, true)
149
- p += 4
150
- if (Btype === 'i' || Btype === 'I' || Btype === 'f') {
151
- p += limit << 2
152
- } else if (Btype === 's' || Btype === 'S') {
153
- p += limit << 1
154
- } else if (Btype === 'c' || Btype === 'C') {
155
- p += limit
136
+ switch (type) {
137
+ case 0x41: // 'A'
138
+ p += 1
139
+ break
140
+ case 0x69: // 'i'
141
+ case 0x49: // 'I'
142
+ case 0x66: // 'f'
143
+ p += 4
144
+ break
145
+ case 0x63: // 'c'
146
+ case 0x43: // 'C'
147
+ p += 1
148
+ break
149
+ case 0x73: // 's'
150
+ case 0x53: // 'S'
151
+ p += 2
152
+ break
153
+ case 0x5a: // 'Z'
154
+ case 0x48: // 'H'
155
+ while (p <= blockEnd && ba[p++] !== 0) {}
156
+ break
157
+ case 0x42: {
158
+ // 'B'
159
+ const Btype = ba[p++]!
160
+ const limit = this._dataView.getInt32(p, true)
161
+ p += 4
162
+ if (Btype === 0x69 || Btype === 0x49 || Btype === 0x66) {
163
+ p += limit << 2
164
+ } else if (Btype === 0x73 || Btype === 0x53) {
165
+ p += limit << 1
166
+ } else if (Btype === 0x63 || Btype === 0x43) {
167
+ p += limit
168
+ }
169
+ break
156
170
  }
157
- break
158
171
  }
159
172
  }
160
173
  }
161
- return undefined
174
+ return this._cachedNUMERIC_MD === null ? undefined : this._cachedNUMERIC_MD
162
175
  }
176
+
163
177
  get tags() {
178
+ if (this._cachedTags === undefined) {
179
+ this._cachedTags = this._computeTags()
180
+ }
181
+ return this._cachedTags
182
+ }
183
+
184
+ private _computeTags() {
164
185
  let p =
165
186
  this.b0 +
166
187
  this.read_name_length +
@@ -169,52 +190,52 @@ export default class BamRecord {
169
190
  this.seq_length
170
191
 
171
192
  const blockEnd = this.bytes.end
193
+ const ba = this.byteArray
172
194
  const tags = {} as Record<string, unknown>
173
195
  while (p < blockEnd) {
174
- const tag =
175
- String.fromCharCode(this.byteArray[p]!) +
176
- String.fromCharCode(this.byteArray[p + 1]!)
177
- const type = String.fromCharCode(this.byteArray[p + 2]!)
196
+ const tag = String.fromCharCode(ba[p]!, ba[p + 1]!)
197
+ const type = ba[p + 2]!
178
198
  p += 3
179
199
 
180
200
  switch (type) {
181
- case 'A':
182
- tags[tag] = String.fromCharCode(this.byteArray[p]!)
201
+ case 0x41: // 'A'
202
+ tags[tag] = String.fromCharCode(ba[p]!)
183
203
  p += 1
184
204
  break
185
- case 'i':
205
+ case 0x69: // 'i'
186
206
  tags[tag] = this._dataView.getInt32(p, true)
187
207
  p += 4
188
208
  break
189
- case 'I':
209
+ case 0x49: // 'I'
190
210
  tags[tag] = this._dataView.getUint32(p, true)
191
211
  p += 4
192
212
  break
193
- case 'c':
213
+ case 0x63: // 'c'
194
214
  tags[tag] = this._dataView.getInt8(p)
195
215
  p += 1
196
216
  break
197
- case 'C':
217
+ case 0x43: // 'C'
198
218
  tags[tag] = this._dataView.getUint8(p)
199
219
  p += 1
200
220
  break
201
- case 's':
221
+ case 0x73: // 's'
202
222
  tags[tag] = this._dataView.getInt16(p, true)
203
223
  p += 2
204
224
  break
205
- case 'S':
225
+ case 0x53: // 'S'
206
226
  tags[tag] = this._dataView.getUint16(p, true)
207
227
  p += 2
208
228
  break
209
- case 'f':
229
+ case 0x66: // 'f'
210
230
  tags[tag] = this._dataView.getFloat32(p, true)
211
231
  p += 4
212
232
  break
213
- case 'Z':
214
- case 'H': {
233
+ case 0x5a: // 'Z'
234
+ case 0x48: {
235
+ // 'H'
215
236
  const value = []
216
237
  while (p <= blockEnd) {
217
- const cc = this.byteArray[p++]!
238
+ const cc = ba[p++]!
218
239
  if (cc !== 0) {
219
240
  value.push(String.fromCharCode(cc))
220
241
  } else {
@@ -224,75 +245,62 @@ export default class BamRecord {
224
245
  tags[tag] = value.join('')
225
246
  break
226
247
  }
227
- case 'B': {
228
- const cc = this.byteArray[p++]!
229
- const Btype = String.fromCharCode(cc)
248
+ case 0x42: {
249
+ // 'B'
250
+ const Btype = ba[p++]!
230
251
  const limit = this._dataView.getInt32(p, true)
231
252
  p += 4
232
- const absOffset = this.byteArray.byteOffset + p
233
- if (Btype === 'i') {
253
+ const absOffset = ba.byteOffset + p
254
+ if (Btype === 0x69) {
255
+ // 'i'
234
256
  if (absOffset % 4 === 0) {
235
- tags[tag] = new Int32Array(
236
- this.byteArray.buffer,
237
- absOffset,
238
- limit,
239
- )
257
+ tags[tag] = new Int32Array(ba.buffer, absOffset, limit)
240
258
  } else {
241
- const bytes = this.byteArray.slice(p, p + (limit << 2))
259
+ const bytes = ba.slice(p, p + (limit << 2))
242
260
  tags[tag] = new Int32Array(bytes.buffer, bytes.byteOffset, limit)
243
261
  }
244
262
  p += limit << 2
245
- } else if (Btype === 'I') {
263
+ } else if (Btype === 0x49) {
264
+ // 'I'
246
265
  if (absOffset % 4 === 0) {
247
- tags[tag] = new Uint32Array(
248
- this.byteArray.buffer,
249
- absOffset,
250
- limit,
251
- )
266
+ tags[tag] = new Uint32Array(ba.buffer, absOffset, limit)
252
267
  } else {
253
- const bytes = this.byteArray.slice(p, p + (limit << 2))
268
+ const bytes = ba.slice(p, p + (limit << 2))
254
269
  tags[tag] = new Uint32Array(bytes.buffer, bytes.byteOffset, limit)
255
270
  }
256
271
  p += limit << 2
257
- } else if (Btype === 's') {
272
+ } else if (Btype === 0x73) {
273
+ // 's'
258
274
  if (absOffset % 2 === 0) {
259
- tags[tag] = new Int16Array(
260
- this.byteArray.buffer,
261
- absOffset,
262
- limit,
263
- )
275
+ tags[tag] = new Int16Array(ba.buffer, absOffset, limit)
264
276
  } else {
265
- const bytes = this.byteArray.slice(p, p + (limit << 1))
277
+ const bytes = ba.slice(p, p + (limit << 1))
266
278
  tags[tag] = new Int16Array(bytes.buffer, bytes.byteOffset, limit)
267
279
  }
268
280
  p += limit << 1
269
- } else if (Btype === 'S') {
281
+ } else if (Btype === 0x53) {
282
+ // 'S'
270
283
  if (absOffset % 2 === 0) {
271
- tags[tag] = new Uint16Array(
272
- this.byteArray.buffer,
273
- absOffset,
274
- limit,
275
- )
284
+ tags[tag] = new Uint16Array(ba.buffer, absOffset, limit)
276
285
  } else {
277
- const bytes = this.byteArray.slice(p, p + (limit << 1))
286
+ const bytes = ba.slice(p, p + (limit << 1))
278
287
  tags[tag] = new Uint16Array(bytes.buffer, bytes.byteOffset, limit)
279
288
  }
280
289
  p += limit << 1
281
- } else if (Btype === 'c') {
282
- tags[tag] = new Int8Array(this.byteArray.buffer, absOffset, limit)
290
+ } else if (Btype === 0x63) {
291
+ // 'c'
292
+ tags[tag] = new Int8Array(ba.buffer, absOffset, limit)
283
293
  p += limit
284
- } else if (Btype === 'C') {
285
- tags[tag] = new Uint8Array(this.byteArray.buffer, absOffset, limit)
294
+ } else if (Btype === 0x43) {
295
+ // 'C'
296
+ tags[tag] = new Uint8Array(ba.buffer, absOffset, limit)
286
297
  p += limit
287
- } else if (Btype === 'f') {
298
+ } else if (Btype === 0x66) {
299
+ // 'f'
288
300
  if (absOffset % 4 === 0) {
289
- tags[tag] = new Float32Array(
290
- this.byteArray.buffer,
291
- absOffset,
292
- limit,
293
- )
301
+ tags[tag] = new Float32Array(ba.buffer, absOffset, limit)
294
302
  } else {
295
- const bytes = this.byteArray.slice(p, p + (limit << 2))
303
+ const bytes = ba.slice(p, p + (limit << 2))
296
304
  tags[tag] = new Float32Array(
297
305
  bytes.buffer,
298
306
  bytes.byteOffset,
@@ -311,70 +319,62 @@ export default class BamRecord {
311
319
  return tags
312
320
  }
313
321
 
314
- /**
315
- * @returns {boolean} true if the read is paired, regardless of whether both
316
- * segments are mapped
317
- */
318
322
  isPaired() {
319
323
  return !!(this.flags & Constants.BAM_FPAIRED)
320
324
  }
321
325
 
322
- /** @returns {boolean} true if the read is paired, and both segments are mapped */
323
326
  isProperlyPaired() {
324
327
  return !!(this.flags & Constants.BAM_FPROPER_PAIR)
325
328
  }
326
329
 
327
- /** @returns {boolean} true if the read itself is unmapped; conflictive with isProperlyPaired */
328
330
  isSegmentUnmapped() {
329
331
  return !!(this.flags & Constants.BAM_FUNMAP)
330
332
  }
331
333
 
332
- /** @returns {boolean} true if the read itself is unmapped; conflictive with isProperlyPaired */
333
334
  isMateUnmapped() {
334
335
  return !!(this.flags & Constants.BAM_FMUNMAP)
335
336
  }
336
337
 
337
- /** @returns {boolean} true if the read is mapped to the reverse strand */
338
338
  isReverseComplemented() {
339
339
  return !!(this.flags & Constants.BAM_FREVERSE)
340
340
  }
341
341
 
342
- /** @returns {boolean} true if the mate is mapped to the reverse strand */
343
342
  isMateReverseComplemented() {
344
343
  return !!(this.flags & Constants.BAM_FMREVERSE)
345
344
  }
346
345
 
347
- /** @returns {boolean} true if this is read number 1 in a pair */
348
346
  isRead1() {
349
347
  return !!(this.flags & Constants.BAM_FREAD1)
350
348
  }
351
349
 
352
- /** @returns {boolean} true if this is read number 2 in a pair */
353
350
  isRead2() {
354
351
  return !!(this.flags & Constants.BAM_FREAD2)
355
352
  }
356
353
 
357
- /** @returns {boolean} true if this is a secondary alignment */
358
354
  isSecondary() {
359
355
  return !!(this.flags & Constants.BAM_FSECONDARY)
360
356
  }
361
357
 
362
- /** @returns {boolean} true if this read has failed QC checks */
363
358
  isFailedQc() {
364
359
  return !!(this.flags & Constants.BAM_FQCFAIL)
365
360
  }
366
361
 
367
- /** @returns {boolean} true if the read is an optical or PCR duplicate */
368
362
  isDuplicate() {
369
363
  return !!(this.flags & Constants.BAM_FDUP)
370
364
  }
371
365
 
372
- /** @returns {boolean} true if this is a supplementary alignment */
373
366
  isSupplementary() {
374
367
  return !!(this.flags & Constants.BAM_FSUPPLEMENTARY)
375
368
  }
376
369
 
377
370
  get cigarAndLength() {
371
+ if (this._cachedCigarAndLength === undefined) {
372
+ this._cachedCigarAndLength = this._computeCigarAndLength()
373
+ }
374
+ return this._cachedCigarAndLength
375
+ }
376
+
377
+ private _computeCigarAndLength() {
378
378
  if (this.isSegmentUnmapped()) {
379
379
  return {
380
380
  length_on_ref: 0,
@@ -405,29 +405,28 @@ export default class BamRecord {
405
405
  NUMERIC_CIGAR: cgArray,
406
406
  length_on_ref: lop,
407
407
  }
408
- } else {
409
- const absOffset = this.byteArray.byteOffset + p
410
- const cigarView =
411
- absOffset % 4 === 0
412
- ? new Uint32Array(this.byteArray.buffer, absOffset, numCigarOps)
413
- : new Uint32Array(
414
- this.byteArray.slice(p, p + (numCigarOps << 2)).buffer,
415
- 0,
416
- numCigarOps,
417
- )
418
- let lref = 0
419
- for (let c = 0; c < numCigarOps; ++c) {
420
- const cigop = cigarView[c]!
421
- const op = cigop & 0xf
422
- if (!((1 << op) & CIGAR_SKIP_MASK)) {
423
- lref += cigop >> 4
424
- }
408
+ }
409
+ const absOffset = this.byteArray.byteOffset + p
410
+ const cigarView =
411
+ absOffset % 4 === 0
412
+ ? new Uint32Array(this.byteArray.buffer, absOffset, numCigarOps)
413
+ : new Uint32Array(
414
+ this.byteArray.slice(p, p + (numCigarOps << 2)).buffer,
415
+ 0,
416
+ numCigarOps,
417
+ )
418
+ let lref = 0
419
+ for (let c = 0; c < numCigarOps; ++c) {
420
+ const cigop = cigarView[c]!
421
+ const op = cigop & 0xf
422
+ if (!((1 << op) & CIGAR_SKIP_MASK)) {
423
+ lref += cigop >> 4
425
424
  }
425
+ }
426
426
 
427
- return {
428
- NUMERIC_CIGAR: cigarView,
429
- length_on_ref: lref,
430
- }
427
+ return {
428
+ NUMERIC_CIGAR: cigarView,
429
+ length_on_ref: lref,
431
430
  }
432
431
  }
433
432
 
@@ -469,12 +468,7 @@ export default class BamRecord {
469
468
 
470
469
  get NUMERIC_SEQ() {
471
470
  const p = this.b0 + this.read_name_length + this.num_cigar_bytes
472
- const seqBytes = this.byteArray.subarray(p, p + this.num_seq_bytes)
473
- return new Uint8Array(
474
- seqBytes.buffer,
475
- seqBytes.byteOffset,
476
- this.num_seq_bytes,
477
- )
471
+ return this.byteArray.subarray(p, p + this.num_seq_bytes)
478
472
  }
479
473
 
480
474
  get seq() {
@@ -588,28 +582,3 @@ export default class BamRecord {
588
582
  return data
589
583
  }
590
584
  }
591
-
592
- function cacheGetter<T>(ctor: { prototype: T }, prop: keyof T): void {
593
- const desc = Object.getOwnPropertyDescriptor(ctor.prototype, prop)
594
- if (!desc) {
595
- throw new Error('OH NO, NO PROPERTY DESCRIPTOR')
596
- }
597
- // eslint-disable-next-line @typescript-eslint/unbound-method
598
- const getter = desc.get
599
- if (!getter) {
600
- throw new Error('OH NO, NOT A GETTER')
601
- }
602
- Object.defineProperty(ctor.prototype, prop, {
603
- get() {
604
- const ret = getter.call(this)
605
- Object.defineProperty(this, prop, { value: ret })
606
- return ret
607
- },
608
- })
609
- }
610
-
611
- cacheGetter(BamRecord, 'tags')
612
- cacheGetter(BamRecord, 'cigarAndLength')
613
- cacheGetter(BamRecord, 'seq')
614
- cacheGetter(BamRecord, 'qual')
615
- cacheGetter(BamRecord, 'end')