@gmod/bam 7.1.11 → 7.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/record.ts CHANGED
@@ -1,9 +1,4 @@
1
- import {
2
- CIGAR_HARD_CLIP,
3
- CIGAR_INS,
4
- CIGAR_REF_SKIP,
5
- CIGAR_SOFT_CLIP,
6
- } from './cigar.ts'
1
+ import { CIGAR_REF_SKIP, CIGAR_SOFT_CLIP } from './cigar.ts'
7
2
  import Constants from './constants.ts'
8
3
 
9
4
  const SEQRET_DECODER = '=ACMGRSVTWYHKDBN'.split('')
@@ -11,9 +6,9 @@ const ASCII_CIGAR_CODES = [
11
6
  77, 73, 68, 78, 83, 72, 80, 61, 88, 63, 63, 63, 63, 63, 63, 63,
12
7
  ]
13
8
 
14
- // ops that don't consume reference: INS, SOFT_CLIP, HARD_CLIP
15
- const CIGAR_SKIP_MASK =
16
- (1 << CIGAR_INS) | (1 << CIGAR_SOFT_CLIP) | (1 << CIGAR_HARD_CLIP)
9
+ // Bitmask for ops that consume ref: M=0, D=2, N=3, P=6, ==7, X=8
10
+ // Binary: 0b111001101 = 0x1CD
11
+ const CIGAR_CONSUMES_REF_MASK = 0x1cd
17
12
 
18
13
  export interface Bytes {
19
14
  start: number
@@ -38,6 +33,7 @@ export default class BamRecord {
38
33
  private _cachedTags?: Record<string, unknown>
39
34
  private _cachedCigarAndLength?: CIGAR_AND_LENGTH
40
35
  private _cachedNUMERIC_MD?: Uint8Array | null
36
+ private _cachedTagsStart?: number
41
37
 
42
38
  constructor(args: { bytes: Bytes; fileOffset: number }) {
43
39
  this.bytes = args.bytes
@@ -107,6 +103,18 @@ export default class BamRecord {
107
103
  get b0() {
108
104
  return this.bytes.start + 36
109
105
  }
106
+
107
+ get tagsStart() {
108
+ if (this._cachedTagsStart === undefined) {
109
+ this._cachedTagsStart =
110
+ this.b0 +
111
+ this.read_name_length +
112
+ this.num_cigar_bytes +
113
+ this.num_seq_bytes +
114
+ this.seq_length
115
+ }
116
+ return this._cachedTagsStart
117
+ }
110
118
  // batch fromCharCode: fastest for typical name lengths (see benchmarks/string-building.bench.ts)
111
119
  get name() {
112
120
  const len = this.read_name_length - 1
@@ -121,67 +129,8 @@ export default class BamRecord {
121
129
 
122
130
  get NUMERIC_MD() {
123
131
  if (this._cachedNUMERIC_MD === undefined) {
124
- let p =
125
- this.b0 +
126
- this.read_name_length +
127
- this.num_cigar_bytes +
128
- this.num_seq_bytes +
129
- this.seq_length
130
-
131
- const blockEnd = this.bytes.end
132
- const ba = this.byteArray
133
- while (p < blockEnd) {
134
- const tag1 = ba[p]!
135
- const tag2 = ba[p + 1]!
136
- const type = ba[p + 2]!
137
- p += 3
138
-
139
- // 'M' = 0x4D, 'D' = 0x44, 'Z' = 0x5A
140
- if (tag1 === 0x4d && tag2 === 0x44 && type === 0x5a) {
141
- const start = p
142
- while (p < blockEnd && ba[p] !== 0) {
143
- p++
144
- }
145
- this._cachedNUMERIC_MD = ba.subarray(start, p)
146
- }
147
-
148
- switch (type) {
149
- case 0x41: // 'A'
150
- p += 1
151
- break
152
- case 0x69: // 'i'
153
- case 0x49: // 'I'
154
- case 0x66: // 'f'
155
- p += 4
156
- break
157
- case 0x63: // 'c'
158
- case 0x43: // 'C'
159
- p += 1
160
- break
161
- case 0x73: // 's'
162
- case 0x53: // 'S'
163
- p += 2
164
- break
165
- case 0x5a: // 'Z'
166
- case 0x48: // 'H'
167
- while (p <= blockEnd && ba[p++] !== 0) {}
168
- break
169
- case 0x42: {
170
- // 'B'
171
- const Btype = ba[p++]!
172
- const limit = this._dataView.getInt32(p, true)
173
- p += 4
174
- if (Btype === 0x69 || Btype === 0x49 || Btype === 0x66) {
175
- p += limit << 2
176
- } else if (Btype === 0x73 || Btype === 0x53) {
177
- p += limit << 1
178
- } else if (Btype === 0x63 || Btype === 0x43) {
179
- p += limit
180
- }
181
- break
182
- }
183
- }
184
- }
132
+ const result = this.getTagRaw('MD')
133
+ this._cachedNUMERIC_MD = result instanceof Uint8Array ? result : null
185
134
  }
186
135
  return this._cachedNUMERIC_MD === null ? undefined : this._cachedNUMERIC_MD
187
136
  }
@@ -193,13 +142,183 @@ export default class BamRecord {
193
142
  return this._cachedTags
194
143
  }
195
144
 
145
+ getTag(tagName: string) {
146
+ if (this._cachedTags !== undefined) {
147
+ return this._cachedTags[tagName]
148
+ }
149
+ return this._findTag(tagName, false)
150
+ }
151
+
152
+ getTagRaw(tagName: string) {
153
+ return this._findTag(tagName, true)
154
+ }
155
+
156
+ private _findTag(tagName: string, raw: boolean) {
157
+ const tag1 = tagName.charCodeAt(0)
158
+ const tag2 = tagName.charCodeAt(1)
159
+
160
+ let p = this.tagsStart
161
+
162
+ const blockEnd = this.bytes.end
163
+ const ba = this.byteArray
164
+ while (p < blockEnd) {
165
+ const currentTag1 = ba[p]!
166
+ const currentTag2 = ba[p + 1]!
167
+ const type = ba[p + 2]!
168
+ p += 3
169
+
170
+ const isMatch = currentTag1 === tag1 && currentTag2 === tag2
171
+
172
+ switch (type) {
173
+ case 0x41: // 'A'
174
+ if (isMatch) {
175
+ return String.fromCharCode(ba[p]!)
176
+ }
177
+ p += 1
178
+ break
179
+ case 0x69: // 'i'
180
+ if (isMatch) {
181
+ return this._dataView.getInt32(p, true)
182
+ }
183
+ p += 4
184
+ break
185
+ case 0x49: // 'I'
186
+ if (isMatch) {
187
+ return this._dataView.getUint32(p, true)
188
+ }
189
+ p += 4
190
+ break
191
+ case 0x63: // 'c'
192
+ if (isMatch) {
193
+ return this._dataView.getInt8(p)
194
+ }
195
+ p += 1
196
+ break
197
+ case 0x43: // 'C'
198
+ if (isMatch) {
199
+ return this._dataView.getUint8(p)
200
+ }
201
+ p += 1
202
+ break
203
+ case 0x73: // 's'
204
+ if (isMatch) {
205
+ return this._dataView.getInt16(p, true)
206
+ }
207
+ p += 2
208
+ break
209
+ case 0x53: // 'S'
210
+ if (isMatch) {
211
+ return this._dataView.getUint16(p, true)
212
+ }
213
+ p += 2
214
+ break
215
+ case 0x66: // 'f'
216
+ if (isMatch) {
217
+ return this._dataView.getFloat32(p, true)
218
+ }
219
+ p += 4
220
+ break
221
+ case 0x5a: // 'Z'
222
+ case 0x48: {
223
+ // 'H'
224
+ if (isMatch) {
225
+ const start = p
226
+ while (p < blockEnd && ba[p] !== 0) {
227
+ p++
228
+ }
229
+ if (raw) {
230
+ return ba.subarray(start, p)
231
+ }
232
+ const value = []
233
+ for (let i = start; i < p; i++) {
234
+ value.push(String.fromCharCode(ba[i]!))
235
+ }
236
+ return value.join('')
237
+ }
238
+ while (p <= blockEnd && ba[p++] !== 0) {}
239
+ break
240
+ }
241
+ case 0x42: {
242
+ // 'B'
243
+ const Btype = ba[p++]!
244
+ const limit = this._dataView.getInt32(p, true)
245
+ p += 4
246
+ const absOffset = ba.byteOffset + p
247
+ if (isMatch) {
248
+ if (Btype === 0x69) {
249
+ // 'i'
250
+ if (absOffset % 4 === 0) {
251
+ return new Int32Array(ba.buffer, absOffset, limit)
252
+ }
253
+ const arr: number[] = new Array(limit)
254
+ for (let i = 0; i < limit; i++) {
255
+ arr[i] = this._dataView.getInt32(p + i * 4, true)
256
+ }
257
+ return arr
258
+ } else if (Btype === 0x49) {
259
+ // 'I'
260
+ if (absOffset % 4 === 0) {
261
+ return new Uint32Array(ba.buffer, absOffset, limit)
262
+ }
263
+ const arr: number[] = new Array(limit)
264
+ for (let i = 0; i < limit; i++) {
265
+ arr[i] = this._dataView.getUint32(p + i * 4, true)
266
+ }
267
+ return arr
268
+ } else if (Btype === 0x73) {
269
+ // 's'
270
+ if (absOffset % 2 === 0) {
271
+ return new Int16Array(ba.buffer, absOffset, limit)
272
+ }
273
+ const arr: number[] = new Array(limit)
274
+ for (let i = 0; i < limit; i++) {
275
+ arr[i] = this._dataView.getInt16(p + i * 2, true)
276
+ }
277
+ return arr
278
+ } else if (Btype === 0x53) {
279
+ // 'S'
280
+ if (absOffset % 2 === 0) {
281
+ return new Uint16Array(ba.buffer, absOffset, limit)
282
+ }
283
+ const arr: number[] = new Array(limit)
284
+ for (let i = 0; i < limit; i++) {
285
+ arr[i] = this._dataView.getUint16(p + i * 2, true)
286
+ }
287
+ return arr
288
+ } else if (Btype === 0x63) {
289
+ // 'c'
290
+ return new Int8Array(ba.buffer, absOffset, limit)
291
+ } else if (Btype === 0x43) {
292
+ // 'C'
293
+ return new Uint8Array(ba.buffer, absOffset, limit)
294
+ } else if (Btype === 0x66) {
295
+ // 'f'
296
+ if (absOffset % 4 === 0) {
297
+ return new Float32Array(ba.buffer, absOffset, limit)
298
+ }
299
+ const arr: number[] = new Array(limit)
300
+ for (let i = 0; i < limit; i++) {
301
+ arr[i] = this._dataView.getFloat32(p + i * 4, true)
302
+ }
303
+ return arr
304
+ }
305
+ }
306
+ if (Btype === 0x69 || Btype === 0x49 || Btype === 0x66) {
307
+ p += limit << 2
308
+ } else if (Btype === 0x73 || Btype === 0x53) {
309
+ p += limit << 1
310
+ } else if (Btype === 0x63 || Btype === 0x43) {
311
+ p += limit
312
+ }
313
+ break
314
+ }
315
+ }
316
+ }
317
+ return undefined
318
+ }
319
+
196
320
  private _computeTags() {
197
- let p =
198
- this.b0 +
199
- this.read_name_length +
200
- this.num_cigar_bytes +
201
- this.num_seq_bytes +
202
- this.seq_length
321
+ let p = this.tagsStart
203
322
 
204
323
  const blockEnd = this.bytes.end
205
324
  const ba = this.byteArray
@@ -268,8 +387,11 @@ export default class BamRecord {
268
387
  if (absOffset % 4 === 0) {
269
388
  tags[tag] = new Int32Array(ba.buffer, absOffset, limit)
270
389
  } else {
271
- const bytes = ba.slice(p, p + (limit << 2))
272
- tags[tag] = new Int32Array(bytes.buffer, bytes.byteOffset, limit)
390
+ const arr: number[] = new Array(limit)
391
+ for (let i = 0; i < limit; i++) {
392
+ arr[i] = this._dataView.getInt32(p + i * 4, true)
393
+ }
394
+ tags[tag] = arr
273
395
  }
274
396
  p += limit << 2
275
397
  } else if (Btype === 0x49) {
@@ -277,8 +399,11 @@ export default class BamRecord {
277
399
  if (absOffset % 4 === 0) {
278
400
  tags[tag] = new Uint32Array(ba.buffer, absOffset, limit)
279
401
  } else {
280
- const bytes = ba.slice(p, p + (limit << 2))
281
- tags[tag] = new Uint32Array(bytes.buffer, bytes.byteOffset, limit)
402
+ const arr: number[] = new Array(limit)
403
+ for (let i = 0; i < limit; i++) {
404
+ arr[i] = this._dataView.getUint32(p + i * 4, true)
405
+ }
406
+ tags[tag] = arr
282
407
  }
283
408
  p += limit << 2
284
409
  } else if (Btype === 0x73) {
@@ -286,8 +411,11 @@ export default class BamRecord {
286
411
  if (absOffset % 2 === 0) {
287
412
  tags[tag] = new Int16Array(ba.buffer, absOffset, limit)
288
413
  } else {
289
- const bytes = ba.slice(p, p + (limit << 1))
290
- tags[tag] = new Int16Array(bytes.buffer, bytes.byteOffset, limit)
414
+ const arr: number[] = new Array(limit)
415
+ for (let i = 0; i < limit; i++) {
416
+ arr[i] = this._dataView.getInt16(p + i * 2, true)
417
+ }
418
+ tags[tag] = arr
291
419
  }
292
420
  p += limit << 1
293
421
  } else if (Btype === 0x53) {
@@ -295,8 +423,11 @@ export default class BamRecord {
295
423
  if (absOffset % 2 === 0) {
296
424
  tags[tag] = new Uint16Array(ba.buffer, absOffset, limit)
297
425
  } else {
298
- const bytes = ba.slice(p, p + (limit << 1))
299
- tags[tag] = new Uint16Array(bytes.buffer, bytes.byteOffset, limit)
426
+ const arr: number[] = new Array(limit)
427
+ for (let i = 0; i < limit; i++) {
428
+ arr[i] = this._dataView.getUint16(p + i * 2, true)
429
+ }
430
+ tags[tag] = arr
300
431
  }
301
432
  p += limit << 1
302
433
  } else if (Btype === 0x63) {
@@ -312,12 +443,11 @@ export default class BamRecord {
312
443
  if (absOffset % 4 === 0) {
313
444
  tags[tag] = new Float32Array(ba.buffer, absOffset, limit)
314
445
  } else {
315
- const bytes = ba.slice(p, p + (limit << 2))
316
- tags[tag] = new Float32Array(
317
- bytes.buffer,
318
- bytes.byteOffset,
319
- limit,
320
- )
446
+ const arr: number[] = new Array(limit)
447
+ for (let i = 0; i < limit; i++) {
448
+ arr[i] = this._dataView.getFloat32(p + i * 4, true)
449
+ }
450
+ tags[tag] = arr
321
451
  }
322
452
  p += limit << 2
323
453
  }
@@ -449,10 +579,7 @@ export default class BamRecord {
449
579
  let lref = 0
450
580
  for (let c = 0; c < numCigarOps; ++c) {
451
581
  const cigop = cigarView[c]!
452
- const op = cigop & 0xf
453
- if (!((1 << op) & CIGAR_SKIP_MASK)) {
454
- lref += cigop >> 4
455
- }
582
+ lref += (cigop >> 4) * ((CIGAR_CONSUMES_REF_MASK >> (cigop & 0xf)) & 1)
456
583
  }
457
584
  return {
458
585
  NUMERIC_CIGAR: cigarView,
@@ -465,10 +592,7 @@ export default class BamRecord {
465
592
  for (let c = 0; c < numCigarOps; ++c) {
466
593
  const cigop = this._dataView.getInt32(p + c * 4, true) | 0
467
594
  cigarArray[c] = cigop
468
- const op = (cigop & 0xf) | 0
469
- if (!((1 << op) & CIGAR_SKIP_MASK)) {
470
- lref = (lref + (cigop >> 4)) | 0
471
- }
595
+ lref += (cigop >> 4) * ((CIGAR_CONSUMES_REF_MASK >> (cigop & 0xf)) & 1)
472
596
  }
473
597
  return {
474
598
  NUMERIC_CIGAR: cigarArray,