@gmod/bam 7.1.11 → 7.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -1
- package/README.md +2 -0
- package/dist/record.d.ts +5 -0
- package/dist/record.js +219 -83
- package/dist/record.js.map +1 -1
- package/esm/record.d.ts +5 -0
- package/esm/record.js +220 -84
- package/esm/record.js.map +1 -1
- package/package.json +1 -1
- package/src/record.ts +222 -98
package/src/record.ts
CHANGED
|
@@ -1,9 +1,4 @@
|
|
|
1
|
-
import {
|
|
2
|
-
CIGAR_HARD_CLIP,
|
|
3
|
-
CIGAR_INS,
|
|
4
|
-
CIGAR_REF_SKIP,
|
|
5
|
-
CIGAR_SOFT_CLIP,
|
|
6
|
-
} from './cigar.ts'
|
|
1
|
+
import { CIGAR_REF_SKIP, CIGAR_SOFT_CLIP } from './cigar.ts'
|
|
7
2
|
import Constants from './constants.ts'
|
|
8
3
|
|
|
9
4
|
const SEQRET_DECODER = '=ACMGRSVTWYHKDBN'.split('')
|
|
@@ -11,9 +6,9 @@ const ASCII_CIGAR_CODES = [
|
|
|
11
6
|
77, 73, 68, 78, 83, 72, 80, 61, 88, 63, 63, 63, 63, 63, 63, 63,
|
|
12
7
|
]
|
|
13
8
|
|
|
14
|
-
// ops that
|
|
15
|
-
|
|
16
|
-
|
|
9
|
+
// Bitmask for ops that consume ref: M=0, D=2, N=3, P=6, ==7, X=8
|
|
10
|
+
// Binary: 0b111001101 = 0x1CD
|
|
11
|
+
const CIGAR_CONSUMES_REF_MASK = 0x1cd
|
|
17
12
|
|
|
18
13
|
export interface Bytes {
|
|
19
14
|
start: number
|
|
@@ -38,6 +33,7 @@ export default class BamRecord {
|
|
|
38
33
|
private _cachedTags?: Record<string, unknown>
|
|
39
34
|
private _cachedCigarAndLength?: CIGAR_AND_LENGTH
|
|
40
35
|
private _cachedNUMERIC_MD?: Uint8Array | null
|
|
36
|
+
private _cachedTagsStart?: number
|
|
41
37
|
|
|
42
38
|
constructor(args: { bytes: Bytes; fileOffset: number }) {
|
|
43
39
|
this.bytes = args.bytes
|
|
@@ -107,6 +103,18 @@ export default class BamRecord {
|
|
|
107
103
|
get b0() {
|
|
108
104
|
return this.bytes.start + 36
|
|
109
105
|
}
|
|
106
|
+
|
|
107
|
+
get tagsStart() {
|
|
108
|
+
if (this._cachedTagsStart === undefined) {
|
|
109
|
+
this._cachedTagsStart =
|
|
110
|
+
this.b0 +
|
|
111
|
+
this.read_name_length +
|
|
112
|
+
this.num_cigar_bytes +
|
|
113
|
+
this.num_seq_bytes +
|
|
114
|
+
this.seq_length
|
|
115
|
+
}
|
|
116
|
+
return this._cachedTagsStart
|
|
117
|
+
}
|
|
110
118
|
// batch fromCharCode: fastest for typical name lengths (see benchmarks/string-building.bench.ts)
|
|
111
119
|
get name() {
|
|
112
120
|
const len = this.read_name_length - 1
|
|
@@ -121,67 +129,8 @@ export default class BamRecord {
|
|
|
121
129
|
|
|
122
130
|
get NUMERIC_MD() {
|
|
123
131
|
if (this._cachedNUMERIC_MD === undefined) {
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
this.read_name_length +
|
|
127
|
-
this.num_cigar_bytes +
|
|
128
|
-
this.num_seq_bytes +
|
|
129
|
-
this.seq_length
|
|
130
|
-
|
|
131
|
-
const blockEnd = this.bytes.end
|
|
132
|
-
const ba = this.byteArray
|
|
133
|
-
while (p < blockEnd) {
|
|
134
|
-
const tag1 = ba[p]!
|
|
135
|
-
const tag2 = ba[p + 1]!
|
|
136
|
-
const type = ba[p + 2]!
|
|
137
|
-
p += 3
|
|
138
|
-
|
|
139
|
-
// 'M' = 0x4D, 'D' = 0x44, 'Z' = 0x5A
|
|
140
|
-
if (tag1 === 0x4d && tag2 === 0x44 && type === 0x5a) {
|
|
141
|
-
const start = p
|
|
142
|
-
while (p < blockEnd && ba[p] !== 0) {
|
|
143
|
-
p++
|
|
144
|
-
}
|
|
145
|
-
this._cachedNUMERIC_MD = ba.subarray(start, p)
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
switch (type) {
|
|
149
|
-
case 0x41: // 'A'
|
|
150
|
-
p += 1
|
|
151
|
-
break
|
|
152
|
-
case 0x69: // 'i'
|
|
153
|
-
case 0x49: // 'I'
|
|
154
|
-
case 0x66: // 'f'
|
|
155
|
-
p += 4
|
|
156
|
-
break
|
|
157
|
-
case 0x63: // 'c'
|
|
158
|
-
case 0x43: // 'C'
|
|
159
|
-
p += 1
|
|
160
|
-
break
|
|
161
|
-
case 0x73: // 's'
|
|
162
|
-
case 0x53: // 'S'
|
|
163
|
-
p += 2
|
|
164
|
-
break
|
|
165
|
-
case 0x5a: // 'Z'
|
|
166
|
-
case 0x48: // 'H'
|
|
167
|
-
while (p <= blockEnd && ba[p++] !== 0) {}
|
|
168
|
-
break
|
|
169
|
-
case 0x42: {
|
|
170
|
-
// 'B'
|
|
171
|
-
const Btype = ba[p++]!
|
|
172
|
-
const limit = this._dataView.getInt32(p, true)
|
|
173
|
-
p += 4
|
|
174
|
-
if (Btype === 0x69 || Btype === 0x49 || Btype === 0x66) {
|
|
175
|
-
p += limit << 2
|
|
176
|
-
} else if (Btype === 0x73 || Btype === 0x53) {
|
|
177
|
-
p += limit << 1
|
|
178
|
-
} else if (Btype === 0x63 || Btype === 0x43) {
|
|
179
|
-
p += limit
|
|
180
|
-
}
|
|
181
|
-
break
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
}
|
|
132
|
+
const result = this.getTagRaw('MD')
|
|
133
|
+
this._cachedNUMERIC_MD = result instanceof Uint8Array ? result : null
|
|
185
134
|
}
|
|
186
135
|
return this._cachedNUMERIC_MD === null ? undefined : this._cachedNUMERIC_MD
|
|
187
136
|
}
|
|
@@ -193,13 +142,183 @@ export default class BamRecord {
|
|
|
193
142
|
return this._cachedTags
|
|
194
143
|
}
|
|
195
144
|
|
|
145
|
+
getTag(tagName: string) {
|
|
146
|
+
if (this._cachedTags !== undefined) {
|
|
147
|
+
return this._cachedTags[tagName]
|
|
148
|
+
}
|
|
149
|
+
return this._findTag(tagName, false)
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
getTagRaw(tagName: string) {
|
|
153
|
+
return this._findTag(tagName, true)
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
private _findTag(tagName: string, raw: boolean) {
|
|
157
|
+
const tag1 = tagName.charCodeAt(0)
|
|
158
|
+
const tag2 = tagName.charCodeAt(1)
|
|
159
|
+
|
|
160
|
+
let p = this.tagsStart
|
|
161
|
+
|
|
162
|
+
const blockEnd = this.bytes.end
|
|
163
|
+
const ba = this.byteArray
|
|
164
|
+
while (p < blockEnd) {
|
|
165
|
+
const currentTag1 = ba[p]!
|
|
166
|
+
const currentTag2 = ba[p + 1]!
|
|
167
|
+
const type = ba[p + 2]!
|
|
168
|
+
p += 3
|
|
169
|
+
|
|
170
|
+
const isMatch = currentTag1 === tag1 && currentTag2 === tag2
|
|
171
|
+
|
|
172
|
+
switch (type) {
|
|
173
|
+
case 0x41: // 'A'
|
|
174
|
+
if (isMatch) {
|
|
175
|
+
return String.fromCharCode(ba[p]!)
|
|
176
|
+
}
|
|
177
|
+
p += 1
|
|
178
|
+
break
|
|
179
|
+
case 0x69: // 'i'
|
|
180
|
+
if (isMatch) {
|
|
181
|
+
return this._dataView.getInt32(p, true)
|
|
182
|
+
}
|
|
183
|
+
p += 4
|
|
184
|
+
break
|
|
185
|
+
case 0x49: // 'I'
|
|
186
|
+
if (isMatch) {
|
|
187
|
+
return this._dataView.getUint32(p, true)
|
|
188
|
+
}
|
|
189
|
+
p += 4
|
|
190
|
+
break
|
|
191
|
+
case 0x63: // 'c'
|
|
192
|
+
if (isMatch) {
|
|
193
|
+
return this._dataView.getInt8(p)
|
|
194
|
+
}
|
|
195
|
+
p += 1
|
|
196
|
+
break
|
|
197
|
+
case 0x43: // 'C'
|
|
198
|
+
if (isMatch) {
|
|
199
|
+
return this._dataView.getUint8(p)
|
|
200
|
+
}
|
|
201
|
+
p += 1
|
|
202
|
+
break
|
|
203
|
+
case 0x73: // 's'
|
|
204
|
+
if (isMatch) {
|
|
205
|
+
return this._dataView.getInt16(p, true)
|
|
206
|
+
}
|
|
207
|
+
p += 2
|
|
208
|
+
break
|
|
209
|
+
case 0x53: // 'S'
|
|
210
|
+
if (isMatch) {
|
|
211
|
+
return this._dataView.getUint16(p, true)
|
|
212
|
+
}
|
|
213
|
+
p += 2
|
|
214
|
+
break
|
|
215
|
+
case 0x66: // 'f'
|
|
216
|
+
if (isMatch) {
|
|
217
|
+
return this._dataView.getFloat32(p, true)
|
|
218
|
+
}
|
|
219
|
+
p += 4
|
|
220
|
+
break
|
|
221
|
+
case 0x5a: // 'Z'
|
|
222
|
+
case 0x48: {
|
|
223
|
+
// 'H'
|
|
224
|
+
if (isMatch) {
|
|
225
|
+
const start = p
|
|
226
|
+
while (p < blockEnd && ba[p] !== 0) {
|
|
227
|
+
p++
|
|
228
|
+
}
|
|
229
|
+
if (raw) {
|
|
230
|
+
return ba.subarray(start, p)
|
|
231
|
+
}
|
|
232
|
+
const value = []
|
|
233
|
+
for (let i = start; i < p; i++) {
|
|
234
|
+
value.push(String.fromCharCode(ba[i]!))
|
|
235
|
+
}
|
|
236
|
+
return value.join('')
|
|
237
|
+
}
|
|
238
|
+
while (p <= blockEnd && ba[p++] !== 0) {}
|
|
239
|
+
break
|
|
240
|
+
}
|
|
241
|
+
case 0x42: {
|
|
242
|
+
// 'B'
|
|
243
|
+
const Btype = ba[p++]!
|
|
244
|
+
const limit = this._dataView.getInt32(p, true)
|
|
245
|
+
p += 4
|
|
246
|
+
const absOffset = ba.byteOffset + p
|
|
247
|
+
if (isMatch) {
|
|
248
|
+
if (Btype === 0x69) {
|
|
249
|
+
// 'i'
|
|
250
|
+
if (absOffset % 4 === 0) {
|
|
251
|
+
return new Int32Array(ba.buffer, absOffset, limit)
|
|
252
|
+
}
|
|
253
|
+
const arr: number[] = new Array(limit)
|
|
254
|
+
for (let i = 0; i < limit; i++) {
|
|
255
|
+
arr[i] = this._dataView.getInt32(p + i * 4, true)
|
|
256
|
+
}
|
|
257
|
+
return arr
|
|
258
|
+
} else if (Btype === 0x49) {
|
|
259
|
+
// 'I'
|
|
260
|
+
if (absOffset % 4 === 0) {
|
|
261
|
+
return new Uint32Array(ba.buffer, absOffset, limit)
|
|
262
|
+
}
|
|
263
|
+
const arr: number[] = new Array(limit)
|
|
264
|
+
for (let i = 0; i < limit; i++) {
|
|
265
|
+
arr[i] = this._dataView.getUint32(p + i * 4, true)
|
|
266
|
+
}
|
|
267
|
+
return arr
|
|
268
|
+
} else if (Btype === 0x73) {
|
|
269
|
+
// 's'
|
|
270
|
+
if (absOffset % 2 === 0) {
|
|
271
|
+
return new Int16Array(ba.buffer, absOffset, limit)
|
|
272
|
+
}
|
|
273
|
+
const arr: number[] = new Array(limit)
|
|
274
|
+
for (let i = 0; i < limit; i++) {
|
|
275
|
+
arr[i] = this._dataView.getInt16(p + i * 2, true)
|
|
276
|
+
}
|
|
277
|
+
return arr
|
|
278
|
+
} else if (Btype === 0x53) {
|
|
279
|
+
// 'S'
|
|
280
|
+
if (absOffset % 2 === 0) {
|
|
281
|
+
return new Uint16Array(ba.buffer, absOffset, limit)
|
|
282
|
+
}
|
|
283
|
+
const arr: number[] = new Array(limit)
|
|
284
|
+
for (let i = 0; i < limit; i++) {
|
|
285
|
+
arr[i] = this._dataView.getUint16(p + i * 2, true)
|
|
286
|
+
}
|
|
287
|
+
return arr
|
|
288
|
+
} else if (Btype === 0x63) {
|
|
289
|
+
// 'c'
|
|
290
|
+
return new Int8Array(ba.buffer, absOffset, limit)
|
|
291
|
+
} else if (Btype === 0x43) {
|
|
292
|
+
// 'C'
|
|
293
|
+
return new Uint8Array(ba.buffer, absOffset, limit)
|
|
294
|
+
} else if (Btype === 0x66) {
|
|
295
|
+
// 'f'
|
|
296
|
+
if (absOffset % 4 === 0) {
|
|
297
|
+
return new Float32Array(ba.buffer, absOffset, limit)
|
|
298
|
+
}
|
|
299
|
+
const arr: number[] = new Array(limit)
|
|
300
|
+
for (let i = 0; i < limit; i++) {
|
|
301
|
+
arr[i] = this._dataView.getFloat32(p + i * 4, true)
|
|
302
|
+
}
|
|
303
|
+
return arr
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
if (Btype === 0x69 || Btype === 0x49 || Btype === 0x66) {
|
|
307
|
+
p += limit << 2
|
|
308
|
+
} else if (Btype === 0x73 || Btype === 0x53) {
|
|
309
|
+
p += limit << 1
|
|
310
|
+
} else if (Btype === 0x63 || Btype === 0x43) {
|
|
311
|
+
p += limit
|
|
312
|
+
}
|
|
313
|
+
break
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
return undefined
|
|
318
|
+
}
|
|
319
|
+
|
|
196
320
|
private _computeTags() {
|
|
197
|
-
let p =
|
|
198
|
-
this.b0 +
|
|
199
|
-
this.read_name_length +
|
|
200
|
-
this.num_cigar_bytes +
|
|
201
|
-
this.num_seq_bytes +
|
|
202
|
-
this.seq_length
|
|
321
|
+
let p = this.tagsStart
|
|
203
322
|
|
|
204
323
|
const blockEnd = this.bytes.end
|
|
205
324
|
const ba = this.byteArray
|
|
@@ -268,8 +387,11 @@ export default class BamRecord {
|
|
|
268
387
|
if (absOffset % 4 === 0) {
|
|
269
388
|
tags[tag] = new Int32Array(ba.buffer, absOffset, limit)
|
|
270
389
|
} else {
|
|
271
|
-
const
|
|
272
|
-
|
|
390
|
+
const arr: number[] = new Array(limit)
|
|
391
|
+
for (let i = 0; i < limit; i++) {
|
|
392
|
+
arr[i] = this._dataView.getInt32(p + i * 4, true)
|
|
393
|
+
}
|
|
394
|
+
tags[tag] = arr
|
|
273
395
|
}
|
|
274
396
|
p += limit << 2
|
|
275
397
|
} else if (Btype === 0x49) {
|
|
@@ -277,8 +399,11 @@ export default class BamRecord {
|
|
|
277
399
|
if (absOffset % 4 === 0) {
|
|
278
400
|
tags[tag] = new Uint32Array(ba.buffer, absOffset, limit)
|
|
279
401
|
} else {
|
|
280
|
-
const
|
|
281
|
-
|
|
402
|
+
const arr: number[] = new Array(limit)
|
|
403
|
+
for (let i = 0; i < limit; i++) {
|
|
404
|
+
arr[i] = this._dataView.getUint32(p + i * 4, true)
|
|
405
|
+
}
|
|
406
|
+
tags[tag] = arr
|
|
282
407
|
}
|
|
283
408
|
p += limit << 2
|
|
284
409
|
} else if (Btype === 0x73) {
|
|
@@ -286,8 +411,11 @@ export default class BamRecord {
|
|
|
286
411
|
if (absOffset % 2 === 0) {
|
|
287
412
|
tags[tag] = new Int16Array(ba.buffer, absOffset, limit)
|
|
288
413
|
} else {
|
|
289
|
-
const
|
|
290
|
-
|
|
414
|
+
const arr: number[] = new Array(limit)
|
|
415
|
+
for (let i = 0; i < limit; i++) {
|
|
416
|
+
arr[i] = this._dataView.getInt16(p + i * 2, true)
|
|
417
|
+
}
|
|
418
|
+
tags[tag] = arr
|
|
291
419
|
}
|
|
292
420
|
p += limit << 1
|
|
293
421
|
} else if (Btype === 0x53) {
|
|
@@ -295,8 +423,11 @@ export default class BamRecord {
|
|
|
295
423
|
if (absOffset % 2 === 0) {
|
|
296
424
|
tags[tag] = new Uint16Array(ba.buffer, absOffset, limit)
|
|
297
425
|
} else {
|
|
298
|
-
const
|
|
299
|
-
|
|
426
|
+
const arr: number[] = new Array(limit)
|
|
427
|
+
for (let i = 0; i < limit; i++) {
|
|
428
|
+
arr[i] = this._dataView.getUint16(p + i * 2, true)
|
|
429
|
+
}
|
|
430
|
+
tags[tag] = arr
|
|
300
431
|
}
|
|
301
432
|
p += limit << 1
|
|
302
433
|
} else if (Btype === 0x63) {
|
|
@@ -312,12 +443,11 @@ export default class BamRecord {
|
|
|
312
443
|
if (absOffset % 4 === 0) {
|
|
313
444
|
tags[tag] = new Float32Array(ba.buffer, absOffset, limit)
|
|
314
445
|
} else {
|
|
315
|
-
const
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
)
|
|
446
|
+
const arr: number[] = new Array(limit)
|
|
447
|
+
for (let i = 0; i < limit; i++) {
|
|
448
|
+
arr[i] = this._dataView.getFloat32(p + i * 4, true)
|
|
449
|
+
}
|
|
450
|
+
tags[tag] = arr
|
|
321
451
|
}
|
|
322
452
|
p += limit << 2
|
|
323
453
|
}
|
|
@@ -449,10 +579,7 @@ export default class BamRecord {
|
|
|
449
579
|
let lref = 0
|
|
450
580
|
for (let c = 0; c < numCigarOps; ++c) {
|
|
451
581
|
const cigop = cigarView[c]!
|
|
452
|
-
|
|
453
|
-
if (!((1 << op) & CIGAR_SKIP_MASK)) {
|
|
454
|
-
lref += cigop >> 4
|
|
455
|
-
}
|
|
582
|
+
lref += (cigop >> 4) * ((CIGAR_CONSUMES_REF_MASK >> (cigop & 0xf)) & 1)
|
|
456
583
|
}
|
|
457
584
|
return {
|
|
458
585
|
NUMERIC_CIGAR: cigarView,
|
|
@@ -465,10 +592,7 @@ export default class BamRecord {
|
|
|
465
592
|
for (let c = 0; c < numCigarOps; ++c) {
|
|
466
593
|
const cigop = this._dataView.getInt32(p + c * 4, true) | 0
|
|
467
594
|
cigarArray[c] = cigop
|
|
468
|
-
|
|
469
|
-
if (!((1 << op) & CIGAR_SKIP_MASK)) {
|
|
470
|
-
lref = (lref + (cigop >> 4)) | 0
|
|
471
|
-
}
|
|
595
|
+
lref += (cigop >> 4) * ((CIGAR_CONSUMES_REF_MASK >> (cigop & 0xf)) & 1)
|
|
472
596
|
}
|
|
473
597
|
return {
|
|
474
598
|
NUMERIC_CIGAR: cigarArray,
|