@gmod/bam 6.1.1 → 7.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/record.ts CHANGED
@@ -1,7 +1,19 @@
1
1
  import Constants from './constants.ts'
2
2
 
3
3
  const SEQRET_DECODER = '=ACMGRSVTWYHKDBN'.split('')
4
- const CIGAR_DECODER = 'MIDNSHP=X???????'.split('')
4
+ const ASCII_CIGAR_CODES = [
5
+ 77, 73, 68, 78, 83, 72, 80, 61, 88, 63, 63, 63, 63, 63, 63, 63,
6
+ ]
7
+
8
+ // const CIGAR_MATCH = 0
9
+ const CIGAR_INS = 1
10
+ // const CIGAR_DEL = 2
11
+ const CIGAR_REF_SKIP = 3
12
+ const CIGAR_SOFT_CLIP = 4
13
+ const CIGAR_HARD_CLIP = 5
14
+ // const CIGAR_PAD = 6
15
+ // const CIGAR_EQUAL = 7
16
+ // const CIGAR_DIFF = 8
5
17
 
6
18
  interface Bytes {
7
19
  start: number
@@ -12,12 +24,12 @@ interface Bytes {
12
24
  export default class BamRecord {
13
25
  public fileOffset: number
14
26
  private bytes: Bytes
15
- #dataView: DataView
27
+ private _dataView: DataView
16
28
 
17
29
  constructor(args: { bytes: Bytes; fileOffset: number }) {
18
30
  this.bytes = args.bytes
19
31
  this.fileOffset = args.fileOffset
20
- this.#dataView = new DataView(this.bytes.byteArray.buffer)
32
+ this._dataView = new DataView(this.bytes.byteArray.buffer)
21
33
  }
22
34
 
23
35
  get byteArray() {
@@ -26,15 +38,15 @@ export default class BamRecord {
26
38
 
27
39
  get flags() {
28
40
  return (
29
- (this.#dataView.getInt32(this.bytes.start + 16, true) & 0xffff0000) >> 16
41
+ (this._dataView.getInt32(this.bytes.start + 16, true) & 0xffff0000) >> 16
30
42
  )
31
43
  }
32
44
  get ref_id() {
33
- return this.#dataView.getInt32(this.bytes.start + 4, true)
45
+ return this._dataView.getInt32(this.bytes.start + 4, true)
34
46
  }
35
47
 
36
48
  get start() {
37
- return this.#dataView.getInt32(this.bytes.start + 8, true)
49
+ return this._dataView.getInt32(this.bytes.start + 8, true)
38
50
  }
39
51
 
40
52
  get end() {
@@ -93,130 +105,98 @@ export default class BamRecord {
93
105
  const blockEnd = this.bytes.end
94
106
  const tags = {} as Record<string, unknown>
95
107
  while (p < blockEnd) {
96
- const tag = String.fromCharCode(
97
- this.byteArray[p]!,
98
- this.byteArray[p + 1]!,
99
- )
108
+ const tag =
109
+ String.fromCharCode(this.byteArray[p]!) +
110
+ String.fromCharCode(this.byteArray[p + 1]!)
100
111
  const type = String.fromCharCode(this.byteArray[p + 2]!)
101
112
  p += 3
102
113
 
103
- if (type === 'A') {
104
- tags[tag] = String.fromCharCode(this.byteArray[p]!)
105
- p += 1
106
- } else if (type === 'i') {
107
- tags[tag] = this.#dataView.getInt32(p, true)
108
- p += 4
109
- } else if (type === 'I') {
110
- tags[tag] = this.#dataView.getUint32(p, true)
111
- p += 4
112
- } else if (type === 'c') {
113
- tags[tag] = this.#dataView.getInt8(p)
114
- p += 1
115
- } else if (type === 'C') {
116
- tags[tag] = this.#dataView.getUint8(p)
117
- p += 1
118
- } else if (type === 's') {
119
- tags[tag] = this.#dataView.getInt16(p, true)
120
- p += 2
121
- } else if (type === 'S') {
122
- tags[tag] = this.#dataView.getUint16(p, true)
123
- p += 2
124
- } else if (type === 'f') {
125
- tags[tag] = this.#dataView.getFloat32(p, true)
126
- p += 4
127
- } else if (type === 'Z' || type === 'H') {
128
- const value = []
129
- while (p <= blockEnd) {
130
- const cc = this.byteArray[p++]!
131
- if (cc !== 0) {
132
- value.push(String.fromCharCode(cc))
133
- } else {
134
- break
135
- }
136
- }
137
- tags[tag] = value.join('')
138
- } else if (type === 'B') {
139
- const cc = this.byteArray[p++]!
140
- const Btype = String.fromCharCode(cc)
141
- const limit = this.#dataView.getInt32(p, true)
142
- p += 4
143
- if (Btype === 'i') {
144
- if (tag === 'CG') {
145
- const value = []
146
- for (let k = 0; k < limit; k++) {
147
- const cigop = this.#dataView.getInt32(p, true)
148
- const lop = cigop >> 4
149
- const op = CIGAR_DECODER[cigop & 0xf]!
150
- value.push(lop + op)
151
- p += 4
152
- }
153
- tags[tag] = value.join('')
154
- } else {
155
- const value = []
156
- for (let k = 0; k < limit; k++) {
157
- value.push(this.#dataView.getInt32(p, true))
158
- p += 4
159
- }
160
- tags[tag] = value
161
- }
162
- } else if (Btype === 'I') {
163
- if (tag === 'CG') {
164
- const value = []
165
- for (let k = 0; k < limit; k++) {
166
- const cigop = this.#dataView.getUint32(p, true)
167
- const lop = cigop >> 4
168
- const op = CIGAR_DECODER[cigop & 0xf]!
169
- value.push(lop + op)
170
- p += 4
171
- }
172
- tags[tag] = value.join('')
173
- } else {
174
- const value = []
175
- for (let k = 0; k < limit; k++) {
176
- value.push(this.#dataView.getUint32(p, true))
177
- p += 4
178
- }
179
- tags[tag] = value
180
- }
181
- } else if (Btype === 's') {
182
- const value = []
183
- for (let k = 0; k < limit; k++) {
184
- value.push(this.#dataView.getInt16(p, true))
185
- p += 2
186
- }
187
- tags[tag] = value
188
- } else if (Btype === 'S') {
189
- const value = []
190
- for (let k = 0; k < limit; k++) {
191
- value.push(this.#dataView.getUint16(p, true))
192
- p += 2
193
- }
194
- tags[tag] = value
195
- } else if (Btype === 'c') {
114
+ switch (type) {
115
+ case 'A':
116
+ tags[tag] = String.fromCharCode(this.byteArray[p]!)
117
+ p += 1
118
+ break
119
+ case 'i':
120
+ tags[tag] = this._dataView.getInt32(p, true)
121
+ p += 4
122
+ break
123
+ case 'I':
124
+ tags[tag] = this._dataView.getUint32(p, true)
125
+ p += 4
126
+ break
127
+ case 'c':
128
+ tags[tag] = this._dataView.getInt8(p)
129
+ p += 1
130
+ break
131
+ case 'C':
132
+ tags[tag] = this._dataView.getUint8(p)
133
+ p += 1
134
+ break
135
+ case 's':
136
+ tags[tag] = this._dataView.getInt16(p, true)
137
+ p += 2
138
+ break
139
+ case 'S':
140
+ tags[tag] = this._dataView.getUint16(p, true)
141
+ p += 2
142
+ break
143
+ case 'f':
144
+ tags[tag] = this._dataView.getFloat32(p, true)
145
+ p += 4
146
+ break
147
+ case 'Z':
148
+ case 'H': {
196
149
  const value = []
197
- for (let k = 0; k < limit; k++) {
198
- value.push(this.#dataView.getInt8(p))
199
- p += 1
200
- }
201
- tags[tag] = value
202
- } else if (Btype === 'C') {
203
- const value = []
204
- for (let k = 0; k < limit; k++) {
205
- value.push(this.#dataView.getUint8(p))
206
- p += 1
150
+ while (p <= blockEnd) {
151
+ const cc = this.byteArray[p++]!
152
+ if (cc !== 0) {
153
+ value.push(String.fromCharCode(cc))
154
+ } else {
155
+ break
156
+ }
207
157
  }
208
- tags[tag] = value
209
- } else if (Btype === 'f') {
210
- const value = []
211
- for (let k = 0; k < limit; k++) {
212
- value.push(this.#dataView.getFloat32(p, true))
213
- p += 4
158
+ tags[tag] = value.join('')
159
+ break
160
+ }
161
+ case 'B': {
162
+ const cc = this.byteArray[p++]!
163
+ const Btype = String.fromCharCode(cc)
164
+ const limit = this._dataView.getInt32(p, true)
165
+ p += 4
166
+ if (Btype === 'i') {
167
+ const bytes = this.byteArray.slice(p, p + limit * 4)
168
+ tags[tag] = new Int32Array(bytes.buffer, bytes.byteOffset, limit)
169
+ p += limit * 4
170
+ } else if (Btype === 'I') {
171
+ const bytes = this.byteArray.slice(p, p + limit * 4)
172
+ tags[tag] = new Uint32Array(bytes.buffer, bytes.byteOffset, limit)
173
+ p += limit * 4
174
+ } else if (Btype === 's') {
175
+ const bytes = this.byteArray.slice(p, p + limit * 2)
176
+ tags[tag] = new Int16Array(bytes.buffer, bytes.byteOffset, limit)
177
+ p += limit * 2
178
+ } else if (Btype === 'S') {
179
+ const bytes = this.byteArray.slice(p, p + limit * 2)
180
+ tags[tag] = new Uint16Array(bytes.buffer, bytes.byteOffset, limit)
181
+ p += limit * 2
182
+ } else if (Btype === 'c') {
183
+ const bytes = this.byteArray.slice(p, p + limit)
184
+ tags[tag] = new Int8Array(bytes.buffer, bytes.byteOffset, limit)
185
+ p += limit
186
+ } else if (Btype === 'C') {
187
+ const bytes = this.byteArray.slice(p, p + limit)
188
+ tags[tag] = new Uint8Array(bytes.buffer, bytes.byteOffset, limit)
189
+ p += limit
190
+ } else if (Btype === 'f') {
191
+ const bytes = this.byteArray.slice(p, p + limit * 4)
192
+ tags[tag] = new Float32Array(bytes.buffer, bytes.byteOffset, limit)
193
+ p += limit * 4
214
194
  }
215
- tags[tag] = value
195
+ break
216
196
  }
217
- } else {
218
- console.error('Unknown BAM tag type', type)
219
- break
197
+ default:
198
+ console.error('Unknown BAM tag type', type)
199
+ break
220
200
  }
221
201
  }
222
202
  return tags
@@ -289,51 +269,57 @@ export default class BamRecord {
289
269
  if (this.isSegmentUnmapped()) {
290
270
  return {
291
271
  length_on_ref: 0,
292
- CIGAR: '',
272
+ NUMERIC_CIGAR: new Uint32Array(0),
293
273
  }
294
274
  }
295
275
 
296
276
  const numCigarOps = this.num_cigar_ops
297
277
  let p = this.b0 + this.read_name_length
298
- const CIGAR = []
299
278
 
300
279
  // check for CG tag by inspecting whether the CIGAR field contains a clip
301
280
  // that consumes entire seqLen
302
- let cigop = this.#dataView.getInt32(p, true)
303
- let lop = cigop >> 4
304
- let op = CIGAR_DECODER[cigop & 0xf]
305
- if (op === 'S' && lop === this.seq_length) {
281
+ const cigop = this._dataView.getInt32(p, true)
282
+ const lop = cigop >> 4
283
+ const op = cigop & 0xf
284
+ if (op === CIGAR_SOFT_CLIP && lop === this.seq_length) {
306
285
  // if there is a CG the second CIGAR field will be a N tag the represents
307
286
  // the length on ref
308
287
  p += 4
309
- cigop = this.#dataView.getInt32(p, true)
310
- lop = cigop >> 4
311
- op = CIGAR_DECODER[cigop & 0xf]
312
- if (op !== 'N') {
288
+ const cigop = this._dataView.getInt32(p, true)
289
+ const lop = cigop >> 4
290
+ const op = cigop & 0xf
291
+ if (op !== CIGAR_REF_SKIP) {
313
292
  console.warn('CG tag with no N tag')
314
293
  }
294
+ const cgArray = this.tags.CG as Uint32Array
315
295
  return {
316
- CIGAR: this.tags.CG as string,
296
+ NUMERIC_CIGAR: cgArray,
317
297
  length_on_ref: lop,
318
298
  }
319
299
  } else {
300
+ const cigarBytes = this.byteArray.slice(p, p + numCigarOps * 4)
301
+ const cigarView = new Uint32Array(
302
+ cigarBytes.buffer,
303
+ cigarBytes.byteOffset,
304
+ numCigarOps,
305
+ )
320
306
  let lref = 0
321
307
  for (let c = 0; c < numCigarOps; ++c) {
322
- cigop = this.#dataView.getInt32(p, true)
323
- lop = cigop >> 4
324
- op = CIGAR_DECODER[cigop & 0xf]!
325
- CIGAR.push(lop + op)
308
+ const cigop = cigarView[c]!
309
+ const op = cigop & 0xf
326
310
  // soft clip, hard clip, and insertion don't count toward the length on
327
311
  // the reference
328
- if (op !== 'H' && op !== 'S' && op !== 'I') {
329
- lref += lop
312
+ if (
313
+ op !== CIGAR_HARD_CLIP &&
314
+ op !== CIGAR_SOFT_CLIP &&
315
+ op !== CIGAR_INS
316
+ ) {
317
+ lref += cigop >> 4
330
318
  }
331
-
332
- p += 4
333
319
  }
334
320
 
335
321
  return {
336
- CIGAR: CIGAR.join(''),
322
+ NUMERIC_CIGAR: cigarView,
337
323
  length_on_ref: lref,
338
324
  }
339
325
  }
@@ -343,8 +329,20 @@ export default class BamRecord {
343
329
  return this.cigarAndLength.length_on_ref
344
330
  }
345
331
 
332
+ get NUMERIC_CIGAR() {
333
+ return this.cigarAndLength.NUMERIC_CIGAR
334
+ }
335
+
346
336
  get CIGAR() {
347
- return this.cigarAndLength.CIGAR
337
+ const numeric = this.NUMERIC_CIGAR
338
+ let result = ''
339
+ for (let i = 0, l = numeric.length; i < l; i++) {
340
+ const packed = numeric[i]!
341
+ const length = packed >> 4
342
+ const opCode = ASCII_CIGAR_CODES[packed & 0xf]!
343
+ result += length + String.fromCharCode(opCode)
344
+ }
345
+ return result
348
346
  }
349
347
 
350
348
  get num_cigar_ops() {
@@ -359,21 +357,30 @@ export default class BamRecord {
359
357
  return (this.seq_length + 1) >> 1
360
358
  }
361
359
 
362
- get seq() {
360
+ get NUMERIC_SEQ() {
363
361
  const p = this.b0 + this.read_name_length + this.num_cigar_ops * 4
364
- const seqBytes = this.num_seq_bytes
362
+ const seqBytes = this.byteArray.slice(p, p + this.num_seq_bytes)
363
+ return new Uint8Array(seqBytes.buffer, seqBytes.byteOffset, this.num_seq_bytes)
364
+ }
365
+
366
+ get seq() {
367
+ const numeric = this.NUMERIC_SEQ
365
368
  const len = this.seq_length
366
- const buf = []
369
+ const buf = new Array(len)
367
370
  let i = 0
368
- for (let j = 0; j < seqBytes; ++j) {
369
- const sb = this.byteArray[p + j]!
370
- buf.push(SEQRET_DECODER[(sb & 0xf0) >> 4])
371
- i++
372
- if (i < len) {
373
- buf.push(SEQRET_DECODER[sb & 0x0f])
374
- i++
375
- }
371
+ const fullBytes = len >> 1
372
+
373
+ for (let j = 0; j < fullBytes; ++j) {
374
+ const sb = numeric[j]!
375
+ buf[i++] = SEQRET_DECODER[(sb & 0xf0) >> 4]
376
+ buf[i++] = SEQRET_DECODER[sb & 0x0f]
377
+ }
378
+
379
+ if (i < len) {
380
+ const sb = numeric[fullBytes]!
381
+ buf[i] = SEQRET_DECODER[(sb & 0xf0) >> 4]
376
382
  }
383
+
377
384
  return buf.join('')
378
385
  }
379
386
 
@@ -415,27 +422,27 @@ export default class BamRecord {
415
422
  }
416
423
 
417
424
  get bin_mq_nl() {
418
- return this.#dataView.getInt32(this.bytes.start + 12, true)
425
+ return this._dataView.getInt32(this.bytes.start + 12, true)
419
426
  }
420
427
 
421
428
  get flag_nc() {
422
- return this.#dataView.getInt32(this.bytes.start + 16, true)
429
+ return this._dataView.getInt32(this.bytes.start + 16, true)
423
430
  }
424
431
 
425
432
  get seq_length() {
426
- return this.#dataView.getInt32(this.bytes.start + 20, true)
433
+ return this._dataView.getInt32(this.bytes.start + 20, true)
427
434
  }
428
435
 
429
436
  get next_refid() {
430
- return this.#dataView.getInt32(this.bytes.start + 24, true)
437
+ return this._dataView.getInt32(this.bytes.start + 24, true)
431
438
  }
432
439
 
433
440
  get next_pos() {
434
- return this.#dataView.getInt32(this.bytes.start + 28, true)
441
+ return this._dataView.getInt32(this.bytes.start + 28, true)
435
442
  }
436
443
 
437
444
  get template_length() {
438
- return this.#dataView.getInt32(this.bytes.start + 32, true)
445
+ return this._dataView.getInt32(this.bytes.start + 32, true)
439
446
  }
440
447
 
441
448
  seqAt(idx: number): string | undefined {
@@ -491,3 +498,4 @@ cacheGetter(BamRecord, 'tags')
491
498
  cacheGetter(BamRecord, 'cigarAndLength')
492
499
  cacheGetter(BamRecord, 'seq')
493
500
  cacheGetter(BamRecord, 'qual')
501
+ cacheGetter(BamRecord, 'end')
package/src/util.ts CHANGED
@@ -2,51 +2,6 @@ import Chunk from './chunk.ts'
2
2
  import { longFromBytesToUnsigned } from './long.ts'
3
3
  import { Offset, VirtualOffset } from './virtualOffset.ts'
4
4
 
5
- export function timeout(ms: number) {
6
- return new Promise(resolve => setTimeout(resolve, ms))
7
- }
8
-
9
- /**
10
- * Properly check if the given AbortSignal is aborted.
11
- *
12
- * Per the standard, if the signal reads as aborted, this function throws
13
- * either a DOMException AbortError, or a regular error with a `code` attribute
14
- * set to `ERR_ABORTED`.
15
- *
16
- * For convenience, passing `undefined` is a no-op
17
- *
18
- * @param {AbortSignal} [signal] an AbortSignal, or anything with an `aborted` attribute
19
- * @returns nothing
20
- */
21
- export function checkAbortSignal(signal?: AbortSignal) {
22
- if (!signal) {
23
- return
24
- }
25
-
26
- if (signal.aborted) {
27
- // console.log('bam aborted!')
28
- if (typeof DOMException === 'undefined') {
29
- const e = new Error('aborted')
30
- // @ts-ignore
31
- e.code = 'ERR_ABORTED'
32
- throw e
33
- } else {
34
- throw new DOMException('aborted', 'AbortError')
35
- }
36
- }
37
- }
38
-
39
- /**
40
- * Skips to the next tick, then runs `checkAbortSignal`.
41
- * Await this to inside an otherwise synchronous loop to
42
- * provide a place to break when an abort signal is received.
43
- * @param {AbortSignal} signal
44
- */
45
- export async function abortBreakPoint(signal?: AbortSignal) {
46
- await Promise.resolve()
47
- checkAbortSignal(signal)
48
- }
49
-
50
5
  export function canMergeBlocks(chunk1: Chunk, chunk2: Chunk) {
51
6
  return (
52
7
  chunk2.minv.blockPosition - chunk1.maxv.blockPosition < 65000 &&
@@ -146,15 +101,12 @@ export function parseNameBytes(
146
101
  return { refNameToId, refIdToName }
147
102
  }
148
103
 
149
- export function sum(array: Uint8Array[]) {
150
- let sum = 0
151
- for (const entry of array) {
152
- sum += entry.length
153
- }
154
- return sum
155
- }
156
104
  export function concatUint8Array(args: Uint8Array[]) {
157
- const mergedArray = new Uint8Array(sum(args))
105
+ let totalLength = 0
106
+ for (const entry of args) {
107
+ totalLength += entry.length
108
+ }
109
+ const mergedArray = new Uint8Array(totalLength)
158
110
  let offset = 0
159
111
  for (const entry of args) {
160
112
  mergedArray.set(entry, offset)
@@ -164,9 +116,11 @@ export function concatUint8Array(args: Uint8Array[]) {
164
116
  }
165
117
 
166
118
  export async function gen2array<T>(gen: AsyncIterable<T[]>): Promise<T[]> {
167
- let out: T[] = []
119
+ const out: T[] = []
168
120
  for await (const x of gen) {
169
- out = out.concat(x)
121
+ for (const item of x) {
122
+ out.push(item)
123
+ }
170
124
  }
171
125
  return out
172
126
  }