@gmod/bam 7.1.10 → 7.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -7
- package/README.md +2 -18
- package/dist/bai.js +1 -1
- package/dist/bai.js.map +1 -1
- package/dist/bamFile.d.ts +4 -3
- package/dist/bamFile.js +73 -75
- package/dist/bamFile.js.map +1 -1
- package/dist/htsget.d.ts +1 -11
- package/dist/htsget.js +23 -49
- package/dist/htsget.js.map +1 -1
- package/dist/index.d.ts +5 -2
- package/dist/index.js.map +1 -1
- package/dist/record.d.ts +5 -0
- package/dist/record.js +214 -73
- package/dist/record.js.map +1 -1
- package/dist/util.d.ts +13 -0
- package/dist/util.js +25 -0
- package/dist/util.js.map +1 -1
- package/esm/bai.js +1 -1
- package/esm/bai.js.map +1 -1
- package/esm/bamFile.d.ts +4 -3
- package/esm/bamFile.js +74 -76
- package/esm/bamFile.js.map +1 -1
- package/esm/htsget.d.ts +1 -11
- package/esm/htsget.js +23 -49
- package/esm/htsget.js.map +1 -1
- package/esm/index.d.ts +5 -2
- package/esm/index.js +1 -1
- package/esm/index.js.map +1 -1
- package/esm/record.d.ts +5 -0
- package/esm/record.js +214 -73
- package/esm/record.js.map +1 -1
- package/esm/util.d.ts +13 -0
- package/esm/util.js +22 -0
- package/esm/util.js.map +1 -1
- package/package.json +1 -1
- package/src/bai.ts +1 -1
- package/src/bamFile.ts +111 -100
- package/src/htsget.ts +26 -58
- package/src/index.ts +6 -6
- package/src/record.ts +216 -81
- package/src/util.ts +41 -0
package/src/bamFile.ts
CHANGED
|
@@ -9,10 +9,15 @@ import CSI from './csi.ts'
|
|
|
9
9
|
import NullFilehandle from './nullFilehandle.ts'
|
|
10
10
|
import BAMFeature from './record.ts'
|
|
11
11
|
import { parseHeaderText } from './sam.ts'
|
|
12
|
-
import {
|
|
12
|
+
import {
|
|
13
|
+
filterCacheKey,
|
|
14
|
+
filterReadFlag,
|
|
15
|
+
filterTagValue,
|
|
16
|
+
makeOpts,
|
|
17
|
+
} from './util.ts'
|
|
13
18
|
|
|
14
19
|
import type { Bytes } from './record.ts'
|
|
15
|
-
import type { BamOpts, BaseOpts } from './util.ts'
|
|
20
|
+
import type { BamOpts, BaseOpts, FilterBy } from './util.ts'
|
|
16
21
|
import type { GenericFilehandle } from 'generic-filehandle2'
|
|
17
22
|
|
|
18
23
|
export interface BamRecordLike {
|
|
@@ -23,6 +28,8 @@ export interface BamRecordLike {
|
|
|
23
28
|
fileOffset: number
|
|
24
29
|
next_pos: number
|
|
25
30
|
next_refid: number
|
|
31
|
+
flags: number
|
|
32
|
+
tags: Record<string, unknown>
|
|
26
33
|
}
|
|
27
34
|
|
|
28
35
|
export type BamRecordClass<T extends BamRecordLike = BAMFeature> = new (args: {
|
|
@@ -204,7 +211,10 @@ export default class BamFile<T extends BamRecordLike = BAMFeature> {
|
|
|
204
211
|
const lRef = dataView.getInt32(p + lName + 4, true)
|
|
205
212
|
|
|
206
213
|
chrToIndex[refName] = i
|
|
207
|
-
indexToChr.push({
|
|
214
|
+
indexToChr.push({
|
|
215
|
+
refName,
|
|
216
|
+
length: lRef,
|
|
217
|
+
})
|
|
208
218
|
|
|
209
219
|
p = p + 8 + lName
|
|
210
220
|
}
|
|
@@ -217,28 +227,19 @@ export default class BamFile<T extends BamRecordLike = BAMFeature> {
|
|
|
217
227
|
min: number,
|
|
218
228
|
max: number,
|
|
219
229
|
opts?: BamOpts,
|
|
220
|
-
) {
|
|
221
|
-
return gen2array(this.streamRecordsForRange(chr, min, max, opts))
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
async *streamRecordsForRange(
|
|
225
|
-
chr: string,
|
|
226
|
-
min: number,
|
|
227
|
-
max: number,
|
|
228
|
-
opts?: BamOpts,
|
|
229
230
|
) {
|
|
230
231
|
await this.getHeader(opts)
|
|
231
232
|
const chrId = this.chrToIndex?.[chr]
|
|
232
233
|
if (chrId === undefined || !this.index) {
|
|
233
|
-
return
|
|
234
|
+
return []
|
|
234
235
|
}
|
|
235
236
|
const chunks = await this.index.blocksForRange(chrId, min - 1, max, opts)
|
|
236
|
-
|
|
237
|
+
return this._fetchChunkFeaturesDirect(chunks, chrId, min, max, opts)
|
|
237
238
|
}
|
|
238
239
|
|
|
239
|
-
private chunkCacheKey(chunk: Chunk) {
|
|
240
|
+
private chunkCacheKey(chunk: Chunk, filterBy?: FilterBy) {
|
|
240
241
|
const { minv, maxv } = chunk
|
|
241
|
-
return `${minv.blockPosition}:${minv.dataPosition}-${maxv.blockPosition}:${maxv.dataPosition}`
|
|
242
|
+
return `${minv.blockPosition}:${minv.dataPosition}-${maxv.blockPosition}:${maxv.dataPosition}${filterCacheKey(filterBy)}`
|
|
242
243
|
}
|
|
243
244
|
|
|
244
245
|
private blocksOverlap(
|
|
@@ -256,29 +257,25 @@ export default class BamFile<T extends BamRecordLike = BAMFeature> {
|
|
|
256
257
|
if (
|
|
257
258
|
this.blocksOverlap(minBlock, maxBlock, entry.minBlock, entry.maxBlock)
|
|
258
259
|
) {
|
|
259
|
-
// console.log(
|
|
260
|
-
// `[BAM Cache] Evicting overlapping chunk: ${key} (${entry.features.length} features, blocks ${entry.minBlock}-${entry.maxBlock})`,
|
|
261
|
-
// )
|
|
262
260
|
this.chunkFeatureCache.delete(key)
|
|
263
261
|
}
|
|
264
262
|
}
|
|
265
263
|
}
|
|
266
264
|
|
|
267
|
-
async
|
|
265
|
+
private async _fetchChunkFeaturesDirect(
|
|
268
266
|
chunks: Chunk[],
|
|
269
267
|
chrId: number,
|
|
270
268
|
min: number,
|
|
271
269
|
max: number,
|
|
272
270
|
opts: BamOpts = {},
|
|
273
271
|
) {
|
|
274
|
-
const { viewAsPairs } = opts
|
|
275
|
-
const
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
const cacheKey = this.chunkCacheKey(chunk)
|
|
272
|
+
const { viewAsPairs, filterBy } = opts
|
|
273
|
+
const { flagInclude = 0, flagExclude = 0, tagFilter } = filterBy || {}
|
|
274
|
+
const result: T[] = []
|
|
275
|
+
|
|
276
|
+
for (let ci = 0, cl = chunks.length; ci < cl; ci++) {
|
|
277
|
+
const chunk = chunks[ci]!
|
|
278
|
+
const cacheKey = this.chunkCacheKey(chunk, filterBy)
|
|
282
279
|
const minBlock = chunk.minv.blockPosition
|
|
283
280
|
const maxBlock = chunk.maxv.blockPosition
|
|
284
281
|
|
|
@@ -286,107 +283,114 @@ export default class BamFile<T extends BamRecordLike = BAMFeature> {
|
|
|
286
283
|
const cached = this.chunkFeatureCache.get(cacheKey)
|
|
287
284
|
if (cached) {
|
|
288
285
|
records = cached.features
|
|
289
|
-
// cacheHits++
|
|
290
286
|
} else {
|
|
291
287
|
this.evictOverlappingChunks(minBlock, maxBlock)
|
|
292
288
|
const { data, cpositions, dpositions } = await this._readChunk({
|
|
293
289
|
chunk,
|
|
294
290
|
opts,
|
|
295
291
|
})
|
|
296
|
-
|
|
292
|
+
const allRecords = await this.readBamFeatures(
|
|
297
293
|
data,
|
|
298
294
|
cpositions,
|
|
299
295
|
dpositions,
|
|
300
296
|
chunk,
|
|
301
297
|
)
|
|
298
|
+
if (filterBy) {
|
|
299
|
+
records = []
|
|
300
|
+
for (let i = 0, l = allRecords.length; i < l; i++) {
|
|
301
|
+
const record = allRecords[i]!
|
|
302
|
+
if (filterReadFlag(record.flags, flagInclude, flagExclude)) {
|
|
303
|
+
continue
|
|
304
|
+
}
|
|
305
|
+
if (
|
|
306
|
+
tagFilter &&
|
|
307
|
+
filterTagValue(record.tags[tagFilter.tag], tagFilter.value)
|
|
308
|
+
) {
|
|
309
|
+
continue
|
|
310
|
+
}
|
|
311
|
+
records.push(record)
|
|
312
|
+
}
|
|
313
|
+
} else {
|
|
314
|
+
records = allRecords
|
|
315
|
+
}
|
|
302
316
|
this.chunkFeatureCache.set(cacheKey, {
|
|
303
317
|
minBlock,
|
|
304
318
|
maxBlock,
|
|
305
319
|
features: records,
|
|
306
320
|
})
|
|
307
|
-
// cacheMisses++
|
|
308
321
|
}
|
|
309
322
|
|
|
310
|
-
|
|
311
|
-
for (
|
|
323
|
+
let done = false
|
|
324
|
+
for (let i = 0, l = records.length; i < l; i++) {
|
|
325
|
+
const feature = records[i]!
|
|
312
326
|
if (feature.ref_id === chrId) {
|
|
313
327
|
if (feature.start >= max) {
|
|
314
328
|
done = true
|
|
315
329
|
break
|
|
316
330
|
} else if (feature.end >= min) {
|
|
317
|
-
|
|
331
|
+
result.push(feature)
|
|
318
332
|
}
|
|
319
333
|
}
|
|
320
334
|
}
|
|
321
|
-
feats.push(recs)
|
|
322
|
-
yield recs
|
|
323
335
|
if (done) {
|
|
324
336
|
break
|
|
325
337
|
}
|
|
326
338
|
}
|
|
327
339
|
|
|
328
|
-
// const total = cacheHits + cacheMisses
|
|
329
|
-
// if (total > 0) {
|
|
330
|
-
// const hitRate = (cacheHits / total) * 100
|
|
331
|
-
// console.log(
|
|
332
|
-
// `[BAM Cache] chunks: ${total}, hits: ${cacheHits}, misses: ${cacheMisses}, rate: ${hitRate.toFixed(1)}%, cacheSize: ${this.chunkFeatureCache.size}`,
|
|
333
|
-
// )
|
|
334
|
-
// }
|
|
335
|
-
|
|
336
340
|
if (viewAsPairs) {
|
|
337
|
-
|
|
341
|
+
const pairs = await this.fetchPairs(chrId, result, opts)
|
|
342
|
+
for (let i = 0, l = pairs.length; i < l; i++) {
|
|
343
|
+
result.push(pairs[i]!)
|
|
344
|
+
}
|
|
338
345
|
}
|
|
346
|
+
|
|
347
|
+
return result
|
|
339
348
|
}
|
|
340
349
|
|
|
341
|
-
async fetchPairs(chrId: number,
|
|
350
|
+
async fetchPairs(chrId: number, records: T[], opts: BamOpts) {
|
|
342
351
|
const { pairAcrossChr, maxInsertSize = 200000 } = opts
|
|
343
|
-
const
|
|
344
|
-
const readIds: Record<
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
readNames[name] = 0
|
|
352
|
-
}
|
|
353
|
-
readNames[name]++
|
|
354
|
-
readIds[id] = 1
|
|
355
|
-
}
|
|
356
|
-
for (const [k, v] of Object.entries(readNames)) {
|
|
357
|
-
if (v === 1) {
|
|
358
|
-
unmatedPairs[k] = true
|
|
359
|
-
}
|
|
360
|
-
}
|
|
352
|
+
const readNameCounts: Record<string, number> = {}
|
|
353
|
+
const readIds: Record<number, number> = {}
|
|
354
|
+
|
|
355
|
+
for (let i = 0, l = records.length; i < l; i++) {
|
|
356
|
+
const r = records[i]!
|
|
357
|
+
const name = r.name
|
|
358
|
+
readNameCounts[name] = (readNameCounts[name] || 0) + 1
|
|
359
|
+
readIds[r.fileOffset] = 1
|
|
361
360
|
}
|
|
362
361
|
|
|
363
362
|
const matePromises: Promise<Chunk[]>[] = []
|
|
364
|
-
for (
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
363
|
+
for (let i = 0, l = records.length; i < l; i++) {
|
|
364
|
+
const f = records[i]!
|
|
365
|
+
const name = f.name
|
|
366
|
+
if (
|
|
367
|
+
this.index &&
|
|
368
|
+
readNameCounts[name] === 1 &&
|
|
369
|
+
(pairAcrossChr ||
|
|
370
|
+
(f.next_refid === chrId &&
|
|
371
|
+
Math.abs(f.start - f.next_pos) < maxInsertSize))
|
|
372
|
+
) {
|
|
373
|
+
matePromises.push(
|
|
374
|
+
this.index.blocksForRange(
|
|
375
|
+
f.next_refid,
|
|
376
|
+
f.next_pos,
|
|
377
|
+
f.next_pos + 1,
|
|
378
|
+
opts,
|
|
379
|
+
),
|
|
380
|
+
)
|
|
380
381
|
}
|
|
381
382
|
}
|
|
382
383
|
|
|
383
|
-
// filter out duplicate chunks (the blocks are lists of chunks, blocks are
|
|
384
|
-
// concatenated, then filter dup chunks)
|
|
385
384
|
const map = new Map<string, Chunk>()
|
|
386
385
|
const res = await Promise.all(matePromises)
|
|
387
|
-
for (
|
|
388
|
-
|
|
389
|
-
|
|
386
|
+
for (let i = 0, l = res.length; i < l; i++) {
|
|
387
|
+
const chunks = res[i]!
|
|
388
|
+
for (let j = 0, jl = chunks.length; j < jl; j++) {
|
|
389
|
+
const m = chunks[j]!
|
|
390
|
+
const key = m.toString()
|
|
391
|
+
if (!map.has(key)) {
|
|
392
|
+
map.set(key, m)
|
|
393
|
+
}
|
|
390
394
|
}
|
|
391
395
|
}
|
|
392
396
|
|
|
@@ -397,13 +401,18 @@ export default class BamFile<T extends BamRecordLike = BAMFeature> {
|
|
|
397
401
|
opts,
|
|
398
402
|
})
|
|
399
403
|
const mateRecs = [] as T[]
|
|
400
|
-
|
|
404
|
+
const features = await this.readBamFeatures(
|
|
401
405
|
data,
|
|
402
406
|
cpositions,
|
|
403
407
|
dpositions,
|
|
404
408
|
chunk,
|
|
405
|
-
)
|
|
406
|
-
|
|
409
|
+
)
|
|
410
|
+
for (let i = 0, l = features.length; i < l; i++) {
|
|
411
|
+
const feature = features[i]!
|
|
412
|
+
if (
|
|
413
|
+
readNameCounts[feature.name] === 1 &&
|
|
414
|
+
!readIds[feature.fileOffset]
|
|
415
|
+
) {
|
|
407
416
|
mateRecs.push(feature)
|
|
408
417
|
}
|
|
409
418
|
}
|
|
@@ -524,17 +533,19 @@ export default class BamFile<T extends BamRecordLike = BAMFeature> {
|
|
|
524
533
|
if (!this.chrToIndex) {
|
|
525
534
|
throw new Error('Header not yet parsed')
|
|
526
535
|
}
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
536
|
+
return this.index.estimatedBytesForRegions(
|
|
537
|
+
regions.map(r => {
|
|
538
|
+
const refId = this.chrToIndex![r.refName]
|
|
539
|
+
if (refId === undefined) {
|
|
540
|
+
throw new Error(`Unknown reference name: ${r.refName}`)
|
|
541
|
+
}
|
|
542
|
+
return {
|
|
543
|
+
refId,
|
|
544
|
+
start: r.start,
|
|
545
|
+
end: r.end,
|
|
546
|
+
}
|
|
547
|
+
}),
|
|
548
|
+
opts,
|
|
549
|
+
)
|
|
539
550
|
}
|
|
540
551
|
}
|
package/src/htsget.ts
CHANGED
|
@@ -66,7 +66,7 @@ export default class HtsgetFile<
|
|
|
66
66
|
this.trackId = args.trackId
|
|
67
67
|
}
|
|
68
68
|
|
|
69
|
-
async
|
|
69
|
+
async getRecordsForRange(
|
|
70
70
|
chr: string,
|
|
71
71
|
min: number,
|
|
72
72
|
max: number,
|
|
@@ -76,66 +76,34 @@ export default class HtsgetFile<
|
|
|
76
76
|
const url = `${base}?referenceName=${chr}&start=${min}&end=${max}&format=BAM`
|
|
77
77
|
const chrId = this.chrToIndex?.[chr]
|
|
78
78
|
if (chrId === undefined) {
|
|
79
|
-
|
|
80
|
-
} else {
|
|
81
|
-
const result = await fetch(url, { ...opts })
|
|
82
|
-
if (!result.ok) {
|
|
83
|
-
throw new Error(
|
|
84
|
-
`HTTP ${result.status} fetching ${url}: ${await result.text()}`,
|
|
85
|
-
)
|
|
86
|
-
}
|
|
87
|
-
const data = await result.json()
|
|
88
|
-
const uncba = await concat(data.htsget.urls.slice(1), opts)
|
|
89
|
-
|
|
90
|
-
yield* this._fetchChunkFeatures(
|
|
91
|
-
[
|
|
92
|
-
// fake stuff to pretend to be a Chunk
|
|
93
|
-
{
|
|
94
|
-
buffer: uncba,
|
|
95
|
-
_fetchedSize: undefined,
|
|
96
|
-
bin: 0,
|
|
97
|
-
compareTo() {
|
|
98
|
-
return 0
|
|
99
|
-
},
|
|
100
|
-
toUniqueString() {
|
|
101
|
-
return `${chr}_${min}_${max}`
|
|
102
|
-
},
|
|
103
|
-
fetchedSize() {
|
|
104
|
-
return 0
|
|
105
|
-
},
|
|
106
|
-
minv: {
|
|
107
|
-
dataPosition: 0,
|
|
108
|
-
blockPosition: 0,
|
|
109
|
-
compareTo: () => 0,
|
|
110
|
-
},
|
|
111
|
-
maxv: {
|
|
112
|
-
dataPosition: Number.MAX_SAFE_INTEGER,
|
|
113
|
-
blockPosition: 0,
|
|
114
|
-
compareTo: () => 0,
|
|
115
|
-
},
|
|
116
|
-
toString() {
|
|
117
|
-
return `${chr}_${min}_${max}`
|
|
118
|
-
},
|
|
119
|
-
},
|
|
120
|
-
],
|
|
121
|
-
chrId,
|
|
122
|
-
min,
|
|
123
|
-
max,
|
|
124
|
-
opts,
|
|
125
|
-
)
|
|
79
|
+
return []
|
|
126
80
|
}
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
81
|
+
const result = await fetch(url, { ...opts })
|
|
82
|
+
if (!result.ok) {
|
|
83
|
+
throw new Error(
|
|
84
|
+
`HTTP ${result.status} fetching ${url}: ${await result.text()}`,
|
|
85
|
+
)
|
|
132
86
|
}
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
87
|
+
const data = await result.json()
|
|
88
|
+
const uncba = await concat(data.htsget.urls.slice(1), opts)
|
|
89
|
+
|
|
90
|
+
const allRecords = await this.readBamFeatures(uncba, [], [], {
|
|
91
|
+
minv: { dataPosition: 0, blockPosition: 0 },
|
|
92
|
+
maxv: { dataPosition: 0, blockPosition: 0 },
|
|
93
|
+
} as Chunk)
|
|
94
|
+
|
|
95
|
+
const records: T[] = []
|
|
96
|
+
for (let i = 0, l = allRecords.length; i < l; i++) {
|
|
97
|
+
const feature = allRecords[i]!
|
|
98
|
+
if (feature.ref_id === chrId) {
|
|
99
|
+
if (feature.start >= max) {
|
|
100
|
+
break
|
|
101
|
+
} else if (feature.end >= min) {
|
|
102
|
+
records.push(feature)
|
|
103
|
+
}
|
|
104
|
+
}
|
|
138
105
|
}
|
|
106
|
+
return records
|
|
139
107
|
}
|
|
140
108
|
|
|
141
109
|
async getHeader(opts: BaseOpts = {}) {
|
package/src/index.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
export { default as BAI } from './bai.ts'
|
|
2
|
-
export {
|
|
3
|
-
type BamRecordClass,
|
|
4
|
-
type BamRecordLike,
|
|
5
|
-
default as BamFile,
|
|
6
|
-
} from './bamFile.ts'
|
|
2
|
+
export { default as BamFile } from './bamFile.ts'
|
|
7
3
|
export { default as CSI } from './csi.ts'
|
|
8
|
-
export {
|
|
4
|
+
export { default as BamRecord } from './record.ts'
|
|
9
5
|
export { default as HtsgetFile } from './htsget.ts'
|
|
6
|
+
|
|
7
|
+
export type { Bytes } from './record.ts'
|
|
8
|
+
export type { FilterBy, TagFilter } from './util.ts'
|
|
9
|
+
export type { BamRecordClass, BamRecordLike } from './bamFile.ts'
|