@gmod/bam 7.1.10 → 7.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/bamFile.ts CHANGED
@@ -9,10 +9,15 @@ import CSI from './csi.ts'
9
9
  import NullFilehandle from './nullFilehandle.ts'
10
10
  import BAMFeature from './record.ts'
11
11
  import { parseHeaderText } from './sam.ts'
12
- import { gen2array, makeOpts } from './util.ts'
12
+ import {
13
+ filterCacheKey,
14
+ filterReadFlag,
15
+ filterTagValue,
16
+ makeOpts,
17
+ } from './util.ts'
13
18
 
14
19
  import type { Bytes } from './record.ts'
15
- import type { BamOpts, BaseOpts } from './util.ts'
20
+ import type { BamOpts, BaseOpts, FilterBy } from './util.ts'
16
21
  import type { GenericFilehandle } from 'generic-filehandle2'
17
22
 
18
23
  export interface BamRecordLike {
@@ -23,6 +28,8 @@ export interface BamRecordLike {
23
28
  fileOffset: number
24
29
  next_pos: number
25
30
  next_refid: number
31
+ flags: number
32
+ tags: Record<string, unknown>
26
33
  }
27
34
 
28
35
  export type BamRecordClass<T extends BamRecordLike = BAMFeature> = new (args: {
@@ -204,7 +211,10 @@ export default class BamFile<T extends BamRecordLike = BAMFeature> {
204
211
  const lRef = dataView.getInt32(p + lName + 4, true)
205
212
 
206
213
  chrToIndex[refName] = i
207
- indexToChr.push({ refName, length: lRef })
214
+ indexToChr.push({
215
+ refName,
216
+ length: lRef,
217
+ })
208
218
 
209
219
  p = p + 8 + lName
210
220
  }
@@ -217,28 +227,19 @@ export default class BamFile<T extends BamRecordLike = BAMFeature> {
217
227
  min: number,
218
228
  max: number,
219
229
  opts?: BamOpts,
220
- ) {
221
- return gen2array(this.streamRecordsForRange(chr, min, max, opts))
222
- }
223
-
224
- async *streamRecordsForRange(
225
- chr: string,
226
- min: number,
227
- max: number,
228
- opts?: BamOpts,
229
230
  ) {
230
231
  await this.getHeader(opts)
231
232
  const chrId = this.chrToIndex?.[chr]
232
233
  if (chrId === undefined || !this.index) {
233
- return
234
+ return []
234
235
  }
235
236
  const chunks = await this.index.blocksForRange(chrId, min - 1, max, opts)
236
- yield* this._fetchChunkFeatures(chunks, chrId, min, max, opts)
237
+ return this._fetchChunkFeaturesDirect(chunks, chrId, min, max, opts)
237
238
  }
238
239
 
239
- private chunkCacheKey(chunk: Chunk) {
240
+ private chunkCacheKey(chunk: Chunk, filterBy?: FilterBy) {
240
241
  const { minv, maxv } = chunk
241
- return `${minv.blockPosition}:${minv.dataPosition}-${maxv.blockPosition}:${maxv.dataPosition}`
242
+ return `${minv.blockPosition}:${minv.dataPosition}-${maxv.blockPosition}:${maxv.dataPosition}${filterCacheKey(filterBy)}`
242
243
  }
243
244
 
244
245
  private blocksOverlap(
@@ -256,29 +257,25 @@ export default class BamFile<T extends BamRecordLike = BAMFeature> {
256
257
  if (
257
258
  this.blocksOverlap(minBlock, maxBlock, entry.minBlock, entry.maxBlock)
258
259
  ) {
259
- // console.log(
260
- // `[BAM Cache] Evicting overlapping chunk: ${key} (${entry.features.length} features, blocks ${entry.minBlock}-${entry.maxBlock})`,
261
- // )
262
260
  this.chunkFeatureCache.delete(key)
263
261
  }
264
262
  }
265
263
  }
266
264
 
267
- async *_fetchChunkFeatures(
265
+ private async _fetchChunkFeaturesDirect(
268
266
  chunks: Chunk[],
269
267
  chrId: number,
270
268
  min: number,
271
269
  max: number,
272
270
  opts: BamOpts = {},
273
271
  ) {
274
- const { viewAsPairs } = opts
275
- const feats = [] as T[][]
276
- let done = false
277
- // let cacheHits = 0
278
- // let cacheMisses = 0
279
-
280
- for (const chunk of chunks) {
281
- const cacheKey = this.chunkCacheKey(chunk)
272
+ const { viewAsPairs, filterBy } = opts
273
+ const { flagInclude = 0, flagExclude = 0, tagFilter } = filterBy || {}
274
+ const result: T[] = []
275
+
276
+ for (let ci = 0, cl = chunks.length; ci < cl; ci++) {
277
+ const chunk = chunks[ci]!
278
+ const cacheKey = this.chunkCacheKey(chunk, filterBy)
282
279
  const minBlock = chunk.minv.blockPosition
283
280
  const maxBlock = chunk.maxv.blockPosition
284
281
 
@@ -286,107 +283,114 @@ export default class BamFile<T extends BamRecordLike = BAMFeature> {
286
283
  const cached = this.chunkFeatureCache.get(cacheKey)
287
284
  if (cached) {
288
285
  records = cached.features
289
- // cacheHits++
290
286
  } else {
291
287
  this.evictOverlappingChunks(minBlock, maxBlock)
292
288
  const { data, cpositions, dpositions } = await this._readChunk({
293
289
  chunk,
294
290
  opts,
295
291
  })
296
- records = await this.readBamFeatures(
292
+ const allRecords = await this.readBamFeatures(
297
293
  data,
298
294
  cpositions,
299
295
  dpositions,
300
296
  chunk,
301
297
  )
298
+ if (filterBy) {
299
+ records = []
300
+ for (let i = 0, l = allRecords.length; i < l; i++) {
301
+ const record = allRecords[i]!
302
+ if (filterReadFlag(record.flags, flagInclude, flagExclude)) {
303
+ continue
304
+ }
305
+ if (
306
+ tagFilter &&
307
+ filterTagValue(record.tags[tagFilter.tag], tagFilter.value)
308
+ ) {
309
+ continue
310
+ }
311
+ records.push(record)
312
+ }
313
+ } else {
314
+ records = allRecords
315
+ }
302
316
  this.chunkFeatureCache.set(cacheKey, {
303
317
  minBlock,
304
318
  maxBlock,
305
319
  features: records,
306
320
  })
307
- // cacheMisses++
308
321
  }
309
322
 
310
- const recs = [] as T[]
311
- for (const feature of records) {
323
+ let done = false
324
+ for (let i = 0, l = records.length; i < l; i++) {
325
+ const feature = records[i]!
312
326
  if (feature.ref_id === chrId) {
313
327
  if (feature.start >= max) {
314
328
  done = true
315
329
  break
316
330
  } else if (feature.end >= min) {
317
- recs.push(feature)
331
+ result.push(feature)
318
332
  }
319
333
  }
320
334
  }
321
- feats.push(recs)
322
- yield recs
323
335
  if (done) {
324
336
  break
325
337
  }
326
338
  }
327
339
 
328
- // const total = cacheHits + cacheMisses
329
- // if (total > 0) {
330
- // const hitRate = (cacheHits / total) * 100
331
- // console.log(
332
- // `[BAM Cache] chunks: ${total}, hits: ${cacheHits}, misses: ${cacheMisses}, rate: ${hitRate.toFixed(1)}%, cacheSize: ${this.chunkFeatureCache.size}`,
333
- // )
334
- // }
335
-
336
340
  if (viewAsPairs) {
337
- yield this.fetchPairs(chrId, feats, opts)
341
+ const pairs = await this.fetchPairs(chrId, result, opts)
342
+ for (let i = 0, l = pairs.length; i < l; i++) {
343
+ result.push(pairs[i]!)
344
+ }
338
345
  }
346
+
347
+ return result
339
348
  }
340
349
 
341
- async fetchPairs(chrId: number, feats: T[][], opts: BamOpts) {
350
+ async fetchPairs(chrId: number, records: T[], opts: BamOpts) {
342
351
  const { pairAcrossChr, maxInsertSize = 200000 } = opts
343
- const unmatedPairs: Record<string, boolean> = {}
344
- const readIds: Record<string, number> = {}
345
- for (const ret of feats) {
346
- const readNames: Record<string, number> = {}
347
- for (const element of ret) {
348
- const name = element.name
349
- const id = element.fileOffset
350
- if (!readNames[name]) {
351
- readNames[name] = 0
352
- }
353
- readNames[name]++
354
- readIds[id] = 1
355
- }
356
- for (const [k, v] of Object.entries(readNames)) {
357
- if (v === 1) {
358
- unmatedPairs[k] = true
359
- }
360
- }
352
+ const readNameCounts: Record<string, number> = {}
353
+ const readIds: Record<number, number> = {}
354
+
355
+ for (let i = 0, l = records.length; i < l; i++) {
356
+ const r = records[i]!
357
+ const name = r.name
358
+ readNameCounts[name] = (readNameCounts[name] || 0) + 1
359
+ readIds[r.fileOffset] = 1
361
360
  }
362
361
 
363
362
  const matePromises: Promise<Chunk[]>[] = []
364
- for (const ret of feats) {
365
- for (const f of ret) {
366
- const name = f.name
367
- const start = f.start
368
- const pnext = f.next_pos
369
- const rnext = f.next_refid
370
- if (
371
- this.index &&
372
- unmatedPairs[name] &&
373
- (pairAcrossChr ||
374
- (rnext === chrId && Math.abs(start - pnext) < maxInsertSize))
375
- ) {
376
- matePromises.push(
377
- this.index.blocksForRange(rnext, pnext, pnext + 1, opts),
378
- )
379
- }
363
+ for (let i = 0, l = records.length; i < l; i++) {
364
+ const f = records[i]!
365
+ const name = f.name
366
+ if (
367
+ this.index &&
368
+ readNameCounts[name] === 1 &&
369
+ (pairAcrossChr ||
370
+ (f.next_refid === chrId &&
371
+ Math.abs(f.start - f.next_pos) < maxInsertSize))
372
+ ) {
373
+ matePromises.push(
374
+ this.index.blocksForRange(
375
+ f.next_refid,
376
+ f.next_pos,
377
+ f.next_pos + 1,
378
+ opts,
379
+ ),
380
+ )
380
381
  }
381
382
  }
382
383
 
383
- // filter out duplicate chunks (the blocks are lists of chunks, blocks are
384
- // concatenated, then filter dup chunks)
385
384
  const map = new Map<string, Chunk>()
386
385
  const res = await Promise.all(matePromises)
387
- for (const m of res.flat()) {
388
- if (!map.has(m.toString())) {
389
- map.set(m.toString(), m)
386
+ for (let i = 0, l = res.length; i < l; i++) {
387
+ const chunks = res[i]!
388
+ for (let j = 0, jl = chunks.length; j < jl; j++) {
389
+ const m = chunks[j]!
390
+ const key = m.toString()
391
+ if (!map.has(key)) {
392
+ map.set(key, m)
393
+ }
390
394
  }
391
395
  }
392
396
 
@@ -397,13 +401,18 @@ export default class BamFile<T extends BamRecordLike = BAMFeature> {
397
401
  opts,
398
402
  })
399
403
  const mateRecs = [] as T[]
400
- for (const feature of await this.readBamFeatures(
404
+ const features = await this.readBamFeatures(
401
405
  data,
402
406
  cpositions,
403
407
  dpositions,
404
408
  chunk,
405
- )) {
406
- if (unmatedPairs[feature.name] && !readIds[feature.fileOffset]) {
409
+ )
410
+ for (let i = 0, l = features.length; i < l; i++) {
411
+ const feature = features[i]!
412
+ if (
413
+ readNameCounts[feature.name] === 1 &&
414
+ !readIds[feature.fileOffset]
415
+ ) {
407
416
  mateRecs.push(feature)
408
417
  }
409
418
  }
@@ -524,17 +533,19 @@ export default class BamFile<T extends BamRecordLike = BAMFeature> {
524
533
  if (!this.chrToIndex) {
525
534
  throw new Error('Header not yet parsed')
526
535
  }
527
- const regionsWithIds = regions.map(r => {
528
- const refId = this.chrToIndex![r.refName]
529
- if (refId === undefined) {
530
- throw new Error(`Unknown reference name: ${r.refName}`)
531
- }
532
- return {
533
- refId,
534
- start: r.start,
535
- end: r.end,
536
- }
537
- })
538
- return this.index.estimatedBytesForRegions(regionsWithIds, opts)
536
+ return this.index.estimatedBytesForRegions(
537
+ regions.map(r => {
538
+ const refId = this.chrToIndex![r.refName]
539
+ if (refId === undefined) {
540
+ throw new Error(`Unknown reference name: ${r.refName}`)
541
+ }
542
+ return {
543
+ refId,
544
+ start: r.start,
545
+ end: r.end,
546
+ }
547
+ }),
548
+ opts,
549
+ )
539
550
  }
540
551
  }
package/src/htsget.ts CHANGED
@@ -66,7 +66,7 @@ export default class HtsgetFile<
66
66
  this.trackId = args.trackId
67
67
  }
68
68
 
69
- async *streamRecordsForRange(
69
+ async getRecordsForRange(
70
70
  chr: string,
71
71
  min: number,
72
72
  max: number,
@@ -76,66 +76,34 @@ export default class HtsgetFile<
76
76
  const url = `${base}?referenceName=${chr}&start=${min}&end=${max}&format=BAM`
77
77
  const chrId = this.chrToIndex?.[chr]
78
78
  if (chrId === undefined) {
79
- yield []
80
- } else {
81
- const result = await fetch(url, { ...opts })
82
- if (!result.ok) {
83
- throw new Error(
84
- `HTTP ${result.status} fetching ${url}: ${await result.text()}`,
85
- )
86
- }
87
- const data = await result.json()
88
- const uncba = await concat(data.htsget.urls.slice(1), opts)
89
-
90
- yield* this._fetchChunkFeatures(
91
- [
92
- // fake stuff to pretend to be a Chunk
93
- {
94
- buffer: uncba,
95
- _fetchedSize: undefined,
96
- bin: 0,
97
- compareTo() {
98
- return 0
99
- },
100
- toUniqueString() {
101
- return `${chr}_${min}_${max}`
102
- },
103
- fetchedSize() {
104
- return 0
105
- },
106
- minv: {
107
- dataPosition: 0,
108
- blockPosition: 0,
109
- compareTo: () => 0,
110
- },
111
- maxv: {
112
- dataPosition: Number.MAX_SAFE_INTEGER,
113
- blockPosition: 0,
114
- compareTo: () => 0,
115
- },
116
- toString() {
117
- return `${chr}_${min}_${max}`
118
- },
119
- },
120
- ],
121
- chrId,
122
- min,
123
- max,
124
- opts,
125
- )
79
+ return []
126
80
  }
127
- }
128
-
129
- async _readChunk({ chunk }: { chunk: Chunk; opts: BaseOpts }) {
130
- if (!chunk.buffer) {
131
- throw new Error('expected chunk.buffer in htsget')
81
+ const result = await fetch(url, { ...opts })
82
+ if (!result.ok) {
83
+ throw new Error(
84
+ `HTTP ${result.status} fetching ${url}: ${await result.text()}`,
85
+ )
132
86
  }
133
- return {
134
- data: chunk.buffer,
135
- cpositions: [],
136
- dpositions: [],
137
- chunk,
87
+ const data = await result.json()
88
+ const uncba = await concat(data.htsget.urls.slice(1), opts)
89
+
90
+ const allRecords = await this.readBamFeatures(uncba, [], [], {
91
+ minv: { dataPosition: 0, blockPosition: 0 },
92
+ maxv: { dataPosition: 0, blockPosition: 0 },
93
+ } as Chunk)
94
+
95
+ const records: T[] = []
96
+ for (let i = 0, l = allRecords.length; i < l; i++) {
97
+ const feature = allRecords[i]!
98
+ if (feature.ref_id === chrId) {
99
+ if (feature.start >= max) {
100
+ break
101
+ } else if (feature.end >= min) {
102
+ records.push(feature)
103
+ }
104
+ }
138
105
  }
106
+ return records
139
107
  }
140
108
 
141
109
  async getHeader(opts: BaseOpts = {}) {
package/src/index.ts CHANGED
@@ -1,9 +1,9 @@
1
1
  export { default as BAI } from './bai.ts'
2
- export {
3
- type BamRecordClass,
4
- type BamRecordLike,
5
- default as BamFile,
6
- } from './bamFile.ts'
2
+ export { default as BamFile } from './bamFile.ts'
7
3
  export { default as CSI } from './csi.ts'
8
- export { type Bytes, default as BamRecord } from './record.ts'
4
+ export { default as BamRecord } from './record.ts'
9
5
  export { default as HtsgetFile } from './htsget.ts'
6
+
7
+ export type { Bytes } from './record.ts'
8
+ export type { FilterBy, TagFilter } from './util.ts'
9
+ export type { BamRecordClass, BamRecordLike } from './bamFile.ts'