npm - @gmod/bam - Versions diffs - 1.1.17 → 2.0.0 - Mend

@gmod/bam 1.1.17 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

package/src/csi.ts CHANGED Viewed

@@ -1,8 +1,13 @@
-import Long from 'long'
 import { unzip } from '@gmod/bgzf-filehandle'
 import VirtualOffset, { fromBytes } from './virtualOffset'
 import Chunk from './chunk'
-import { longToNumber, abortBreakPoint, optimizeChunks, BaseOpts } from './util'
+import {
+  optimizeChunks,
+  findFirstData,
+  parsePseudoBin,
+  parseNameBytes,
+  BaseOpts,
+} from './util'
 import IndexFile from './indexFile'
@@ -17,102 +22,69 @@ function rshift(num: number, bits: number) {
 }
 export default class CSI extends IndexFile {
-  private maxBinNumber: number
-  private depth: number
-  private minShift: number
-  constructor(args: any) {
-    super(args)
-    this.maxBinNumber = 0
-    this.depth = 0
-    this.minShift = 0
-  }
-  async lineCount(refId: number): Promise<number> {
-    const indexData = await this.parse()
-    if (!indexData) {
-      return -1
-    }
-    const idx = indexData.indices[refId]
-    if (!idx) {
-      return -1
-    }
-    const { stats } = indexData.indices[refId]
-    if (stats) {
-      return stats.lineCount
-    }
-    return -1
+  private maxBinNumber = 0
+  private depth = 0
+  private minShift = 0
+  public setupP?: ReturnType<CSI['_parse']>
+  async lineCount(refId: number, opts?: BaseOpts) {
+    const indexData = await this.parse(opts)
+    return indexData.indices[refId]?.stats?.lineCount || 0
   }
   async indexCov() {
     return []
   }
-  parseAuxData(bytes: Buffer, offset: number, auxLength: number) {
-    if (auxLength < 30) {
-      return {}
-    }
-    const data: { [key: string]: any } = {}
-    data.formatFlags = bytes.readInt32LE(offset)
-    data.coordinateType =
-      data.formatFlags & 0x10000 ? 'zero-based-half-open' : '1-based-closed'
-    data.format = (
+  parseAuxData(bytes: Buffer, offset: number) {
+    const formatFlags = bytes.readInt32LE(offset)
+    const coordinateType =
+      formatFlags & 0x10000 ? 'zero-based-half-open' : '1-based-closed'
+    const format = (
       { 0: 'generic', 1: 'SAM', 2: 'VCF' } as {
         [key: number]: string
       }
-    )[data.formatFlags & 0xf]
-    if (!data.format) {
-      throw new Error(`invalid Tabix preset format flags ${data.formatFlags}`)
+    )[formatFlags & 0xf]
+    if (!format) {
+      throw new Error(`invalid Tabix preset format flags ${formatFlags}`)
     }
-    data.columnNumbers = {
+    const columnNumbers = {
       ref: bytes.readInt32LE(offset + 4),
       start: bytes.readInt32LE(offset + 8),
       end: bytes.readInt32LE(offset + 12),
     }
-    data.metaValue = bytes.readInt32LE(offset + 16)
-    data.metaChar = data.metaValue ? String.fromCharCode(data.metaValue) : ''
-    data.skipLines = bytes.readInt32LE(offset + 20)
+    const metaValue = bytes.readInt32LE(offset + 16)
+    const metaChar = metaValue ? String.fromCharCode(metaValue) : ''
+    const skipLines = bytes.readInt32LE(offset + 20)
     const nameSectionLength = bytes.readInt32LE(offset + 24)
-    Object.assign(
-      data,
-      this._parseNameBytes(
+    return {
+      columnNumbers,
+      coordinateType,
+      metaValue,
+      metaChar,
+      skipLines,
+      format,
+      formatFlags,
+      ...parseNameBytes(
         bytes.subarray(offset + 28, offset + 28 + nameSectionLength),
+        this.renameRefSeq,
       ),
-    )
-    return data
-  }
-  _parseNameBytes(namesBytes: Buffer) {
-    let currRefId = 0
-    let currNameStart = 0
-    const refIdToName = []
-    const refNameToId: { [key: string]: number } = {}
-    for (let i = 0; i < namesBytes.length; i += 1) {
-      if (!namesBytes[i]) {
-        if (currNameStart < i) {
-          let refName = namesBytes.toString('utf8', currNameStart, i)
-          refName = this.renameRefSeq(refName)
-          refIdToName[currRefId] = refName
-          refNameToId[refName] = currRefId
-        }
-        currNameStart = i + 1
-        currRefId += 1
-      }
     }
-    return { refNameToId, refIdToName }
   }
   // fetch and parse the index
   async _parse(opts: { signal?: AbortSignal }) {
-    const data: { [key: string]: any } = { csi: true, maxBlockSize: 1 << 16 }
-    const buffer = (await this.filehandle.readFile(opts)) as Buffer
+    const buffer = await this.filehandle.readFile(opts)
     const bytes = await unzip(buffer)
+    let csiVersion
     // check TBI magic numbers
     if (bytes.readUInt32LE(0) === CSI1_MAGIC) {
-      data.csiVersion = 1
+      csiVersion = 1
     } else if (bytes.readUInt32LE(0) === CSI2_MAGIC) {
-      data.csiVersion = 2
+      csiVersion = 2
     } else {
       throw new Error('Not a CSI file')
       // TODO: do we need to support big-endian CSI files?
@@ -122,59 +94,60 @@ export default class CSI extends IndexFile {
     this.depth = bytes.readInt32LE(8)
     this.maxBinNumber = ((1 << ((this.depth + 1) * 3)) - 1) / 7
     const auxLength = bytes.readInt32LE(12)
-    if (auxLength) {
-      Object.assign(data, this.parseAuxData(bytes, 16, auxLength))
-    }
-    data.refCount = bytes.readInt32LE(16 + auxLength)
+    const aux = auxLength >= 30 ? this.parseAuxData(bytes, 16) : undefined
+    const refCount = bytes.readInt32LE(16 + auxLength)
+    type BinIndex = { [key: string]: Chunk[] }
     // read the indexes for each reference sequence
-    data.indices = new Array(data.refCount)
-    let currOffset = 16 + auxLength + 4
-    for (let i = 0; i < data.refCount; i += 1) {
-      await abortBreakPoint(opts.signal)
+    let curr = 16 + auxLength + 4
+    let firstDataLine: VirtualOffset | undefined
+    const indices = new Array<{
+      binIndex: BinIndex
+      stats?: { lineCount: number }
+    }>(refCount)
+    for (let i = 0; i < refCount; i++) {
       // the binning index
-      const binCount = bytes.readInt32LE(currOffset)
-      currOffset += 4
+      const binCount = bytes.readInt32LE(curr)
+      curr += 4
       const binIndex: { [key: string]: Chunk[] } = {}
       let stats // < provided by parsing a pseudo-bin, if present
-      for (let j = 0; j < binCount; j += 1) {
-        const bin = bytes.readUInt32LE(currOffset)
+      for (let j = 0; j < binCount; j++) {
+        const bin = bytes.readUInt32LE(curr)
+        curr += 4
         if (bin > this.maxBinNumber) {
-          // this is a fake bin that actually has stats information
-          // about the reference sequence in it
-          stats = this.parsePseudoBin(bytes, currOffset + 4)
-          currOffset += 4 + 8 + 4 + 16 + 16
+          stats = parsePseudoBin(bytes, curr + 28)
+          curr += 28 + 16
         } else {
-          const loffset = fromBytes(bytes, currOffset + 4)
-          this._findFirstData(data, loffset)
-          const chunkCount = bytes.readInt32LE(currOffset + 12)
-          currOffset += 16
-          const chunks = new Array(chunkCount)
+          firstDataLine = findFirstData(firstDataLine, fromBytes(bytes, curr))
+          curr += 8
+          const chunkCount = bytes.readInt32LE(curr)
+          curr += 4
+          const chunks = new Array<Chunk>(chunkCount)
           for (let k = 0; k < chunkCount; k += 1) {
-            const u = fromBytes(bytes, currOffset)
-            const v = fromBytes(bytes, currOffset + 8)
-            currOffset += 16
-            // this._findFirstData(data, u)
+            const u = fromBytes(bytes, curr)
+            curr += 8
+            const v = fromBytes(bytes, curr)
+            curr += 8
+            firstDataLine = findFirstData(firstDataLine, u)
             chunks[k] = new Chunk(u, v, bin)
           }
           binIndex[bin] = chunks
         }
       }
-      data.indices[i] = { binIndex, stats }
+      indices[i] = { binIndex, stats }
     }
-    return data
-  }
-  parsePseudoBin(bytes: Buffer, offset: number) {
-    const lineCount = longToNumber(
-      Long.fromBytesLE(
-        Array.prototype.slice.call(bytes, offset + 28, offset + 36),
-        true,
-      ),
-    )
-    return { lineCount }
+    return {
+      csiVersion,
+      firstDataLine,
+      indices,
+      refCount,
+      csi: true,
+      maxBlockSize: 1 << 16,
+      ...aux,
+    }
   }
   async blocksForRange(
@@ -192,17 +165,20 @@ export default class CSI extends IndexFile {
     if (!ba) {
       return []
     }
+    const overlappingBins = this.reg2bins(min, max)
-    const overlappingBins = this.reg2bins(min, max) // List of bin #s that overlap min, max
-    const chunks = []
+    if (overlappingBins.length === 0) {
+      return []
+    }
+    const chunks = []
     // Find chunks in overlapping bins.  Leaf bins (< 4681) are not pruned
     for (const [start, end] of overlappingBins) {
       for (let bin = start; bin <= end; bin++) {
         if (ba.binIndex[bin]) {
           const binChunks = ba.binIndex[bin]
-          for (let c = 0; c < binChunks.length; ++c) {
-            chunks.push(new Chunk(binChunks[c].minv, binChunks[c].maxv, bin))
+          for (const c of binChunks) {
+            chunks.push(c)
           }
         }
       }
@@ -240,4 +216,19 @@ export default class CSI extends IndexFile {
     }
     return bins
   }
+  async parse(opts: BaseOpts = {}) {
+    if (!this.setupP) {
+      this.setupP = this._parse(opts).catch(e => {
+        this.setupP = undefined
+        throw e
+      })
+    }
+    return this.setupP
+  }
+  async hasRefSeq(seqId: number, opts: BaseOpts = {}) {
+    const header = await this.parse(opts)
+    return !!header.indices[seqId]?.binIndex
+  }
 }

package/src/htsget.ts CHANGED Viewed

@@ -1,36 +1,33 @@
+import { unzip } from '@gmod/bgzf-filehandle'
+import { Buffer } from 'buffer'
 import { BaseOpts, BamOpts } from './util'
 import BamFile, { BAM_MAGIC } from './bamFile'
-import 'cross-fetch/polyfill'
 import Chunk from './chunk'
-import { unzip } from '@gmod/bgzf-filehandle'
 import { parseHeaderText } from './sam'
-interface HeaderLine {
-  tag: string
-  value: string
-}
 interface HtsgetChunk {
   url: string
   headers?: Record<string, string>
 }
-async function concat(arr: { url: string }[], opts: Record<string, any>) {
+async function concat(arr: HtsgetChunk[], opts?: Record<string, any>) {
   const res = await Promise.all(
-    arr.map(async (chunk: HtsgetChunk) => {
+    arr.map(async chunk => {
       const { url, headers } = chunk
       if (url.startsWith('data:')) {
         return Buffer.from(url.split(',')[1], 'base64')
       } else {
         //remove referer header, it is not even allowed to be specified
-        //@ts-ignore
+        // @ts-expect-error
         //eslint-disable-next-line @typescript-eslint/no-unused-vars
         const { referer, ...rest } = headers
         const res = await fetch(url, {
           ...opts,
-          headers: { ...opts.headers, ...rest },
+          headers: { ...opts?.headers, ...rest },
         })
         if (!res.ok) {
-          throw new Error(`Failed to fetch ${res.statusText}`)
+          throw new Error(
+            `HTTP ${res.status} fetching ${url}: ${await res.text()}`,
+          )
         }
         return Buffer.from(await res.arrayBuffer())
       }
@@ -46,8 +43,7 @@ export default class HtsgetFile extends BamFile {
   private trackId: string
   constructor(args: { trackId: string; baseUrl: string }) {
-    // @ts-ignore override bam defaults
-    super({ bamFilehandle: '?', baiFilehandle: '?' })
+    super({ htsget: true })
     this.baseUrl = args.baseUrl
     this.trackId = args.trackId
   }
@@ -56,54 +52,76 @@ export default class HtsgetFile extends BamFile {
     chr: string,
     min: number,
     max: number,
-    opts: BamOpts = {
-      viewAsPairs: false,
-      pairAcrossChr: false,
-      maxInsertSize: 200000,
-    },
+    opts?: BamOpts,
   ) {
     const base = `${this.baseUrl}/${this.trackId}`
     const url = `${base}?referenceName=${chr}&start=${min}&end=${max}&format=BAM`
-    const chrId = this.chrToIndex && this.chrToIndex[chr]
-    const result = await fetch(url, { ...opts })
-    if (!result.ok) {
-      throw new Error(result.statusText)
-    }
-    const data = await result.json()
-    const uncba = await concat(data.htsget.urls.slice(1), opts)
-    const chunk = {
-      buffer: uncba,
-      chunk: { minv: { dataPosition: 0 } },
-      toString() {
-        return `${chr}_${min}_${max}`
-      },
-    }
+    const chrId = this.chrToIndex?.[chr]
+    if (chrId === undefined) {
+      yield []
+    } else {
+      const result = await fetch(url, { ...opts })
+      if (!result.ok) {
+        throw new Error(
+          `HTTP ${result.status} fetching ${url}: ${await result.text()}`,
+        )
+      }
+      const data = await result.json()
+      const uncba = await concat(data.htsget.urls.slice(1), opts)
-    yield* this._fetchChunkFeatures(
-      // @ts-ignore
-      [chunk],
-      chrId,
-      min,
-      max,
-      opts,
-    )
+      yield* this._fetchChunkFeatures(
+        [
+          // fake stuff to pretend to be a Chunk
+          {
+            buffer: uncba,
+            _fetchedSize: undefined,
+            bin: 0,
+            compareTo() {
+              return 0
+            },
+            toUniqueString() {
+              return `${chr}_${min}_${max}`
+            },
+            fetchedSize() {
+              return 0
+            },
+            minv: {
+              dataPosition: 0,
+              blockPosition: 0,
+              compareTo: () => 0,
+            },
+            maxv: {
+              dataPosition: Number.MAX_SAFE_INTEGER,
+              blockPosition: 0,
+              compareTo: () => 0,
+            },
+            toString() {
+              return `${chr}_${min}_${max}`
+            },
+          },
+        ],
+        chrId,
+        min,
+        max,
+        opts,
+      )
+    }
   }
-  //@ts-ignore
-  async _readChunk(params: {
-    chunk: { buffer: Buffer; chunk: Chunk }
-    opts: BaseOpts
-  }) {
-    const { chunk } = params
-    const { buffer, chunk: c2 } = chunk
-    return { data: buffer, cpositions: null, dpositions: null, chunk: c2 }
+  async _readChunk({ chunk }: { chunk: Chunk; opts: BaseOpts }) {
+    if (!chunk.buffer) {
+      throw new Error('expected chunk.buffer in htsget')
+    }
+    return { data: chunk.buffer, cpositions: [], dpositions: [], chunk }
   }
   async getHeader(opts: BaseOpts = {}) {
     const url = `${this.baseUrl}/${this.trackId}?referenceName=na&class=header`
     const result = await fetch(url, opts)
     if (!result.ok) {
-      throw new Error(`Failed to fetch ${result.statusText}`)
+      throw new Error(
+        `HTTP ${result.status} fetching ${url}: ${await result.text()}`,
+      )
     }
     const data = await result.json()
     const uncba = await concat(data.htsget.urls, opts)
@@ -112,25 +130,27 @@ export default class HtsgetFile extends BamFile {
       throw new Error('Not a BAM file')
     }
     const headLen = uncba.readInt32LE(4)
     const headerText = uncba.toString('utf8', 8, 8 + headLen)
     const samHeader = parseHeaderText(headerText)
     // use the @SQ lines in the header to figure out the
     // mapping between ref ref ID numbers and names
-    const idToName: string[] = []
+    const idToName: { refName: string; length: number }[] = []
     const nameToId: Record<string, number> = {}
-    const sqLines = samHeader.filter((l: { tag: string }) => l.tag === 'SQ')
-    sqLines.forEach((sqLine: { data: HeaderLine[] }, refId: number) => {
-      sqLine.data.forEach((item: HeaderLine) => {
+    const sqLines = samHeader.filter(l => l.tag === 'SQ')
+    for (const [refId, sqLine] of sqLines.entries()) {
+      let refName = ''
+      let length = 0
+      for (const item of sqLine.data) {
         if (item.tag === 'SN') {
-          // this is the ref name
-          const refName = item.value
-          nameToId[refName] = refId
-          idToName[refId] = refName
+          refName = item.value
+        } else if (item.tag === 'LN') {
+          length = +item.value
         }
-      })
-    })
+      }
+      nameToId[refName] = refId
+      idToName[refId] = { refName, length }
+    }
     this.chrToIndex = nameToId
     this.indexToChr = idToName
     return samHeader

package/src/index.ts CHANGED Viewed

@@ -1,7 +1,5 @@
-import BAI from './bai'
-import CSI from './csi'
-import BamFile from './bamFile'
-import HtsgetFile from './htsget'
-import BamRecord from './record'
-export { BAI, CSI, BamFile, BamRecord, HtsgetFile }
+export { default as BAI } from './bai'
+export { default as BamFile } from './bamFile'
+export { default as CSI } from './csi'
+export { default as BamRecord } from './record'
+export { default as HtsgetFile } from './htsget'

package/src/indexFile.ts CHANGED Viewed

@@ -1,12 +1,10 @@
 import { GenericFilehandle } from 'generic-filehandle'
-import VirtualOffset from './virtualOffset'
 import Chunk from './chunk'
 import { BaseOpts } from './util'
 export default abstract class IndexFile {
   public filehandle: GenericFilehandle
   public renameRefSeq: (s: string) => string
-  public setupP?: Promise<any>
   /**
    * @param {filehandle} filehandle
@@ -23,7 +21,6 @@ export default abstract class IndexFile {
     this.renameRefSeq = renameRefSeq
   }
   public abstract lineCount(refId: number): Promise<number>
-  protected abstract _parse(opts?: BaseOpts): Promise<any>
   public abstract indexCov(
     refId: number,
     start?: number,
@@ -36,28 +33,4 @@ export default abstract class IndexFile {
     end: number,
     opts?: BaseOpts,
   ): Promise<Chunk[]>
-  _findFirstData(data: any, virtualOffset: VirtualOffset) {
-    const currentFdl = data.firstDataLine
-    if (currentFdl) {
-      data.firstDataLine =
-        currentFdl.compareTo(virtualOffset) > 0 ? virtualOffset : currentFdl
-    } else {
-      data.firstDataLine = virtualOffset
-    }
-  }
-  async parse(opts: BaseOpts = {}) {
-    if (!this.setupP) {
-      this.setupP = this._parse(opts).catch(e => {
-        this.setupP = undefined
-        throw e
-      })
-    }
-    return this.setupP
-  }
-  async hasRefSeq(seqId: number, opts: BaseOpts = {}) {
-    return !!((await this.parse(opts)).indices[seqId] || {}).binIndex
-  }
 }

package/src/nullIndex.ts ADDED Viewed

@@ -0,0 +1,18 @@
+import IndexFile from './indexFile'
+export default class NullIndex extends IndexFile {
+  public lineCount(): Promise<any> {
+    throw new Error('never called')
+  }
+  protected _parse(): Promise<any> {
+    throw new Error('never called')
+  }
+  public async indexCov(): Promise<any> {
+    throw new Error('never called')
+  }
+  public blocksForRange(): Promise<any> {
+    throw new Error('never called')
+  }
+}