npm - @gmod/bam - Versions diffs - 4.0.0 → 5.0.0 - Mend

@gmod/bam 4.0.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

package/src/bamFile.ts CHANGED Viewed

@@ -1,7 +1,6 @@
-import { Buffer } from 'buffer'
 import crc32 from 'crc/crc32'
 import { unzip, unzipChunkSlice } from '@gmod/bgzf-filehandle'
-import { LocalFile, RemoteFile, GenericFilehandle } from 'generic-filehandle'
+import { LocalFile, RemoteFile, GenericFilehandle } from 'generic-filehandle2'
 import AbortablePromiseCache from '@gmod/abortable-promise-cache'
 import QuickLRU from 'quick-lru'
@@ -148,23 +147,21 @@ export default class BamFile {
     let buffer
     if (ret) {
       const s = ret + blockLen
-      const res = await this.bam.read(Buffer.alloc(s), 0, s, 0, opts)
-      if (!res.bytesRead) {
-        throw new Error('Error reading header')
-      }
-      buffer = res.buffer.subarray(0, Math.min(res.bytesRead, ret))
+      buffer = await this.bam.read(s, 0)
     } else {
       buffer = await this.bam.readFile(opts)
     }
     const uncba = await unzip(buffer)
+    const dataView = new DataView(uncba.buffer)
-    if (uncba.readInt32LE(0) !== BAM_MAGIC) {
+    if (dataView.getInt32(0, true) !== BAM_MAGIC) {
       throw new Error('Not a BAM file')
     }
-    const headLen = uncba.readInt32LE(4)
+    const headLen = dataView.getInt32(4, true)
-    this.header = uncba.toString('utf8', 8, 8 + headLen)
+    const decoder = new TextDecoder('utf8')
+    this.header = decoder.decode(uncba.subarray(8, 8 + headLen))
     const { chrToIndex, indexToChr } = await this._readRefSeqs(
       headLen + 8,
       65535,
@@ -204,30 +201,21 @@ export default class BamFile {
     if (start > refSeqBytes) {
       return this._readRefSeqs(start, refSeqBytes * 2, opts)
     }
-    const size = refSeqBytes + blockLen
-    const { bytesRead, buffer } = await this.bam.read(
-      Buffer.alloc(size),
-      0,
-      refSeqBytes,
-      0,
-      opts,
-    )
-    if (!bytesRead) {
-      throw new Error('Error reading refseqs from header')
-    }
-    const uncba = await unzip(
-      buffer.subarray(0, Math.min(bytesRead, refSeqBytes)),
-    )
-    const nRef = uncba.readInt32LE(start)
+    // const size = refSeqBytes + blockLen <-- use this?
+    const buffer = await this.bam.read(refSeqBytes, 0, opts)
+    const uncba = await unzip(buffer)
+    const dataView = new DataView(uncba.buffer)
+    const nRef = dataView.getInt32(start, true)
     let p = start + 4
     const chrToIndex: Record<string, number> = {}
     const indexToChr: { refName: string; length: number }[] = []
+    const decoder = new TextDecoder('utf8')
     for (let i = 0; i < nRef; i += 1) {
-      const lName = uncba.readInt32LE(p)
+      const lName = dataView.getInt32(p, true)
       const refName = this.renameRefSeq(
-        uncba.toString('utf8', p + 4, p + 4 + lName - 1),
+        decoder.decode(uncba.subarray(p + 4, p + 4 + lName - 1)),
       )
-      const lRef = uncba.readInt32LE(p + lName + 4)
+      const lRef = dataView.getInt32(p + lName + 4, true)
       chrToIndex[refName] = i
       indexToChr.push({ refName, length: lRef })
@@ -388,15 +376,7 @@ export default class BamFile {
   }
   async _readRegion(position: number, size: number, opts: BaseOpts = {}) {
-    const { bytesRead, buffer } = await this.bam.read(
-      Buffer.alloc(size),
-      0,
-      size,
-      position,
-      opts,
-    )
-    return buffer.subarray(0, Math.min(bytesRead, size))
+    return this.bam.read(size, position, opts)
   }
   async _readChunk({ chunk, opts }: { chunk: Chunk; opts: BaseOpts }) {
@@ -415,7 +395,7 @@ export default class BamFile {
   }
   async readBamFeatures(
-    ba: Buffer,
+    ba: Uint8Array,
     cpositions: number[],
     dpositions: number[],
     chunk: Chunk,
@@ -425,8 +405,9 @@ export default class BamFile {
     let pos = 0
     let last = +Date.now()
+    const dataView = new DataView(ba.buffer)
     while (blockStart + 4 < ba.length) {
-      const blockSize = ba.readInt32LE(blockStart)
+      const blockSize = dataView.getInt32(blockStart, true)
       const blockEnd = blockStart + 4 + blockSize - 1
       // increment position to the current decompressed status
@@ -471,8 +452,8 @@ export default class BamFile {
                 chunk.minv.dataPosition +
                 1
               : // must be slice, not subarray for buffer polyfill on web
-                // eslint-disable-next-line @typescript-eslint/no-deprecated
-                crc32.signed(ba.slice(blockStart, blockEnd)),
+                // @ts-expect-error
+                crc32.signed(ba.subarray(blockStart, blockEnd)),
         })
         sink.push(feature)

package/src/chunk.ts CHANGED Viewed

@@ -2,7 +2,7 @@ import VirtualOffset from './virtualOffset'
 // little class representing a chunk in the index
 export default class Chunk {
-  public buffer?: Buffer
+  public buffer?: Uint8Array
   constructor(
     public minv: VirtualOffset,

package/src/csi.ts CHANGED Viewed

@@ -37,8 +37,9 @@ export default class CSI extends IndexFile {
     return []
   }
-  parseAuxData(bytes: Buffer, offset: number) {
-    const formatFlags = bytes.readInt32LE(offset)
+  parseAuxData(bytes: Uint8Array, offset: number) {
+    const dataView = new DataView(bytes.buffer)
+    const formatFlags = dataView.getUint32(offset, true)
     const coordinateType =
       formatFlags & 0x10000 ? 'zero-based-half-open' : '1-based-closed'
     const format = (
@@ -48,14 +49,14 @@ export default class CSI extends IndexFile {
       throw new Error(`invalid Tabix preset format flags ${formatFlags}`)
     }
     const columnNumbers = {
-      ref: bytes.readInt32LE(offset + 4),
-      start: bytes.readInt32LE(offset + 8),
-      end: bytes.readInt32LE(offset + 12),
+      ref: dataView.getInt32(offset + 4, true),
+      start: dataView.getInt32(offset + 8, true),
+      end: dataView.getInt32(offset + 12, true),
     }
-    const metaValue = bytes.readInt32LE(offset + 16)
+    const metaValue = dataView.getInt32(offset + 16, true)
     const metaChar = metaValue ? String.fromCharCode(metaValue) : ''
-    const skipLines = bytes.readInt32LE(offset + 20)
-    const nameSectionLength = bytes.readInt32LE(offset + 24)
+    const skipLines = dataView.getInt32(offset + 20, true)
+    const nameSectionLength = dataView.getInt32(offset + 24, true)
     return {
       columnNumbers,
@@ -77,23 +78,25 @@ export default class CSI extends IndexFile {
     const buffer = await this.filehandle.readFile(opts)
     const bytes = await unzip(buffer)
+    const dataView = new DataView(bytes.buffer)
     let csiVersion
-    // check TBI magic numbers
-    if (bytes.readUInt32LE(0) === CSI1_MAGIC) {
+    const magic = dataView.getUint32(0, true)
+    if (magic === CSI1_MAGIC) {
       csiVersion = 1
-    } else if (bytes.readUInt32LE(0) === CSI2_MAGIC) {
+    } else if (magic === CSI2_MAGIC) {
       csiVersion = 2
     } else {
-      throw new Error('Not a CSI file')
+      throw new Error(`Not a CSI file ${magic}`)
       // TODO: do we need to support big-endian CSI files?
     }
-    this.minShift = bytes.readInt32LE(4)
-    this.depth = bytes.readInt32LE(8)
+    this.minShift = dataView.getInt32(4, true)
+    this.depth = dataView.getInt32(8, true)
     this.maxBinNumber = ((1 << ((this.depth + 1) * 3)) - 1) / 7
-    const auxLength = bytes.readInt32LE(12)
+    const auxLength = dataView.getInt32(12, true)
     const aux = auxLength >= 30 ? this.parseAuxData(bytes, 16) : undefined
-    const refCount = bytes.readInt32LE(16 + auxLength)
+    const refCount = dataView.getInt32(16 + auxLength, true)
     type BinIndex = Record<string, Chunk[]>
@@ -106,12 +109,12 @@ export default class CSI extends IndexFile {
     }>(refCount)
     for (let i = 0; i < refCount; i++) {
       // the binning index
-      const binCount = bytes.readInt32LE(curr)
+      const binCount = dataView.getInt32(curr, true)
       curr += 4
       const binIndex: Record<string, Chunk[]> = {}
       let stats // < provided by parsing a pseudo-bin, if present
       for (let j = 0; j < binCount; j++) {
-        const bin = bytes.readUInt32LE(curr)
+        const bin = dataView.getUint32(curr, true)
         curr += 4
         if (bin > this.maxBinNumber) {
           stats = parsePseudoBin(bytes, curr + 28)
@@ -119,7 +122,7 @@ export default class CSI extends IndexFile {
         } else {
           firstDataLine = findFirstData(firstDataLine, fromBytes(bytes, curr))
           curr += 8
-          const chunkCount = bytes.readInt32LE(curr)
+          const chunkCount = dataView.getInt32(curr, true)
           curr += 4
           const chunks = new Array<Chunk>(chunkCount)
           for (let k = 0; k < chunkCount; k += 1) {

package/src/htsget.ts CHANGED Viewed

@@ -1,6 +1,5 @@
 import { unzip } from '@gmod/bgzf-filehandle'
-import { Buffer } from 'buffer'
-import { BaseOpts, BamOpts } from './util'
+import { BaseOpts, BamOpts, concatUint8Array } from './util'
 import BamFile, { BAM_MAGIC } from './bamFile'
 import Chunk from './chunk'
 import { parseHeaderText } from './sam'
@@ -14,7 +13,8 @@ async function concat(arr: HtsgetChunk[], opts?: Record<string, any>) {
     arr.map(async chunk => {
       const { url, headers } = chunk
       if (url.startsWith('data:')) {
-        return Buffer.from(url.split(',')[1], 'base64')
+        // @ts-expect-error
+        return Uint8Array.fromBase64(url.split(',')[1], 'base64') as Uint8Array
       } else {
         //remove referer header, it is not even allowed to be specified
         // @ts-expect-error
@@ -29,12 +29,12 @@ async function concat(arr: HtsgetChunk[], opts?: Record<string, any>) {
             `HTTP ${res.status} fetching ${url}: ${await res.text()}`,
           )
         }
-        return Buffer.from(await res.arrayBuffer())
+        return new Uint8Array(await res.arrayBuffer())
       }
     }),
   )
-  return Buffer.concat(await Promise.all(res.map(elt => unzip(elt))))
+  return concatUint8Array(await Promise.all(res.map(elt => unzip(elt))))
 }
 export default class HtsgetFile extends BamFile {
@@ -108,11 +108,17 @@ export default class HtsgetFile extends BamFile {
     }
   }
+  // @ts-expect-error
   async _readChunk({ chunk }: { chunk: Chunk; opts: BaseOpts }) {
     if (!chunk.buffer) {
       throw new Error('expected chunk.buffer in htsget')
     }
-    return { data: chunk.buffer, cpositions: [], dpositions: [], chunk }
+    return {
+      data: chunk.buffer,
+      cpositions: [],
+      dpositions: [],
+      chunk,
+    }
   }
   async getHeader(opts: BaseOpts = {}) {
@@ -125,12 +131,15 @@ export default class HtsgetFile extends BamFile {
     }
     const data = await result.json()
     const uncba = await concat(data.htsget.urls, opts)
+    const dataView = new DataView(uncba.buffer)
-    if (uncba.readInt32LE(0) !== BAM_MAGIC) {
+    if (dataView.getInt32(0, true) !== BAM_MAGIC) {
       throw new Error('Not a BAM file')
     }
-    const headLen = uncba.readInt32LE(4)
-    const headerText = uncba.toString('utf8', 8, 8 + headLen)
+    const headLen = dataView.getInt32(4, true)
+    const decoder = new TextDecoder('utf8')
+    const headerText = decoder.decode(uncba.subarray(8, 8 + headLen))
     const samHeader = parseHeaderText(headerText)
     // use the @SQ lines in the header to figure out the

package/src/indexFile.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { GenericFilehandle } from 'generic-filehandle'
+import { GenericFilehandle } from 'generic-filehandle2'
 import Chunk from './chunk'
 import { BaseOpts } from './util'

package/src/record.ts CHANGED Viewed

@@ -1,5 +1,4 @@
 import Constants from './constants'
-import type { Buffer } from 'buffer'
 const SEQRET_DECODER = '=ACMGRSVTWYHKDBN'.split('')
 const CIGAR_DECODER = 'MIDNSHP=X???????'.split('')
@@ -7,15 +6,18 @@ const CIGAR_DECODER = 'MIDNSHP=X???????'.split('')
 interface Bytes {
   start: number
   end: number
-  byteArray: Buffer
+  byteArray: Uint8Array
 }
 export default class BamRecord {
   public fileOffset: number
   private bytes: Bytes
+  #dataView: DataView
   constructor(args: { bytes: Bytes; fileOffset: number }) {
     this.bytes = args.bytes
     this.fileOffset = args.fileOffset
+    this.#dataView = new DataView(this.bytes.byteArray.buffer)
   }
   get byteArray() {
@@ -24,15 +26,15 @@ export default class BamRecord {
   get flags() {
     return (
-      (this.byteArray.readInt32LE(this.bytes.start + 16) & 0xffff0000) >> 16
+      (this.#dataView.getInt32(this.bytes.start + 16, true) & 0xffff0000) >> 16
     )
   }
   get ref_id() {
-    return this.byteArray.readInt32LE(this.bytes.start + 4)
+    return this.#dataView.getInt32(this.bytes.start + 4, true)
   }
   get start() {
-    return this.byteArray.readInt32LE(this.bytes.start + 8)
+    return this.#dataView.getInt32(this.bytes.start + 8, true)
   }
   get end() {
@@ -73,15 +75,14 @@ export default class BamRecord {
     return this.bytes.start + 36
   }
   get name() {
-    return this.byteArray.toString(
-      'ascii',
-      this.b0,
-      this.b0 + this.read_name_length - 1,
-    )
+    let str = ''
+    for (let i = 0; i < this.read_name_length - 1; i++) {
+      str += String.fromCharCode(this.byteArray[this.b0 + i])
+    }
+    return str
   }
   get tags() {
-    const { byteArray } = this.bytes
     let p =
       this.b0 +
       this.read_name_length +
@@ -92,38 +93,38 @@ export default class BamRecord {
     const blockEnd = this.bytes.end
     const tags = {} as Record<string, unknown>
     while (p < blockEnd) {
-      const tag = String.fromCharCode(byteArray[p], byteArray[p + 1])
-      const type = String.fromCharCode(byteArray[p + 2])
+      const tag = String.fromCharCode(this.byteArray[p], this.byteArray[p + 1])
+      const type = String.fromCharCode(this.byteArray[p + 2])
       p += 3
       if (type === 'A') {
-        tags[tag] = String.fromCharCode(byteArray[p])
+        tags[tag] = String.fromCharCode(this.byteArray[p])
         p += 1
       } else if (type === 'i') {
-        tags[tag] = byteArray.readInt32LE(p)
+        tags[tag] = this.#dataView.getInt32(p, true)
         p += 4
       } else if (type === 'I') {
-        tags[tag] = byteArray.readUInt32LE(p)
+        tags[tag] = this.#dataView.getUint32(p, true)
         p += 4
       } else if (type === 'c') {
-        tags[tag] = byteArray.readInt8(p)
+        tags[tag] = this.#dataView.getInt8(p)
         p += 1
       } else if (type === 'C') {
-        tags[tag] = byteArray.readUInt8(p)
+        tags[tag] = this.#dataView.getUint8(p)
         p += 1
       } else if (type === 's') {
-        tags[tag] = byteArray.readInt16LE(p)
+        tags[tag] = this.#dataView.getInt16(p, true)
         p += 2
       } else if (type === 'S') {
-        tags[tag] = byteArray.readUInt16LE(p)
+        tags[tag] = this.#dataView.getUint16(p, true)
         p += 2
       } else if (type === 'f') {
-        tags[tag] = byteArray.readFloatLE(p)
+        tags[tag] = this.#dataView.getFloat32(p, true)
         p += 4
       } else if (type === 'Z' || type === 'H') {
         const value = []
         while (p <= blockEnd) {
-          const cc = byteArray[p++]
+          const cc = this.byteArray[p++]
           if (cc !== 0) {
             value.push(String.fromCharCode(cc))
           } else {
@@ -132,15 +133,15 @@ export default class BamRecord {
         }
         tags[tag] = value.join('')
       } else if (type === 'B') {
-        const cc = byteArray[p++]
+        const cc = this.byteArray[p++]
         const Btype = String.fromCharCode(cc)
-        const limit = byteArray.readInt32LE(p)
+        const limit = this.#dataView.getInt32(p, true)
         p += 4
         if (Btype === 'i') {
           if (tag === 'CG') {
             const value = []
             for (let k = 0; k < limit; k++) {
-              const cigop = byteArray.readInt32LE(p)
+              const cigop = this.#dataView.getInt32(p, true)
               const lop = cigop >> 4
               const op = CIGAR_DECODER[cigop & 0xf]
               value.push(lop + op)
@@ -150,7 +151,7 @@ export default class BamRecord {
           } else {
             const value = []
             for (let k = 0; k < limit; k++) {
-              value.push(byteArray.readInt32LE(p))
+              value.push(this.#dataView.getInt32(p, true))
               p += 4
             }
             tags[tag] = value
@@ -159,7 +160,7 @@ export default class BamRecord {
           if (tag === 'CG') {
             const value = []
             for (let k = 0; k < limit; k++) {
-              const cigop = byteArray.readUInt32LE(p)
+              const cigop = this.#dataView.getUint32(p, true)
               const lop = cigop >> 4
               const op = CIGAR_DECODER[cigop & 0xf]
               value.push(lop + op)
@@ -169,7 +170,7 @@ export default class BamRecord {
           } else {
             const value = []
             for (let k = 0; k < limit; k++) {
-              value.push(byteArray.readUInt32LE(p))
+              value.push(this.#dataView.getUint32(p, true))
               p += 4
             }
             tags[tag] = value
@@ -177,35 +178,35 @@ export default class BamRecord {
         } else if (Btype === 's') {
           const value = []
           for (let k = 0; k < limit; k++) {
-            value.push(byteArray.readInt16LE(p))
+            value.push(this.#dataView.getInt16(p, true))
             p += 2
           }
           tags[tag] = value
         } else if (Btype === 'S') {
           const value = []
           for (let k = 0; k < limit; k++) {
-            value.push(byteArray.readUInt16LE(p))
+            value.push(this.#dataView.getUint16(p, true))
             p += 2
           }
           tags[tag] = value
         } else if (Btype === 'c') {
           const value = []
           for (let k = 0; k < limit; k++) {
-            value.push(byteArray.readInt8(p))
+            value.push(this.#dataView.getInt8(p))
             p += 1
           }
           tags[tag] = value
         } else if (Btype === 'C') {
           const value = []
           for (let k = 0; k < limit; k++) {
-            value.push(byteArray.readUInt8(p))
+            value.push(this.#dataView.getUint8(p))
             p += 1
           }
           tags[tag] = value
         } else if (Btype === 'f') {
           const value = []
           for (let k = 0; k < limit; k++) {
-            value.push(byteArray.readFloatLE(p))
+            value.push(this.#dataView.getFloat32(p, true))
             p += 4
           }
           tags[tag] = value
@@ -295,14 +296,14 @@ export default class BamRecord {
     // check for CG tag by inspecting whether the CIGAR field contains a clip
     // that consumes entire seqLen
-    let cigop = this.byteArray.readInt32LE(p)
+    let cigop = this.#dataView.getInt32(p, true)
     let lop = cigop >> 4
     let op = CIGAR_DECODER[cigop & 0xf]
     if (op === 'S' && lop === this.seq_length) {
       // if there is a CG the second CIGAR field will be a N tag the represents
       // the length on ref
       p += 4
-      cigop = this.byteArray.readInt32LE(p)
+      cigop = this.#dataView.getInt32(p, true)
       lop = cigop >> 4
       op = CIGAR_DECODER[cigop & 0xf]
       if (op !== 'N') {
@@ -315,7 +316,7 @@ export default class BamRecord {
     } else {
       let lref = 0
       for (let c = 0; c < numCigarOps; ++c) {
-        cigop = this.byteArray.readInt32LE(p)
+        cigop = this.#dataView.getInt32(p, true)
         lop = cigop >> 4
         op = CIGAR_DECODER[cigop & 0xf]
         CIGAR.push(lop + op)
@@ -408,31 +409,31 @@ export default class BamRecord {
       }
       return tmp.join('')
     }
-    return ''
+    return undefined
   }
   get bin_mq_nl() {
-    return this.byteArray.readInt32LE(this.bytes.start + 12)
+    return this.#dataView.getInt32(this.bytes.start + 12, true)
   }
   get flag_nc() {
-    return this.byteArray.readInt32LE(this.bytes.start + 16)
+    return this.#dataView.getInt32(this.bytes.start + 16, true)
   }
   get seq_length() {
-    return this.byteArray.readInt32LE(this.bytes.start + 20)
+    return this.#dataView.getInt32(this.bytes.start + 20, true)
   }
   get next_refid() {
-    return this.byteArray.readInt32LE(this.bytes.start + 24)
+    return this.#dataView.getInt32(this.bytes.start + 24, true)
   }
   get next_pos() {
-    return this.byteArray.readInt32LE(this.bytes.start + 28)
+    return this.#dataView.getInt32(this.bytes.start + 28, true)
   }
   get template_length() {
-    return this.byteArray.readInt32LE(this.bytes.start + 32)
+    return this.#dataView.getInt32(this.bytes.start + 32, true)
   }
   toJSON() {

package/src/util.ts CHANGED Viewed

@@ -102,7 +102,7 @@ export function optimizeChunks(chunks: Chunk[], lowest?: VirtualOffset) {
   return mergedChunks
 }
-export function parsePseudoBin(bytes: Buffer, offset: number) {
+export function parsePseudoBin(bytes: Uint8Array, offset: number) {
   return {
     lineCount: Long.fromBytesLE(
       Array.prototype.slice.call(bytes, offset, offset + 8),
@@ -123,7 +123,7 @@ export function findFirstData(
 }
 export function parseNameBytes(
-  namesBytes: Buffer,
+  namesBytes: Uint8Array,
   renameRefSeq: (arg: string) => string = s => s,
 ) {
   let currRefId = 0
@@ -133,7 +133,10 @@ export function parseNameBytes(
   for (let i = 0; i < namesBytes.length; i += 1) {
     if (!namesBytes[i]) {
       if (currNameStart < i) {
-        let refName = namesBytes.toString('utf8', currNameStart, i)
+        let refName = ''
+        for (let j = currNameStart; j < i; j++) {
+          refName += String.fromCharCode(namesBytes[j])
+        }
         refName = renameRefSeq(refName)
         refIdToName[currRefId] = refName
         refNameToId[refName] = currRefId
@@ -144,3 +147,20 @@ export function parseNameBytes(
   }
   return { refNameToId, refIdToName }
 }
+export function sum(array: Uint8Array[]) {
+  let sum = 0
+  for (const entry of array) {
+    sum += entry.length
+  }
+  return sum
+}
+export function concatUint8Array(args: Uint8Array[]) {
+  const mergedArray = new Uint8Array(sum(args))
+  let offset = 0
+  for (const entry of args) {
+    mergedArray.set(entry, offset)
+    offset += entry.length
+  }
+  return mergedArray
+}

package/src/virtualOffset.ts CHANGED Viewed

@@ -30,7 +30,7 @@ export default class VirtualOffset {
     return min
   }
 }
-export function fromBytes(bytes: Buffer, offset = 0, bigendian = false) {
+export function fromBytes(bytes: Uint8Array, offset = 0, bigendian = false) {
   if (bigendian) {
     throw new Error('big-endian virtual file offsets not implemented')
   }