npm - @gmod/cram - Versions diffs - 8.0.3 → 8.0.4 - Mend

@gmod/cram 8.0.3 → 8.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

package/dist/cram-bundle.js +1 -1
package/dist/cramFile/codecs/_base.d.ts +1 -0
package/dist/cramFile/codecs/_base.js +3 -0
package/dist/cramFile/codecs/_base.js.map +1 -1
package/dist/cramFile/codecs/byteArrayLength.d.ts +1 -1
package/dist/cramFile/codecs/byteArrayLength.js +14 -7
package/dist/cramFile/codecs/byteArrayLength.js.map +1 -1
package/dist/cramFile/codecs/external.d.ts +1 -1
package/dist/cramFile/codecs/external.js +32 -4
package/dist/cramFile/codecs/external.js.map +1 -1
package/dist/cramFile/codecs/getBits.d.ts +1 -0
package/dist/cramFile/codecs/getBits.js +4 -0
package/dist/cramFile/codecs/getBits.js.map +1 -1
package/dist/cramFile/record.d.ts +39 -9
package/dist/cramFile/record.js +23 -19
package/dist/cramFile/record.js.map +1 -1
package/dist/cramFile/slice/decodeRecord.d.ts +4 -3
package/dist/cramFile/slice/decodeRecord.js +62 -77
package/dist/cramFile/slice/decodeRecord.js.map +1 -1
package/dist/cramFile/slice/index.js +17 -27
package/dist/cramFile/slice/index.js.map +1 -1
package/dist/cramFile/util.d.ts +2 -0
package/dist/cramFile/util.js +13 -0
package/dist/cramFile/util.js.map +1 -1
package/dist/indexedCramFile.js +0 -3
package/dist/indexedCramFile.js.map +1 -1
package/esm/cramFile/codecs/_base.d.ts +1 -0
package/esm/cramFile/codecs/_base.js +3 -0
package/esm/cramFile/codecs/_base.js.map +1 -1
package/esm/cramFile/codecs/byteArrayLength.d.ts +1 -1
package/esm/cramFile/codecs/byteArrayLength.js +14 -7
package/esm/cramFile/codecs/byteArrayLength.js.map +1 -1
package/esm/cramFile/codecs/external.d.ts +1 -1
package/esm/cramFile/codecs/external.js +32 -4
package/esm/cramFile/codecs/external.js.map +1 -1
package/esm/cramFile/codecs/getBits.d.ts +1 -0
package/esm/cramFile/codecs/getBits.js +4 -0
package/esm/cramFile/codecs/getBits.js.map +1 -1
package/esm/cramFile/record.d.ts +39 -9
package/esm/cramFile/record.js +23 -19
package/esm/cramFile/record.js.map +1 -1
package/esm/cramFile/slice/decodeRecord.d.ts +4 -3
package/esm/cramFile/slice/decodeRecord.js +62 -77
package/esm/cramFile/slice/decodeRecord.js.map +1 -1
package/esm/cramFile/slice/index.js +17 -27
package/esm/cramFile/slice/index.js.map +1 -1
package/esm/cramFile/util.d.ts +2 -0
package/esm/cramFile/util.js +11 -0
package/esm/cramFile/util.js.map +1 -1
package/esm/indexedCramFile.js +0 -3
package/esm/indexedCramFile.js.map +1 -1
package/package.json +1 -1
package/src/cramFile/codecs/_base.ts +8 -0
package/src/cramFile/codecs/byteArrayLength.ts +21 -8
package/src/cramFile/codecs/external.ts +41 -9
package/src/cramFile/codecs/getBits.ts +3 -1
package/src/cramFile/record.ts +64 -36
package/src/cramFile/slice/decodeRecord.ts +77 -96
package/src/cramFile/slice/index.ts +31 -47
package/src/cramFile/util.ts +14 -0
package/src/indexedCramFile.ts +0 -4

package/src/cramFile/codecs/_base.ts CHANGED Viewed

@@ -42,4 +42,12 @@ export default abstract class CramCodec<
     blocksByContentId: Record<number, CramFileBlock>,
     cursors: Cursors,
   ): DataTypeMapping[TResult] | undefined
+  getBytesSubarray(
+    _blocksByContentId: Record<number, CramFileBlock>,
+    _cursors: Cursors,
+    _length: number,
+  ): Uint8Array | undefined {
+    return undefined
+  }
 }

package/src/cramFile/codecs/byteArrayLength.ts CHANGED Viewed

@@ -35,18 +35,31 @@ export default class ByteArrayStopCodec extends CramCodec<
     const arrayLength =
       lengthCodec.decode(slice, coreDataBlock, blocksByContentId, cursors) || 0
-    const data = new Uint8Array(arrayLength)
     if (arrayLength > 0) {
       const dataCodec = this._getDataCodec()
-      // Call decode directly on codec to avoid repeated lookups
-      for (let i = 0; i < arrayLength; i += 1) {
-        data[i] =
-          dataCodec.decode(slice, coreDataBlock, blocksByContentId, cursors) ||
-          0
+      const subarray = dataCodec.getBytesSubarray(
+        blocksByContentId,
+        cursors,
+        arrayLength,
+      )
+      if (subarray) {
+        return subarray
+      } else {
+        const data = new Uint8Array(arrayLength)
+        for (let i = 0; i < arrayLength; i += 1) {
+          data[i] =
+            dataCodec.decode(
+              slice,
+              coreDataBlock,
+              blocksByContentId,
+              cursors,
+            ) || 0
+        }
+        return data
       }
+    } else {
+      return new Uint8Array(0)
     }
-    return data
   }
   // memoize

package/src/cramFile/codecs/external.ts CHANGED Viewed

@@ -1,10 +1,47 @@
 import CramCodec, { Cursors } from './_base.ts'
 import { CramUnimplementedError } from '../../errors.ts'
+import { ExternalCramEncoding } from '../encoding.ts'
 import { CramFileBlock } from '../file.ts'
-import CramSlice from '../slice/index.ts'
-import { parseItf8 } from '../util.ts'
 import { CramBufferOverrunError } from './getBits.ts'
-import { ExternalCramEncoding } from '../encoding.ts'
+import CramSlice from '../slice/index.ts'
+function parseItf8Inline(buffer: Uint8Array, cursor: { bytePosition: number }) {
+  const offset = cursor.bytePosition
+  const countFlags = buffer[offset]!
+  if (countFlags < 0x80) {
+    cursor.bytePosition = offset + 1
+    return countFlags
+  }
+  if (countFlags < 0xc0) {
+    cursor.bytePosition = offset + 2
+    return ((countFlags & 0x3f) << 8) | buffer[offset + 1]!
+  }
+  if (countFlags < 0xe0) {
+    cursor.bytePosition = offset + 3
+    return (
+      ((countFlags & 0x1f) << 16) |
+      (buffer[offset + 1]! << 8) |
+      buffer[offset + 2]!
+    )
+  }
+  if (countFlags < 0xf0) {
+    cursor.bytePosition = offset + 4
+    return (
+      ((countFlags & 0x0f) << 24) |
+      (buffer[offset + 1]! << 16) |
+      (buffer[offset + 2]! << 8) |
+      buffer[offset + 3]!
+    )
+  }
+  cursor.bytePosition = offset + 5
+  return (
+    ((countFlags & 0x0f) << 28) |
+    (buffer[offset + 1]! << 20) |
+    (buffer[offset + 2]! << 12) |
+    (buffer[offset + 3]! << 4) |
+    (buffer[offset + 4]! & 0x0f)
+  )
+}
 export default class ExternalCodec extends CramCodec<
   'int' | 'byte',
@@ -37,12 +74,7 @@ export default class ExternalCodec extends CramCodec<
     const cursor = cursors.externalBlocks.getCursor(blockContentId)
     if (this.dataType === 'int') {
-      const [result, bytesRead] = parseItf8(
-        contentBlock.content,
-        cursor.bytePosition,
-      )
-      cursor.bytePosition += bytesRead
-      return result
+      return parseItf8Inline(contentBlock.content, cursor)
     } else {
       if (cursor.bytePosition >= contentBlock.content.length) {
         throw new CramBufferOverrunError(

package/src/cramFile/codecs/getBits.ts CHANGED Viewed

@@ -1,4 +1,6 @@
-export class CramBufferOverrunError extends Error {}
+export class CramBufferOverrunError extends Error {
+  readonly code = 'CRAM_BUFFER_OVERRUN' as const
+}
 export function getBits(
   data: Uint8Array,

package/src/cramFile/record.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import Constants from './constants.ts'
 import CramContainerCompressionScheme from './container/compressionScheme.ts'
+import { readNullTerminatedStringFromBuffer } from './util.ts'
 import type decodeRecord from './slice/decodeRecord.ts'
@@ -20,15 +21,33 @@ export interface RefRegion {
   seq: string
 }
-export interface ReadFeature {
-  code: string
+interface ReadFeatureBase {
   pos: number
   refPos: number
-  data: any
-  ref?: string
-  sub?: string
 }
+/**
+ * Read features describe differences between a read and the reference sequence.
+ * Each feature has a code indicating the type of difference, a position in the
+ * read (pos), and a position on the reference (refPos).
+ */
+export type ReadFeature =
+  /** I=insertion, S=soft clip, b=bases, i=single-base insertion — all carry a sequence string */
+  | (ReadFeatureBase & { code: 'I' | 'S' | 'b' | 'i'; data: string })
+  /** B=base and quality pair — [substituted base, quality score] */
+  | (ReadFeatureBase & { code: 'B'; data: [string, number] })
+  /** X=base substitution — data is the substitution matrix index, ref/sub filled in by addReferenceSequence */
+  | (ReadFeatureBase & {
+      code: 'X'
+      data: number
+      ref?: string
+      sub?: string
+    })
+  /** D=deletion, N=reference skip, H=hard clip, P=padding, Q=single quality score */
+  | (ReadFeatureBase & { code: 'D' | 'N' | 'H' | 'P' | 'Q'; data: number })
+  /** q=quality scores for a stretch of bases */
+  | (ReadFeatureBase & { code: 'q'; data: number[] })
 export interface DecodeOptions {
   /** Whether to parse tags. If false, raw tag data is stored for lazy parsing. Default true. */
   decodeTags?: boolean
@@ -70,40 +89,30 @@ function decodeReadSequence(cramRecord: CramRecord, refRegion: RefRegion) {
         currentReadFeature += 1
         if (feature.code === 'b') {
-          // specify a base pair for some reason
           const added = feature.data
           bases += added
           regionPos += added.length
         } else if (feature.code === 'B') {
-          // base pair and associated quality
-          // TODO: do we need to set the quality in the qual scores?
           bases += feature.data[0]
           regionPos += 1
         } else if (feature.code === 'X') {
-          // base substitution
           bases += feature.sub
           regionPos += 1
         } else if (feature.code === 'I') {
-          // insertion
           bases += feature.data
         } else if (feature.code === 'D') {
-          // deletion
           regionPos += feature.data
         } else if (feature.code === 'i') {
-          // insert single base
           bases += feature.data
         } else if (feature.code === 'N') {
-          // reference skip. delete some bases
-          // do nothing
-          // seqBases.splice(feature.pos - 1, feature.data)
           regionPos += feature.data
         } else if (feature.code === 'S') {
-          // soft clipped bases that should be present in the read seq
-          // seqBases.splice(feature.pos - 1, 0, ...feature.data.split(''))
           bases += feature.data
         } else if (feature.code === 'P') {
           // padding, do nothing
-        } else if (feature.code === 'H') {
+        }
+        // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
+        else if (feature.code === 'H') {
           // hard clip, do nothing
         }
       } else if (currentReadFeature < cramRecord.readFeatures.length) {
@@ -129,7 +138,7 @@ function decodeReadSequence(cramRecord: CramRecord, refRegion: RefRegion) {
   return bases.toUpperCase()
 }
-const baseNumbers = {
+const baseNumbers: Record<string, number | undefined> = {
   a: 0,
   A: 0,
   c: 1,
@@ -146,7 +155,12 @@ function decodeBaseSubstitution(
   cramRecord: CramRecord,
   refRegion: RefRegion,
   compressionScheme: CramContainerCompressionScheme,
-  readFeature: ReadFeature,
+  readFeature: ReadFeatureBase & {
+    code: 'X'
+    data: number
+    ref?: string
+    sub?: string
+  },
 ) {
   // decode base substitution code using the substitution matrix
   const refCoord = readFeature.refPos - refRegion.start
@@ -154,7 +168,7 @@ function decodeBaseSubstitution(
   if (refBase) {
     readFeature.ref = refBase
   }
-  let baseNumber = (baseNumbers as any)[refBase]
+  let baseNumber = baseNumbers[refBase]
   if (baseNumber === undefined) {
     baseNumber = 4
   }
@@ -240,7 +254,7 @@ export const MateFlagsDecoder = makeFlagsHelper(MateFlags)
  * Class of each CRAM record returned by this API.
  */
 export default class CramRecord {
-  public tags: Record<string, string>
+  public tags: Record<string, string | number | number[] | undefined>
   public flags: number
   public cramFlags: number
   public readBases?: string | null
@@ -249,9 +263,13 @@ export default class CramRecord {
   public alignmentStart: number
   public lengthOnRef: number | undefined
   public readLength: number
+  // templateLength is computed post-hoc for intra-slice mate pairs,
+  // templateSize is the raw CRAM-encoded TS data series value
   public templateLength?: number
   public templateSize?: number
-  public readName?: string
+  private _readName?: string
+  private _readNameRaw?: Uint8Array
+  public _syntheticReadName?: string
   public mateRecordNumber?: number
   public mate?: MateRecord
   public uniqueId: number
@@ -260,6 +278,18 @@ export default class CramRecord {
   public mappingQuality: number | undefined
   public qualityScores: Uint8Array | null | undefined
+  get readName() {
+    if (this._readName === undefined) {
+      if (this._readNameRaw) {
+        this._readName = readNullTerminatedStringFromBuffer(this._readNameRaw)
+        this._readNameRaw = undefined
+      } else {
+        return this._syntheticReadName
+      }
+    }
+    return this._readName
+  }
   constructor({
     flags,
     cramFlags,
@@ -272,32 +302,31 @@ export default class CramRecord {
     readFeatures,
     mateToUse,
     readGroupId,
-    readName,
+    readNameRaw,
     sequenceId,
     uniqueId,
     templateSize,
     alignmentStart,
     tags,
-  }: ReturnType<typeof decodeRecord> & { uniqueId: number }) {
+  }: ReturnType<typeof decodeRecord>) {
     this.flags = flags
     this.cramFlags = cramFlags
     this.readLength = readLength
     this.mappingQuality = mappingQuality
     this.lengthOnRef = lengthOnRef
     this.qualityScores = qualityScores
-    if (readBases) {
-      this.readBases = readBases
-    }
     this.readGroupId = readGroupId
-    this.readName = readName
     this.sequenceId = sequenceId!
     this.uniqueId = uniqueId
-    this.templateSize = templateSize
     this.alignmentStart = alignmentStart
     this.tags = tags
-    // backwards compatibility
+    if (readNameRaw) {
+      this._readNameRaw = readNameRaw
+    }
+    if (readBases) {
+      this.readBases = readBases
+    }
+    this.templateSize = templateSize
     if (readFeatures) {
       this.readFeatures = readFeatures
     }
@@ -430,9 +459,7 @@ export default class CramRecord {
       return undefined
     }
     const isize = this.templateLength || this.templateSize || 0
-    return PAIR_ORIENTATION_TABLE[
-      ((f >> 4) & 0xf) | (isize > 0 ? 16 : 0)
-    ]
+    return PAIR_ORIENTATION_TABLE[((f >> 4) & 0xf) | (isize > 0 ? 16 : 0)]
   }
   /**
@@ -488,6 +515,7 @@ export default class CramRecord {
       data[k] = (this as any)[k]
     })
+    data.readName = this.readName
     data.readBases = this.getReadBases()
     data.qualityScores = this.qualityScores
       ? Array.from(this.qualityScores)