@gmod/cram 8.0.2 → 8.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/dist/cram-bundle.js +1 -1
  2. package/dist/cramFile/codecs/_base.d.ts +1 -0
  3. package/dist/cramFile/codecs/_base.js +3 -0
  4. package/dist/cramFile/codecs/_base.js.map +1 -1
  5. package/dist/cramFile/codecs/byteArrayLength.d.ts +1 -1
  6. package/dist/cramFile/codecs/byteArrayLength.js +14 -7
  7. package/dist/cramFile/codecs/byteArrayLength.js.map +1 -1
  8. package/dist/cramFile/codecs/external.d.ts +1 -1
  9. package/dist/cramFile/codecs/external.js +32 -4
  10. package/dist/cramFile/codecs/external.js.map +1 -1
  11. package/dist/cramFile/codecs/getBits.d.ts +1 -0
  12. package/dist/cramFile/codecs/getBits.js +4 -0
  13. package/dist/cramFile/codecs/getBits.js.map +1 -1
  14. package/dist/cramFile/record.d.ts +39 -9
  15. package/dist/cramFile/record.js +35 -35
  16. package/dist/cramFile/record.js.map +1 -1
  17. package/dist/cramFile/slice/decodeRecord.d.ts +4 -3
  18. package/dist/cramFile/slice/decodeRecord.js +62 -77
  19. package/dist/cramFile/slice/decodeRecord.js.map +1 -1
  20. package/dist/cramFile/slice/index.js +17 -27
  21. package/dist/cramFile/slice/index.js.map +1 -1
  22. package/dist/cramFile/util.d.ts +2 -0
  23. package/dist/cramFile/util.js +13 -0
  24. package/dist/cramFile/util.js.map +1 -1
  25. package/dist/indexedCramFile.js +0 -3
  26. package/dist/indexedCramFile.js.map +1 -1
  27. package/esm/cramFile/codecs/_base.d.ts +1 -0
  28. package/esm/cramFile/codecs/_base.js +3 -0
  29. package/esm/cramFile/codecs/_base.js.map +1 -1
  30. package/esm/cramFile/codecs/byteArrayLength.d.ts +1 -1
  31. package/esm/cramFile/codecs/byteArrayLength.js +14 -7
  32. package/esm/cramFile/codecs/byteArrayLength.js.map +1 -1
  33. package/esm/cramFile/codecs/external.d.ts +1 -1
  34. package/esm/cramFile/codecs/external.js +32 -4
  35. package/esm/cramFile/codecs/external.js.map +1 -1
  36. package/esm/cramFile/codecs/getBits.d.ts +1 -0
  37. package/esm/cramFile/codecs/getBits.js +4 -0
  38. package/esm/cramFile/codecs/getBits.js.map +1 -1
  39. package/esm/cramFile/record.d.ts +39 -9
  40. package/esm/cramFile/record.js +35 -35
  41. package/esm/cramFile/record.js.map +1 -1
  42. package/esm/cramFile/slice/decodeRecord.d.ts +4 -3
  43. package/esm/cramFile/slice/decodeRecord.js +62 -77
  44. package/esm/cramFile/slice/decodeRecord.js.map +1 -1
  45. package/esm/cramFile/slice/index.js +17 -27
  46. package/esm/cramFile/slice/index.js.map +1 -1
  47. package/esm/cramFile/util.d.ts +2 -0
  48. package/esm/cramFile/util.js +11 -0
  49. package/esm/cramFile/util.js.map +1 -1
  50. package/esm/indexedCramFile.js +0 -3
  51. package/esm/indexedCramFile.js.map +1 -1
  52. package/package.json +1 -1
  53. package/src/cramFile/codecs/_base.ts +8 -0
  54. package/src/cramFile/codecs/byteArrayLength.ts +21 -8
  55. package/src/cramFile/codecs/external.ts +41 -9
  56. package/src/cramFile/codecs/getBits.ts +3 -1
  57. package/src/cramFile/record.ts +76 -49
  58. package/src/cramFile/slice/decodeRecord.ts +77 -96
  59. package/src/cramFile/slice/index.ts +31 -47
  60. package/src/cramFile/util.ts +14 -0
  61. package/src/indexedCramFile.ts +0 -4
@@ -42,4 +42,12 @@ export default abstract class CramCodec<
42
42
  blocksByContentId: Record<number, CramFileBlock>,
43
43
  cursors: Cursors,
44
44
  ): DataTypeMapping[TResult] | undefined
45
+
46
+ getBytesSubarray(
47
+ _blocksByContentId: Record<number, CramFileBlock>,
48
+ _cursors: Cursors,
49
+ _length: number,
50
+ ): Uint8Array | undefined {
51
+ return undefined
52
+ }
45
53
  }
@@ -35,18 +35,31 @@ export default class ByteArrayStopCodec extends CramCodec<
35
35
  const arrayLength =
36
36
  lengthCodec.decode(slice, coreDataBlock, blocksByContentId, cursors) || 0
37
37
 
38
- const data = new Uint8Array(arrayLength)
39
38
  if (arrayLength > 0) {
40
39
  const dataCodec = this._getDataCodec()
41
- // Call decode directly on codec to avoid repeated lookups
42
- for (let i = 0; i < arrayLength; i += 1) {
43
- data[i] =
44
- dataCodec.decode(slice, coreDataBlock, blocksByContentId, cursors) ||
45
- 0
40
+ const subarray = dataCodec.getBytesSubarray(
41
+ blocksByContentId,
42
+ cursors,
43
+ arrayLength,
44
+ )
45
+ if (subarray) {
46
+ return subarray
47
+ } else {
48
+ const data = new Uint8Array(arrayLength)
49
+ for (let i = 0; i < arrayLength; i += 1) {
50
+ data[i] =
51
+ dataCodec.decode(
52
+ slice,
53
+ coreDataBlock,
54
+ blocksByContentId,
55
+ cursors,
56
+ ) || 0
57
+ }
58
+ return data
46
59
  }
60
+ } else {
61
+ return new Uint8Array(0)
47
62
  }
48
-
49
- return data
50
63
  }
51
64
 
52
65
  // memoize
@@ -1,10 +1,47 @@
1
1
  import CramCodec, { Cursors } from './_base.ts'
2
2
  import { CramUnimplementedError } from '../../errors.ts'
3
+ import { ExternalCramEncoding } from '../encoding.ts'
3
4
  import { CramFileBlock } from '../file.ts'
4
- import CramSlice from '../slice/index.ts'
5
- import { parseItf8 } from '../util.ts'
6
5
  import { CramBufferOverrunError } from './getBits.ts'
7
- import { ExternalCramEncoding } from '../encoding.ts'
6
+ import CramSlice from '../slice/index.ts'
7
+
8
+ function parseItf8Inline(buffer: Uint8Array, cursor: { bytePosition: number }) {
9
+ const offset = cursor.bytePosition
10
+ const countFlags = buffer[offset]!
11
+ if (countFlags < 0x80) {
12
+ cursor.bytePosition = offset + 1
13
+ return countFlags
14
+ }
15
+ if (countFlags < 0xc0) {
16
+ cursor.bytePosition = offset + 2
17
+ return ((countFlags & 0x3f) << 8) | buffer[offset + 1]!
18
+ }
19
+ if (countFlags < 0xe0) {
20
+ cursor.bytePosition = offset + 3
21
+ return (
22
+ ((countFlags & 0x1f) << 16) |
23
+ (buffer[offset + 1]! << 8) |
24
+ buffer[offset + 2]!
25
+ )
26
+ }
27
+ if (countFlags < 0xf0) {
28
+ cursor.bytePosition = offset + 4
29
+ return (
30
+ ((countFlags & 0x0f) << 24) |
31
+ (buffer[offset + 1]! << 16) |
32
+ (buffer[offset + 2]! << 8) |
33
+ buffer[offset + 3]!
34
+ )
35
+ }
36
+ cursor.bytePosition = offset + 5
37
+ return (
38
+ ((countFlags & 0x0f) << 28) |
39
+ (buffer[offset + 1]! << 20) |
40
+ (buffer[offset + 2]! << 12) |
41
+ (buffer[offset + 3]! << 4) |
42
+ (buffer[offset + 4]! & 0x0f)
43
+ )
44
+ }
8
45
 
9
46
  export default class ExternalCodec extends CramCodec<
10
47
  'int' | 'byte',
@@ -37,12 +74,7 @@ export default class ExternalCodec extends CramCodec<
37
74
  const cursor = cursors.externalBlocks.getCursor(blockContentId)
38
75
 
39
76
  if (this.dataType === 'int') {
40
- const [result, bytesRead] = parseItf8(
41
- contentBlock.content,
42
- cursor.bytePosition,
43
- )
44
- cursor.bytePosition += bytesRead
45
- return result
77
+ return parseItf8Inline(contentBlock.content, cursor)
46
78
  } else {
47
79
  if (cursor.bytePosition >= contentBlock.content.length) {
48
80
  throw new CramBufferOverrunError(
@@ -1,4 +1,6 @@
1
- export class CramBufferOverrunError extends Error {}
1
+ export class CramBufferOverrunError extends Error {
2
+ readonly code = 'CRAM_BUFFER_OVERRUN' as const
3
+ }
2
4
 
3
5
  export function getBits(
4
6
  data: Uint8Array,
@@ -1,23 +1,53 @@
1
1
  import Constants from './constants.ts'
2
2
  import CramContainerCompressionScheme from './container/compressionScheme.ts'
3
+ import { readNullTerminatedStringFromBuffer } from './util.ts'
3
4
 
4
5
  import type decodeRecord from './slice/decodeRecord.ts'
5
6
 
7
+ // precomputed pair orientation strings indexed by ((flags >> 4) & 0xF) | (isize > 0 ? 16 : 0)
8
+ // bits 0-3 encode flag bits 0x10(reverse),0x20(mate reverse),0x40(read1),0x80(read2)
9
+ // bit 4 encodes whether isize > 0
10
+ // prettier-ignore
11
+ const PAIR_ORIENTATION_TABLE = [
12
+ 'F F ','F R ','R F ','R R ','F2F1','F2R1','R2F1','R2R1',
13
+ 'F1F2','F1R2','R1F2','R1R2','F2F1','F2R1','R2F1','R2R1',
14
+ 'F F ','R F ','F R ','R R ','F1F2','R1F2','F1R2','R1R2',
15
+ 'F2F1','R2F1','F2R1','R2R1','F1F2','R1F2','F1R2','R1R2',
16
+ ]
17
+
6
18
  export interface RefRegion {
7
19
  start: number
8
20
  end: number
9
21
  seq: string
10
22
  }
11
23
 
12
- export interface ReadFeature {
13
- code: string
24
+ interface ReadFeatureBase {
14
25
  pos: number
15
26
  refPos: number
16
- data: any
17
- ref?: string
18
- sub?: string
19
27
  }
20
28
 
29
+ /**
30
+ * Read features describe differences between a read and the reference sequence.
31
+ * Each feature has a code indicating the type of difference, a position in the
32
+ * read (pos), and a position on the reference (refPos).
33
+ */
34
+ export type ReadFeature =
35
+ /** I=insertion, S=soft clip, b=bases, i=single-base insertion — all carry a sequence string */
36
+ | (ReadFeatureBase & { code: 'I' | 'S' | 'b' | 'i'; data: string })
37
+ /** B=base and quality pair — [substituted base, quality score] */
38
+ | (ReadFeatureBase & { code: 'B'; data: [string, number] })
39
+ /** X=base substitution — data is the substitution matrix index, ref/sub filled in by addReferenceSequence */
40
+ | (ReadFeatureBase & {
41
+ code: 'X'
42
+ data: number
43
+ ref?: string
44
+ sub?: string
45
+ })
46
+ /** D=deletion, N=reference skip, H=hard clip, P=padding, Q=single quality score */
47
+ | (ReadFeatureBase & { code: 'D' | 'N' | 'H' | 'P' | 'Q'; data: number })
48
+ /** q=quality scores for a stretch of bases */
49
+ | (ReadFeatureBase & { code: 'q'; data: number[] })
50
+
21
51
  export interface DecodeOptions {
22
52
  /** Whether to parse tags. If false, raw tag data is stored for lazy parsing. Default true. */
23
53
  decodeTags?: boolean
@@ -59,40 +89,30 @@ function decodeReadSequence(cramRecord: CramRecord, refRegion: RefRegion) {
59
89
  currentReadFeature += 1
60
90
 
61
91
  if (feature.code === 'b') {
62
- // specify a base pair for some reason
63
92
  const added = feature.data
64
93
  bases += added
65
94
  regionPos += added.length
66
95
  } else if (feature.code === 'B') {
67
- // base pair and associated quality
68
- // TODO: do we need to set the quality in the qual scores?
69
96
  bases += feature.data[0]
70
97
  regionPos += 1
71
98
  } else if (feature.code === 'X') {
72
- // base substitution
73
99
  bases += feature.sub
74
100
  regionPos += 1
75
101
  } else if (feature.code === 'I') {
76
- // insertion
77
102
  bases += feature.data
78
103
  } else if (feature.code === 'D') {
79
- // deletion
80
104
  regionPos += feature.data
81
105
  } else if (feature.code === 'i') {
82
- // insert single base
83
106
  bases += feature.data
84
107
  } else if (feature.code === 'N') {
85
- // reference skip. delete some bases
86
- // do nothing
87
- // seqBases.splice(feature.pos - 1, feature.data)
88
108
  regionPos += feature.data
89
109
  } else if (feature.code === 'S') {
90
- // soft clipped bases that should be present in the read seq
91
- // seqBases.splice(feature.pos - 1, 0, ...feature.data.split(''))
92
110
  bases += feature.data
93
111
  } else if (feature.code === 'P') {
94
112
  // padding, do nothing
95
- } else if (feature.code === 'H') {
113
+ }
114
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
115
+ else if (feature.code === 'H') {
96
116
  // hard clip, do nothing
97
117
  }
98
118
  } else if (currentReadFeature < cramRecord.readFeatures.length) {
@@ -118,7 +138,7 @@ function decodeReadSequence(cramRecord: CramRecord, refRegion: RefRegion) {
118
138
  return bases.toUpperCase()
119
139
  }
120
140
 
121
- const baseNumbers = {
141
+ const baseNumbers: Record<string, number | undefined> = {
122
142
  a: 0,
123
143
  A: 0,
124
144
  c: 1,
@@ -135,7 +155,12 @@ function decodeBaseSubstitution(
135
155
  cramRecord: CramRecord,
136
156
  refRegion: RefRegion,
137
157
  compressionScheme: CramContainerCompressionScheme,
138
- readFeature: ReadFeature,
158
+ readFeature: ReadFeatureBase & {
159
+ code: 'X'
160
+ data: number
161
+ ref?: string
162
+ sub?: string
163
+ },
139
164
  ) {
140
165
  // decode base substitution code using the substitution matrix
141
166
  const refCoord = readFeature.refPos - refRegion.start
@@ -143,7 +168,7 @@ function decodeBaseSubstitution(
143
168
  if (refBase) {
144
169
  readFeature.ref = refBase
145
170
  }
146
- let baseNumber = (baseNumbers as any)[refBase]
171
+ let baseNumber = baseNumbers[refBase]
147
172
  if (baseNumber === undefined) {
148
173
  baseNumber = 4
149
174
  }
@@ -229,7 +254,7 @@ export const MateFlagsDecoder = makeFlagsHelper(MateFlags)
229
254
  * Class of each CRAM record returned by this API.
230
255
  */
231
256
  export default class CramRecord {
232
- public tags: Record<string, string>
257
+ public tags: Record<string, string | number | number[] | undefined>
233
258
  public flags: number
234
259
  public cramFlags: number
235
260
  public readBases?: string | null
@@ -238,9 +263,13 @@ export default class CramRecord {
238
263
  public alignmentStart: number
239
264
  public lengthOnRef: number | undefined
240
265
  public readLength: number
266
+ // templateLength is computed post-hoc for intra-slice mate pairs,
267
+ // templateSize is the raw CRAM-encoded TS data series value
241
268
  public templateLength?: number
242
269
  public templateSize?: number
243
- public readName?: string
270
+ private _readName?: string
271
+ private _readNameRaw?: Uint8Array
272
+ public _syntheticReadName?: string
244
273
  public mateRecordNumber?: number
245
274
  public mate?: MateRecord
246
275
  public uniqueId: number
@@ -249,6 +278,18 @@ export default class CramRecord {
249
278
  public mappingQuality: number | undefined
250
279
  public qualityScores: Uint8Array | null | undefined
251
280
 
281
+ get readName() {
282
+ if (this._readName === undefined) {
283
+ if (this._readNameRaw) {
284
+ this._readName = readNullTerminatedStringFromBuffer(this._readNameRaw)
285
+ this._readNameRaw = undefined
286
+ } else {
287
+ return this._syntheticReadName
288
+ }
289
+ }
290
+ return this._readName
291
+ }
292
+
252
293
  constructor({
253
294
  flags,
254
295
  cramFlags,
@@ -261,32 +302,31 @@ export default class CramRecord {
261
302
  readFeatures,
262
303
  mateToUse,
263
304
  readGroupId,
264
- readName,
305
+ readNameRaw,
265
306
  sequenceId,
266
307
  uniqueId,
267
308
  templateSize,
268
309
  alignmentStart,
269
310
  tags,
270
- }: ReturnType<typeof decodeRecord> & { uniqueId: number }) {
311
+ }: ReturnType<typeof decodeRecord>) {
271
312
  this.flags = flags
272
313
  this.cramFlags = cramFlags
273
314
  this.readLength = readLength
274
315
  this.mappingQuality = mappingQuality
275
316
  this.lengthOnRef = lengthOnRef
276
317
  this.qualityScores = qualityScores
277
- if (readBases) {
278
- this.readBases = readBases
279
- }
280
-
281
318
  this.readGroupId = readGroupId
282
- this.readName = readName
283
319
  this.sequenceId = sequenceId!
284
320
  this.uniqueId = uniqueId
285
- this.templateSize = templateSize
286
321
  this.alignmentStart = alignmentStart
287
322
  this.tags = tags
288
-
289
- // backwards compatibility
323
+ if (readNameRaw) {
324
+ this._readNameRaw = readNameRaw
325
+ }
326
+ if (readBases) {
327
+ this.readBases = readBases
328
+ }
329
+ this.templateSize = templateSize
290
330
  if (readFeatures) {
291
331
  this.readFeatures = readFeatures
292
332
  }
@@ -411,29 +451,15 @@ export default class CramRecord {
411
451
  }
412
452
 
413
453
  // adapted from igv.js
414
- // inlines flag checks and uses template literal instead of array+join
454
+ // uses precomputed lookup table indexed by flag bits + isize sign
415
455
  getPairOrientation() {
416
456
  const f = this.flags
417
457
  // combined check: paired (0x1) set, unmapped (0x4) clear, mate unmapped (0x8) clear
418
458
  if ((f & 0xd) !== 0x1 || this.sequenceId !== this.mate?.sequenceId) {
419
459
  return undefined
420
460
  }
421
- const s1 = f & 0x10 ? 'R' : 'F'
422
- const s2 = f & 0x20 ? 'R' : 'F'
423
- let o1 = ' '
424
- let o2 = ' '
425
- if (f & 0x40) {
426
- o1 = '1'
427
- o2 = '2'
428
- } else if (f & 0x80) {
429
- o1 = '2'
430
- o2 = '1'
431
- }
432
-
433
461
  const isize = this.templateLength || this.templateSize || 0
434
- return isize > 0
435
- ? `${s1}${o1}${s2}${o2}`
436
- : `${s2}${o2}${s1}${o1}`
462
+ return PAIR_ORIENTATION_TABLE[((f >> 4) & 0xf) | (isize > 0 ? 16 : 0)]
437
463
  }
438
464
 
439
465
  /**
@@ -489,6 +515,7 @@ export default class CramRecord {
489
515
  data[k] = (this as any)[k]
490
516
  })
491
517
 
518
+ data.readName = this.readName
492
519
  data.readBases = this.getReadBases()
493
520
  data.qualityScores = this.qualityScores
494
521
  ? Array.from(this.qualityScores)