@gmod/cram 8.0.3 → 8.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/dist/cram-bundle.js +1 -1
  2. package/dist/cramFile/codecs/_base.d.ts +1 -0
  3. package/dist/cramFile/codecs/_base.js +3 -0
  4. package/dist/cramFile/codecs/_base.js.map +1 -1
  5. package/dist/cramFile/codecs/byteArrayLength.d.ts +1 -1
  6. package/dist/cramFile/codecs/byteArrayLength.js +14 -7
  7. package/dist/cramFile/codecs/byteArrayLength.js.map +1 -1
  8. package/dist/cramFile/codecs/external.d.ts +1 -1
  9. package/dist/cramFile/codecs/external.js +32 -4
  10. package/dist/cramFile/codecs/external.js.map +1 -1
  11. package/dist/cramFile/codecs/getBits.d.ts +1 -0
  12. package/dist/cramFile/codecs/getBits.js +4 -0
  13. package/dist/cramFile/codecs/getBits.js.map +1 -1
  14. package/dist/cramFile/record.d.ts +39 -9
  15. package/dist/cramFile/record.js +23 -19
  16. package/dist/cramFile/record.js.map +1 -1
  17. package/dist/cramFile/slice/decodeRecord.d.ts +4 -3
  18. package/dist/cramFile/slice/decodeRecord.js +62 -77
  19. package/dist/cramFile/slice/decodeRecord.js.map +1 -1
  20. package/dist/cramFile/slice/index.js +17 -27
  21. package/dist/cramFile/slice/index.js.map +1 -1
  22. package/dist/cramFile/util.d.ts +2 -0
  23. package/dist/cramFile/util.js +13 -0
  24. package/dist/cramFile/util.js.map +1 -1
  25. package/dist/indexedCramFile.js +0 -3
  26. package/dist/indexedCramFile.js.map +1 -1
  27. package/esm/cramFile/codecs/_base.d.ts +1 -0
  28. package/esm/cramFile/codecs/_base.js +3 -0
  29. package/esm/cramFile/codecs/_base.js.map +1 -1
  30. package/esm/cramFile/codecs/byteArrayLength.d.ts +1 -1
  31. package/esm/cramFile/codecs/byteArrayLength.js +14 -7
  32. package/esm/cramFile/codecs/byteArrayLength.js.map +1 -1
  33. package/esm/cramFile/codecs/external.d.ts +1 -1
  34. package/esm/cramFile/codecs/external.js +32 -4
  35. package/esm/cramFile/codecs/external.js.map +1 -1
  36. package/esm/cramFile/codecs/getBits.d.ts +1 -0
  37. package/esm/cramFile/codecs/getBits.js +4 -0
  38. package/esm/cramFile/codecs/getBits.js.map +1 -1
  39. package/esm/cramFile/record.d.ts +39 -9
  40. package/esm/cramFile/record.js +23 -19
  41. package/esm/cramFile/record.js.map +1 -1
  42. package/esm/cramFile/slice/decodeRecord.d.ts +4 -3
  43. package/esm/cramFile/slice/decodeRecord.js +62 -77
  44. package/esm/cramFile/slice/decodeRecord.js.map +1 -1
  45. package/esm/cramFile/slice/index.js +17 -27
  46. package/esm/cramFile/slice/index.js.map +1 -1
  47. package/esm/cramFile/util.d.ts +2 -0
  48. package/esm/cramFile/util.js +11 -0
  49. package/esm/cramFile/util.js.map +1 -1
  50. package/esm/indexedCramFile.js +0 -3
  51. package/esm/indexedCramFile.js.map +1 -1
  52. package/package.json +1 -1
  53. package/src/cramFile/codecs/_base.ts +8 -0
  54. package/src/cramFile/codecs/byteArrayLength.ts +21 -8
  55. package/src/cramFile/codecs/external.ts +41 -9
  56. package/src/cramFile/codecs/getBits.ts +3 -1
  57. package/src/cramFile/record.ts +64 -36
  58. package/src/cramFile/slice/decodeRecord.ts +77 -96
  59. package/src/cramFile/slice/index.ts +31 -47
  60. package/src/cramFile/util.ts +14 -0
  61. package/src/indexedCramFile.ts +0 -4
@@ -42,4 +42,12 @@ export default abstract class CramCodec<
42
42
  blocksByContentId: Record<number, CramFileBlock>,
43
43
  cursors: Cursors,
44
44
  ): DataTypeMapping[TResult] | undefined
45
+
46
+ getBytesSubarray(
47
+ _blocksByContentId: Record<number, CramFileBlock>,
48
+ _cursors: Cursors,
49
+ _length: number,
50
+ ): Uint8Array | undefined {
51
+ return undefined
52
+ }
45
53
  }
@@ -35,18 +35,31 @@ export default class ByteArrayStopCodec extends CramCodec<
35
35
  const arrayLength =
36
36
  lengthCodec.decode(slice, coreDataBlock, blocksByContentId, cursors) || 0
37
37
 
38
- const data = new Uint8Array(arrayLength)
39
38
  if (arrayLength > 0) {
40
39
  const dataCodec = this._getDataCodec()
41
- // Call decode directly on codec to avoid repeated lookups
42
- for (let i = 0; i < arrayLength; i += 1) {
43
- data[i] =
44
- dataCodec.decode(slice, coreDataBlock, blocksByContentId, cursors) ||
45
- 0
40
+ const subarray = dataCodec.getBytesSubarray(
41
+ blocksByContentId,
42
+ cursors,
43
+ arrayLength,
44
+ )
45
+ if (subarray) {
46
+ return subarray
47
+ } else {
48
+ const data = new Uint8Array(arrayLength)
49
+ for (let i = 0; i < arrayLength; i += 1) {
50
+ data[i] =
51
+ dataCodec.decode(
52
+ slice,
53
+ coreDataBlock,
54
+ blocksByContentId,
55
+ cursors,
56
+ ) || 0
57
+ }
58
+ return data
46
59
  }
60
+ } else {
61
+ return new Uint8Array(0)
47
62
  }
48
-
49
- return data
50
63
  }
51
64
 
52
65
  // memoize
@@ -1,10 +1,47 @@
1
1
  import CramCodec, { Cursors } from './_base.ts'
2
2
  import { CramUnimplementedError } from '../../errors.ts'
3
+ import { ExternalCramEncoding } from '../encoding.ts'
3
4
  import { CramFileBlock } from '../file.ts'
4
- import CramSlice from '../slice/index.ts'
5
- import { parseItf8 } from '../util.ts'
6
5
  import { CramBufferOverrunError } from './getBits.ts'
7
- import { ExternalCramEncoding } from '../encoding.ts'
6
+ import CramSlice from '../slice/index.ts'
7
+
8
+ function parseItf8Inline(buffer: Uint8Array, cursor: { bytePosition: number }) {
9
+ const offset = cursor.bytePosition
10
+ const countFlags = buffer[offset]!
11
+ if (countFlags < 0x80) {
12
+ cursor.bytePosition = offset + 1
13
+ return countFlags
14
+ }
15
+ if (countFlags < 0xc0) {
16
+ cursor.bytePosition = offset + 2
17
+ return ((countFlags & 0x3f) << 8) | buffer[offset + 1]!
18
+ }
19
+ if (countFlags < 0xe0) {
20
+ cursor.bytePosition = offset + 3
21
+ return (
22
+ ((countFlags & 0x1f) << 16) |
23
+ (buffer[offset + 1]! << 8) |
24
+ buffer[offset + 2]!
25
+ )
26
+ }
27
+ if (countFlags < 0xf0) {
28
+ cursor.bytePosition = offset + 4
29
+ return (
30
+ ((countFlags & 0x0f) << 24) |
31
+ (buffer[offset + 1]! << 16) |
32
+ (buffer[offset + 2]! << 8) |
33
+ buffer[offset + 3]!
34
+ )
35
+ }
36
+ cursor.bytePosition = offset + 5
37
+ return (
38
+ ((countFlags & 0x0f) << 28) |
39
+ (buffer[offset + 1]! << 20) |
40
+ (buffer[offset + 2]! << 12) |
41
+ (buffer[offset + 3]! << 4) |
42
+ (buffer[offset + 4]! & 0x0f)
43
+ )
44
+ }
8
45
 
9
46
  export default class ExternalCodec extends CramCodec<
10
47
  'int' | 'byte',
@@ -37,12 +74,7 @@ export default class ExternalCodec extends CramCodec<
37
74
  const cursor = cursors.externalBlocks.getCursor(blockContentId)
38
75
 
39
76
  if (this.dataType === 'int') {
40
- const [result, bytesRead] = parseItf8(
41
- contentBlock.content,
42
- cursor.bytePosition,
43
- )
44
- cursor.bytePosition += bytesRead
45
- return result
77
+ return parseItf8Inline(contentBlock.content, cursor)
46
78
  } else {
47
79
  if (cursor.bytePosition >= contentBlock.content.length) {
48
80
  throw new CramBufferOverrunError(
@@ -1,4 +1,6 @@
1
- export class CramBufferOverrunError extends Error {}
1
+ export class CramBufferOverrunError extends Error {
2
+ readonly code = 'CRAM_BUFFER_OVERRUN' as const
3
+ }
2
4
 
3
5
  export function getBits(
4
6
  data: Uint8Array,
@@ -1,5 +1,6 @@
1
1
  import Constants from './constants.ts'
2
2
  import CramContainerCompressionScheme from './container/compressionScheme.ts'
3
+ import { readNullTerminatedStringFromBuffer } from './util.ts'
3
4
 
4
5
  import type decodeRecord from './slice/decodeRecord.ts'
5
6
 
@@ -20,15 +21,33 @@ export interface RefRegion {
20
21
  seq: string
21
22
  }
22
23
 
23
- export interface ReadFeature {
24
- code: string
24
+ interface ReadFeatureBase {
25
25
  pos: number
26
26
  refPos: number
27
- data: any
28
- ref?: string
29
- sub?: string
30
27
  }
31
28
 
29
+ /**
30
+ * Read features describe differences between a read and the reference sequence.
31
+ * Each feature has a code indicating the type of difference, a position in the
32
+ * read (pos), and a position on the reference (refPos).
33
+ */
34
+ export type ReadFeature =
35
+ /** I=insertion, S=soft clip, b=bases, i=single-base insertion — all carry a sequence string */
36
+ | (ReadFeatureBase & { code: 'I' | 'S' | 'b' | 'i'; data: string })
37
+ /** B=base and quality pair — [substituted base, quality score] */
38
+ | (ReadFeatureBase & { code: 'B'; data: [string, number] })
39
+ /** X=base substitution — data is the substitution matrix index, ref/sub filled in by addReferenceSequence */
40
+ | (ReadFeatureBase & {
41
+ code: 'X'
42
+ data: number
43
+ ref?: string
44
+ sub?: string
45
+ })
46
+ /** D=deletion, N=reference skip, H=hard clip, P=padding, Q=single quality score */
47
+ | (ReadFeatureBase & { code: 'D' | 'N' | 'H' | 'P' | 'Q'; data: number })
48
+ /** q=quality scores for a stretch of bases */
49
+ | (ReadFeatureBase & { code: 'q'; data: number[] })
50
+
32
51
  export interface DecodeOptions {
33
52
  /** Whether to parse tags. If false, raw tag data is stored for lazy parsing. Default true. */
34
53
  decodeTags?: boolean
@@ -70,40 +89,30 @@ function decodeReadSequence(cramRecord: CramRecord, refRegion: RefRegion) {
70
89
  currentReadFeature += 1
71
90
 
72
91
  if (feature.code === 'b') {
73
- // specify a base pair for some reason
74
92
  const added = feature.data
75
93
  bases += added
76
94
  regionPos += added.length
77
95
  } else if (feature.code === 'B') {
78
- // base pair and associated quality
79
- // TODO: do we need to set the quality in the qual scores?
80
96
  bases += feature.data[0]
81
97
  regionPos += 1
82
98
  } else if (feature.code === 'X') {
83
- // base substitution
84
99
  bases += feature.sub
85
100
  regionPos += 1
86
101
  } else if (feature.code === 'I') {
87
- // insertion
88
102
  bases += feature.data
89
103
  } else if (feature.code === 'D') {
90
- // deletion
91
104
  regionPos += feature.data
92
105
  } else if (feature.code === 'i') {
93
- // insert single base
94
106
  bases += feature.data
95
107
  } else if (feature.code === 'N') {
96
- // reference skip. delete some bases
97
- // do nothing
98
- // seqBases.splice(feature.pos - 1, feature.data)
99
108
  regionPos += feature.data
100
109
  } else if (feature.code === 'S') {
101
- // soft clipped bases that should be present in the read seq
102
- // seqBases.splice(feature.pos - 1, 0, ...feature.data.split(''))
103
110
  bases += feature.data
104
111
  } else if (feature.code === 'P') {
105
112
  // padding, do nothing
106
- } else if (feature.code === 'H') {
113
+ }
114
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
115
+ else if (feature.code === 'H') {
107
116
  // hard clip, do nothing
108
117
  }
109
118
  } else if (currentReadFeature < cramRecord.readFeatures.length) {
@@ -129,7 +138,7 @@ function decodeReadSequence(cramRecord: CramRecord, refRegion: RefRegion) {
129
138
  return bases.toUpperCase()
130
139
  }
131
140
 
132
- const baseNumbers = {
141
+ const baseNumbers: Record<string, number | undefined> = {
133
142
  a: 0,
134
143
  A: 0,
135
144
  c: 1,
@@ -146,7 +155,12 @@ function decodeBaseSubstitution(
146
155
  cramRecord: CramRecord,
147
156
  refRegion: RefRegion,
148
157
  compressionScheme: CramContainerCompressionScheme,
149
- readFeature: ReadFeature,
158
+ readFeature: ReadFeatureBase & {
159
+ code: 'X'
160
+ data: number
161
+ ref?: string
162
+ sub?: string
163
+ },
150
164
  ) {
151
165
  // decode base substitution code using the substitution matrix
152
166
  const refCoord = readFeature.refPos - refRegion.start
@@ -154,7 +168,7 @@ function decodeBaseSubstitution(
154
168
  if (refBase) {
155
169
  readFeature.ref = refBase
156
170
  }
157
- let baseNumber = (baseNumbers as any)[refBase]
171
+ let baseNumber = baseNumbers[refBase]
158
172
  if (baseNumber === undefined) {
159
173
  baseNumber = 4
160
174
  }
@@ -240,7 +254,7 @@ export const MateFlagsDecoder = makeFlagsHelper(MateFlags)
240
254
  * Class of each CRAM record returned by this API.
241
255
  */
242
256
  export default class CramRecord {
243
- public tags: Record<string, string>
257
+ public tags: Record<string, string | number | number[] | undefined>
244
258
  public flags: number
245
259
  public cramFlags: number
246
260
  public readBases?: string | null
@@ -249,9 +263,13 @@ export default class CramRecord {
249
263
  public alignmentStart: number
250
264
  public lengthOnRef: number | undefined
251
265
  public readLength: number
266
+ // templateLength is computed post-hoc for intra-slice mate pairs,
267
+ // templateSize is the raw CRAM-encoded TS data series value
252
268
  public templateLength?: number
253
269
  public templateSize?: number
254
- public readName?: string
270
+ private _readName?: string
271
+ private _readNameRaw?: Uint8Array
272
+ public _syntheticReadName?: string
255
273
  public mateRecordNumber?: number
256
274
  public mate?: MateRecord
257
275
  public uniqueId: number
@@ -260,6 +278,18 @@ export default class CramRecord {
260
278
  public mappingQuality: number | undefined
261
279
  public qualityScores: Uint8Array | null | undefined
262
280
 
281
+ get readName() {
282
+ if (this._readName === undefined) {
283
+ if (this._readNameRaw) {
284
+ this._readName = readNullTerminatedStringFromBuffer(this._readNameRaw)
285
+ this._readNameRaw = undefined
286
+ } else {
287
+ return this._syntheticReadName
288
+ }
289
+ }
290
+ return this._readName
291
+ }
292
+
263
293
  constructor({
264
294
  flags,
265
295
  cramFlags,
@@ -272,32 +302,31 @@ export default class CramRecord {
272
302
  readFeatures,
273
303
  mateToUse,
274
304
  readGroupId,
275
- readName,
305
+ readNameRaw,
276
306
  sequenceId,
277
307
  uniqueId,
278
308
  templateSize,
279
309
  alignmentStart,
280
310
  tags,
281
- }: ReturnType<typeof decodeRecord> & { uniqueId: number }) {
311
+ }: ReturnType<typeof decodeRecord>) {
282
312
  this.flags = flags
283
313
  this.cramFlags = cramFlags
284
314
  this.readLength = readLength
285
315
  this.mappingQuality = mappingQuality
286
316
  this.lengthOnRef = lengthOnRef
287
317
  this.qualityScores = qualityScores
288
- if (readBases) {
289
- this.readBases = readBases
290
- }
291
-
292
318
  this.readGroupId = readGroupId
293
- this.readName = readName
294
319
  this.sequenceId = sequenceId!
295
320
  this.uniqueId = uniqueId
296
- this.templateSize = templateSize
297
321
  this.alignmentStart = alignmentStart
298
322
  this.tags = tags
299
-
300
- // backwards compatibility
323
+ if (readNameRaw) {
324
+ this._readNameRaw = readNameRaw
325
+ }
326
+ if (readBases) {
327
+ this.readBases = readBases
328
+ }
329
+ this.templateSize = templateSize
301
330
  if (readFeatures) {
302
331
  this.readFeatures = readFeatures
303
332
  }
@@ -430,9 +459,7 @@ export default class CramRecord {
430
459
  return undefined
431
460
  }
432
461
  const isize = this.templateLength || this.templateSize || 0
433
- return PAIR_ORIENTATION_TABLE[
434
- ((f >> 4) & 0xf) | (isize > 0 ? 16 : 0)
435
- ]
462
+ return PAIR_ORIENTATION_TABLE[((f >> 4) & 0xf) | (isize > 0 ? 16 : 0)]
436
463
  }
437
464
 
438
465
  /**
@@ -488,6 +515,7 @@ export default class CramRecord {
488
515
  data[k] = (this as any)[k]
489
516
  })
490
517
 
518
+ data.readName = this.readName
491
519
  data.readBases = this.getReadBases()
492
520
  data.qualityScores = this.qualityScores
493
521
  ? Array.from(this.qualityScores)