@gmod/cram 2.0.4 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/CHANGELOG.md +10 -0
  2. package/dist/cram-bundle.js +1 -1
  3. package/dist/cramFile/codecs/byteArrayLength.js +1 -1
  4. package/dist/cramFile/codecs/byteArrayLength.js.map +1 -1
  5. package/dist/cramFile/codecs/byteArrayStop.js +1 -1
  6. package/dist/cramFile/codecs/external.js +1 -1
  7. package/dist/cramFile/codecs/external.js.map +1 -1
  8. package/dist/cramFile/codecs/huffman.js +3 -2
  9. package/dist/cramFile/codecs/huffman.js.map +1 -1
  10. package/dist/cramFile/codecs/subexp.js.map +1 -1
  11. package/dist/cramFile/container/compressionScheme.d.ts +0 -3
  12. package/dist/cramFile/container/compressionScheme.js +0 -4
  13. package/dist/cramFile/container/compressionScheme.js.map +1 -1
  14. package/dist/cramFile/container/index.d.ts +57 -3
  15. package/dist/cramFile/container/index.js +21 -12
  16. package/dist/cramFile/container/index.js.map +1 -1
  17. package/dist/cramFile/file.d.ts +25 -59
  18. package/dist/cramFile/file.js +33 -37
  19. package/dist/cramFile/file.js.map +1 -1
  20. package/dist/cramFile/record.d.ts +1 -1
  21. package/dist/cramFile/record.js +2 -2
  22. package/dist/cramFile/record.js.map +1 -1
  23. package/dist/cramFile/sectionParsers.d.ts +195 -48
  24. package/dist/cramFile/sectionParsers.js +621 -303
  25. package/dist/cramFile/sectionParsers.js.map +1 -1
  26. package/dist/cramFile/slice/decodeRecord.js +5 -4
  27. package/dist/cramFile/slice/decodeRecord.js.map +1 -1
  28. package/dist/cramFile/slice/index.d.ts +23 -1
  29. package/dist/cramFile/slice/index.js +11 -8
  30. package/dist/cramFile/slice/index.js.map +1 -1
  31. package/dist/cramFile/util.d.ts +6 -4
  32. package/dist/cramFile/util.js +88 -6
  33. package/dist/cramFile/util.js.map +1 -1
  34. package/dist/rans/d04.js.map +1 -1
  35. package/dist/rans/decoding.d.ts +4 -4
  36. package/dist/rans/decoding.js +5 -6
  37. package/dist/rans/decoding.js.map +1 -1
  38. package/dist/rans/index.js +4 -3
  39. package/dist/rans/index.js.map +1 -1
  40. package/esm/cramFile/codecs/byteArrayLength.js +1 -1
  41. package/esm/cramFile/codecs/byteArrayLength.js.map +1 -1
  42. package/esm/cramFile/codecs/byteArrayStop.js +1 -1
  43. package/esm/cramFile/codecs/external.js +1 -1
  44. package/esm/cramFile/codecs/external.js.map +1 -1
  45. package/esm/cramFile/codecs/huffman.js +3 -2
  46. package/esm/cramFile/codecs/huffman.js.map +1 -1
  47. package/esm/cramFile/codecs/subexp.js.map +1 -1
  48. package/esm/cramFile/container/compressionScheme.d.ts +0 -3
  49. package/esm/cramFile/container/compressionScheme.js +0 -4
  50. package/esm/cramFile/container/compressionScheme.js.map +1 -1
  51. package/esm/cramFile/container/index.d.ts +57 -3
  52. package/esm/cramFile/container/index.js +19 -10
  53. package/esm/cramFile/container/index.js.map +1 -1
  54. package/esm/cramFile/file.d.ts +25 -59
  55. package/esm/cramFile/file.js +27 -29
  56. package/esm/cramFile/file.js.map +1 -1
  57. package/esm/cramFile/record.d.ts +1 -1
  58. package/esm/cramFile/record.js +2 -2
  59. package/esm/cramFile/record.js.map +1 -1
  60. package/esm/cramFile/sectionParsers.d.ts +195 -48
  61. package/esm/cramFile/sectionParsers.js +620 -303
  62. package/esm/cramFile/sectionParsers.js.map +1 -1
  63. package/esm/cramFile/slice/decodeRecord.js +5 -4
  64. package/esm/cramFile/slice/decodeRecord.js.map +1 -1
  65. package/esm/cramFile/slice/index.d.ts +23 -1
  66. package/esm/cramFile/slice/index.js +12 -9
  67. package/esm/cramFile/slice/index.js.map +1 -1
  68. package/esm/cramFile/util.d.ts +6 -4
  69. package/esm/cramFile/util.js +87 -6
  70. package/esm/cramFile/util.js.map +1 -1
  71. package/esm/rans/d04.js.map +1 -1
  72. package/esm/rans/decoding.d.ts +4 -4
  73. package/esm/rans/decoding.js +5 -6
  74. package/esm/rans/decoding.js.map +1 -1
  75. package/esm/rans/index.js +3 -2
  76. package/esm/rans/index.js.map +1 -1
  77. package/package.json +7 -8
  78. package/src/cramFile/codecs/byteArrayLength.ts +1 -2
  79. package/src/cramFile/codecs/byteArrayStop.ts +1 -1
  80. package/src/cramFile/codecs/external.ts +1 -1
  81. package/src/cramFile/codecs/huffman.ts +3 -2
  82. package/src/cramFile/codecs/subexp.ts +2 -2
  83. package/src/cramFile/container/compressionScheme.ts +1 -8
  84. package/src/cramFile/container/index.ts +23 -12
  85. package/src/cramFile/declare.d.ts +1 -0
  86. package/src/cramFile/file.ts +37 -53
  87. package/src/cramFile/record.ts +4 -7
  88. package/src/cramFile/sectionParsers.ts +668 -390
  89. package/src/cramFile/slice/decodeRecord.ts +20 -12
  90. package/src/cramFile/slice/index.ts +13 -7
  91. package/src/cramFile/util.ts +91 -92
  92. package/src/rans/d04.ts +1 -1
  93. package/src/rans/decoding.ts +5 -7
  94. package/src/rans/index.ts +3 -2
  95. package/src/typings/binary-parser.d.ts +0 -44
@@ -1,117 +1,160 @@
1
- import { Parser } from '@gmod/binary-parser'
2
1
  import { TupleOf } from '../typescript'
3
- import { ParsedItem } from './util'
2
+ import { parseItf8, parseLtf8 } from './util'
4
3
  import { DataSeriesEncodingMap } from './codecs/dataSeriesTypes'
5
4
  import { CramEncoding } from './encoding'
6
5
 
7
- const singleItf8 = new Parser().itf8()
8
-
9
- const cramFileDefinition = {
10
- parser: new Parser()
11
- .string('magic', { length: 4 })
12
- .uint8('majorVersion')
13
- .uint8('minorVersion')
14
- .string('fileId', { length: 20, stripNull: true }),
15
- maxLength: 26,
6
+ export function cramFileDefinition() {
7
+ return {
8
+ parser: (buffer: Buffer, _startOffset = 0) => {
9
+ const b = buffer
10
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length)
11
+ let offset = 0
12
+ const magic = buffer.subarray(offset, offset + 4).toString()
13
+ offset += 4
14
+ const majorVersion = dataView.getUint8(offset)
15
+ offset += 1
16
+ const minorVersion = dataView.getUint8(offset)
17
+ offset += 1
18
+ const fileId = b
19
+ .subarray(offset, offset + 20)
20
+ .toString()
21
+ .replaceAll('\0', '')
22
+ offset += 20
23
+ return {
24
+ value: {
25
+ magic,
26
+ majorVersion,
27
+ minorVersion,
28
+ fileId,
29
+ },
30
+ offset,
31
+ }
32
+ },
33
+ maxLength: 26,
34
+ }
16
35
  }
17
-
18
- const cramBlockHeader = {
19
- parser: new Parser()
20
- .uint8('compressionMethod', {
21
- formatter: /* istanbul ignore next */ b => {
22
- const method = [
23
- 'raw',
24
- 'gzip',
25
- 'bzip2',
26
- 'lzma',
27
- 'rans',
28
- 'rans4x16',
29
- 'arith',
30
- 'fqzcomp',
31
- 'tok3',
32
- ][b]
33
- if (!method) {
34
- throw new Error(`compression method number ${b} not implemented`)
35
- }
36
- return method
37
- },
38
- })
39
- .uint8('contentType', {
40
- formatter: /* istanbul ignore next */ b => {
41
- const type = [
42
- 'FILE_HEADER',
43
- 'COMPRESSION_HEADER',
44
- 'MAPPED_SLICE_HEADER',
45
- 'UNMAPPED_SLICE_HEADER', // < only used in cram v1
46
- 'EXTERNAL_DATA',
47
- 'CORE_DATA',
48
- ][b]
49
- if (!type) {
50
- throw new Error(`invalid block content type id ${b}`)
51
- }
52
- return type
36
+ export function cramBlockHeader() {
37
+ const parser = (buffer: Buffer, _startOffset = 0) => {
38
+ const b = buffer
39
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length)
40
+ let offset = 0
41
+ const d = dataView.getUint8(offset)
42
+ const compressionMethod = [
43
+ 'raw',
44
+ 'gzip',
45
+ 'bzip2',
46
+ 'lzma',
47
+ 'rans',
48
+ 'rans4x16',
49
+ 'arith',
50
+ 'fqzcomp',
51
+ 'tok3',
52
+ ][d]
53
+ if (!compressionMethod) {
54
+ throw new Error(`compression method number ${d} not implemented`)
55
+ }
56
+ offset += 1
57
+
58
+ const c = dataView.getUint8(offset)
59
+ const contentType = [
60
+ 'FILE_HEADER',
61
+ 'COMPRESSION_HEADER',
62
+ 'MAPPED_SLICE_HEADER',
63
+ 'UNMAPPED_SLICE_HEADER', // < only used in cram v1
64
+ 'EXTERNAL_DATA',
65
+ 'CORE_DATA',
66
+ ][c]
67
+ if (!contentType) {
68
+ throw new Error(`invalid block content type id ${c}`)
69
+ }
70
+ offset += 1
71
+
72
+ const [contentId, newOffset1] = parseItf8(buffer, offset)
73
+ offset += newOffset1
74
+ const [compressedSize, newOffset2] = parseItf8(buffer, offset)
75
+ offset += newOffset2
76
+ const [uncompressedSize, newOffset3] = parseItf8(buffer, offset)
77
+ offset += newOffset3
78
+ return {
79
+ offset,
80
+ value: {
81
+ uncompressedSize,
82
+ compressedSize,
83
+ contentId,
84
+ contentType: contentType as
85
+ | 'FILE_HEADER'
86
+ | 'COMPRESSION_HEADER'
87
+ | 'MAPPED_SLICE_HEADER'
88
+ | 'UNMAPPED_SLICE_HEADER' // < only used in cram v1
89
+ | 'EXTERNAL_DATA'
90
+ | 'CORE_DATA',
91
+ compressionMethod: compressionMethod as CompressionMethod,
53
92
  },
54
- })
55
- .itf8('contentId')
56
- .itf8('compressedSize')
57
- .itf8('uncompressedSize'),
58
- maxLength: 17,
93
+ }
94
+ }
95
+ return { parser, maxLength: 17 }
59
96
  }
60
97
 
61
- const cramBlockCrc32 = {
62
- parser: new Parser().uint32('crc32'),
63
- maxLength: 4,
98
+ export function cramBlockCrc32() {
99
+ return {
100
+ parser: (buffer: Buffer, offset: number) => {
101
+ const b = buffer
102
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length)
103
+ const crc32 = dataView.getUint32(offset, true)
104
+ offset += 4
105
+ return {
106
+ offset,
107
+ value: {
108
+ crc32,
109
+ },
110
+ }
111
+ },
112
+ maxLength: 4,
113
+ }
64
114
  }
65
115
 
66
- // const ENCODING_NAMES = [
67
- // 'NULL', // 0
68
- // 'EXTERNAL', // 1
69
- // 'GOLOMB', // 2
70
- // 'HUFFMAN_INT', // 3
71
- // 'BYTE_ARRAY_LEN', // 4
72
- // 'BYTE_ARRAY_STOP', // 5
73
- // 'BETA', // 6
74
- // 'SUBEXP', // 7
75
- // 'GOLOMB_RICE', // 8
76
- // 'GAMMA', // 9
77
- // ]
78
-
79
116
  export type CramTagDictionary = string[][]
80
117
 
81
- const cramTagDictionary = new Parser().itf8('size').buffer('ents', {
82
- length: 'size',
83
- formatter: /* istanbul ignore next */ buffer => {
84
- function makeTagSet(stringStart: number, stringEnd: number) {
85
- const str = buffer.toString('utf8', stringStart, stringEnd)
86
- const tags = []
87
- for (let i = 0; i < str.length; i += 3) {
88
- tags.push(str.slice(i, i + 3))
89
- }
90
- return tags
91
- }
118
+ function makeTagSet(buffer: Buffer, stringStart: number, stringEnd: number) {
119
+ const str = buffer.toString('utf8', stringStart, stringEnd)
120
+ const tags = []
121
+ for (let i = 0; i < str.length; i += 3) {
122
+ tags.push(str.slice(i, i + 3))
123
+ }
124
+ return tags
125
+ }
92
126
 
93
- /* eslint-disable */
94
- var tagSets = []
95
- var stringStart = 0
96
- var i
97
- /* eslint-enable */
98
- for (i = 0; i < buffer.length; i += 1) {
99
- if (!buffer[i]) {
100
- tagSets.push(makeTagSet(stringStart, i))
101
- stringStart = i + 1
127
+ export function cramTagDictionary() {
128
+ return {
129
+ parser: (buffer: Buffer, offset: number) => {
130
+ const [size, newOffset1] = parseItf8(buffer, offset)
131
+ offset += newOffset1
132
+ const subbuf = buffer.subarray(offset, offset + size)
133
+ offset += size
134
+
135
+ const tagSets = []
136
+ let stringStart = 0
137
+ let i = 0
138
+ for (; i < subbuf.length; i++) {
139
+ if (!subbuf[i]) {
140
+ tagSets.push(makeTagSet(subbuf, stringStart, i))
141
+ stringStart = i + 1
142
+ }
143
+ }
144
+ if (i > stringStart) {
145
+ tagSets.push(makeTagSet(subbuf, stringStart, i))
102
146
  }
103
- }
104
- if (i > stringStart) {
105
- tagSets.push(makeTagSet(stringStart, i))
106
- }
107
- return tagSets
108
- },
109
- })
110
147
 
111
- // const cramPreservationMapKeys = 'XX RN AP RR SM TD'.split(' ')
112
- const parseByteAsBool = new Parser().uint8(null, {
113
- formatter: /* istanbul ignore next */ val => !!val,
114
- })
148
+ return {
149
+ value: {
150
+ size,
151
+ ents: tagSets,
152
+ },
153
+ offset,
154
+ }
155
+ },
156
+ }
157
+ }
115
158
 
116
159
  export interface CramPreservationMap {
117
160
  MI: boolean
@@ -124,38 +167,72 @@ export interface CramPreservationMap {
124
167
  TD: CramTagDictionary
125
168
  }
126
169
 
127
- const cramPreservationMap = new Parser()
128
- .itf8('mapSize')
129
- .itf8('mapCount')
130
- .array('ents', {
131
- length: 'mapCount',
132
- type: new Parser()
133
- .string('key', {
134
- length: 2,
135
- stripNull: false,
136
- // formatter: val => cramPreservationMapKeys[val] || 0,
137
- })
138
- .choice('value', {
139
- tag: 'key',
140
- choices: {
141
- MI: parseByteAsBool,
142
- UI: parseByteAsBool,
143
- PI: parseByteAsBool,
144
- RN: parseByteAsBool,
145
- AP: parseByteAsBool,
146
- RR: parseByteAsBool,
147
- SM: new Parser().array(null, { type: 'uint8', length: 5 }),
148
- TD: new Parser().nest(null, {
149
- type: cramTagDictionary,
150
- formatter: /* istanbul ignore next */ data => data.ents,
151
- }),
170
+ export function cramPreservationMap() {
171
+ return {
172
+ parser: (buffer: Buffer, offset: number) => {
173
+ const b = buffer
174
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length)
175
+ const [mapSize, newOffset1] = parseItf8(buffer, offset)
176
+ offset += newOffset1
177
+ const [mapCount, newOffset2] = parseItf8(buffer, offset)
178
+ offset += newOffset2
179
+ const ents = []
180
+ for (let i = 0; i < mapCount; i++) {
181
+ const key =
182
+ String.fromCharCode(buffer[offset]) +
183
+ String.fromCharCode(buffer[offset + 1])
184
+ offset += 2
185
+
186
+ if (
187
+ key === 'MI' ||
188
+ key === 'UI' ||
189
+ key === 'PI' ||
190
+ key === 'RN' ||
191
+ key === 'AP' ||
192
+ key === 'RR'
193
+ ) {
194
+ ents.push({
195
+ key,
196
+ value: !!dataView.getUint8(offset),
197
+ })
198
+ offset += 1
199
+ } else if (key === 'SM') {
200
+ ents.push({
201
+ key,
202
+ value: [
203
+ dataView.getUint8(offset),
204
+ dataView.getUint8(offset + 1),
205
+ dataView.getUint8(offset + 2),
206
+ dataView.getUint8(offset + 3),
207
+ dataView.getUint8(offset + 4),
208
+ ],
209
+ })
210
+ offset += 5
211
+ } else if (key === 'TD') {
212
+ const { offset: offsetRet, value } = cramTagDictionary().parser(
213
+ buffer,
214
+ offset,
215
+ )
216
+ ents.push({ key, value: value.ents })
217
+ offset = offsetRet
218
+ } else {
219
+ throw new Error(`unknown key ${key}`)
220
+ }
221
+ }
222
+ return {
223
+ value: {
224
+ mapSize,
225
+ mapCount,
226
+ ents,
152
227
  },
153
- }),
154
- })
228
+ offset,
229
+ }
230
+ },
231
+ }
232
+ }
155
233
 
156
- /* istanbul ignore next */
157
- function formatMap<T>(data: { ents: { key: string; value: T }[] }) {
158
- const map: Record<string, T> = {}
234
+ function formatMap(data: { ents: { key: string; value: unknown }[] }) {
235
+ const map: Record<string, unknown> = {}
159
236
  for (const { key, value } of data.ents) {
160
237
  if (map[key]) {
161
238
  console.warn(`duplicate key ${key} in map`)
@@ -165,12 +242,6 @@ function formatMap<T>(data: { ents: { key: string; value: T }[] }) {
165
242
  return map
166
243
  }
167
244
 
168
- const unversionedParsers = {
169
- cramFileDefinition,
170
- cramBlockHeader,
171
- cramBlockCrc32,
172
- }
173
-
174
245
  export interface MappedSliceHeader {
175
246
  refSeqId: number
176
247
  refSeqStart: number
@@ -181,7 +252,7 @@ export interface MappedSliceHeader {
181
252
  numContentIds: number
182
253
  contentIds: number[]
183
254
  refBaseBlockId: number
184
- md5: TupleOf<number, 16>
255
+ md5?: TupleOf<number, 16>
185
256
  }
186
257
 
187
258
  export interface UnmappedSliceHeader {
@@ -190,227 +261,477 @@ export interface UnmappedSliceHeader {
190
261
  numBlocks: number
191
262
  numContentIds: number
192
263
  contentIds: number[]
193
- md5: TupleOf<number, 16>
264
+ md5?: TupleOf<number, 16>
194
265
  }
195
266
 
196
267
  export function isMappedSliceHeader(
197
- header: MappedSliceHeader | UnmappedSliceHeader,
268
+ header: unknown,
198
269
  ): header is MappedSliceHeader {
199
270
  return typeof (header as any).refSeqId === 'number'
200
271
  }
201
272
 
202
- // each of these is a function of the major and minor version
203
- const versionedParsers = {
204
- // assemble a section parser for the unmapped slice header, with slight
205
- // variations depending on the major version of the cram file
206
- cramUnmappedSliceHeader(majorVersion: number) {
207
- let maxLength = 0
208
- let parser = new Parser().itf8('numRecords')
209
- maxLength += 5
273
+ interface Value {
274
+ codecId: number
275
+ parametersBytes: number
276
+ parameters: Record<string, unknown>
277
+ }
278
+ // assemble a section parser for the unmapped slice header, with slight
279
+ // variations depending on the major version of the cram file
280
+ function cramUnmappedSliceHeader(majorVersion: number) {
281
+ let maxLength = 0
282
+ maxLength += 5
283
+ maxLength += 9
284
+ maxLength += 5 * 2
285
+ maxLength += 16
286
+
287
+ const parser = (buffer: Buffer, offset: number) => {
288
+ const [numRecords, newOffset1] = parseItf8(buffer, offset)
289
+ offset += newOffset1
290
+ let recordCounter = 0
210
291
 
211
292
  // recordCounter is itf8 in a CRAM v2 file, absent in CRAM v1
212
293
  if (majorVersion >= 3) {
213
- parser = parser.ltf8('recordCounter')
214
- maxLength += 9
294
+ const [rc, newOffset2] = parseLtf8(buffer, offset)
295
+ offset += newOffset2
296
+ recordCounter = rc
215
297
  } else if (majorVersion === 2) {
216
- parser = parser.itf8('recordCounter')
217
- maxLength += 5
298
+ const [rc, newOffset2] = parseItf8(buffer, offset)
299
+ offset += newOffset2
300
+ recordCounter = rc
301
+ } else {
302
+ console.warn('recordCounter=0')
218
303
  }
219
304
 
220
- parser = parser
221
- .itf8('numBlocks')
222
- .itf8('numContentIds')
223
- .array('contentIds', {
224
- type: singleItf8,
225
- length: 'numContentIds',
226
- })
227
- maxLength += 5 * 2 // + numContentIds*5
305
+ const [numBlocks, newOffset3] = parseItf8(buffer, offset)
306
+ offset += newOffset3
307
+ const [numContentIds, newOffset4] = parseItf8(buffer, offset)
308
+ offset += newOffset4
309
+ const contentIds = []
310
+ for (let i = 0; i < numContentIds; i++) {
311
+ const [id, newOffset5] = parseItf8(buffer, offset)
312
+ offset += newOffset5
313
+ contentIds.push(id)
314
+ }
228
315
 
229
316
  // the md5 sum is missing in cram v1
317
+ let md5: TupleOf<number, 16> | undefined
230
318
  if (majorVersion >= 2) {
231
- parser = parser.array('md5', { type: 'uint8', length: 16 })
232
- maxLength += 16
319
+ md5 = [...buffer.subarray(offset, offset + 16)] as TupleOf<number, 16>
320
+ offset += 16
233
321
  }
234
322
 
235
- const maxLengthFunc = (numContentIds: number) =>
236
- maxLength + numContentIds * 5
323
+ return {
324
+ value: {
325
+ recordCounter,
326
+ md5,
327
+ contentIds,
328
+ numContentIds,
329
+ numBlocks,
330
+ numRecords,
331
+ },
332
+ offset,
333
+ }
334
+ }
335
+ return {
336
+ parser,
337
+ maxLength: (numContentIds: number) => maxLength + numContentIds * 5,
338
+ }
339
+ }
237
340
 
238
- return { parser, maxLength: maxLengthFunc } // : p, maxLength: numContentIds => 5 + 9 + 5 * 2 + 5 * numContentIds + 16 }
239
- },
341
+ // assembles a section parser for the unmapped slice header, with slight
342
+ // variations depending on the major version of the cram file
343
+ function cramMappedSliceHeader(majorVersion: number) {
344
+ let maxLength = 0
345
+ maxLength += 5 * 4 // EL0
346
+ maxLength += 9 // EL1
347
+ maxLength += 5 * 3 // EL2 ITF8s
348
+ maxLength += 16 // MD5
349
+
350
+ return {
351
+ parser: (buffer: Buffer, offset: number) => {
352
+ // L0
353
+ const [refSeqId, newOffset1] = parseItf8(buffer, offset)
354
+ offset += newOffset1
355
+ const [refSeqStart, newOffset2] = parseItf8(buffer, offset)
356
+ offset += newOffset2
357
+ const [refSeqSpan, newOffset3] = parseItf8(buffer, offset)
358
+ offset += newOffset3
359
+ const [numRecords, newOffset4] = parseItf8(buffer, offset)
360
+ offset += newOffset4
361
+ // EL0
362
+
363
+ // L1
364
+ let recordCounter = 0
365
+ if (majorVersion >= 3) {
366
+ const [rc, newOffset5] = parseLtf8(buffer, offset)
367
+ offset += newOffset5
368
+ recordCounter = rc
369
+ } else if (majorVersion === 2) {
370
+ const [rc, newOffset5] = parseItf8(buffer, offset)
371
+ offset += newOffset5
372
+ recordCounter = rc
373
+ } else {
374
+ console.warn('majorVersion is <2, recordCounter set to 0')
375
+ }
376
+ // EL1
377
+
378
+ // L2
379
+ const [numBlocks, newOffset6] = parseItf8(buffer, offset)
380
+ offset += newOffset6
381
+ const [numContentIds, newOffset7] = parseItf8(buffer, offset)
382
+ offset += newOffset7
383
+ const contentIds = []
384
+ for (let i = 0; i < numContentIds; i++) {
385
+ const [id, newOffset5] = parseItf8(buffer, offset)
386
+ offset += newOffset5
387
+ contentIds.push(id)
388
+ }
389
+ const [refBaseBlockId, newOffset8] = parseItf8(buffer, offset)
390
+ offset += newOffset8
391
+ // EL2
392
+
393
+ // the md5 sum is missing in cram v1
394
+ let md5: TupleOf<number, 16> | undefined
395
+ if (majorVersion >= 2) {
396
+ md5 = [...buffer.subarray(offset, offset + 16)] as TupleOf<number, 16>
397
+ offset += 16
398
+ }
240
399
 
241
- // assembles a section parser for the unmapped slice header, with slight
242
- // variations depending on the major version of the cram file
243
- cramMappedSliceHeader(majorVersion: number) {
244
- let parser = new Parser()
245
- .itf8('refSeqId')
246
- .itf8('refSeqStart')
247
- .itf8('refSeqSpan')
248
- .itf8('numRecords')
249
- let maxLength = 5 * 4
400
+ return {
401
+ value: {
402
+ md5,
403
+ numBlocks,
404
+ numRecords,
405
+ numContentIds,
406
+ refSeqSpan,
407
+ refSeqId,
408
+ refSeqStart,
409
+ recordCounter,
410
+ refBaseBlockId,
411
+ contentIds,
412
+ },
413
+ offset,
414
+ }
415
+ },
416
+ maxLength: (numContentIds: number) => maxLength + numContentIds * 5,
417
+ }
418
+ }
250
419
 
251
- if (majorVersion >= 3) {
252
- parser = parser.ltf8('recordCounter')
253
- maxLength += 9
254
- } else if (majorVersion === 2) {
255
- parser = parser.itf8('recordCounter')
256
- maxLength += 5
420
+ function cramEncoding() {
421
+ return {
422
+ parser: (buffer: Buffer, offset: number) => cramEncodingSub(buffer, offset),
423
+ }
424
+ }
425
+
426
+ function cramEncodingSub(
427
+ buffer: Buffer,
428
+ offset: number,
429
+ ): { value: Value; offset: number } {
430
+ const b = buffer
431
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length)
432
+ const [codecId, newOffset1] = parseItf8(buffer, offset)
433
+ offset += newOffset1
434
+ const [parametersBytes, newOffset2] = parseItf8(buffer, offset)
435
+ offset += newOffset2
436
+
437
+ const parameters = {} as Record<string, unknown>
438
+
439
+ if (codecId === 0) {
440
+ // NULL
441
+ } else if (codecId === 1) {
442
+ // EXTERNAL
443
+ const [bc, newOffset3] = parseItf8(buffer, offset)
444
+ parameters.blockContentId = bc
445
+ offset += newOffset3
446
+ } else if (codecId === 2) {
447
+ // GOLUMB
448
+ const [off, newOffset3] = parseItf8(buffer, offset)
449
+ parameters.offset = off
450
+ offset += newOffset3
451
+ const [M2, newOffset4] = parseItf8(buffer, offset)
452
+ parameters.M = M2
453
+ offset += newOffset4
454
+ } else if (codecId === 3) {
455
+ // HUFFMAN_INT
456
+ const val = parseItf8(buffer, offset)
457
+ const numCodes = val[0]
458
+ offset += val[1]
459
+ const symbols = [] as number[]
460
+ for (let i = 0; i < numCodes; i++) {
461
+ const code = parseItf8(buffer, offset)
462
+ symbols.push(code[0])
463
+ offset += code[1]
257
464
  }
465
+ parameters.symbols = symbols
466
+ const val2 = parseItf8(buffer, offset)
467
+ const numLengths = val[0]
468
+ parameters.numLengths = numLengths
469
+ parameters.numCodes = numCodes
470
+ parameters.numLengths = numLengths
471
+ offset += val2[1]
472
+ const bitLengths = [] as number[]
473
+ for (let i = 0; i < numLengths; i++) {
474
+ const len = parseItf8(buffer, offset)
475
+ offset += len[1]
476
+ bitLengths.push(len[0])
477
+ }
478
+ parameters.bitLengths = bitLengths
479
+ } else if (codecId === 4) {
480
+ // BYTE_ARRAY_LEN
481
+ const { value: lengthsEncoding, offset: newOffset1 } = cramEncodingSub(
482
+ buffer,
483
+ offset,
484
+ )
485
+ parameters.lengthsEncoding = lengthsEncoding
486
+ offset = newOffset1
487
+ const { value: valuesEncoding, offset: newOffset2 } = cramEncodingSub(
488
+ buffer,
489
+ offset,
490
+ )
491
+ parameters.valuesEncoding = valuesEncoding
492
+ offset = newOffset2
493
+ } else if (codecId === 5) {
494
+ // BYTE_ARRAY_STOP
495
+ parameters.stopByte = dataView.getUint8(offset)
496
+ offset += 1
497
+ const [blockContentId, newOffset1] = parseItf8(buffer, offset)
498
+ parameters.blockContentId = blockContentId
499
+ offset += newOffset1
500
+ } else if (codecId === 6) {
501
+ // BETA
502
+ const [off, newOffset1] = parseItf8(buffer, offset)
503
+ parameters.offset = off
504
+ offset += newOffset1
505
+ const [len, newOffset2] = parseItf8(buffer, offset)
506
+ parameters.length = len
507
+ offset += newOffset2
508
+ } else if (codecId === 7) {
509
+ // SUBEXP
510
+ const [off, newOffset1] = parseItf8(buffer, offset)
511
+ parameters.offset = off
512
+ offset += newOffset1
513
+ const [K, newOffset2] = parseItf8(buffer, offset)
514
+ parameters.K = K
515
+ offset += newOffset2
516
+ } else if (codecId === 8) {
517
+ // GOLOMB_RICE
518
+ const [off, newOffset1] = parseItf8(buffer, offset)
519
+ parameters.offset = off
520
+ offset += newOffset1
521
+ const [l2m, newOffset2] = parseItf8(buffer, offset)
522
+ parameters.log2m = l2m
523
+ offset += newOffset2
524
+ } else if (codecId === 9) {
525
+ // GAMMA
526
+ const [off, newOffset1] = parseItf8(buffer, offset)
527
+ parameters.offset = off
528
+ offset += newOffset1
529
+ } else {
530
+ throw new Error(`unknown codecId ${codecId}`)
531
+ }
258
532
 
259
- parser = parser
260
- .itf8('numBlocks')
261
- .itf8('numContentIds')
262
- .array('contentIds', {
263
- type: singleItf8,
264
- length: 'numContentIds',
265
- })
266
- .itf8('refBaseBlockId')
267
- maxLength += 5 * 3
533
+ return {
534
+ value: {
535
+ codecId,
536
+ parametersBytes,
537
+ parameters,
538
+ },
539
+ offset,
540
+ }
541
+ }
268
542
 
269
- // the md5 sum is missing in cram v1
270
- if (majorVersion >= 2) {
271
- parser = parser.array('md5', { type: 'uint8', length: 16 })
272
- maxLength += 16
273
- }
543
+ function cramDataSeriesEncodingMap() {
544
+ return {
545
+ parser: (buffer: Buffer, offset: number) => {
546
+ const [mapSize, newOffset1] = parseItf8(buffer, offset)
547
+ offset += newOffset1
548
+ const [mapCount, newOffset2] = parseItf8(buffer, offset)
549
+ offset += newOffset2
550
+ const ents = []
551
+ for (let i = 0; i < mapCount; i++) {
552
+ const key =
553
+ String.fromCharCode(buffer[offset]) +
554
+ String.fromCharCode(buffer[offset + 1])
555
+ offset += 2
556
+
557
+ const { value, offset: newOffset4 } = cramEncodingSub(buffer, offset)
558
+ offset = newOffset4
559
+ ents.push({ key, value })
560
+ }
561
+ return {
562
+ value: {
563
+ mapSize,
564
+ ents,
565
+ mapCount,
566
+ },
567
+ offset,
568
+ }
569
+ },
570
+ }
571
+ }
274
572
 
275
- const maxLengthFunc = (numContentIds: number) =>
276
- maxLength + numContentIds * 5
277
-
278
- return { parser, maxLength: maxLengthFunc }
279
- },
280
-
281
- cramEncoding(_majorVersion: number) {
282
- const parser = new Parser()
283
- .namely('cramEncoding')
284
- .itf8('codecId')
285
- .itf8('parametersBytes')
286
- .choice('parameters', {
287
- tag: 'codecId',
288
- choices: {
289
- 0: new Parser(), // NULL
290
- 1: new Parser().itf8('blockContentId'), // EXTERNAL
291
- 2: new Parser().itf8('offset').itf8('M'), // GOLOMB,
292
- // HUFFMAN_INT
293
- 3: Parser.start()
294
- .itf8('numCodes')
295
- .array('symbols', { length: 'numCodes', type: singleItf8 })
296
- .itf8('numLengths')
297
- .array('bitLengths', { length: 'numLengths', type: singleItf8 }),
298
- 4: Parser.start() // BYTE_ARRAY_LEN
299
- .nest('lengthsEncoding', { type: 'cramEncoding' })
300
- .nest('valuesEncoding', { type: 'cramEncoding' }),
301
- // BYTE_ARRAY_STOP is a little different for CRAM v1
302
- 5: new Parser().uint8('stopByte').itf8('blockContentId'),
303
- 6: new Parser().itf8('offset').itf8('length'), // BETA
304
- 7: new Parser().itf8('offset').itf8('K'), // SUBEXP
305
- 8: new Parser().itf8('offset').itf8('log2m'), // GOLOMB_RICE
306
- 9: new Parser().itf8('offset'), // GAMMA
573
+ function cramTagEncodingMap() {
574
+ return {
575
+ parser: (buffer: Buffer, offset: number) => {
576
+ const [mapSize, newOffset1] = parseItf8(buffer, offset)
577
+ offset += newOffset1
578
+ const [mapCount, newOffset2] = parseItf8(buffer, offset)
579
+ offset += newOffset2
580
+ const ents = []
581
+ for (let i = 0; i < mapCount; i++) {
582
+ const [k0, newOffset3] = parseItf8(buffer, offset)
583
+ offset += newOffset3
584
+ const key =
585
+ String.fromCharCode((k0 >> 16) & 0xff) +
586
+ String.fromCharCode((k0 >> 8) & 0xff) +
587
+ String.fromCharCode(k0 & 0xff)
588
+
589
+ const { value, offset: newOffset4 } = cramEncodingSub(buffer, offset)
590
+ offset = newOffset4
591
+ ents.push({ key, value })
592
+ }
593
+ return {
594
+ value: {
595
+ mapSize,
596
+ ents,
597
+ mapCount,
307
598
  },
308
- })
309
-
310
- return { parser }
311
- },
312
-
313
- cramDataSeriesEncodingMap(majorVersion: number) {
314
- return new Parser()
315
- .itf8('mapSize')
316
- .itf8('mapCount')
317
- .array('ents', {
318
- length: 'mapCount',
319
- type: new Parser()
320
- .string('key', { length: 2, stripNull: false })
321
- .nest('value', { type: this.cramEncoding(majorVersion).parser }),
322
- })
323
- },
324
-
325
- cramTagEncodingMap(majorVersion: number) {
326
- return new Parser()
327
- .itf8('mapSize')
328
- .itf8('mapCount')
329
- .array('ents', {
330
- length: 'mapCount',
331
- type: new Parser()
332
- .itf8('key', {
333
- formatter: /* istanbul ignore next */ integerRepresentation =>
334
- /* istanbul ignore next */
335
- String.fromCharCode((integerRepresentation >> 16) & 0xff) +
336
- String.fromCharCode((integerRepresentation >> 8) & 0xff) +
337
- String.fromCharCode(integerRepresentation & 0xff),
338
- })
339
- .nest('value', { type: this.cramEncoding(majorVersion).parser }),
340
- })
341
- },
342
-
343
- cramCompressionHeader(majorVersion: number) {
344
- let parser = new Parser()
345
- // TODO: if we want to support CRAM v1, we will need to refactor
346
- // compression header into 2 parts to parse the landmarks,
347
- // like the container header
348
- parser = parser
349
- .nest('preservation', {
350
- type: cramPreservationMap,
351
- formatter: formatMap,
352
- })
353
- .nest('dataSeriesEncoding', {
354
- type: this.cramDataSeriesEncodingMap(majorVersion),
355
- formatter: formatMap,
356
- })
357
- .nest('tagEncoding', {
358
- type: this.cramTagEncodingMap(majorVersion),
359
- formatter: formatMap,
360
- })
361
- return { parser }
362
- },
363
-
364
- cramContainerHeader1(majorVersion: number) {
365
- let parser = new Parser()
366
- .int32('length') // byte size of the container data (blocks)
367
- .itf8('refSeqId') // reference sequence identifier, -1 for unmapped reads, -2 for multiple reference sequences
368
- .itf8('refSeqStart') // the alignment start position or 0 for unmapped reads
369
- .itf8('alignmentSpan') // the length of the alignment or 0 for unmapped reads
370
- .itf8('numRecords') // number of records in the container
371
- let maxLength = 4 + 5 * 4
599
+ offset,
600
+ }
601
+ },
602
+ }
603
+ }
372
604
 
373
- if (majorVersion >= 3) {
374
- parser = parser.ltf8('recordCounter') // 1-based sequential index of records in the file/stream.
375
- maxLength += 9
376
- } else if (majorVersion === 2) {
377
- parser = parser.itf8('recordCounter')
378
- maxLength += 5
379
- }
605
+ function cramCompressionHeader() {
606
+ return {
607
+ parser: (buffer: Buffer, offset: number) => {
608
+ // TODO: if we want to support CRAM v1, we will need to refactor
609
+ // compression header into 2 parts to parse the landmarks, like the
610
+ // container header
611
+ const { value: preservation, offset: newOffset1 } =
612
+ cramPreservationMap().parser(buffer, offset)
613
+ offset = newOffset1
614
+
615
+ const { value: dataSeriesEncoding, offset: newOffset2 } =
616
+ cramDataSeriesEncodingMap().parser(buffer, offset)
617
+ offset = newOffset2
618
+
619
+ const { value: tagEncoding, offset: newOffset3 } =
620
+ cramTagEncodingMap().parser(buffer, offset)
621
+ offset = newOffset3
622
+
623
+ return {
624
+ value: {
625
+ dataSeriesEncoding: formatMap(
626
+ dataSeriesEncoding,
627
+ ) as DataSeriesEncodingMap,
628
+ preservation: formatMap(
629
+ preservation,
630
+ ) as unknown as CramPreservationMap,
631
+ tagEncoding: formatMap(tagEncoding) as Record<string, CramEncoding>,
632
+ },
633
+ offset,
634
+ }
635
+ },
636
+ }
637
+ }
380
638
 
381
- if (majorVersion > 1) {
382
- parser = parser.ltf8('numBases') // number of read bases
383
- maxLength += 9
384
- }
385
- parser = parser
386
- .itf8('numBlocks') // the number of blocks
387
- .itf8('numLandmarks') // the number of landmarks
388
- maxLength += 5 + 5
389
-
390
- return { parser, maxLength }
391
- },
392
-
393
- cramContainerHeader2(majorVersion: number) {
394
- let parser = new Parser()
395
- .itf8('numLandmarks') // the number of blocks
396
- // Each integer value of this array is a byte offset
397
- // into the blocks byte array. Landmarks are used for
398
- // random access indexing.
399
- .array('landmarks', {
400
- type: new Parser().itf8(),
401
- length: 'numLandmarks',
402
- })
403
-
404
- let crcLength = 0
405
- if (majorVersion >= 3) {
406
- parser = parser.uint32('crc32')
407
- crcLength = 4
408
- }
409
- return {
410
- parser,
411
- maxLength: (numLandmarks: number) => 5 + numLandmarks * 5 + crcLength,
412
- }
413
- },
639
+ function cramContainerHeader1(majorVersion: number) {
640
+ let maxLength = 4
641
+ maxLength += 5 * 4
642
+ maxLength += 9
643
+ maxLength += 9
644
+ maxLength += 5 + 5
645
+ return {
646
+ maxLength,
647
+ parser: (buffer: Buffer, offset: number) => {
648
+ const b = buffer
649
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length)
650
+ // byte size of the container data (blocks)
651
+ const length = dataView.getInt32(offset, true)
652
+ offset += 4
653
+ // reference sequence identifier, -1 for unmapped reads, -2 for multiple
654
+ // reference sequences
655
+ const [refSeqId, newOffset1] = parseItf8(buffer, offset)
656
+ offset += newOffset1
657
+ const [refSeqStart, newOffset2] = parseItf8(buffer, offset)
658
+ offset += newOffset2
659
+ const [alignmentSpan, newOffset3] = parseItf8(buffer, offset)
660
+ offset += newOffset3
661
+ const [numRecords, newOffset4] = parseItf8(buffer, offset)
662
+ offset += newOffset4
663
+
664
+ let recordCounter = 0
665
+ if (majorVersion >= 3) {
666
+ const [rc, newOffset5] = parseLtf8(buffer, offset)
667
+ recordCounter = rc
668
+ offset += newOffset5
669
+ } else if (majorVersion === 2) {
670
+ const [rc, newOffset5] = parseItf8(buffer, offset)
671
+ recordCounter = rc
672
+ offset += newOffset5
673
+ } else {
674
+ console.warn('setting recordCounter=0')
675
+ }
676
+
677
+ let numBases: number | undefined
678
+ if (majorVersion > 1) {
679
+ const [n, newOffset5] = parseLtf8(buffer, offset)
680
+ numBases = n
681
+ offset += newOffset5
682
+ }
683
+ const [numBlocks, newOffset6] = parseItf8(buffer, offset)
684
+ offset += newOffset6
685
+ const [numLandmarks, newOffset7] = parseItf8(buffer, offset)
686
+ offset += newOffset7
687
+ return {
688
+ value: {
689
+ length,
690
+ refSeqId,
691
+ refSeqStart,
692
+ alignmentSpan,
693
+ numBlocks,
694
+ numLandmarks,
695
+ numBases,
696
+ recordCounter,
697
+ numRecords,
698
+ },
699
+ offset,
700
+ }
701
+ },
702
+ }
703
+ }
704
+
705
+ function cramContainerHeader2(majorVersion: number) {
706
+ return {
707
+ parser: (buffer: Buffer, offset: number) => {
708
+ const b = buffer
709
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length)
710
+ const [numLandmarks, newOffset1] = parseItf8(buffer, offset)
711
+ offset += newOffset1
712
+ const landmarks = []
713
+ for (let i = 0; i < numLandmarks; i++) {
714
+ const [landmark, newOffset2] = parseItf8(buffer, offset)
715
+ offset += newOffset2
716
+ landmarks.push(landmark)
717
+ }
718
+
719
+ let crc32: number | undefined
720
+ if (majorVersion >= 3) {
721
+ crc32 = dataView.getUint32(offset, true)
722
+ offset += 4
723
+ }
724
+ return {
725
+ value: {
726
+ ...(crc32 === undefined ? {} : { crc32 }),
727
+ numLandmarks,
728
+ landmarks,
729
+ },
730
+ offset,
731
+ }
732
+ },
733
+ maxLength: (numLandmarks: number) => 5 + 5 * numLandmarks + 4,
734
+ }
414
735
  }
415
736
 
416
737
  export type CompressionMethod =
@@ -438,69 +759,26 @@ export interface BlockHeader {
438
759
  uncompressedSize: number
439
760
  }
440
761
 
441
- export type CramCompressionHeader = ParsedItem<{
762
+ export interface CramCompressionHeader {
442
763
  preservation: CramPreservationMap
443
764
  dataSeriesEncoding: DataSeriesEncodingMap
444
765
  tagEncoding: Record<string, CramEncoding>
445
- }>
446
-
447
- function getSectionParsers(majorVersion: number): {
448
- cramFileDefinition: {
449
- parser: Parser<{
450
- magic: string
451
- majorVersion: number
452
- minorVersion: number
453
- fileId: string
454
- }>
455
- maxLength: number
456
- }
457
- cramContainerHeader1: {
458
- parser: Parser<{
459
- length: number
460
- refSeqId: number
461
- refSeqStart: number
462
- alignmentSpan: number
463
- numRecords: number
464
- recordCounter: number
465
- numBases: number
466
- numBlocks: number
467
- numLandmarks: number
468
- }>
469
- maxLength: number
470
- }
471
- cramContainerHeader2: {
472
- parser: Parser<{
473
- numLandmarks: number
474
- landmarks: number[]
475
- crc32: number
476
- }>
477
- maxLength: (x: number) => number
478
- }
479
- cramBlockHeader: {
480
- parser: Parser<BlockHeader>
481
- maxLength: number
482
- }
483
- cramBlockCrc32: {
484
- parser: Parser<{ crc32: number }>
485
- maxLength: number
486
- }
487
- cramCompressionHeader: {
488
- parser: Parser<CramCompressionHeader>
489
- }
490
- cramMappedSliceHeader: {
491
- parser: Parser<MappedSliceHeader>
492
- maxLength: (numContentIds: number) => number
493
- }
494
- cramUnmappedSliceHeader: {
495
- parser: Parser<UnmappedSliceHeader>
496
- maxLength: (numContentIds: number) => number
497
- }
498
- } {
499
- const parsers: any = Object.assign({}, unversionedParsers)
500
- Object.keys(versionedParsers).forEach(parserName => {
501
- parsers[parserName] = (versionedParsers as any)[parserName](majorVersion)
502
- })
503
- return parsers
766
+ _size: number
767
+ _endPosition: number
504
768
  }
505
769
 
506
- export { cramFileDefinition, getSectionParsers }
770
+ export function getSectionParsers(majorVersion: number) {
771
+ return {
772
+ cramFileDefinition: cramFileDefinition(),
773
+ cramBlockHeader: cramBlockHeader(),
774
+ cramBlockCrc32: cramBlockCrc32(),
775
+ cramDataSeriesEncodingMap: cramDataSeriesEncodingMap(),
776
+ cramTagEncodingMap: cramTagEncodingMap(),
777
+ cramCompressionHeader: cramCompressionHeader(),
778
+ cramEncoding: cramEncoding(),
779
+ cramUnmappedSliceHeader: cramUnmappedSliceHeader(majorVersion),
780
+ cramMappedSliceHeader: cramMappedSliceHeader(majorVersion),
781
+ cramContainerHeader1: cramContainerHeader1(majorVersion),
782
+ cramContainerHeader2: cramContainerHeader2(majorVersion),
783
+ }
784
+ }