@gmod/cram 2.0.4 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CHANGELOG.md +10 -0
  2. package/dist/cram-bundle.js +1 -1
  3. package/dist/cramFile/codecs/byteArrayLength.js +1 -1
  4. package/dist/cramFile/codecs/byteArrayLength.js.map +1 -1
  5. package/dist/cramFile/codecs/external.js +1 -1
  6. package/dist/cramFile/codecs/external.js.map +1 -1
  7. package/dist/cramFile/codecs/huffman.js +2 -1
  8. package/dist/cramFile/codecs/huffman.js.map +1 -1
  9. package/dist/cramFile/container/compressionScheme.d.ts +0 -3
  10. package/dist/cramFile/container/compressionScheme.js +0 -4
  11. package/dist/cramFile/container/compressionScheme.js.map +1 -1
  12. package/dist/cramFile/container/index.d.ts +56 -3
  13. package/dist/cramFile/container/index.js +15 -9
  14. package/dist/cramFile/container/index.js.map +1 -1
  15. package/dist/cramFile/file.d.ts +24 -59
  16. package/dist/cramFile/file.js +21 -26
  17. package/dist/cramFile/file.js.map +1 -1
  18. package/dist/cramFile/record.d.ts +1 -1
  19. package/dist/cramFile/sectionParsers.d.ts +195 -48
  20. package/dist/cramFile/sectionParsers.js +621 -303
  21. package/dist/cramFile/sectionParsers.js.map +1 -1
  22. package/dist/cramFile/slice/index.d.ts +23 -1
  23. package/dist/cramFile/slice/index.js +9 -6
  24. package/dist/cramFile/slice/index.js.map +1 -1
  25. package/dist/cramFile/util.d.ts +6 -4
  26. package/dist/cramFile/util.js +88 -6
  27. package/dist/cramFile/util.js.map +1 -1
  28. package/esm/cramFile/codecs/byteArrayLength.js +1 -1
  29. package/esm/cramFile/codecs/byteArrayLength.js.map +1 -1
  30. package/esm/cramFile/codecs/external.js +1 -1
  31. package/esm/cramFile/codecs/external.js.map +1 -1
  32. package/esm/cramFile/codecs/huffman.js +2 -1
  33. package/esm/cramFile/codecs/huffman.js.map +1 -1
  34. package/esm/cramFile/container/compressionScheme.d.ts +0 -3
  35. package/esm/cramFile/container/compressionScheme.js +0 -4
  36. package/esm/cramFile/container/compressionScheme.js.map +1 -1
  37. package/esm/cramFile/container/index.d.ts +56 -3
  38. package/esm/cramFile/container/index.js +15 -9
  39. package/esm/cramFile/container/index.js.map +1 -1
  40. package/esm/cramFile/file.d.ts +24 -59
  41. package/esm/cramFile/file.js +22 -25
  42. package/esm/cramFile/file.js.map +1 -1
  43. package/esm/cramFile/record.d.ts +1 -1
  44. package/esm/cramFile/sectionParsers.d.ts +195 -48
  45. package/esm/cramFile/sectionParsers.js +620 -303
  46. package/esm/cramFile/sectionParsers.js.map +1 -1
  47. package/esm/cramFile/slice/index.d.ts +23 -1
  48. package/esm/cramFile/slice/index.js +10 -7
  49. package/esm/cramFile/slice/index.js.map +1 -1
  50. package/esm/cramFile/util.d.ts +6 -4
  51. package/esm/cramFile/util.js +87 -6
  52. package/esm/cramFile/util.js.map +1 -1
  53. package/package.json +7 -8
  54. package/src/cramFile/codecs/byteArrayLength.ts +1 -2
  55. package/src/cramFile/codecs/external.ts +1 -1
  56. package/src/cramFile/codecs/huffman.ts +2 -1
  57. package/src/cramFile/container/compressionScheme.ts +1 -8
  58. package/src/cramFile/container/index.ts +21 -10
  59. package/src/cramFile/file.ts +28 -43
  60. package/src/cramFile/record.ts +1 -1
  61. package/src/cramFile/sectionParsers.ts +668 -390
  62. package/src/cramFile/slice/index.ts +11 -5
  63. package/src/cramFile/util.ts +90 -91
  64. package/src/typings/binary-parser.d.ts +0 -44
@@ -1,126 +1,209 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.cramFileDefinition = void 0;
3
+ exports.cramFileDefinition = cramFileDefinition;
4
+ exports.cramBlockHeader = cramBlockHeader;
5
+ exports.cramBlockCrc32 = cramBlockCrc32;
6
+ exports.cramTagDictionary = cramTagDictionary;
7
+ exports.cramPreservationMap = cramPreservationMap;
4
8
  exports.isMappedSliceHeader = isMappedSliceHeader;
5
9
  exports.getSectionParsers = getSectionParsers;
6
- const binary_parser_1 = require("@gmod/binary-parser");
7
- const singleItf8 = new binary_parser_1.Parser().itf8();
8
- const cramFileDefinition = {
9
- parser: new binary_parser_1.Parser()
10
- .string('magic', { length: 4 })
11
- .uint8('majorVersion')
12
- .uint8('minorVersion')
13
- .string('fileId', { length: 20, stripNull: true }),
14
- maxLength: 26,
15
- };
16
- exports.cramFileDefinition = cramFileDefinition;
17
- const cramBlockHeader = {
18
- parser: new binary_parser_1.Parser()
19
- .uint8('compressionMethod', {
20
- formatter: /* istanbul ignore next */ /* istanbul ignore next */ b => {
21
- const method = [
22
- 'raw',
23
- 'gzip',
24
- 'bzip2',
25
- 'lzma',
26
- 'rans',
27
- 'rans4x16',
28
- 'arith',
29
- 'fqzcomp',
30
- 'tok3',
31
- ][b];
32
- if (!method) {
33
- throw new Error(`compression method number ${b} not implemented`);
34
- }
35
- return method;
10
+ const util_1 = require("./util");
11
+ function cramFileDefinition() {
12
+ return {
13
+ parser: (buffer, _startOffset = 0) => {
14
+ const b = buffer;
15
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
16
+ let offset = 0;
17
+ const magic = buffer.subarray(offset, offset + 4).toString();
18
+ offset += 4;
19
+ const majorVersion = dataView.getUint8(offset);
20
+ offset += 1;
21
+ const minorVersion = dataView.getUint8(offset);
22
+ offset += 1;
23
+ const fileId = b
24
+ .subarray(offset, offset + 20)
25
+ .toString()
26
+ .replaceAll('\0', '');
27
+ offset += 20;
28
+ return {
29
+ value: {
30
+ magic,
31
+ majorVersion,
32
+ minorVersion,
33
+ fileId,
34
+ },
35
+ offset,
36
+ };
36
37
  },
37
- })
38
- .uint8('contentType', {
39
- formatter: /* istanbul ignore next */ /* istanbul ignore next */ b => {
40
- const type = [
41
- 'FILE_HEADER',
42
- 'COMPRESSION_HEADER',
43
- 'MAPPED_SLICE_HEADER',
44
- 'UNMAPPED_SLICE_HEADER', // < only used in cram v1
45
- 'EXTERNAL_DATA',
46
- 'CORE_DATA',
47
- ][b];
48
- if (!type) {
49
- throw new Error(`invalid block content type id ${b}`);
50
- }
51
- return type;
38
+ maxLength: 26,
39
+ };
40
+ }
41
+ function cramBlockHeader() {
42
+ const parser = (buffer, _startOffset = 0) => {
43
+ const b = buffer;
44
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
45
+ let offset = 0;
46
+ const d = dataView.getUint8(offset);
47
+ const compressionMethod = [
48
+ 'raw',
49
+ 'gzip',
50
+ 'bzip2',
51
+ 'lzma',
52
+ 'rans',
53
+ 'rans4x16',
54
+ 'arith',
55
+ 'fqzcomp',
56
+ 'tok3',
57
+ ][d];
58
+ if (!compressionMethod) {
59
+ throw new Error(`compression method number ${d} not implemented`);
60
+ }
61
+ offset += 1;
62
+ const c = dataView.getUint8(offset);
63
+ const contentType = [
64
+ 'FILE_HEADER',
65
+ 'COMPRESSION_HEADER',
66
+ 'MAPPED_SLICE_HEADER',
67
+ 'UNMAPPED_SLICE_HEADER', // < only used in cram v1
68
+ 'EXTERNAL_DATA',
69
+ 'CORE_DATA',
70
+ ][c];
71
+ if (!contentType) {
72
+ throw new Error(`invalid block content type id ${c}`);
73
+ }
74
+ offset += 1;
75
+ const [contentId, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
76
+ offset += newOffset1;
77
+ const [compressedSize, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
78
+ offset += newOffset2;
79
+ const [uncompressedSize, newOffset3] = (0, util_1.parseItf8)(buffer, offset);
80
+ offset += newOffset3;
81
+ return {
82
+ offset,
83
+ value: {
84
+ uncompressedSize,
85
+ compressedSize,
86
+ contentId,
87
+ contentType: contentType,
88
+ compressionMethod: compressionMethod,
89
+ },
90
+ };
91
+ };
92
+ return { parser, maxLength: 17 };
93
+ }
94
+ function cramBlockCrc32() {
95
+ return {
96
+ parser: (buffer, offset) => {
97
+ const b = buffer;
98
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
99
+ const crc32 = dataView.getUint32(offset, true);
100
+ offset += 4;
101
+ return {
102
+ offset,
103
+ value: {
104
+ crc32,
105
+ },
106
+ };
52
107
  },
53
- })
54
- .itf8('contentId')
55
- .itf8('compressedSize')
56
- .itf8('uncompressedSize'),
57
- maxLength: 17,
58
- };
59
- const cramBlockCrc32 = {
60
- parser: new binary_parser_1.Parser().uint32('crc32'),
61
- maxLength: 4,
62
- };
63
- const cramTagDictionary = new binary_parser_1.Parser().itf8('size').buffer('ents', {
64
- length: 'size',
65
- formatter: /* istanbul ignore next */ /* istanbul ignore next */ buffer => {
66
- function makeTagSet(stringStart, stringEnd) {
67
- const str = buffer.toString('utf8', stringStart, stringEnd);
68
- const tags = [];
69
- for (let i = 0; i < str.length; i += 3) {
70
- tags.push(str.slice(i, i + 3));
108
+ maxLength: 4,
109
+ };
110
+ }
111
+ function makeTagSet(buffer, stringStart, stringEnd) {
112
+ const str = buffer.toString('utf8', stringStart, stringEnd);
113
+ const tags = [];
114
+ for (let i = 0; i < str.length; i += 3) {
115
+ tags.push(str.slice(i, i + 3));
116
+ }
117
+ return tags;
118
+ }
119
+ function cramTagDictionary() {
120
+ return {
121
+ parser: (buffer, offset) => {
122
+ const [size, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
123
+ offset += newOffset1;
124
+ const subbuf = buffer.subarray(offset, offset + size);
125
+ offset += size;
126
+ const tagSets = [];
127
+ let stringStart = 0;
128
+ let i = 0;
129
+ for (; i < subbuf.length; i++) {
130
+ if (!subbuf[i]) {
131
+ tagSets.push(makeTagSet(subbuf, stringStart, i));
132
+ stringStart = i + 1;
133
+ }
71
134
  }
72
- return tags;
73
- }
74
- /* eslint-disable */
75
- var tagSets = [];
76
- var stringStart = 0;
77
- var i;
78
- /* eslint-enable */
79
- for (i = 0; i < buffer.length; i += 1) {
80
- if (!buffer[i]) {
81
- tagSets.push(makeTagSet(stringStart, i));
82
- stringStart = i + 1;
135
+ if (i > stringStart) {
136
+ tagSets.push(makeTagSet(subbuf, stringStart, i));
83
137
  }
84
- }
85
- if (i > stringStart) {
86
- tagSets.push(makeTagSet(stringStart, i));
87
- }
88
- return tagSets;
89
- },
90
- });
91
- // const cramPreservationMapKeys = 'XX RN AP RR SM TD'.split(' ')
92
- const parseByteAsBool = new binary_parser_1.Parser().uint8(null, {
93
- formatter: /* istanbul ignore next */ /* istanbul ignore next */ val => !!val,
94
- });
95
- const cramPreservationMap = new binary_parser_1.Parser()
96
- .itf8('mapSize')
97
- .itf8('mapCount')
98
- .array('ents', {
99
- length: 'mapCount',
100
- type: new binary_parser_1.Parser()
101
- .string('key', {
102
- length: 2,
103
- stripNull: false,
104
- // formatter: val => cramPreservationMapKeys[val] || 0,
105
- })
106
- .choice('value', {
107
- tag: 'key',
108
- choices: {
109
- MI: parseByteAsBool,
110
- UI: parseByteAsBool,
111
- PI: parseByteAsBool,
112
- RN: parseByteAsBool,
113
- AP: parseByteAsBool,
114
- RR: parseByteAsBool,
115
- SM: new binary_parser_1.Parser().array(null, { type: 'uint8', length: 5 }),
116
- TD: new binary_parser_1.Parser().nest(null, {
117
- type: cramTagDictionary,
118
- formatter: /* istanbul ignore next */ /* istanbul ignore next */ data => data.ents,
119
- }),
138
+ return {
139
+ value: {
140
+ size,
141
+ ents: tagSets,
142
+ },
143
+ offset,
144
+ };
145
+ },
146
+ };
147
+ }
148
+ function cramPreservationMap() {
149
+ return {
150
+ parser: (buffer, offset) => {
151
+ const b = buffer;
152
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
153
+ const [mapSize, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
154
+ offset += newOffset1;
155
+ const [mapCount, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
156
+ offset += newOffset2;
157
+ const ents = [];
158
+ for (let i = 0; i < mapCount; i++) {
159
+ const key = String.fromCharCode(buffer[offset]) +
160
+ String.fromCharCode(buffer[offset + 1]);
161
+ offset += 2;
162
+ if (key === 'MI' ||
163
+ key === 'UI' ||
164
+ key === 'PI' ||
165
+ key === 'RN' ||
166
+ key === 'AP' ||
167
+ key === 'RR') {
168
+ ents.push({
169
+ key,
170
+ value: !!dataView.getUint8(offset),
171
+ });
172
+ offset += 1;
173
+ }
174
+ else if (key === 'SM') {
175
+ ents.push({
176
+ key,
177
+ value: [
178
+ dataView.getUint8(offset),
179
+ dataView.getUint8(offset + 1),
180
+ dataView.getUint8(offset + 2),
181
+ dataView.getUint8(offset + 3),
182
+ dataView.getUint8(offset + 4),
183
+ ],
184
+ });
185
+ offset += 5;
186
+ }
187
+ else if (key === 'TD') {
188
+ const { offset: offsetRet, value } = cramTagDictionary().parser(buffer, offset);
189
+ ents.push({ key, value: value.ents });
190
+ offset = offsetRet;
191
+ }
192
+ else {
193
+ throw new Error(`unknown key ${key}`);
194
+ }
195
+ }
196
+ return {
197
+ value: {
198
+ mapSize,
199
+ mapCount,
200
+ ents,
201
+ },
202
+ offset,
203
+ };
120
204
  },
121
- }),
122
- });
123
- /* istanbul ignore next */
205
+ };
206
+ }
124
207
  function formatMap(data) {
125
208
  const map = {};
126
209
  for (const { key, value } of data.ents) {
@@ -131,211 +214,446 @@ function formatMap(data) {
131
214
  }
132
215
  return map;
133
216
  }
134
- const unversionedParsers = {
135
- cramFileDefinition,
136
- cramBlockHeader,
137
- cramBlockCrc32,
138
- };
139
217
  function isMappedSliceHeader(header) {
140
218
  return typeof header.refSeqId === 'number';
141
219
  }
142
- // each of these is a function of the major and minor version
143
- const versionedParsers = {
144
- // assemble a section parser for the unmapped slice header, with slight
145
- // variations depending on the major version of the cram file
146
- cramUnmappedSliceHeader(majorVersion) {
147
- let maxLength = 0;
148
- let parser = new binary_parser_1.Parser().itf8('numRecords');
149
- maxLength += 5;
220
+ // assemble a section parser for the unmapped slice header, with slight
221
+ // variations depending on the major version of the cram file
222
+ function cramUnmappedSliceHeader(majorVersion) {
223
+ let maxLength = 0;
224
+ maxLength += 5;
225
+ maxLength += 9;
226
+ maxLength += 5 * 2;
227
+ maxLength += 16;
228
+ const parser = (buffer, offset) => {
229
+ const [numRecords, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
230
+ offset += newOffset1;
231
+ let recordCounter = 0;
150
232
  // recordCounter is itf8 in a CRAM v2 file, absent in CRAM v1
151
233
  if (majorVersion >= 3) {
152
- parser = parser.ltf8('recordCounter');
153
- maxLength += 9;
234
+ const [rc, newOffset2] = (0, util_1.parseLtf8)(buffer, offset);
235
+ offset += newOffset2;
236
+ recordCounter = rc;
154
237
  }
155
238
  else if (majorVersion === 2) {
156
- parser = parser.itf8('recordCounter');
157
- maxLength += 5;
158
- }
159
- parser = parser
160
- .itf8('numBlocks')
161
- .itf8('numContentIds')
162
- .array('contentIds', {
163
- type: singleItf8,
164
- length: 'numContentIds',
165
- });
166
- maxLength += 5 * 2; // + numContentIds*5
167
- // the md5 sum is missing in cram v1
168
- if (majorVersion >= 2) {
169
- parser = parser.array('md5', { type: 'uint8', length: 16 });
170
- maxLength += 16;
239
+ const [rc, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
240
+ offset += newOffset2;
241
+ recordCounter = rc;
171
242
  }
172
- const maxLengthFunc = (numContentIds) => maxLength + numContentIds * 5;
173
- return { parser, maxLength: maxLengthFunc }; // : p, maxLength: numContentIds => 5 + 9 + 5 * 2 + 5 * numContentIds + 16 }
174
- },
175
- // assembles a section parser for the unmapped slice header, with slight
176
- // variations depending on the major version of the cram file
177
- cramMappedSliceHeader(majorVersion) {
178
- let parser = new binary_parser_1.Parser()
179
- .itf8('refSeqId')
180
- .itf8('refSeqStart')
181
- .itf8('refSeqSpan')
182
- .itf8('numRecords');
183
- let maxLength = 5 * 4;
184
- if (majorVersion >= 3) {
185
- parser = parser.ltf8('recordCounter');
186
- maxLength += 9;
243
+ else {
244
+ console.warn('recordCounter=0');
187
245
  }
188
- else if (majorVersion === 2) {
189
- parser = parser.itf8('recordCounter');
190
- maxLength += 5;
246
+ const [numBlocks, newOffset3] = (0, util_1.parseItf8)(buffer, offset);
247
+ offset += newOffset3;
248
+ const [numContentIds, newOffset4] = (0, util_1.parseItf8)(buffer, offset);
249
+ offset += newOffset4;
250
+ const contentIds = [];
251
+ for (let i = 0; i < numContentIds; i++) {
252
+ const [id, newOffset5] = (0, util_1.parseItf8)(buffer, offset);
253
+ offset += newOffset5;
254
+ contentIds.push(id);
191
255
  }
192
- parser = parser
193
- .itf8('numBlocks')
194
- .itf8('numContentIds')
195
- .array('contentIds', {
196
- type: singleItf8,
197
- length: 'numContentIds',
198
- })
199
- .itf8('refBaseBlockId');
200
- maxLength += 5 * 3;
201
256
  // the md5 sum is missing in cram v1
257
+ let md5;
202
258
  if (majorVersion >= 2) {
203
- parser = parser.array('md5', { type: 'uint8', length: 16 });
204
- maxLength += 16;
259
+ md5 = [...buffer.subarray(offset, offset + 16)];
260
+ offset += 16;
205
261
  }
206
- const maxLengthFunc = (numContentIds) => maxLength + numContentIds * 5;
207
- return { parser, maxLength: maxLengthFunc };
208
- },
209
- cramEncoding(_majorVersion) {
210
- const parser = new binary_parser_1.Parser()
211
- .namely('cramEncoding')
212
- .itf8('codecId')
213
- .itf8('parametersBytes')
214
- .choice('parameters', {
215
- tag: 'codecId',
216
- choices: {
217
- 0: new binary_parser_1.Parser(), // NULL
218
- 1: new binary_parser_1.Parser().itf8('blockContentId'), // EXTERNAL
219
- 2: new binary_parser_1.Parser().itf8('offset').itf8('M'), // GOLOMB,
220
- // HUFFMAN_INT
221
- 3: binary_parser_1.Parser.start()
222
- .itf8('numCodes')
223
- .array('symbols', { length: 'numCodes', type: singleItf8 })
224
- .itf8('numLengths')
225
- .array('bitLengths', { length: 'numLengths', type: singleItf8 }),
226
- 4: binary_parser_1.Parser.start() // BYTE_ARRAY_LEN
227
- .nest('lengthsEncoding', { type: 'cramEncoding' })
228
- .nest('valuesEncoding', { type: 'cramEncoding' }),
229
- // BYTE_ARRAY_STOP is a little different for CRAM v1
230
- 5: new binary_parser_1.Parser().uint8('stopByte').itf8('blockContentId'),
231
- 6: new binary_parser_1.Parser().itf8('offset').itf8('length'), // BETA
232
- 7: new binary_parser_1.Parser().itf8('offset').itf8('K'), // SUBEXP
233
- 8: new binary_parser_1.Parser().itf8('offset').itf8('log2m'), // GOLOMB_RICE
234
- 9: new binary_parser_1.Parser().itf8('offset'), // GAMMA
262
+ return {
263
+ value: {
264
+ recordCounter,
265
+ md5,
266
+ contentIds,
267
+ numContentIds,
268
+ numBlocks,
269
+ numRecords,
235
270
  },
236
- });
237
- return { parser };
238
- },
239
- cramDataSeriesEncodingMap(majorVersion) {
240
- return new binary_parser_1.Parser()
241
- .itf8('mapSize')
242
- .itf8('mapCount')
243
- .array('ents', {
244
- length: 'mapCount',
245
- type: new binary_parser_1.Parser()
246
- .string('key', { length: 2, stripNull: false })
247
- .nest('value', { type: this.cramEncoding(majorVersion).parser }),
248
- });
249
- },
250
- cramTagEncodingMap(majorVersion) {
251
- return new binary_parser_1.Parser()
252
- .itf8('mapSize')
253
- .itf8('mapCount')
254
- .array('ents', {
255
- length: 'mapCount',
256
- type: new binary_parser_1.Parser()
257
- .itf8('key', {
258
- formatter: /* istanbul ignore next */ /* istanbul ignore next */ integerRepresentation =>
259
- /* istanbul ignore next */
260
- String.fromCharCode((integerRepresentation >> 16) & 0xff) +
261
- String.fromCharCode((integerRepresentation >> 8) & 0xff) +
262
- String.fromCharCode(integerRepresentation & 0xff),
263
- })
264
- .nest('value', { type: this.cramEncoding(majorVersion).parser }),
265
- });
266
- },
267
- cramCompressionHeader(majorVersion) {
268
- let parser = new binary_parser_1.Parser();
269
- // TODO: if we want to support CRAM v1, we will need to refactor
270
- // compression header into 2 parts to parse the landmarks,
271
- // like the container header
272
- parser = parser
273
- .nest('preservation', {
274
- type: cramPreservationMap,
275
- formatter: formatMap,
276
- })
277
- .nest('dataSeriesEncoding', {
278
- type: this.cramDataSeriesEncodingMap(majorVersion),
279
- formatter: formatMap,
280
- })
281
- .nest('tagEncoding', {
282
- type: this.cramTagEncodingMap(majorVersion),
283
- formatter: formatMap,
284
- });
285
- return { parser };
286
- },
287
- cramContainerHeader1(majorVersion) {
288
- let parser = new binary_parser_1.Parser()
289
- .int32('length') // byte size of the container data (blocks)
290
- .itf8('refSeqId') // reference sequence identifier, -1 for unmapped reads, -2 for multiple reference sequences
291
- .itf8('refSeqStart') // the alignment start position or 0 for unmapped reads
292
- .itf8('alignmentSpan') // the length of the alignment or 0 for unmapped reads
293
- .itf8('numRecords'); // number of records in the container
294
- let maxLength = 4 + 5 * 4;
295
- if (majorVersion >= 3) {
296
- parser = parser.ltf8('recordCounter'); // 1-based sequential index of records in the file/stream.
297
- maxLength += 9;
298
- }
299
- else if (majorVersion === 2) {
300
- parser = parser.itf8('recordCounter');
301
- maxLength += 5;
302
- }
303
- if (majorVersion > 1) {
304
- parser = parser.ltf8('numBases'); // number of read bases
305
- maxLength += 9;
271
+ offset,
272
+ };
273
+ };
274
+ return {
275
+ parser,
276
+ maxLength: (numContentIds) => maxLength + numContentIds * 5,
277
+ };
278
+ }
279
+ // assembles a section parser for the unmapped slice header, with slight
280
+ // variations depending on the major version of the cram file
281
+ function cramMappedSliceHeader(majorVersion) {
282
+ let maxLength = 0;
283
+ maxLength += 5 * 4; // EL0
284
+ maxLength += 9; // EL1
285
+ maxLength += 5 * 3; // EL2 ITF8s
286
+ maxLength += 16; // MD5
287
+ return {
288
+ parser: (buffer, offset) => {
289
+ // L0
290
+ const [refSeqId, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
291
+ offset += newOffset1;
292
+ const [refSeqStart, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
293
+ offset += newOffset2;
294
+ const [refSeqSpan, newOffset3] = (0, util_1.parseItf8)(buffer, offset);
295
+ offset += newOffset3;
296
+ const [numRecords, newOffset4] = (0, util_1.parseItf8)(buffer, offset);
297
+ offset += newOffset4;
298
+ // EL0
299
+ // L1
300
+ let recordCounter = 0;
301
+ if (majorVersion >= 3) {
302
+ const [rc, newOffset5] = (0, util_1.parseLtf8)(buffer, offset);
303
+ offset += newOffset5;
304
+ recordCounter = rc;
305
+ }
306
+ else if (majorVersion === 2) {
307
+ const [rc, newOffset5] = (0, util_1.parseItf8)(buffer, offset);
308
+ offset += newOffset5;
309
+ recordCounter = rc;
310
+ }
311
+ else {
312
+ console.warn('majorVersion is <2, recordCounter set to 0');
313
+ }
314
+ // EL1
315
+ // L2
316
+ const [numBlocks, newOffset6] = (0, util_1.parseItf8)(buffer, offset);
317
+ offset += newOffset6;
318
+ const [numContentIds, newOffset7] = (0, util_1.parseItf8)(buffer, offset);
319
+ offset += newOffset7;
320
+ const contentIds = [];
321
+ for (let i = 0; i < numContentIds; i++) {
322
+ const [id, newOffset5] = (0, util_1.parseItf8)(buffer, offset);
323
+ offset += newOffset5;
324
+ contentIds.push(id);
325
+ }
326
+ const [refBaseBlockId, newOffset8] = (0, util_1.parseItf8)(buffer, offset);
327
+ offset += newOffset8;
328
+ // EL2
329
+ // the md5 sum is missing in cram v1
330
+ let md5;
331
+ if (majorVersion >= 2) {
332
+ md5 = [...buffer.subarray(offset, offset + 16)];
333
+ offset += 16;
334
+ }
335
+ return {
336
+ value: {
337
+ md5,
338
+ numBlocks,
339
+ numRecords,
340
+ numContentIds,
341
+ refSeqSpan,
342
+ refSeqId,
343
+ refSeqStart,
344
+ recordCounter,
345
+ refBaseBlockId,
346
+ contentIds,
347
+ },
348
+ offset,
349
+ };
350
+ },
351
+ maxLength: (numContentIds) => maxLength + numContentIds * 5,
352
+ };
353
+ }
354
+ function cramEncoding() {
355
+ return {
356
+ parser: (buffer, offset) => cramEncodingSub(buffer, offset),
357
+ };
358
+ }
359
+ function cramEncodingSub(buffer, offset) {
360
+ const b = buffer;
361
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
362
+ const [codecId, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
363
+ offset += newOffset1;
364
+ const [parametersBytes, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
365
+ offset += newOffset2;
366
+ const parameters = {};
367
+ if (codecId === 0) {
368
+ // NULL
369
+ }
370
+ else if (codecId === 1) {
371
+ // EXTERNAL
372
+ const [bc, newOffset3] = (0, util_1.parseItf8)(buffer, offset);
373
+ parameters.blockContentId = bc;
374
+ offset += newOffset3;
375
+ }
376
+ else if (codecId === 2) {
377
+ // GOLUMB
378
+ const [off, newOffset3] = (0, util_1.parseItf8)(buffer, offset);
379
+ parameters.offset = off;
380
+ offset += newOffset3;
381
+ const [M2, newOffset4] = (0, util_1.parseItf8)(buffer, offset);
382
+ parameters.M = M2;
383
+ offset += newOffset4;
384
+ }
385
+ else if (codecId === 3) {
386
+ // HUFFMAN_INT
387
+ const val = (0, util_1.parseItf8)(buffer, offset);
388
+ const numCodes = val[0];
389
+ offset += val[1];
390
+ const symbols = [];
391
+ for (let i = 0; i < numCodes; i++) {
392
+ const code = (0, util_1.parseItf8)(buffer, offset);
393
+ symbols.push(code[0]);
394
+ offset += code[1];
306
395
  }
307
- parser = parser
308
- .itf8('numBlocks') // the number of blocks
309
- .itf8('numLandmarks'); // the number of landmarks
310
- maxLength += 5 + 5;
311
- return { parser, maxLength };
312
- },
313
- cramContainerHeader2(majorVersion) {
314
- let parser = new binary_parser_1.Parser()
315
- .itf8('numLandmarks') // the number of blocks
316
- // Each integer value of this array is a byte offset
317
- // into the blocks byte array. Landmarks are used for
318
- // random access indexing.
319
- .array('landmarks', {
320
- type: new binary_parser_1.Parser().itf8(),
321
- length: 'numLandmarks',
322
- });
323
- let crcLength = 0;
324
- if (majorVersion >= 3) {
325
- parser = parser.uint32('crc32');
326
- crcLength = 4;
396
+ parameters.symbols = symbols;
397
+ const val2 = (0, util_1.parseItf8)(buffer, offset);
398
+ const numLengths = val[0];
399
+ parameters.numLengths = numLengths;
400
+ parameters.numCodes = numCodes;
401
+ parameters.numLengths = numLengths;
402
+ offset += val2[1];
403
+ const bitLengths = [];
404
+ for (let i = 0; i < numLengths; i++) {
405
+ const len = (0, util_1.parseItf8)(buffer, offset);
406
+ offset += len[1];
407
+ bitLengths.push(len[0]);
327
408
  }
328
- return {
329
- parser,
330
- maxLength: (numLandmarks) => 5 + numLandmarks * 5 + crcLength,
331
- };
332
- },
333
- };
409
+ parameters.bitLengths = bitLengths;
410
+ }
411
+ else if (codecId === 4) {
412
+ // BYTE_ARRAY_LEN
413
+ const { value: lengthsEncoding, offset: newOffset1 } = cramEncodingSub(buffer, offset);
414
+ parameters.lengthsEncoding = lengthsEncoding;
415
+ offset = newOffset1;
416
+ const { value: valuesEncoding, offset: newOffset2 } = cramEncodingSub(buffer, offset);
417
+ parameters.valuesEncoding = valuesEncoding;
418
+ offset = newOffset2;
419
+ }
420
+ else if (codecId === 5) {
421
+ // BYTE_ARRAY_STOP
422
+ parameters.stopByte = dataView.getUint8(offset);
423
+ offset += 1;
424
+ const [blockContentId, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
425
+ parameters.blockContentId = blockContentId;
426
+ offset += newOffset1;
427
+ }
428
+ else if (codecId === 6) {
429
+ // BETA
430
+ const [off, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
431
+ parameters.offset = off;
432
+ offset += newOffset1;
433
+ const [len, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
434
+ parameters.length = len;
435
+ offset += newOffset2;
436
+ }
437
+ else if (codecId === 7) {
438
+ // SUBEXP
439
+ const [off, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
440
+ parameters.offset = off;
441
+ offset += newOffset1;
442
+ const [K, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
443
+ parameters.K = K;
444
+ offset += newOffset2;
445
+ }
446
+ else if (codecId === 8) {
447
+ // GOLOMB_RICE
448
+ const [off, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
449
+ parameters.offset = off;
450
+ offset += newOffset1;
451
+ const [l2m, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
452
+ parameters.log2m = l2m;
453
+ offset += newOffset2;
454
+ }
455
+ else if (codecId === 9) {
456
+ // GAMMA
457
+ const [off, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
458
+ parameters.offset = off;
459
+ offset += newOffset1;
460
+ }
461
+ else {
462
+ throw new Error(`unknown codecId ${codecId}`);
463
+ }
464
+ return {
465
+ value: {
466
+ codecId,
467
+ parametersBytes,
468
+ parameters,
469
+ },
470
+ offset,
471
+ };
472
+ }
473
+ function cramDataSeriesEncodingMap() {
474
+ return {
475
+ parser: (buffer, offset) => {
476
+ const [mapSize, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
477
+ offset += newOffset1;
478
+ const [mapCount, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
479
+ offset += newOffset2;
480
+ const ents = [];
481
+ for (let i = 0; i < mapCount; i++) {
482
+ const key = String.fromCharCode(buffer[offset]) +
483
+ String.fromCharCode(buffer[offset + 1]);
484
+ offset += 2;
485
+ const { value, offset: newOffset4 } = cramEncodingSub(buffer, offset);
486
+ offset = newOffset4;
487
+ ents.push({ key, value });
488
+ }
489
+ return {
490
+ value: {
491
+ mapSize,
492
+ ents,
493
+ mapCount,
494
+ },
495
+ offset,
496
+ };
497
+ },
498
+ };
499
+ }
500
+ function cramTagEncodingMap() {
501
+ return {
502
+ parser: (buffer, offset) => {
503
+ const [mapSize, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
504
+ offset += newOffset1;
505
+ const [mapCount, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
506
+ offset += newOffset2;
507
+ const ents = [];
508
+ for (let i = 0; i < mapCount; i++) {
509
+ const [k0, newOffset3] = (0, util_1.parseItf8)(buffer, offset);
510
+ offset += newOffset3;
511
+ const key = String.fromCharCode((k0 >> 16) & 0xff) +
512
+ String.fromCharCode((k0 >> 8) & 0xff) +
513
+ String.fromCharCode(k0 & 0xff);
514
+ const { value, offset: newOffset4 } = cramEncodingSub(buffer, offset);
515
+ offset = newOffset4;
516
+ ents.push({ key, value });
517
+ }
518
+ return {
519
+ value: {
520
+ mapSize,
521
+ ents,
522
+ mapCount,
523
+ },
524
+ offset,
525
+ };
526
+ },
527
+ };
528
+ }
529
+ function cramCompressionHeader() {
530
+ return {
531
+ parser: (buffer, offset) => {
532
+ // TODO: if we want to support CRAM v1, we will need to refactor
533
+ // compression header into 2 parts to parse the landmarks, like the
534
+ // container header
535
+ const { value: preservation, offset: newOffset1 } = cramPreservationMap().parser(buffer, offset);
536
+ offset = newOffset1;
537
+ const { value: dataSeriesEncoding, offset: newOffset2 } = cramDataSeriesEncodingMap().parser(buffer, offset);
538
+ offset = newOffset2;
539
+ const { value: tagEncoding, offset: newOffset3 } = cramTagEncodingMap().parser(buffer, offset);
540
+ offset = newOffset3;
541
+ return {
542
+ value: {
543
+ dataSeriesEncoding: formatMap(dataSeriesEncoding),
544
+ preservation: formatMap(preservation),
545
+ tagEncoding: formatMap(tagEncoding),
546
+ },
547
+ offset,
548
+ };
549
+ },
550
+ };
551
+ }
552
+ function cramContainerHeader1(majorVersion) {
553
+ let maxLength = 4;
554
+ maxLength += 5 * 4;
555
+ maxLength += 9;
556
+ maxLength += 9;
557
+ maxLength += 5 + 5;
558
+ return {
559
+ maxLength,
560
+ parser: (buffer, offset) => {
561
+ const b = buffer;
562
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
563
+ // byte size of the container data (blocks)
564
+ const length = dataView.getInt32(offset, true);
565
+ offset += 4;
566
+ // reference sequence identifier, -1 for unmapped reads, -2 for multiple
567
+ // reference sequences
568
+ const [refSeqId, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
569
+ offset += newOffset1;
570
+ const [refSeqStart, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
571
+ offset += newOffset2;
572
+ const [alignmentSpan, newOffset3] = (0, util_1.parseItf8)(buffer, offset);
573
+ offset += newOffset3;
574
+ const [numRecords, newOffset4] = (0, util_1.parseItf8)(buffer, offset);
575
+ offset += newOffset4;
576
+ let recordCounter = 0;
577
+ if (majorVersion >= 3) {
578
+ const [rc, newOffset5] = (0, util_1.parseLtf8)(buffer, offset);
579
+ recordCounter = rc;
580
+ offset += newOffset5;
581
+ }
582
+ else if (majorVersion === 2) {
583
+ const [rc, newOffset5] = (0, util_1.parseItf8)(buffer, offset);
584
+ recordCounter = rc;
585
+ offset += newOffset5;
586
+ }
587
+ else {
588
+ console.warn('setting recordCounter=0');
589
+ }
590
+ let numBases;
591
+ if (majorVersion > 1) {
592
+ const [n, newOffset5] = (0, util_1.parseLtf8)(buffer, offset);
593
+ numBases = n;
594
+ offset += newOffset5;
595
+ }
596
+ const [numBlocks, newOffset6] = (0, util_1.parseItf8)(buffer, offset);
597
+ offset += newOffset6;
598
+ const [numLandmarks, newOffset7] = (0, util_1.parseItf8)(buffer, offset);
599
+ offset += newOffset7;
600
+ return {
601
+ value: {
602
+ length,
603
+ refSeqId,
604
+ refSeqStart,
605
+ alignmentSpan,
606
+ numBlocks,
607
+ numLandmarks,
608
+ numBases,
609
+ recordCounter,
610
+ numRecords,
611
+ },
612
+ offset,
613
+ };
614
+ },
615
+ };
616
+ }
617
+ function cramContainerHeader2(majorVersion) {
618
+ return {
619
+ parser: (buffer, offset) => {
620
+ const b = buffer;
621
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
622
+ const [numLandmarks, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
623
+ offset += newOffset1;
624
+ const landmarks = [];
625
+ for (let i = 0; i < numLandmarks; i++) {
626
+ const [landmark, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
627
+ offset += newOffset2;
628
+ landmarks.push(landmark);
629
+ }
630
+ let crc32;
631
+ if (majorVersion >= 3) {
632
+ crc32 = dataView.getUint32(offset, true);
633
+ offset += 4;
634
+ }
635
+ return {
636
+ value: Object.assign(Object.assign({}, (crc32 === undefined ? {} : { crc32 })), { numLandmarks,
637
+ landmarks }),
638
+ offset,
639
+ };
640
+ },
641
+ maxLength: (numLandmarks) => 5 + 5 * numLandmarks + 4,
642
+ };
643
+ }
334
644
  function getSectionParsers(majorVersion) {
335
- const parsers = Object.assign({}, unversionedParsers);
336
- Object.keys(versionedParsers).forEach(parserName => {
337
- parsers[parserName] = versionedParsers[parserName](majorVersion);
338
- });
339
- return parsers;
645
+ return {
646
+ cramFileDefinition: cramFileDefinition(),
647
+ cramBlockHeader: cramBlockHeader(),
648
+ cramBlockCrc32: cramBlockCrc32(),
649
+ cramDataSeriesEncodingMap: cramDataSeriesEncodingMap(),
650
+ cramTagEncodingMap: cramTagEncodingMap(),
651
+ cramCompressionHeader: cramCompressionHeader(),
652
+ cramEncoding: cramEncoding(),
653
+ cramUnmappedSliceHeader: cramUnmappedSliceHeader(majorVersion),
654
+ cramMappedSliceHeader: cramMappedSliceHeader(majorVersion),
655
+ cramContainerHeader1: cramContainerHeader1(majorVersion),
656
+ cramContainerHeader2: cramContainerHeader2(majorVersion),
657
+ };
340
658
  }
341
659
  //# sourceMappingURL=sectionParsers.js.map