@gmod/cram 2.0.4 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/CHANGELOG.md +10 -0
  2. package/dist/cram-bundle.js +1 -1
  3. package/dist/cramFile/codecs/byteArrayLength.js +1 -1
  4. package/dist/cramFile/codecs/byteArrayLength.js.map +1 -1
  5. package/dist/cramFile/codecs/byteArrayStop.js +1 -1
  6. package/dist/cramFile/codecs/external.js +1 -1
  7. package/dist/cramFile/codecs/external.js.map +1 -1
  8. package/dist/cramFile/codecs/huffman.js +3 -2
  9. package/dist/cramFile/codecs/huffman.js.map +1 -1
  10. package/dist/cramFile/codecs/subexp.js.map +1 -1
  11. package/dist/cramFile/container/compressionScheme.d.ts +0 -3
  12. package/dist/cramFile/container/compressionScheme.js +0 -4
  13. package/dist/cramFile/container/compressionScheme.js.map +1 -1
  14. package/dist/cramFile/container/index.d.ts +57 -3
  15. package/dist/cramFile/container/index.js +21 -12
  16. package/dist/cramFile/container/index.js.map +1 -1
  17. package/dist/cramFile/file.d.ts +25 -59
  18. package/dist/cramFile/file.js +33 -37
  19. package/dist/cramFile/file.js.map +1 -1
  20. package/dist/cramFile/record.d.ts +1 -1
  21. package/dist/cramFile/record.js +2 -2
  22. package/dist/cramFile/record.js.map +1 -1
  23. package/dist/cramFile/sectionParsers.d.ts +195 -48
  24. package/dist/cramFile/sectionParsers.js +621 -303
  25. package/dist/cramFile/sectionParsers.js.map +1 -1
  26. package/dist/cramFile/slice/decodeRecord.js +5 -4
  27. package/dist/cramFile/slice/decodeRecord.js.map +1 -1
  28. package/dist/cramFile/slice/index.d.ts +23 -1
  29. package/dist/cramFile/slice/index.js +11 -8
  30. package/dist/cramFile/slice/index.js.map +1 -1
  31. package/dist/cramFile/util.d.ts +6 -4
  32. package/dist/cramFile/util.js +88 -6
  33. package/dist/cramFile/util.js.map +1 -1
  34. package/dist/rans/d04.js.map +1 -1
  35. package/dist/rans/decoding.d.ts +4 -4
  36. package/dist/rans/decoding.js +5 -6
  37. package/dist/rans/decoding.js.map +1 -1
  38. package/dist/rans/index.js +4 -3
  39. package/dist/rans/index.js.map +1 -1
  40. package/esm/cramFile/codecs/byteArrayLength.js +1 -1
  41. package/esm/cramFile/codecs/byteArrayLength.js.map +1 -1
  42. package/esm/cramFile/codecs/byteArrayStop.js +1 -1
  43. package/esm/cramFile/codecs/external.js +1 -1
  44. package/esm/cramFile/codecs/external.js.map +1 -1
  45. package/esm/cramFile/codecs/huffman.js +3 -2
  46. package/esm/cramFile/codecs/huffman.js.map +1 -1
  47. package/esm/cramFile/codecs/subexp.js.map +1 -1
  48. package/esm/cramFile/container/compressionScheme.d.ts +0 -3
  49. package/esm/cramFile/container/compressionScheme.js +0 -4
  50. package/esm/cramFile/container/compressionScheme.js.map +1 -1
  51. package/esm/cramFile/container/index.d.ts +57 -3
  52. package/esm/cramFile/container/index.js +19 -10
  53. package/esm/cramFile/container/index.js.map +1 -1
  54. package/esm/cramFile/file.d.ts +25 -59
  55. package/esm/cramFile/file.js +27 -29
  56. package/esm/cramFile/file.js.map +1 -1
  57. package/esm/cramFile/record.d.ts +1 -1
  58. package/esm/cramFile/record.js +2 -2
  59. package/esm/cramFile/record.js.map +1 -1
  60. package/esm/cramFile/sectionParsers.d.ts +195 -48
  61. package/esm/cramFile/sectionParsers.js +620 -303
  62. package/esm/cramFile/sectionParsers.js.map +1 -1
  63. package/esm/cramFile/slice/decodeRecord.js +5 -4
  64. package/esm/cramFile/slice/decodeRecord.js.map +1 -1
  65. package/esm/cramFile/slice/index.d.ts +23 -1
  66. package/esm/cramFile/slice/index.js +12 -9
  67. package/esm/cramFile/slice/index.js.map +1 -1
  68. package/esm/cramFile/util.d.ts +6 -4
  69. package/esm/cramFile/util.js +87 -6
  70. package/esm/cramFile/util.js.map +1 -1
  71. package/esm/rans/d04.js.map +1 -1
  72. package/esm/rans/decoding.d.ts +4 -4
  73. package/esm/rans/decoding.js +5 -6
  74. package/esm/rans/decoding.js.map +1 -1
  75. package/esm/rans/index.js +3 -2
  76. package/esm/rans/index.js.map +1 -1
  77. package/package.json +7 -8
  78. package/src/cramFile/codecs/byteArrayLength.ts +1 -2
  79. package/src/cramFile/codecs/byteArrayStop.ts +1 -1
  80. package/src/cramFile/codecs/external.ts +1 -1
  81. package/src/cramFile/codecs/huffman.ts +3 -2
  82. package/src/cramFile/codecs/subexp.ts +2 -2
  83. package/src/cramFile/container/compressionScheme.ts +1 -8
  84. package/src/cramFile/container/index.ts +23 -12
  85. package/src/cramFile/declare.d.ts +1 -0
  86. package/src/cramFile/file.ts +37 -53
  87. package/src/cramFile/record.ts +4 -7
  88. package/src/cramFile/sectionParsers.ts +668 -390
  89. package/src/cramFile/slice/decodeRecord.ts +20 -12
  90. package/src/cramFile/slice/index.ts +13 -7
  91. package/src/cramFile/util.ts +91 -92
  92. package/src/rans/d04.ts +1 -1
  93. package/src/rans/decoding.ts +5 -7
  94. package/src/rans/index.ts +3 -2
  95. package/src/typings/binary-parser.d.ts +0 -44
@@ -1,120 +1,200 @@
1
- import { Parser } from '@gmod/binary-parser';
2
- const singleItf8 = new Parser().itf8();
3
- const cramFileDefinition = {
4
- parser: new Parser()
5
- .string('magic', { length: 4 })
6
- .uint8('majorVersion')
7
- .uint8('minorVersion')
8
- .string('fileId', { length: 20, stripNull: true }),
9
- maxLength: 26,
10
- };
11
- const cramBlockHeader = {
12
- parser: new Parser()
13
- .uint8('compressionMethod', {
14
- formatter: /* istanbul ignore next */ /* istanbul ignore next */ b => {
15
- const method = [
16
- 'raw',
17
- 'gzip',
18
- 'bzip2',
19
- 'lzma',
20
- 'rans',
21
- 'rans4x16',
22
- 'arith',
23
- 'fqzcomp',
24
- 'tok3',
25
- ][b];
26
- if (!method) {
27
- throw new Error(`compression method number ${b} not implemented`);
28
- }
29
- return method;
1
+ import { parseItf8, parseLtf8 } from './util';
2
+ export function cramFileDefinition() {
3
+ return {
4
+ parser: (buffer, _startOffset = 0) => {
5
+ const b = buffer;
6
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
7
+ let offset = 0;
8
+ const magic = buffer.subarray(offset, offset + 4).toString();
9
+ offset += 4;
10
+ const majorVersion = dataView.getUint8(offset);
11
+ offset += 1;
12
+ const minorVersion = dataView.getUint8(offset);
13
+ offset += 1;
14
+ const fileId = b
15
+ .subarray(offset, offset + 20)
16
+ .toString()
17
+ .replaceAll('\0', '');
18
+ offset += 20;
19
+ return {
20
+ value: {
21
+ magic,
22
+ majorVersion,
23
+ minorVersion,
24
+ fileId,
25
+ },
26
+ offset,
27
+ };
30
28
  },
31
- })
32
- .uint8('contentType', {
33
- formatter: /* istanbul ignore next */ /* istanbul ignore next */ b => {
34
- const type = [
35
- 'FILE_HEADER',
36
- 'COMPRESSION_HEADER',
37
- 'MAPPED_SLICE_HEADER',
38
- 'UNMAPPED_SLICE_HEADER', // < only used in cram v1
39
- 'EXTERNAL_DATA',
40
- 'CORE_DATA',
41
- ][b];
42
- if (!type) {
43
- throw new Error(`invalid block content type id ${b}`);
44
- }
45
- return type;
29
+ maxLength: 26,
30
+ };
31
+ }
32
+ export function cramBlockHeader() {
33
+ const parser = (buffer, _startOffset = 0) => {
34
+ const b = buffer;
35
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
36
+ let offset = 0;
37
+ const d = dataView.getUint8(offset);
38
+ const compressionMethod = [
39
+ 'raw',
40
+ 'gzip',
41
+ 'bzip2',
42
+ 'lzma',
43
+ 'rans',
44
+ 'rans4x16',
45
+ 'arith',
46
+ 'fqzcomp',
47
+ 'tok3',
48
+ ][d];
49
+ if (!compressionMethod) {
50
+ throw new Error(`compression method number ${d} not implemented`);
51
+ }
52
+ offset += 1;
53
+ const c = dataView.getUint8(offset);
54
+ const contentType = [
55
+ 'FILE_HEADER',
56
+ 'COMPRESSION_HEADER',
57
+ 'MAPPED_SLICE_HEADER',
58
+ 'UNMAPPED_SLICE_HEADER', // < only used in cram v1
59
+ 'EXTERNAL_DATA',
60
+ 'CORE_DATA',
61
+ ][c];
62
+ if (!contentType) {
63
+ throw new Error(`invalid block content type id ${c}`);
64
+ }
65
+ offset += 1;
66
+ const [contentId, newOffset1] = parseItf8(buffer, offset);
67
+ offset += newOffset1;
68
+ const [compressedSize, newOffset2] = parseItf8(buffer, offset);
69
+ offset += newOffset2;
70
+ const [uncompressedSize, newOffset3] = parseItf8(buffer, offset);
71
+ offset += newOffset3;
72
+ return {
73
+ offset,
74
+ value: {
75
+ uncompressedSize,
76
+ compressedSize,
77
+ contentId,
78
+ contentType: contentType,
79
+ compressionMethod: compressionMethod,
80
+ },
81
+ };
82
+ };
83
+ return { parser, maxLength: 17 };
84
+ }
85
+ export function cramBlockCrc32() {
86
+ return {
87
+ parser: (buffer, offset) => {
88
+ const b = buffer;
89
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
90
+ const crc32 = dataView.getUint32(offset, true);
91
+ offset += 4;
92
+ return {
93
+ offset,
94
+ value: {
95
+ crc32,
96
+ },
97
+ };
46
98
  },
47
- })
48
- .itf8('contentId')
49
- .itf8('compressedSize')
50
- .itf8('uncompressedSize'),
51
- maxLength: 17,
52
- };
53
- const cramBlockCrc32 = {
54
- parser: new Parser().uint32('crc32'),
55
- maxLength: 4,
56
- };
57
- const cramTagDictionary = new Parser().itf8('size').buffer('ents', {
58
- length: 'size',
59
- formatter: /* istanbul ignore next */ /* istanbul ignore next */ buffer => {
60
- function makeTagSet(stringStart, stringEnd) {
61
- const str = buffer.toString('utf8', stringStart, stringEnd);
62
- const tags = [];
63
- for (let i = 0; i < str.length; i += 3) {
64
- tags.push(str.slice(i, i + 3));
99
+ maxLength: 4,
100
+ };
101
+ }
102
+ function makeTagSet(buffer, stringStart, stringEnd) {
103
+ const str = buffer.toString('utf8', stringStart, stringEnd);
104
+ const tags = [];
105
+ for (let i = 0; i < str.length; i += 3) {
106
+ tags.push(str.slice(i, i + 3));
107
+ }
108
+ return tags;
109
+ }
110
+ export function cramTagDictionary() {
111
+ return {
112
+ parser: (buffer, offset) => {
113
+ const [size, newOffset1] = parseItf8(buffer, offset);
114
+ offset += newOffset1;
115
+ const subbuf = buffer.subarray(offset, offset + size);
116
+ offset += size;
117
+ const tagSets = [];
118
+ let stringStart = 0;
119
+ let i = 0;
120
+ for (; i < subbuf.length; i++) {
121
+ if (!subbuf[i]) {
122
+ tagSets.push(makeTagSet(subbuf, stringStart, i));
123
+ stringStart = i + 1;
124
+ }
65
125
  }
66
- return tags;
67
- }
68
- /* eslint-disable */
69
- var tagSets = [];
70
- var stringStart = 0;
71
- var i;
72
- /* eslint-enable */
73
- for (i = 0; i < buffer.length; i += 1) {
74
- if (!buffer[i]) {
75
- tagSets.push(makeTagSet(stringStart, i));
76
- stringStart = i + 1;
126
+ if (i > stringStart) {
127
+ tagSets.push(makeTagSet(subbuf, stringStart, i));
77
128
  }
78
- }
79
- if (i > stringStart) {
80
- tagSets.push(makeTagSet(stringStart, i));
81
- }
82
- return tagSets;
83
- },
84
- });
85
- // const cramPreservationMapKeys = 'XX RN AP RR SM TD'.split(' ')
86
- const parseByteAsBool = new Parser().uint8(null, {
87
- formatter: /* istanbul ignore next */ /* istanbul ignore next */ val => !!val,
88
- });
89
- const cramPreservationMap = new Parser()
90
- .itf8('mapSize')
91
- .itf8('mapCount')
92
- .array('ents', {
93
- length: 'mapCount',
94
- type: new Parser()
95
- .string('key', {
96
- length: 2,
97
- stripNull: false,
98
- // formatter: val => cramPreservationMapKeys[val] || 0,
99
- })
100
- .choice('value', {
101
- tag: 'key',
102
- choices: {
103
- MI: parseByteAsBool,
104
- UI: parseByteAsBool,
105
- PI: parseByteAsBool,
106
- RN: parseByteAsBool,
107
- AP: parseByteAsBool,
108
- RR: parseByteAsBool,
109
- SM: new Parser().array(null, { type: 'uint8', length: 5 }),
110
- TD: new Parser().nest(null, {
111
- type: cramTagDictionary,
112
- formatter: /* istanbul ignore next */ /* istanbul ignore next */ data => data.ents,
113
- }),
129
+ return {
130
+ value: {
131
+ size,
132
+ ents: tagSets,
133
+ },
134
+ offset,
135
+ };
114
136
  },
115
- }),
116
- });
117
- /* istanbul ignore next */
137
+ };
138
+ }
139
+ export function cramPreservationMap() {
140
+ return {
141
+ parser: (buffer, offset) => {
142
+ const b = buffer;
143
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
144
+ const [mapSize, newOffset1] = parseItf8(buffer, offset);
145
+ offset += newOffset1;
146
+ const [mapCount, newOffset2] = parseItf8(buffer, offset);
147
+ offset += newOffset2;
148
+ const ents = [];
149
+ for (let i = 0; i < mapCount; i++) {
150
+ const key = String.fromCharCode(buffer[offset]) +
151
+ String.fromCharCode(buffer[offset + 1]);
152
+ offset += 2;
153
+ if (key === 'MI' ||
154
+ key === 'UI' ||
155
+ key === 'PI' ||
156
+ key === 'RN' ||
157
+ key === 'AP' ||
158
+ key === 'RR') {
159
+ ents.push({
160
+ key,
161
+ value: !!dataView.getUint8(offset),
162
+ });
163
+ offset += 1;
164
+ }
165
+ else if (key === 'SM') {
166
+ ents.push({
167
+ key,
168
+ value: [
169
+ dataView.getUint8(offset),
170
+ dataView.getUint8(offset + 1),
171
+ dataView.getUint8(offset + 2),
172
+ dataView.getUint8(offset + 3),
173
+ dataView.getUint8(offset + 4),
174
+ ],
175
+ });
176
+ offset += 5;
177
+ }
178
+ else if (key === 'TD') {
179
+ const { offset: offsetRet, value } = cramTagDictionary().parser(buffer, offset);
180
+ ents.push({ key, value: value.ents });
181
+ offset = offsetRet;
182
+ }
183
+ else {
184
+ throw new Error(`unknown key ${key}`);
185
+ }
186
+ }
187
+ return {
188
+ value: {
189
+ mapSize,
190
+ mapCount,
191
+ ents,
192
+ },
193
+ offset,
194
+ };
195
+ },
196
+ };
197
+ }
118
198
  function formatMap(data) {
119
199
  const map = {};
120
200
  for (const { key, value } of data.ents) {
@@ -125,212 +205,449 @@ function formatMap(data) {
125
205
  }
126
206
  return map;
127
207
  }
128
- const unversionedParsers = {
129
- cramFileDefinition,
130
- cramBlockHeader,
131
- cramBlockCrc32,
132
- };
133
208
  export function isMappedSliceHeader(header) {
134
209
  return typeof header.refSeqId === 'number';
135
210
  }
136
- // each of these is a function of the major and minor version
137
- const versionedParsers = {
138
- // assemble a section parser for the unmapped slice header, with slight
139
- // variations depending on the major version of the cram file
140
- cramUnmappedSliceHeader(majorVersion) {
141
- let maxLength = 0;
142
- let parser = new Parser().itf8('numRecords');
143
- maxLength += 5;
211
+ // assemble a section parser for the unmapped slice header, with slight
212
+ // variations depending on the major version of the cram file
213
+ function cramUnmappedSliceHeader(majorVersion) {
214
+ let maxLength = 0;
215
+ maxLength += 5;
216
+ maxLength += 9;
217
+ maxLength += 5 * 2;
218
+ maxLength += 16;
219
+ const parser = (buffer, offset) => {
220
+ const [numRecords, newOffset1] = parseItf8(buffer, offset);
221
+ offset += newOffset1;
222
+ let recordCounter = 0;
144
223
  // recordCounter is itf8 in a CRAM v2 file, absent in CRAM v1
145
224
  if (majorVersion >= 3) {
146
- parser = parser.ltf8('recordCounter');
147
- maxLength += 9;
225
+ const [rc, newOffset2] = parseLtf8(buffer, offset);
226
+ offset += newOffset2;
227
+ recordCounter = rc;
148
228
  }
149
229
  else if (majorVersion === 2) {
150
- parser = parser.itf8('recordCounter');
151
- maxLength += 5;
230
+ const [rc, newOffset2] = parseItf8(buffer, offset);
231
+ offset += newOffset2;
232
+ recordCounter = rc;
152
233
  }
153
- parser = parser
154
- .itf8('numBlocks')
155
- .itf8('numContentIds')
156
- .array('contentIds', {
157
- type: singleItf8,
158
- length: 'numContentIds',
159
- });
160
- maxLength += 5 * 2; // + numContentIds*5
161
- // the md5 sum is missing in cram v1
162
- if (majorVersion >= 2) {
163
- parser = parser.array('md5', { type: 'uint8', length: 16 });
164
- maxLength += 16;
234
+ else {
235
+ console.warn('recordCounter=0');
165
236
  }
166
- const maxLengthFunc = (numContentIds) => maxLength + numContentIds * 5;
167
- return { parser, maxLength: maxLengthFunc }; // : p, maxLength: numContentIds => 5 + 9 + 5 * 2 + 5 * numContentIds + 16 }
168
- },
169
- // assembles a section parser for the unmapped slice header, with slight
170
- // variations depending on the major version of the cram file
171
- cramMappedSliceHeader(majorVersion) {
172
- let parser = new Parser()
173
- .itf8('refSeqId')
174
- .itf8('refSeqStart')
175
- .itf8('refSeqSpan')
176
- .itf8('numRecords');
177
- let maxLength = 5 * 4;
178
- if (majorVersion >= 3) {
179
- parser = parser.ltf8('recordCounter');
180
- maxLength += 9;
181
- }
182
- else if (majorVersion === 2) {
183
- parser = parser.itf8('recordCounter');
184
- maxLength += 5;
237
+ const [numBlocks, newOffset3] = parseItf8(buffer, offset);
238
+ offset += newOffset3;
239
+ const [numContentIds, newOffset4] = parseItf8(buffer, offset);
240
+ offset += newOffset4;
241
+ const contentIds = [];
242
+ for (let i = 0; i < numContentIds; i++) {
243
+ const [id, newOffset5] = parseItf8(buffer, offset);
244
+ offset += newOffset5;
245
+ contentIds.push(id);
185
246
  }
186
- parser = parser
187
- .itf8('numBlocks')
188
- .itf8('numContentIds')
189
- .array('contentIds', {
190
- type: singleItf8,
191
- length: 'numContentIds',
192
- })
193
- .itf8('refBaseBlockId');
194
- maxLength += 5 * 3;
195
247
  // the md5 sum is missing in cram v1
248
+ let md5;
196
249
  if (majorVersion >= 2) {
197
- parser = parser.array('md5', { type: 'uint8', length: 16 });
198
- maxLength += 16;
250
+ md5 = [...buffer.subarray(offset, offset + 16)];
251
+ offset += 16;
199
252
  }
200
- const maxLengthFunc = (numContentIds) => maxLength + numContentIds * 5;
201
- return { parser, maxLength: maxLengthFunc };
202
- },
203
- cramEncoding(_majorVersion) {
204
- const parser = new Parser()
205
- .namely('cramEncoding')
206
- .itf8('codecId')
207
- .itf8('parametersBytes')
208
- .choice('parameters', {
209
- tag: 'codecId',
210
- choices: {
211
- 0: new Parser(), // NULL
212
- 1: new Parser().itf8('blockContentId'), // EXTERNAL
213
- 2: new Parser().itf8('offset').itf8('M'), // GOLOMB,
214
- // HUFFMAN_INT
215
- 3: Parser.start()
216
- .itf8('numCodes')
217
- .array('symbols', { length: 'numCodes', type: singleItf8 })
218
- .itf8('numLengths')
219
- .array('bitLengths', { length: 'numLengths', type: singleItf8 }),
220
- 4: Parser.start() // BYTE_ARRAY_LEN
221
- .nest('lengthsEncoding', { type: 'cramEncoding' })
222
- .nest('valuesEncoding', { type: 'cramEncoding' }),
223
- // BYTE_ARRAY_STOP is a little different for CRAM v1
224
- 5: new Parser().uint8('stopByte').itf8('blockContentId'),
225
- 6: new Parser().itf8('offset').itf8('length'), // BETA
226
- 7: new Parser().itf8('offset').itf8('K'), // SUBEXP
227
- 8: new Parser().itf8('offset').itf8('log2m'), // GOLOMB_RICE
228
- 9: new Parser().itf8('offset'), // GAMMA
253
+ return {
254
+ value: {
255
+ recordCounter,
256
+ md5,
257
+ contentIds,
258
+ numContentIds,
259
+ numBlocks,
260
+ numRecords,
229
261
  },
230
- });
231
- return { parser };
232
- },
233
- cramDataSeriesEncodingMap(majorVersion) {
234
- return new Parser()
235
- .itf8('mapSize')
236
- .itf8('mapCount')
237
- .array('ents', {
238
- length: 'mapCount',
239
- type: new Parser()
240
- .string('key', { length: 2, stripNull: false })
241
- .nest('value', { type: this.cramEncoding(majorVersion).parser }),
242
- });
243
- },
244
- cramTagEncodingMap(majorVersion) {
245
- return new Parser()
246
- .itf8('mapSize')
247
- .itf8('mapCount')
248
- .array('ents', {
249
- length: 'mapCount',
250
- type: new Parser()
251
- .itf8('key', {
252
- formatter: /* istanbul ignore next */ /* istanbul ignore next */ integerRepresentation =>
253
- /* istanbul ignore next */
254
- String.fromCharCode((integerRepresentation >> 16) & 0xff) +
255
- String.fromCharCode((integerRepresentation >> 8) & 0xff) +
256
- String.fromCharCode(integerRepresentation & 0xff),
257
- })
258
- .nest('value', { type: this.cramEncoding(majorVersion).parser }),
259
- });
260
- },
261
- cramCompressionHeader(majorVersion) {
262
- let parser = new Parser();
263
- // TODO: if we want to support CRAM v1, we will need to refactor
264
- // compression header into 2 parts to parse the landmarks,
265
- // like the container header
266
- parser = parser
267
- .nest('preservation', {
268
- type: cramPreservationMap,
269
- formatter: formatMap,
270
- })
271
- .nest('dataSeriesEncoding', {
272
- type: this.cramDataSeriesEncodingMap(majorVersion),
273
- formatter: formatMap,
274
- })
275
- .nest('tagEncoding', {
276
- type: this.cramTagEncodingMap(majorVersion),
277
- formatter: formatMap,
278
- });
279
- return { parser };
280
- },
281
- cramContainerHeader1(majorVersion) {
282
- let parser = new Parser()
283
- .int32('length') // byte size of the container data (blocks)
284
- .itf8('refSeqId') // reference sequence identifier, -1 for unmapped reads, -2 for multiple reference sequences
285
- .itf8('refSeqStart') // the alignment start position or 0 for unmapped reads
286
- .itf8('alignmentSpan') // the length of the alignment or 0 for unmapped reads
287
- .itf8('numRecords'); // number of records in the container
288
- let maxLength = 4 + 5 * 4;
289
- if (majorVersion >= 3) {
290
- parser = parser.ltf8('recordCounter'); // 1-based sequential index of records in the file/stream.
291
- maxLength += 9;
292
- }
293
- else if (majorVersion === 2) {
294
- parser = parser.itf8('recordCounter');
295
- maxLength += 5;
296
- }
297
- if (majorVersion > 1) {
298
- parser = parser.ltf8('numBases'); // number of read bases
299
- maxLength += 9;
262
+ offset,
263
+ };
264
+ };
265
+ return {
266
+ parser,
267
+ maxLength: (numContentIds) => maxLength + numContentIds * 5,
268
+ };
269
+ }
270
+ // assembles a section parser for the unmapped slice header, with slight
271
+ // variations depending on the major version of the cram file
272
+ function cramMappedSliceHeader(majorVersion) {
273
+ let maxLength = 0;
274
+ maxLength += 5 * 4; // EL0
275
+ maxLength += 9; // EL1
276
+ maxLength += 5 * 3; // EL2 ITF8s
277
+ maxLength += 16; // MD5
278
+ return {
279
+ parser: (buffer, offset) => {
280
+ // L0
281
+ const [refSeqId, newOffset1] = parseItf8(buffer, offset);
282
+ offset += newOffset1;
283
+ const [refSeqStart, newOffset2] = parseItf8(buffer, offset);
284
+ offset += newOffset2;
285
+ const [refSeqSpan, newOffset3] = parseItf8(buffer, offset);
286
+ offset += newOffset3;
287
+ const [numRecords, newOffset4] = parseItf8(buffer, offset);
288
+ offset += newOffset4;
289
+ // EL0
290
+ // L1
291
+ let recordCounter = 0;
292
+ if (majorVersion >= 3) {
293
+ const [rc, newOffset5] = parseLtf8(buffer, offset);
294
+ offset += newOffset5;
295
+ recordCounter = rc;
296
+ }
297
+ else if (majorVersion === 2) {
298
+ const [rc, newOffset5] = parseItf8(buffer, offset);
299
+ offset += newOffset5;
300
+ recordCounter = rc;
301
+ }
302
+ else {
303
+ console.warn('majorVersion is <2, recordCounter set to 0');
304
+ }
305
+ // EL1
306
+ // L2
307
+ const [numBlocks, newOffset6] = parseItf8(buffer, offset);
308
+ offset += newOffset6;
309
+ const [numContentIds, newOffset7] = parseItf8(buffer, offset);
310
+ offset += newOffset7;
311
+ const contentIds = [];
312
+ for (let i = 0; i < numContentIds; i++) {
313
+ const [id, newOffset5] = parseItf8(buffer, offset);
314
+ offset += newOffset5;
315
+ contentIds.push(id);
316
+ }
317
+ const [refBaseBlockId, newOffset8] = parseItf8(buffer, offset);
318
+ offset += newOffset8;
319
+ // EL2
320
+ // the md5 sum is missing in cram v1
321
+ let md5;
322
+ if (majorVersion >= 2) {
323
+ md5 = [...buffer.subarray(offset, offset + 16)];
324
+ offset += 16;
325
+ }
326
+ return {
327
+ value: {
328
+ md5,
329
+ numBlocks,
330
+ numRecords,
331
+ numContentIds,
332
+ refSeqSpan,
333
+ refSeqId,
334
+ refSeqStart,
335
+ recordCounter,
336
+ refBaseBlockId,
337
+ contentIds,
338
+ },
339
+ offset,
340
+ };
341
+ },
342
+ maxLength: (numContentIds) => maxLength + numContentIds * 5,
343
+ };
344
+ }
345
+ function cramEncoding() {
346
+ return {
347
+ parser: (buffer, offset) => cramEncodingSub(buffer, offset),
348
+ };
349
+ }
350
+ function cramEncodingSub(buffer, offset) {
351
+ const b = buffer;
352
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
353
+ const [codecId, newOffset1] = parseItf8(buffer, offset);
354
+ offset += newOffset1;
355
+ const [parametersBytes, newOffset2] = parseItf8(buffer, offset);
356
+ offset += newOffset2;
357
+ const parameters = {};
358
+ if (codecId === 0) {
359
+ // NULL
360
+ }
361
+ else if (codecId === 1) {
362
+ // EXTERNAL
363
+ const [bc, newOffset3] = parseItf8(buffer, offset);
364
+ parameters.blockContentId = bc;
365
+ offset += newOffset3;
366
+ }
367
+ else if (codecId === 2) {
368
+ // GOLUMB
369
+ const [off, newOffset3] = parseItf8(buffer, offset);
370
+ parameters.offset = off;
371
+ offset += newOffset3;
372
+ const [M2, newOffset4] = parseItf8(buffer, offset);
373
+ parameters.M = M2;
374
+ offset += newOffset4;
375
+ }
376
+ else if (codecId === 3) {
377
+ // HUFFMAN_INT
378
+ const val = parseItf8(buffer, offset);
379
+ const numCodes = val[0];
380
+ offset += val[1];
381
+ const symbols = [];
382
+ for (let i = 0; i < numCodes; i++) {
383
+ const code = parseItf8(buffer, offset);
384
+ symbols.push(code[0]);
385
+ offset += code[1];
300
386
  }
301
- parser = parser
302
- .itf8('numBlocks') // the number of blocks
303
- .itf8('numLandmarks'); // the number of landmarks
304
- maxLength += 5 + 5;
305
- return { parser, maxLength };
306
- },
307
- cramContainerHeader2(majorVersion) {
308
- let parser = new Parser()
309
- .itf8('numLandmarks') // the number of blocks
310
- // Each integer value of this array is a byte offset
311
- // into the blocks byte array. Landmarks are used for
312
- // random access indexing.
313
- .array('landmarks', {
314
- type: new Parser().itf8(),
315
- length: 'numLandmarks',
316
- });
317
- let crcLength = 0;
318
- if (majorVersion >= 3) {
319
- parser = parser.uint32('crc32');
320
- crcLength = 4;
387
+ parameters.symbols = symbols;
388
+ const val2 = parseItf8(buffer, offset);
389
+ const numLengths = val[0];
390
+ parameters.numLengths = numLengths;
391
+ parameters.numCodes = numCodes;
392
+ parameters.numLengths = numLengths;
393
+ offset += val2[1];
394
+ const bitLengths = [];
395
+ for (let i = 0; i < numLengths; i++) {
396
+ const len = parseItf8(buffer, offset);
397
+ offset += len[1];
398
+ bitLengths.push(len[0]);
321
399
  }
322
- return {
323
- parser,
324
- maxLength: (numLandmarks) => 5 + numLandmarks * 5 + crcLength,
325
- };
326
- },
327
- };
328
- function getSectionParsers(majorVersion) {
329
- const parsers = Object.assign({}, unversionedParsers);
330
- Object.keys(versionedParsers).forEach(parserName => {
331
- parsers[parserName] = versionedParsers[parserName](majorVersion);
332
- });
333
- return parsers;
400
+ parameters.bitLengths = bitLengths;
401
+ }
402
+ else if (codecId === 4) {
403
+ // BYTE_ARRAY_LEN
404
+ const { value: lengthsEncoding, offset: newOffset1 } = cramEncodingSub(buffer, offset);
405
+ parameters.lengthsEncoding = lengthsEncoding;
406
+ offset = newOffset1;
407
+ const { value: valuesEncoding, offset: newOffset2 } = cramEncodingSub(buffer, offset);
408
+ parameters.valuesEncoding = valuesEncoding;
409
+ offset = newOffset2;
410
+ }
411
+ else if (codecId === 5) {
412
+ // BYTE_ARRAY_STOP
413
+ parameters.stopByte = dataView.getUint8(offset);
414
+ offset += 1;
415
+ const [blockContentId, newOffset1] = parseItf8(buffer, offset);
416
+ parameters.blockContentId = blockContentId;
417
+ offset += newOffset1;
418
+ }
419
+ else if (codecId === 6) {
420
+ // BETA
421
+ const [off, newOffset1] = parseItf8(buffer, offset);
422
+ parameters.offset = off;
423
+ offset += newOffset1;
424
+ const [len, newOffset2] = parseItf8(buffer, offset);
425
+ parameters.length = len;
426
+ offset += newOffset2;
427
+ }
428
+ else if (codecId === 7) {
429
+ // SUBEXP
430
+ const [off, newOffset1] = parseItf8(buffer, offset);
431
+ parameters.offset = off;
432
+ offset += newOffset1;
433
+ const [K, newOffset2] = parseItf8(buffer, offset);
434
+ parameters.K = K;
435
+ offset += newOffset2;
436
+ }
437
+ else if (codecId === 8) {
438
+ // GOLOMB_RICE
439
+ const [off, newOffset1] = parseItf8(buffer, offset);
440
+ parameters.offset = off;
441
+ offset += newOffset1;
442
+ const [l2m, newOffset2] = parseItf8(buffer, offset);
443
+ parameters.log2m = l2m;
444
+ offset += newOffset2;
445
+ }
446
+ else if (codecId === 9) {
447
+ // GAMMA
448
+ const [off, newOffset1] = parseItf8(buffer, offset);
449
+ parameters.offset = off;
450
+ offset += newOffset1;
451
+ }
452
+ else {
453
+ throw new Error(`unknown codecId ${codecId}`);
454
+ }
455
+ return {
456
+ value: {
457
+ codecId,
458
+ parametersBytes,
459
+ parameters,
460
+ },
461
+ offset,
462
+ };
463
+ }
464
+ function cramDataSeriesEncodingMap() {
465
+ return {
466
+ parser: (buffer, offset) => {
467
+ const [mapSize, newOffset1] = parseItf8(buffer, offset);
468
+ offset += newOffset1;
469
+ const [mapCount, newOffset2] = parseItf8(buffer, offset);
470
+ offset += newOffset2;
471
+ const ents = [];
472
+ for (let i = 0; i < mapCount; i++) {
473
+ const key = String.fromCharCode(buffer[offset]) +
474
+ String.fromCharCode(buffer[offset + 1]);
475
+ offset += 2;
476
+ const { value, offset: newOffset4 } = cramEncodingSub(buffer, offset);
477
+ offset = newOffset4;
478
+ ents.push({ key, value });
479
+ }
480
+ return {
481
+ value: {
482
+ mapSize,
483
+ ents,
484
+ mapCount,
485
+ },
486
+ offset,
487
+ };
488
+ },
489
+ };
490
+ }
491
+ function cramTagEncodingMap() {
492
+ return {
493
+ parser: (buffer, offset) => {
494
+ const [mapSize, newOffset1] = parseItf8(buffer, offset);
495
+ offset += newOffset1;
496
+ const [mapCount, newOffset2] = parseItf8(buffer, offset);
497
+ offset += newOffset2;
498
+ const ents = [];
499
+ for (let i = 0; i < mapCount; i++) {
500
+ const [k0, newOffset3] = parseItf8(buffer, offset);
501
+ offset += newOffset3;
502
+ const key = String.fromCharCode((k0 >> 16) & 0xff) +
503
+ String.fromCharCode((k0 >> 8) & 0xff) +
504
+ String.fromCharCode(k0 & 0xff);
505
+ const { value, offset: newOffset4 } = cramEncodingSub(buffer, offset);
506
+ offset = newOffset4;
507
+ ents.push({ key, value });
508
+ }
509
+ return {
510
+ value: {
511
+ mapSize,
512
+ ents,
513
+ mapCount,
514
+ },
515
+ offset,
516
+ };
517
+ },
518
+ };
519
+ }
520
+ function cramCompressionHeader() {
521
+ return {
522
+ parser: (buffer, offset) => {
523
+ // TODO: if we want to support CRAM v1, we will need to refactor
524
+ // compression header into 2 parts to parse the landmarks, like the
525
+ // container header
526
+ const { value: preservation, offset: newOffset1 } = cramPreservationMap().parser(buffer, offset);
527
+ offset = newOffset1;
528
+ const { value: dataSeriesEncoding, offset: newOffset2 } = cramDataSeriesEncodingMap().parser(buffer, offset);
529
+ offset = newOffset2;
530
+ const { value: tagEncoding, offset: newOffset3 } = cramTagEncodingMap().parser(buffer, offset);
531
+ offset = newOffset3;
532
+ return {
533
+ value: {
534
+ dataSeriesEncoding: formatMap(dataSeriesEncoding),
535
+ preservation: formatMap(preservation),
536
+ tagEncoding: formatMap(tagEncoding),
537
+ },
538
+ offset,
539
+ };
540
+ },
541
+ };
542
+ }
543
+ function cramContainerHeader1(majorVersion) {
544
+ let maxLength = 4;
545
+ maxLength += 5 * 4;
546
+ maxLength += 9;
547
+ maxLength += 9;
548
+ maxLength += 5 + 5;
549
+ return {
550
+ maxLength,
551
+ parser: (buffer, offset) => {
552
+ const b = buffer;
553
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
554
+ // byte size of the container data (blocks)
555
+ const length = dataView.getInt32(offset, true);
556
+ offset += 4;
557
+ // reference sequence identifier, -1 for unmapped reads, -2 for multiple
558
+ // reference sequences
559
+ const [refSeqId, newOffset1] = parseItf8(buffer, offset);
560
+ offset += newOffset1;
561
+ const [refSeqStart, newOffset2] = parseItf8(buffer, offset);
562
+ offset += newOffset2;
563
+ const [alignmentSpan, newOffset3] = parseItf8(buffer, offset);
564
+ offset += newOffset3;
565
+ const [numRecords, newOffset4] = parseItf8(buffer, offset);
566
+ offset += newOffset4;
567
+ let recordCounter = 0;
568
+ if (majorVersion >= 3) {
569
+ const [rc, newOffset5] = parseLtf8(buffer, offset);
570
+ recordCounter = rc;
571
+ offset += newOffset5;
572
+ }
573
+ else if (majorVersion === 2) {
574
+ const [rc, newOffset5] = parseItf8(buffer, offset);
575
+ recordCounter = rc;
576
+ offset += newOffset5;
577
+ }
578
+ else {
579
+ console.warn('setting recordCounter=0');
580
+ }
581
+ let numBases;
582
+ if (majorVersion > 1) {
583
+ const [n, newOffset5] = parseLtf8(buffer, offset);
584
+ numBases = n;
585
+ offset += newOffset5;
586
+ }
587
+ const [numBlocks, newOffset6] = parseItf8(buffer, offset);
588
+ offset += newOffset6;
589
+ const [numLandmarks, newOffset7] = parseItf8(buffer, offset);
590
+ offset += newOffset7;
591
+ return {
592
+ value: {
593
+ length,
594
+ refSeqId,
595
+ refSeqStart,
596
+ alignmentSpan,
597
+ numBlocks,
598
+ numLandmarks,
599
+ numBases,
600
+ recordCounter,
601
+ numRecords,
602
+ },
603
+ offset,
604
+ };
605
+ },
606
+ };
607
+ }
608
+ function cramContainerHeader2(majorVersion) {
609
+ return {
610
+ parser: (buffer, offset) => {
611
+ const b = buffer;
612
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
613
+ const [numLandmarks, newOffset1] = parseItf8(buffer, offset);
614
+ offset += newOffset1;
615
+ const landmarks = [];
616
+ for (let i = 0; i < numLandmarks; i++) {
617
+ const [landmark, newOffset2] = parseItf8(buffer, offset);
618
+ offset += newOffset2;
619
+ landmarks.push(landmark);
620
+ }
621
+ let crc32;
622
+ if (majorVersion >= 3) {
623
+ crc32 = dataView.getUint32(offset, true);
624
+ offset += 4;
625
+ }
626
+ return {
627
+ value: {
628
+ ...(crc32 === undefined ? {} : { crc32 }),
629
+ numLandmarks,
630
+ landmarks,
631
+ },
632
+ offset,
633
+ };
634
+ },
635
+ maxLength: (numLandmarks) => 5 + 5 * numLandmarks + 4,
636
+ };
637
+ }
638
+ export function getSectionParsers(majorVersion) {
639
+ return {
640
+ cramFileDefinition: cramFileDefinition(),
641
+ cramBlockHeader: cramBlockHeader(),
642
+ cramBlockCrc32: cramBlockCrc32(),
643
+ cramDataSeriesEncodingMap: cramDataSeriesEncodingMap(),
644
+ cramTagEncodingMap: cramTagEncodingMap(),
645
+ cramCompressionHeader: cramCompressionHeader(),
646
+ cramEncoding: cramEncoding(),
647
+ cramUnmappedSliceHeader: cramUnmappedSliceHeader(majorVersion),
648
+ cramMappedSliceHeader: cramMappedSliceHeader(majorVersion),
649
+ cramContainerHeader1: cramContainerHeader1(majorVersion),
650
+ cramContainerHeader2: cramContainerHeader2(majorVersion),
651
+ };
334
652
  }
335
- export { cramFileDefinition, getSectionParsers };
336
653
  //# sourceMappingURL=sectionParsers.js.map