@gmod/cram 2.0.3 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/CHANGELOG.md +10 -0
  2. package/README.md +0 -4
  3. package/dist/craiIndex.js +1 -1
  4. package/dist/craiIndex.js.map +1 -1
  5. package/dist/cram-bundle.js +1 -1
  6. package/dist/cram-bundle.js.LICENSE.txt +0 -2
  7. package/dist/cramFile/codecs/byteArrayLength.js +1 -1
  8. package/dist/cramFile/codecs/byteArrayLength.js.map +1 -1
  9. package/dist/cramFile/codecs/byteArrayStop.d.ts +0 -1
  10. package/dist/cramFile/codecs/external.js +1 -1
  11. package/dist/cramFile/codecs/external.js.map +1 -1
  12. package/dist/cramFile/codecs/getBits.d.ts +0 -1
  13. package/dist/cramFile/codecs/getBits.js +2 -2
  14. package/dist/cramFile/codecs/getBits.js.map +1 -1
  15. package/dist/cramFile/codecs/huffman.js +2 -1
  16. package/dist/cramFile/codecs/huffman.js.map +1 -1
  17. package/dist/cramFile/codecs/index.js +1 -2
  18. package/dist/cramFile/codecs/index.js.map +1 -1
  19. package/dist/cramFile/container/compressionScheme.d.ts +0 -3
  20. package/dist/cramFile/container/compressionScheme.js +0 -4
  21. package/dist/cramFile/container/compressionScheme.js.map +1 -1
  22. package/dist/cramFile/container/index.d.ts +57 -5
  23. package/dist/cramFile/container/index.js +15 -9
  24. package/dist/cramFile/container/index.js.map +1 -1
  25. package/dist/cramFile/file.d.ts +24 -61
  26. package/dist/cramFile/file.js +23 -28
  27. package/dist/cramFile/file.js.map +1 -1
  28. package/dist/cramFile/record.d.ts +1 -1
  29. package/dist/cramFile/sectionParsers.d.ts +195 -49
  30. package/dist/cramFile/sectionParsers.js +622 -304
  31. package/dist/cramFile/sectionParsers.js.map +1 -1
  32. package/dist/cramFile/slice/decodeRecord.js +1 -1
  33. package/dist/cramFile/slice/decodeRecord.js.map +1 -1
  34. package/dist/cramFile/slice/index.d.ts +23 -1
  35. package/dist/cramFile/slice/index.js +9 -6
  36. package/dist/cramFile/slice/index.js.map +1 -1
  37. package/dist/cramFile/util.d.ts +6 -6
  38. package/dist/cramFile/util.js +93 -12
  39. package/dist/cramFile/util.js.map +1 -1
  40. package/dist/indexedCramFile.d.ts +0 -3
  41. package/dist/indexedCramFile.js +4 -16
  42. package/dist/indexedCramFile.js.map +1 -1
  43. package/dist/io/index.d.ts +1 -3
  44. package/dist/io/index.js +3 -16
  45. package/dist/io/index.js.map +1 -1
  46. package/dist/rans/d04.js +1 -1
  47. package/dist/rans/d04.js.map +1 -1
  48. package/dist/rans/d14.js +1 -1
  49. package/dist/rans/d14.js.map +1 -1
  50. package/dist/rans/frequencies.js +2 -3
  51. package/dist/rans/frequencies.js.map +1 -1
  52. package/dist/rans/index.js +1 -1
  53. package/dist/rans/index.js.map +1 -1
  54. package/dist/sam.js +1 -2
  55. package/dist/sam.js.map +1 -1
  56. package/dist/typescript.d.ts +0 -1
  57. package/dist/typescript.js +0 -8
  58. package/dist/typescript.js.map +1 -1
  59. package/dist/unzip-pako.d.ts +0 -1
  60. package/dist/unzip-pako.js +1 -2
  61. package/dist/unzip-pako.js.map +1 -1
  62. package/dist/unzip.d.ts +0 -1
  63. package/esm/craiIndex.js +1 -1
  64. package/esm/craiIndex.js.map +1 -1
  65. package/esm/cramFile/codecs/byteArrayLength.js +1 -1
  66. package/esm/cramFile/codecs/byteArrayLength.js.map +1 -1
  67. package/esm/cramFile/codecs/byteArrayStop.d.ts +0 -1
  68. package/esm/cramFile/codecs/external.js +1 -1
  69. package/esm/cramFile/codecs/external.js.map +1 -1
  70. package/esm/cramFile/codecs/getBits.d.ts +0 -1
  71. package/esm/cramFile/codecs/huffman.js +2 -1
  72. package/esm/cramFile/codecs/huffman.js.map +1 -1
  73. package/esm/cramFile/container/compressionScheme.d.ts +0 -3
  74. package/esm/cramFile/container/compressionScheme.js +0 -4
  75. package/esm/cramFile/container/compressionScheme.js.map +1 -1
  76. package/esm/cramFile/container/index.d.ts +57 -5
  77. package/esm/cramFile/container/index.js +15 -9
  78. package/esm/cramFile/container/index.js.map +1 -1
  79. package/esm/cramFile/file.d.ts +24 -61
  80. package/esm/cramFile/file.js +22 -25
  81. package/esm/cramFile/file.js.map +1 -1
  82. package/esm/cramFile/record.d.ts +1 -1
  83. package/esm/cramFile/sectionParsers.d.ts +195 -49
  84. package/esm/cramFile/sectionParsers.js +620 -303
  85. package/esm/cramFile/sectionParsers.js.map +1 -1
  86. package/esm/cramFile/slice/index.d.ts +23 -1
  87. package/esm/cramFile/slice/index.js +10 -7
  88. package/esm/cramFile/slice/index.js.map +1 -1
  89. package/esm/cramFile/util.d.ts +6 -6
  90. package/esm/cramFile/util.js +87 -6
  91. package/esm/cramFile/util.js.map +1 -1
  92. package/esm/indexedCramFile.d.ts +0 -3
  93. package/esm/indexedCramFile.js +3 -15
  94. package/esm/indexedCramFile.js.map +1 -1
  95. package/esm/io/index.d.ts +1 -3
  96. package/esm/io/index.js +2 -11
  97. package/esm/io/index.js.map +1 -1
  98. package/esm/typescript.d.ts +0 -1
  99. package/esm/typescript.js +1 -6
  100. package/esm/typescript.js.map +1 -1
  101. package/esm/unzip-pako.d.ts +0 -1
  102. package/esm/unzip.d.ts +0 -1
  103. package/package.json +9 -15
  104. package/src/craiIndex.ts +1 -1
  105. package/src/cramFile/codecs/byteArrayLength.ts +1 -2
  106. package/src/cramFile/codecs/external.ts +1 -1
  107. package/src/cramFile/codecs/huffman.ts +2 -1
  108. package/src/cramFile/container/compressionScheme.ts +1 -8
  109. package/src/cramFile/container/index.ts +21 -10
  110. package/src/cramFile/file.ts +28 -43
  111. package/src/cramFile/record.ts +1 -1
  112. package/src/cramFile/sectionParsers.ts +668 -390
  113. package/src/cramFile/slice/index.ts +11 -5
  114. package/src/cramFile/util.ts +90 -91
  115. package/src/indexedCramFile.ts +3 -25
  116. package/src/io/index.ts +2 -12
  117. package/src/typescript.ts +0 -7
  118. package/src/typings/binary-parser.d.ts +0 -44
@@ -1,124 +1,209 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.getSectionParsers = exports.cramFileDefinition = exports.isMappedSliceHeader = void 0;
4
- const binary_parser_1 = require("@gmod/binary-parser");
5
- const singleItf8 = new binary_parser_1.Parser().itf8();
6
- const cramFileDefinition = {
7
- parser: new binary_parser_1.Parser()
8
- .string('magic', { length: 4 })
9
- .uint8('majorVersion')
10
- .uint8('minorVersion')
11
- .string('fileId', { length: 20, stripNull: true }),
12
- maxLength: 26,
13
- };
14
3
  exports.cramFileDefinition = cramFileDefinition;
15
- const cramBlockHeader = {
16
- parser: new binary_parser_1.Parser()
17
- .uint8('compressionMethod', {
18
- formatter: /* istanbul ignore next */ /* istanbul ignore next */ b => {
19
- const method = [
20
- 'raw',
21
- 'gzip',
22
- 'bzip2',
23
- 'lzma',
24
- 'rans',
25
- 'rans4x16',
26
- 'arith',
27
- 'fqzcomp',
28
- 'tok3',
29
- ][b];
30
- if (!method) {
31
- throw new Error(`compression method number ${b} not implemented`);
32
- }
33
- return method;
4
+ exports.cramBlockHeader = cramBlockHeader;
5
+ exports.cramBlockCrc32 = cramBlockCrc32;
6
+ exports.cramTagDictionary = cramTagDictionary;
7
+ exports.cramPreservationMap = cramPreservationMap;
8
+ exports.isMappedSliceHeader = isMappedSliceHeader;
9
+ exports.getSectionParsers = getSectionParsers;
10
+ const util_1 = require("./util");
11
+ function cramFileDefinition() {
12
+ return {
13
+ parser: (buffer, _startOffset = 0) => {
14
+ const b = buffer;
15
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
16
+ let offset = 0;
17
+ const magic = buffer.subarray(offset, offset + 4).toString();
18
+ offset += 4;
19
+ const majorVersion = dataView.getUint8(offset);
20
+ offset += 1;
21
+ const minorVersion = dataView.getUint8(offset);
22
+ offset += 1;
23
+ const fileId = b
24
+ .subarray(offset, offset + 20)
25
+ .toString()
26
+ .replaceAll('\0', '');
27
+ offset += 20;
28
+ return {
29
+ value: {
30
+ magic,
31
+ majorVersion,
32
+ minorVersion,
33
+ fileId,
34
+ },
35
+ offset,
36
+ };
34
37
  },
35
- })
36
- .uint8('contentType', {
37
- formatter: /* istanbul ignore next */ /* istanbul ignore next */ b => {
38
- const type = [
39
- 'FILE_HEADER',
40
- 'COMPRESSION_HEADER',
41
- 'MAPPED_SLICE_HEADER',
42
- 'UNMAPPED_SLICE_HEADER', // < only used in cram v1
43
- 'EXTERNAL_DATA',
44
- 'CORE_DATA',
45
- ][b];
46
- if (!type) {
47
- throw new Error(`invalid block content type id ${b}`);
48
- }
49
- return type;
38
+ maxLength: 26,
39
+ };
40
+ }
41
+ function cramBlockHeader() {
42
+ const parser = (buffer, _startOffset = 0) => {
43
+ const b = buffer;
44
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
45
+ let offset = 0;
46
+ const d = dataView.getUint8(offset);
47
+ const compressionMethod = [
48
+ 'raw',
49
+ 'gzip',
50
+ 'bzip2',
51
+ 'lzma',
52
+ 'rans',
53
+ 'rans4x16',
54
+ 'arith',
55
+ 'fqzcomp',
56
+ 'tok3',
57
+ ][d];
58
+ if (!compressionMethod) {
59
+ throw new Error(`compression method number ${d} not implemented`);
60
+ }
61
+ offset += 1;
62
+ const c = dataView.getUint8(offset);
63
+ const contentType = [
64
+ 'FILE_HEADER',
65
+ 'COMPRESSION_HEADER',
66
+ 'MAPPED_SLICE_HEADER',
67
+ 'UNMAPPED_SLICE_HEADER', // < only used in cram v1
68
+ 'EXTERNAL_DATA',
69
+ 'CORE_DATA',
70
+ ][c];
71
+ if (!contentType) {
72
+ throw new Error(`invalid block content type id ${c}`);
73
+ }
74
+ offset += 1;
75
+ const [contentId, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
76
+ offset += newOffset1;
77
+ const [compressedSize, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
78
+ offset += newOffset2;
79
+ const [uncompressedSize, newOffset3] = (0, util_1.parseItf8)(buffer, offset);
80
+ offset += newOffset3;
81
+ return {
82
+ offset,
83
+ value: {
84
+ uncompressedSize,
85
+ compressedSize,
86
+ contentId,
87
+ contentType: contentType,
88
+ compressionMethod: compressionMethod,
89
+ },
90
+ };
91
+ };
92
+ return { parser, maxLength: 17 };
93
+ }
94
+ function cramBlockCrc32() {
95
+ return {
96
+ parser: (buffer, offset) => {
97
+ const b = buffer;
98
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
99
+ const crc32 = dataView.getUint32(offset, true);
100
+ offset += 4;
101
+ return {
102
+ offset,
103
+ value: {
104
+ crc32,
105
+ },
106
+ };
50
107
  },
51
- })
52
- .itf8('contentId')
53
- .itf8('compressedSize')
54
- .itf8('uncompressedSize'),
55
- maxLength: 17,
56
- };
57
- const cramBlockCrc32 = {
58
- parser: new binary_parser_1.Parser().uint32('crc32'),
59
- maxLength: 4,
60
- };
61
- const cramTagDictionary = new binary_parser_1.Parser().itf8('size').buffer('ents', {
62
- length: 'size',
63
- formatter: /* istanbul ignore next */ /* istanbul ignore next */ buffer => {
64
- function makeTagSet(stringStart, stringEnd) {
65
- const str = buffer.toString('utf8', stringStart, stringEnd);
66
- const tags = [];
67
- for (let i = 0; i < str.length; i += 3) {
68
- tags.push(str.slice(i, i + 3));
108
+ maxLength: 4,
109
+ };
110
+ }
111
+ function makeTagSet(buffer, stringStart, stringEnd) {
112
+ const str = buffer.toString('utf8', stringStart, stringEnd);
113
+ const tags = [];
114
+ for (let i = 0; i < str.length; i += 3) {
115
+ tags.push(str.slice(i, i + 3));
116
+ }
117
+ return tags;
118
+ }
119
+ function cramTagDictionary() {
120
+ return {
121
+ parser: (buffer, offset) => {
122
+ const [size, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
123
+ offset += newOffset1;
124
+ const subbuf = buffer.subarray(offset, offset + size);
125
+ offset += size;
126
+ const tagSets = [];
127
+ let stringStart = 0;
128
+ let i = 0;
129
+ for (; i < subbuf.length; i++) {
130
+ if (!subbuf[i]) {
131
+ tagSets.push(makeTagSet(subbuf, stringStart, i));
132
+ stringStart = i + 1;
133
+ }
69
134
  }
70
- return tags;
71
- }
72
- /* eslint-disable */
73
- var tagSets = [];
74
- var stringStart = 0;
75
- var i;
76
- /* eslint-enable */
77
- for (i = 0; i < buffer.length; i += 1) {
78
- if (!buffer[i]) {
79
- tagSets.push(makeTagSet(stringStart, i));
80
- stringStart = i + 1;
135
+ if (i > stringStart) {
136
+ tagSets.push(makeTagSet(subbuf, stringStart, i));
81
137
  }
82
- }
83
- if (i > stringStart) {
84
- tagSets.push(makeTagSet(stringStart, i));
85
- }
86
- return tagSets;
87
- },
88
- });
89
- // const cramPreservationMapKeys = 'XX RN AP RR SM TD'.split(' ')
90
- const parseByteAsBool = new binary_parser_1.Parser().uint8(null, {
91
- formatter: /* istanbul ignore next */ /* istanbul ignore next */ val => !!val,
92
- });
93
- const cramPreservationMap = new binary_parser_1.Parser()
94
- .itf8('mapSize')
95
- .itf8('mapCount')
96
- .array('ents', {
97
- length: 'mapCount',
98
- type: new binary_parser_1.Parser()
99
- .string('key', {
100
- length: 2,
101
- stripNull: false,
102
- // formatter: val => cramPreservationMapKeys[val] || 0,
103
- })
104
- .choice('value', {
105
- tag: 'key',
106
- choices: {
107
- MI: parseByteAsBool,
108
- UI: parseByteAsBool,
109
- PI: parseByteAsBool,
110
- RN: parseByteAsBool,
111
- AP: parseByteAsBool,
112
- RR: parseByteAsBool,
113
- SM: new binary_parser_1.Parser().array(null, { type: 'uint8', length: 5 }),
114
- TD: new binary_parser_1.Parser().nest(null, {
115
- type: cramTagDictionary,
116
- formatter: /* istanbul ignore next */ /* istanbul ignore next */ data => data.ents,
117
- }),
138
+ return {
139
+ value: {
140
+ size,
141
+ ents: tagSets,
142
+ },
143
+ offset,
144
+ };
145
+ },
146
+ };
147
+ }
148
+ function cramPreservationMap() {
149
+ return {
150
+ parser: (buffer, offset) => {
151
+ const b = buffer;
152
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
153
+ const [mapSize, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
154
+ offset += newOffset1;
155
+ const [mapCount, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
156
+ offset += newOffset2;
157
+ const ents = [];
158
+ for (let i = 0; i < mapCount; i++) {
159
+ const key = String.fromCharCode(buffer[offset]) +
160
+ String.fromCharCode(buffer[offset + 1]);
161
+ offset += 2;
162
+ if (key === 'MI' ||
163
+ key === 'UI' ||
164
+ key === 'PI' ||
165
+ key === 'RN' ||
166
+ key === 'AP' ||
167
+ key === 'RR') {
168
+ ents.push({
169
+ key,
170
+ value: !!dataView.getUint8(offset),
171
+ });
172
+ offset += 1;
173
+ }
174
+ else if (key === 'SM') {
175
+ ents.push({
176
+ key,
177
+ value: [
178
+ dataView.getUint8(offset),
179
+ dataView.getUint8(offset + 1),
180
+ dataView.getUint8(offset + 2),
181
+ dataView.getUint8(offset + 3),
182
+ dataView.getUint8(offset + 4),
183
+ ],
184
+ });
185
+ offset += 5;
186
+ }
187
+ else if (key === 'TD') {
188
+ const { offset: offsetRet, value } = cramTagDictionary().parser(buffer, offset);
189
+ ents.push({ key, value: value.ents });
190
+ offset = offsetRet;
191
+ }
192
+ else {
193
+ throw new Error(`unknown key ${key}`);
194
+ }
195
+ }
196
+ return {
197
+ value: {
198
+ mapSize,
199
+ mapCount,
200
+ ents,
201
+ },
202
+ offset,
203
+ };
118
204
  },
119
- }),
120
- });
121
- /* istanbul ignore next */
205
+ };
206
+ }
122
207
  function formatMap(data) {
123
208
  const map = {};
124
209
  for (const { key, value } of data.ents) {
@@ -129,213 +214,446 @@ function formatMap(data) {
129
214
  }
130
215
  return map;
131
216
  }
132
- const unversionedParsers = {
133
- cramFileDefinition,
134
- cramBlockHeader,
135
- cramBlockCrc32,
136
- };
137
217
  function isMappedSliceHeader(header) {
138
218
  return typeof header.refSeqId === 'number';
139
219
  }
140
- exports.isMappedSliceHeader = isMappedSliceHeader;
141
- // each of these is a function of the major and minor version
142
- const versionedParsers = {
143
- // assemble a section parser for the unmapped slice header, with slight
144
- // variations depending on the major version of the cram file
145
- cramUnmappedSliceHeader(majorVersion) {
146
- let maxLength = 0;
147
- let parser = new binary_parser_1.Parser().itf8('numRecords');
148
- maxLength += 5;
220
+ // assemble a section parser for the unmapped slice header, with slight
221
+ // variations depending on the major version of the cram file
222
+ function cramUnmappedSliceHeader(majorVersion) {
223
+ let maxLength = 0;
224
+ maxLength += 5;
225
+ maxLength += 9;
226
+ maxLength += 5 * 2;
227
+ maxLength += 16;
228
+ const parser = (buffer, offset) => {
229
+ const [numRecords, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
230
+ offset += newOffset1;
231
+ let recordCounter = 0;
149
232
  // recordCounter is itf8 in a CRAM v2 file, absent in CRAM v1
150
233
  if (majorVersion >= 3) {
151
- parser = parser.ltf8('recordCounter');
152
- maxLength += 9;
234
+ const [rc, newOffset2] = (0, util_1.parseLtf8)(buffer, offset);
235
+ offset += newOffset2;
236
+ recordCounter = rc;
153
237
  }
154
238
  else if (majorVersion === 2) {
155
- parser = parser.itf8('recordCounter');
156
- maxLength += 5;
157
- }
158
- parser = parser
159
- .itf8('numBlocks')
160
- .itf8('numContentIds')
161
- .array('contentIds', {
162
- type: singleItf8,
163
- length: 'numContentIds',
164
- });
165
- maxLength += 5 * 2; // + numContentIds*5
166
- // the md5 sum is missing in cram v1
167
- if (majorVersion >= 2) {
168
- parser = parser.array('md5', { type: 'uint8', length: 16 });
169
- maxLength += 16;
239
+ const [rc, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
240
+ offset += newOffset2;
241
+ recordCounter = rc;
170
242
  }
171
- const maxLengthFunc = (numContentIds) => maxLength + numContentIds * 5;
172
- return { parser, maxLength: maxLengthFunc }; // : p, maxLength: numContentIds => 5 + 9 + 5 * 2 + 5 * numContentIds + 16 }
173
- },
174
- // assembles a section parser for the unmapped slice header, with slight
175
- // variations depending on the major version of the cram file
176
- cramMappedSliceHeader(majorVersion) {
177
- let parser = new binary_parser_1.Parser()
178
- .itf8('refSeqId')
179
- .itf8('refSeqStart')
180
- .itf8('refSeqSpan')
181
- .itf8('numRecords');
182
- let maxLength = 5 * 4;
183
- if (majorVersion >= 3) {
184
- parser = parser.ltf8('recordCounter');
185
- maxLength += 9;
243
+ else {
244
+ console.warn('recordCounter=0');
186
245
  }
187
- else if (majorVersion === 2) {
188
- parser = parser.itf8('recordCounter');
189
- maxLength += 5;
246
+ const [numBlocks, newOffset3] = (0, util_1.parseItf8)(buffer, offset);
247
+ offset += newOffset3;
248
+ const [numContentIds, newOffset4] = (0, util_1.parseItf8)(buffer, offset);
249
+ offset += newOffset4;
250
+ const contentIds = [];
251
+ for (let i = 0; i < numContentIds; i++) {
252
+ const [id, newOffset5] = (0, util_1.parseItf8)(buffer, offset);
253
+ offset += newOffset5;
254
+ contentIds.push(id);
190
255
  }
191
- parser = parser
192
- .itf8('numBlocks')
193
- .itf8('numContentIds')
194
- .array('contentIds', {
195
- type: singleItf8,
196
- length: 'numContentIds',
197
- })
198
- .itf8('refBaseBlockId');
199
- maxLength += 5 * 3;
200
256
  // the md5 sum is missing in cram v1
257
+ let md5;
201
258
  if (majorVersion >= 2) {
202
- parser = parser.array('md5', { type: 'uint8', length: 16 });
203
- maxLength += 16;
259
+ md5 = [...buffer.subarray(offset, offset + 16)];
260
+ offset += 16;
204
261
  }
205
- const maxLengthFunc = (numContentIds) => maxLength + numContentIds * 5;
206
- return { parser, maxLength: maxLengthFunc };
207
- },
208
- cramEncoding(_majorVersion) {
209
- const parser = new binary_parser_1.Parser()
210
- .namely('cramEncoding')
211
- .itf8('codecId')
212
- .itf8('parametersBytes')
213
- .choice('parameters', {
214
- tag: 'codecId',
215
- choices: {
216
- 0: new binary_parser_1.Parser(), // NULL
217
- 1: new binary_parser_1.Parser().itf8('blockContentId'), // EXTERNAL
218
- 2: new binary_parser_1.Parser().itf8('offset').itf8('M'), // GOLOMB,
219
- // HUFFMAN_INT
220
- 3: binary_parser_1.Parser.start()
221
- .itf8('numCodes')
222
- .array('symbols', { length: 'numCodes', type: singleItf8 })
223
- .itf8('numLengths')
224
- .array('bitLengths', { length: 'numLengths', type: singleItf8 }),
225
- 4: binary_parser_1.Parser.start() // BYTE_ARRAY_LEN
226
- .nest('lengthsEncoding', { type: 'cramEncoding' })
227
- .nest('valuesEncoding', { type: 'cramEncoding' }),
228
- // BYTE_ARRAY_STOP is a little different for CRAM v1
229
- 5: new binary_parser_1.Parser().uint8('stopByte').itf8('blockContentId'),
230
- 6: new binary_parser_1.Parser().itf8('offset').itf8('length'), // BETA
231
- 7: new binary_parser_1.Parser().itf8('offset').itf8('K'), // SUBEXP
232
- 8: new binary_parser_1.Parser().itf8('offset').itf8('log2m'), // GOLOMB_RICE
233
- 9: new binary_parser_1.Parser().itf8('offset'), // GAMMA
262
+ return {
263
+ value: {
264
+ recordCounter,
265
+ md5,
266
+ contentIds,
267
+ numContentIds,
268
+ numBlocks,
269
+ numRecords,
234
270
  },
235
- });
236
- return { parser };
237
- },
238
- cramDataSeriesEncodingMap(majorVersion) {
239
- return new binary_parser_1.Parser()
240
- .itf8('mapSize')
241
- .itf8('mapCount')
242
- .array('ents', {
243
- length: 'mapCount',
244
- type: new binary_parser_1.Parser()
245
- .string('key', { length: 2, stripNull: false })
246
- .nest('value', { type: this.cramEncoding(majorVersion).parser }),
247
- });
248
- },
249
- cramTagEncodingMap(majorVersion) {
250
- return new binary_parser_1.Parser()
251
- .itf8('mapSize')
252
- .itf8('mapCount')
253
- .array('ents', {
254
- length: 'mapCount',
255
- type: new binary_parser_1.Parser()
256
- .itf8('key', {
257
- formatter: /* istanbul ignore next */ /* istanbul ignore next */ integerRepresentation =>
258
- /* istanbul ignore next */
259
- String.fromCharCode((integerRepresentation >> 16) & 0xff) +
260
- String.fromCharCode((integerRepresentation >> 8) & 0xff) +
261
- String.fromCharCode(integerRepresentation & 0xff),
262
- })
263
- .nest('value', { type: this.cramEncoding(majorVersion).parser }),
264
- });
265
- },
266
- cramCompressionHeader(majorVersion) {
267
- let parser = new binary_parser_1.Parser();
268
- // TODO: if we want to support CRAM v1, we will need to refactor
269
- // compression header into 2 parts to parse the landmarks,
270
- // like the container header
271
- parser = parser
272
- .nest('preservation', {
273
- type: cramPreservationMap,
274
- formatter: formatMap,
275
- })
276
- .nest('dataSeriesEncoding', {
277
- type: this.cramDataSeriesEncodingMap(majorVersion),
278
- formatter: formatMap,
279
- })
280
- .nest('tagEncoding', {
281
- type: this.cramTagEncodingMap(majorVersion),
282
- formatter: formatMap,
283
- });
284
- return { parser };
285
- },
286
- cramContainerHeader1(majorVersion) {
287
- let parser = new binary_parser_1.Parser()
288
- .int32('length') // byte size of the container data (blocks)
289
- .itf8('refSeqId') // reference sequence identifier, -1 for unmapped reads, -2 for multiple reference sequences
290
- .itf8('refSeqStart') // the alignment start position or 0 for unmapped reads
291
- .itf8('alignmentSpan') // the length of the alignment or 0 for unmapped reads
292
- .itf8('numRecords'); // number of records in the container
293
- let maxLength = 4 + 5 * 4;
294
- if (majorVersion >= 3) {
295
- parser = parser.ltf8('recordCounter'); // 1-based sequential index of records in the file/stream.
296
- maxLength += 9;
297
- }
298
- else if (majorVersion === 2) {
299
- parser = parser.itf8('recordCounter');
300
- maxLength += 5;
301
- }
302
- if (majorVersion > 1) {
303
- parser = parser.ltf8('numBases'); // number of read bases
304
- maxLength += 9;
271
+ offset,
272
+ };
273
+ };
274
+ return {
275
+ parser,
276
+ maxLength: (numContentIds) => maxLength + numContentIds * 5,
277
+ };
278
+ }
279
+ // assembles a section parser for the unmapped slice header, with slight
280
+ // variations depending on the major version of the cram file
281
+ function cramMappedSliceHeader(majorVersion) {
282
+ let maxLength = 0;
283
+ maxLength += 5 * 4; // EL0
284
+ maxLength += 9; // EL1
285
+ maxLength += 5 * 3; // EL2 ITF8s
286
+ maxLength += 16; // MD5
287
+ return {
288
+ parser: (buffer, offset) => {
289
+ // L0
290
+ const [refSeqId, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
291
+ offset += newOffset1;
292
+ const [refSeqStart, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
293
+ offset += newOffset2;
294
+ const [refSeqSpan, newOffset3] = (0, util_1.parseItf8)(buffer, offset);
295
+ offset += newOffset3;
296
+ const [numRecords, newOffset4] = (0, util_1.parseItf8)(buffer, offset);
297
+ offset += newOffset4;
298
+ // EL0
299
+ // L1
300
+ let recordCounter = 0;
301
+ if (majorVersion >= 3) {
302
+ const [rc, newOffset5] = (0, util_1.parseLtf8)(buffer, offset);
303
+ offset += newOffset5;
304
+ recordCounter = rc;
305
+ }
306
+ else if (majorVersion === 2) {
307
+ const [rc, newOffset5] = (0, util_1.parseItf8)(buffer, offset);
308
+ offset += newOffset5;
309
+ recordCounter = rc;
310
+ }
311
+ else {
312
+ console.warn('majorVersion is <2, recordCounter set to 0');
313
+ }
314
+ // EL1
315
+ // L2
316
+ const [numBlocks, newOffset6] = (0, util_1.parseItf8)(buffer, offset);
317
+ offset += newOffset6;
318
+ const [numContentIds, newOffset7] = (0, util_1.parseItf8)(buffer, offset);
319
+ offset += newOffset7;
320
+ const contentIds = [];
321
+ for (let i = 0; i < numContentIds; i++) {
322
+ const [id, newOffset5] = (0, util_1.parseItf8)(buffer, offset);
323
+ offset += newOffset5;
324
+ contentIds.push(id);
325
+ }
326
+ const [refBaseBlockId, newOffset8] = (0, util_1.parseItf8)(buffer, offset);
327
+ offset += newOffset8;
328
+ // EL2
329
+ // the md5 sum is missing in cram v1
330
+ let md5;
331
+ if (majorVersion >= 2) {
332
+ md5 = [...buffer.subarray(offset, offset + 16)];
333
+ offset += 16;
334
+ }
335
+ return {
336
+ value: {
337
+ md5,
338
+ numBlocks,
339
+ numRecords,
340
+ numContentIds,
341
+ refSeqSpan,
342
+ refSeqId,
343
+ refSeqStart,
344
+ recordCounter,
345
+ refBaseBlockId,
346
+ contentIds,
347
+ },
348
+ offset,
349
+ };
350
+ },
351
+ maxLength: (numContentIds) => maxLength + numContentIds * 5,
352
+ };
353
+ }
354
+ function cramEncoding() {
355
+ return {
356
+ parser: (buffer, offset) => cramEncodingSub(buffer, offset),
357
+ };
358
+ }
359
+ function cramEncodingSub(buffer, offset) {
360
+ const b = buffer;
361
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
362
+ const [codecId, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
363
+ offset += newOffset1;
364
+ const [parametersBytes, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
365
+ offset += newOffset2;
366
+ const parameters = {};
367
+ if (codecId === 0) {
368
+ // NULL
369
+ }
370
+ else if (codecId === 1) {
371
+ // EXTERNAL
372
+ const [bc, newOffset3] = (0, util_1.parseItf8)(buffer, offset);
373
+ parameters.blockContentId = bc;
374
+ offset += newOffset3;
375
+ }
376
+ else if (codecId === 2) {
377
+ // GOLUMB
378
+ const [off, newOffset3] = (0, util_1.parseItf8)(buffer, offset);
379
+ parameters.offset = off;
380
+ offset += newOffset3;
381
+ const [M2, newOffset4] = (0, util_1.parseItf8)(buffer, offset);
382
+ parameters.M = M2;
383
+ offset += newOffset4;
384
+ }
385
+ else if (codecId === 3) {
386
+ // HUFFMAN_INT
387
+ const val = (0, util_1.parseItf8)(buffer, offset);
388
+ const numCodes = val[0];
389
+ offset += val[1];
390
+ const symbols = [];
391
+ for (let i = 0; i < numCodes; i++) {
392
+ const code = (0, util_1.parseItf8)(buffer, offset);
393
+ symbols.push(code[0]);
394
+ offset += code[1];
305
395
  }
306
- parser = parser
307
- .itf8('numBlocks') // the number of blocks
308
- .itf8('numLandmarks'); // the number of landmarks
309
- maxLength += 5 + 5;
310
- return { parser, maxLength };
311
- },
312
- cramContainerHeader2(majorVersion) {
313
- let parser = new binary_parser_1.Parser()
314
- .itf8('numLandmarks') // the number of blocks
315
- // Each integer value of this array is a byte offset
316
- // into the blocks byte array. Landmarks are used for
317
- // random access indexing.
318
- .array('landmarks', {
319
- type: new binary_parser_1.Parser().itf8(),
320
- length: 'numLandmarks',
321
- });
322
- let crcLength = 0;
323
- if (majorVersion >= 3) {
324
- parser = parser.uint32('crc32');
325
- crcLength = 4;
396
+ parameters.symbols = symbols;
397
+ const val2 = (0, util_1.parseItf8)(buffer, offset);
398
+ const numLengths = val[0];
399
+ parameters.numLengths = numLengths;
400
+ parameters.numCodes = numCodes;
401
+ parameters.numLengths = numLengths;
402
+ offset += val2[1];
403
+ const bitLengths = [];
404
+ for (let i = 0; i < numLengths; i++) {
405
+ const len = (0, util_1.parseItf8)(buffer, offset);
406
+ offset += len[1];
407
+ bitLengths.push(len[0]);
326
408
  }
327
- return {
328
- parser,
329
- maxLength: (numLandmarks) => 5 + numLandmarks * 5 + crcLength,
330
- };
331
- },
332
- };
409
+ parameters.bitLengths = bitLengths;
410
+ }
411
+ else if (codecId === 4) {
412
+ // BYTE_ARRAY_LEN
413
+ const { value: lengthsEncoding, offset: newOffset1 } = cramEncodingSub(buffer, offset);
414
+ parameters.lengthsEncoding = lengthsEncoding;
415
+ offset = newOffset1;
416
+ const { value: valuesEncoding, offset: newOffset2 } = cramEncodingSub(buffer, offset);
417
+ parameters.valuesEncoding = valuesEncoding;
418
+ offset = newOffset2;
419
+ }
420
+ else if (codecId === 5) {
421
+ // BYTE_ARRAY_STOP
422
+ parameters.stopByte = dataView.getUint8(offset);
423
+ offset += 1;
424
+ const [blockContentId, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
425
+ parameters.blockContentId = blockContentId;
426
+ offset += newOffset1;
427
+ }
428
+ else if (codecId === 6) {
429
+ // BETA
430
+ const [off, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
431
+ parameters.offset = off;
432
+ offset += newOffset1;
433
+ const [len, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
434
+ parameters.length = len;
435
+ offset += newOffset2;
436
+ }
437
+ else if (codecId === 7) {
438
+ // SUBEXP
439
+ const [off, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
440
+ parameters.offset = off;
441
+ offset += newOffset1;
442
+ const [K, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
443
+ parameters.K = K;
444
+ offset += newOffset2;
445
+ }
446
+ else if (codecId === 8) {
447
+ // GOLOMB_RICE
448
+ const [off, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
449
+ parameters.offset = off;
450
+ offset += newOffset1;
451
+ const [l2m, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
452
+ parameters.log2m = l2m;
453
+ offset += newOffset2;
454
+ }
455
+ else if (codecId === 9) {
456
+ // GAMMA
457
+ const [off, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
458
+ parameters.offset = off;
459
+ offset += newOffset1;
460
+ }
461
+ else {
462
+ throw new Error(`unknown codecId ${codecId}`);
463
+ }
464
+ return {
465
+ value: {
466
+ codecId,
467
+ parametersBytes,
468
+ parameters,
469
+ },
470
+ offset,
471
+ };
472
+ }
473
+ function cramDataSeriesEncodingMap() {
474
+ return {
475
+ parser: (buffer, offset) => {
476
+ const [mapSize, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
477
+ offset += newOffset1;
478
+ const [mapCount, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
479
+ offset += newOffset2;
480
+ const ents = [];
481
+ for (let i = 0; i < mapCount; i++) {
482
+ const key = String.fromCharCode(buffer[offset]) +
483
+ String.fromCharCode(buffer[offset + 1]);
484
+ offset += 2;
485
+ const { value, offset: newOffset4 } = cramEncodingSub(buffer, offset);
486
+ offset = newOffset4;
487
+ ents.push({ key, value });
488
+ }
489
+ return {
490
+ value: {
491
+ mapSize,
492
+ ents,
493
+ mapCount,
494
+ },
495
+ offset,
496
+ };
497
+ },
498
+ };
499
+ }
500
+ function cramTagEncodingMap() {
501
+ return {
502
+ parser: (buffer, offset) => {
503
+ const [mapSize, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
504
+ offset += newOffset1;
505
+ const [mapCount, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
506
+ offset += newOffset2;
507
+ const ents = [];
508
+ for (let i = 0; i < mapCount; i++) {
509
+ const [k0, newOffset3] = (0, util_1.parseItf8)(buffer, offset);
510
+ offset += newOffset3;
511
+ const key = String.fromCharCode((k0 >> 16) & 0xff) +
512
+ String.fromCharCode((k0 >> 8) & 0xff) +
513
+ String.fromCharCode(k0 & 0xff);
514
+ const { value, offset: newOffset4 } = cramEncodingSub(buffer, offset);
515
+ offset = newOffset4;
516
+ ents.push({ key, value });
517
+ }
518
+ return {
519
+ value: {
520
+ mapSize,
521
+ ents,
522
+ mapCount,
523
+ },
524
+ offset,
525
+ };
526
+ },
527
+ };
528
+ }
529
+ function cramCompressionHeader() {
530
+ return {
531
+ parser: (buffer, offset) => {
532
+ // TODO: if we want to support CRAM v1, we will need to refactor
533
+ // compression header into 2 parts to parse the landmarks, like the
534
+ // container header
535
+ const { value: preservation, offset: newOffset1 } = cramPreservationMap().parser(buffer, offset);
536
+ offset = newOffset1;
537
+ const { value: dataSeriesEncoding, offset: newOffset2 } = cramDataSeriesEncodingMap().parser(buffer, offset);
538
+ offset = newOffset2;
539
+ const { value: tagEncoding, offset: newOffset3 } = cramTagEncodingMap().parser(buffer, offset);
540
+ offset = newOffset3;
541
+ return {
542
+ value: {
543
+ dataSeriesEncoding: formatMap(dataSeriesEncoding),
544
+ preservation: formatMap(preservation),
545
+ tagEncoding: formatMap(tagEncoding),
546
+ },
547
+ offset,
548
+ };
549
+ },
550
+ };
551
+ }
552
+ function cramContainerHeader1(majorVersion) {
553
+ let maxLength = 4;
554
+ maxLength += 5 * 4;
555
+ maxLength += 9;
556
+ maxLength += 9;
557
+ maxLength += 5 + 5;
558
+ return {
559
+ maxLength,
560
+ parser: (buffer, offset) => {
561
+ const b = buffer;
562
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
563
+ // byte size of the container data (blocks)
564
+ const length = dataView.getInt32(offset, true);
565
+ offset += 4;
566
+ // reference sequence identifier, -1 for unmapped reads, -2 for multiple
567
+ // reference sequences
568
+ const [refSeqId, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
569
+ offset += newOffset1;
570
+ const [refSeqStart, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
571
+ offset += newOffset2;
572
+ const [alignmentSpan, newOffset3] = (0, util_1.parseItf8)(buffer, offset);
573
+ offset += newOffset3;
574
+ const [numRecords, newOffset4] = (0, util_1.parseItf8)(buffer, offset);
575
+ offset += newOffset4;
576
+ let recordCounter = 0;
577
+ if (majorVersion >= 3) {
578
+ const [rc, newOffset5] = (0, util_1.parseLtf8)(buffer, offset);
579
+ recordCounter = rc;
580
+ offset += newOffset5;
581
+ }
582
+ else if (majorVersion === 2) {
583
+ const [rc, newOffset5] = (0, util_1.parseItf8)(buffer, offset);
584
+ recordCounter = rc;
585
+ offset += newOffset5;
586
+ }
587
+ else {
588
+ console.warn('setting recordCounter=0');
589
+ }
590
+ let numBases;
591
+ if (majorVersion > 1) {
592
+ const [n, newOffset5] = (0, util_1.parseLtf8)(buffer, offset);
593
+ numBases = n;
594
+ offset += newOffset5;
595
+ }
596
+ const [numBlocks, newOffset6] = (0, util_1.parseItf8)(buffer, offset);
597
+ offset += newOffset6;
598
+ const [numLandmarks, newOffset7] = (0, util_1.parseItf8)(buffer, offset);
599
+ offset += newOffset7;
600
+ return {
601
+ value: {
602
+ length,
603
+ refSeqId,
604
+ refSeqStart,
605
+ alignmentSpan,
606
+ numBlocks,
607
+ numLandmarks,
608
+ numBases,
609
+ recordCounter,
610
+ numRecords,
611
+ },
612
+ offset,
613
+ };
614
+ },
615
+ };
616
+ }
617
+ function cramContainerHeader2(majorVersion) {
618
+ return {
619
+ parser: (buffer, offset) => {
620
+ const b = buffer;
621
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length);
622
+ const [numLandmarks, newOffset1] = (0, util_1.parseItf8)(buffer, offset);
623
+ offset += newOffset1;
624
+ const landmarks = [];
625
+ for (let i = 0; i < numLandmarks; i++) {
626
+ const [landmark, newOffset2] = (0, util_1.parseItf8)(buffer, offset);
627
+ offset += newOffset2;
628
+ landmarks.push(landmark);
629
+ }
630
+ let crc32;
631
+ if (majorVersion >= 3) {
632
+ crc32 = dataView.getUint32(offset, true);
633
+ offset += 4;
634
+ }
635
+ return {
636
+ value: Object.assign(Object.assign({}, (crc32 === undefined ? {} : { crc32 })), { numLandmarks,
637
+ landmarks }),
638
+ offset,
639
+ };
640
+ },
641
+ maxLength: (numLandmarks) => 5 + 5 * numLandmarks + 4,
642
+ };
643
+ }
333
644
  function getSectionParsers(majorVersion) {
334
- const parsers = Object.assign({}, unversionedParsers);
335
- Object.keys(versionedParsers).forEach(parserName => {
336
- parsers[parserName] = versionedParsers[parserName](majorVersion);
337
- });
338
- return parsers;
645
+ return {
646
+ cramFileDefinition: cramFileDefinition(),
647
+ cramBlockHeader: cramBlockHeader(),
648
+ cramBlockCrc32: cramBlockCrc32(),
649
+ cramDataSeriesEncodingMap: cramDataSeriesEncodingMap(),
650
+ cramTagEncodingMap: cramTagEncodingMap(),
651
+ cramCompressionHeader: cramCompressionHeader(),
652
+ cramEncoding: cramEncoding(),
653
+ cramUnmappedSliceHeader: cramUnmappedSliceHeader(majorVersion),
654
+ cramMappedSliceHeader: cramMappedSliceHeader(majorVersion),
655
+ cramContainerHeader1: cramContainerHeader1(majorVersion),
656
+ cramContainerHeader2: cramContainerHeader2(majorVersion),
657
+ };
339
658
  }
340
- exports.getSectionParsers = getSectionParsers;
341
659
  //# sourceMappingURL=sectionParsers.js.map