@gmod/cram 2.0.4 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/dist/cram-bundle.js +1 -1
- package/dist/cramFile/codecs/byteArrayLength.js +1 -1
- package/dist/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/dist/cramFile/codecs/external.js +1 -1
- package/dist/cramFile/codecs/external.js.map +1 -1
- package/dist/cramFile/codecs/huffman.js +2 -1
- package/dist/cramFile/codecs/huffman.js.map +1 -1
- package/dist/cramFile/container/compressionScheme.d.ts +0 -3
- package/dist/cramFile/container/compressionScheme.js +0 -4
- package/dist/cramFile/container/compressionScheme.js.map +1 -1
- package/dist/cramFile/container/index.d.ts +56 -3
- package/dist/cramFile/container/index.js +15 -9
- package/dist/cramFile/container/index.js.map +1 -1
- package/dist/cramFile/file.d.ts +24 -59
- package/dist/cramFile/file.js +21 -26
- package/dist/cramFile/file.js.map +1 -1
- package/dist/cramFile/record.d.ts +1 -1
- package/dist/cramFile/sectionParsers.d.ts +195 -48
- package/dist/cramFile/sectionParsers.js +621 -303
- package/dist/cramFile/sectionParsers.js.map +1 -1
- package/dist/cramFile/slice/index.d.ts +23 -1
- package/dist/cramFile/slice/index.js +9 -6
- package/dist/cramFile/slice/index.js.map +1 -1
- package/dist/cramFile/util.d.ts +6 -4
- package/dist/cramFile/util.js +88 -6
- package/dist/cramFile/util.js.map +1 -1
- package/esm/cramFile/codecs/byteArrayLength.js +1 -1
- package/esm/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/esm/cramFile/codecs/external.js +1 -1
- package/esm/cramFile/codecs/external.js.map +1 -1
- package/esm/cramFile/codecs/huffman.js +2 -1
- package/esm/cramFile/codecs/huffman.js.map +1 -1
- package/esm/cramFile/container/compressionScheme.d.ts +0 -3
- package/esm/cramFile/container/compressionScheme.js +0 -4
- package/esm/cramFile/container/compressionScheme.js.map +1 -1
- package/esm/cramFile/container/index.d.ts +56 -3
- package/esm/cramFile/container/index.js +15 -9
- package/esm/cramFile/container/index.js.map +1 -1
- package/esm/cramFile/file.d.ts +24 -59
- package/esm/cramFile/file.js +22 -25
- package/esm/cramFile/file.js.map +1 -1
- package/esm/cramFile/record.d.ts +1 -1
- package/esm/cramFile/sectionParsers.d.ts +195 -48
- package/esm/cramFile/sectionParsers.js +620 -303
- package/esm/cramFile/sectionParsers.js.map +1 -1
- package/esm/cramFile/slice/index.d.ts +23 -1
- package/esm/cramFile/slice/index.js +10 -7
- package/esm/cramFile/slice/index.js.map +1 -1
- package/esm/cramFile/util.d.ts +6 -4
- package/esm/cramFile/util.js +87 -6
- package/esm/cramFile/util.js.map +1 -1
- package/package.json +7 -8
- package/src/cramFile/codecs/byteArrayLength.ts +1 -2
- package/src/cramFile/codecs/external.ts +1 -1
- package/src/cramFile/codecs/huffman.ts +2 -1
- package/src/cramFile/container/compressionScheme.ts +1 -8
- package/src/cramFile/container/index.ts +21 -10
- package/src/cramFile/file.ts +28 -43
- package/src/cramFile/record.ts +1 -1
- package/src/cramFile/sectionParsers.ts +668 -390
- package/src/cramFile/slice/index.ts +11 -5
- package/src/cramFile/util.ts +90 -91
- package/src/typings/binary-parser.d.ts +0 -44
|
@@ -1,120 +1,200 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
'
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
}
|
|
29
|
-
return method;
|
|
1
|
+
import { parseItf8, parseLtf8 } from './util';
|
|
2
|
+
export function cramFileDefinition() {
|
|
3
|
+
return {
|
|
4
|
+
parser: (buffer, _startOffset = 0) => {
|
|
5
|
+
const b = buffer;
|
|
6
|
+
const dataView = new DataView(b.buffer, b.byteOffset, b.length);
|
|
7
|
+
let offset = 0;
|
|
8
|
+
const magic = buffer.subarray(offset, offset + 4).toString();
|
|
9
|
+
offset += 4;
|
|
10
|
+
const majorVersion = dataView.getUint8(offset);
|
|
11
|
+
offset += 1;
|
|
12
|
+
const minorVersion = dataView.getUint8(offset);
|
|
13
|
+
offset += 1;
|
|
14
|
+
const fileId = b
|
|
15
|
+
.subarray(offset, offset + 20)
|
|
16
|
+
.toString()
|
|
17
|
+
.replaceAll('\0', '');
|
|
18
|
+
offset += 20;
|
|
19
|
+
return {
|
|
20
|
+
value: {
|
|
21
|
+
magic,
|
|
22
|
+
majorVersion,
|
|
23
|
+
minorVersion,
|
|
24
|
+
fileId,
|
|
25
|
+
},
|
|
26
|
+
offset,
|
|
27
|
+
};
|
|
30
28
|
},
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
29
|
+
maxLength: 26,
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
export function cramBlockHeader() {
|
|
33
|
+
const parser = (buffer, _startOffset = 0) => {
|
|
34
|
+
const b = buffer;
|
|
35
|
+
const dataView = new DataView(b.buffer, b.byteOffset, b.length);
|
|
36
|
+
let offset = 0;
|
|
37
|
+
const d = dataView.getUint8(offset);
|
|
38
|
+
const compressionMethod = [
|
|
39
|
+
'raw',
|
|
40
|
+
'gzip',
|
|
41
|
+
'bzip2',
|
|
42
|
+
'lzma',
|
|
43
|
+
'rans',
|
|
44
|
+
'rans4x16',
|
|
45
|
+
'arith',
|
|
46
|
+
'fqzcomp',
|
|
47
|
+
'tok3',
|
|
48
|
+
][d];
|
|
49
|
+
if (!compressionMethod) {
|
|
50
|
+
throw new Error(`compression method number ${d} not implemented`);
|
|
51
|
+
}
|
|
52
|
+
offset += 1;
|
|
53
|
+
const c = dataView.getUint8(offset);
|
|
54
|
+
const contentType = [
|
|
55
|
+
'FILE_HEADER',
|
|
56
|
+
'COMPRESSION_HEADER',
|
|
57
|
+
'MAPPED_SLICE_HEADER',
|
|
58
|
+
'UNMAPPED_SLICE_HEADER', // < only used in cram v1
|
|
59
|
+
'EXTERNAL_DATA',
|
|
60
|
+
'CORE_DATA',
|
|
61
|
+
][c];
|
|
62
|
+
if (!contentType) {
|
|
63
|
+
throw new Error(`invalid block content type id ${c}`);
|
|
64
|
+
}
|
|
65
|
+
offset += 1;
|
|
66
|
+
const [contentId, newOffset1] = parseItf8(buffer, offset);
|
|
67
|
+
offset += newOffset1;
|
|
68
|
+
const [compressedSize, newOffset2] = parseItf8(buffer, offset);
|
|
69
|
+
offset += newOffset2;
|
|
70
|
+
const [uncompressedSize, newOffset3] = parseItf8(buffer, offset);
|
|
71
|
+
offset += newOffset3;
|
|
72
|
+
return {
|
|
73
|
+
offset,
|
|
74
|
+
value: {
|
|
75
|
+
uncompressedSize,
|
|
76
|
+
compressedSize,
|
|
77
|
+
contentId,
|
|
78
|
+
contentType: contentType,
|
|
79
|
+
compressionMethod: compressionMethod,
|
|
80
|
+
},
|
|
81
|
+
};
|
|
82
|
+
};
|
|
83
|
+
return { parser, maxLength: 17 };
|
|
84
|
+
}
|
|
85
|
+
export function cramBlockCrc32() {
|
|
86
|
+
return {
|
|
87
|
+
parser: (buffer, offset) => {
|
|
88
|
+
const b = buffer;
|
|
89
|
+
const dataView = new DataView(b.buffer, b.byteOffset, b.length);
|
|
90
|
+
const crc32 = dataView.getUint32(offset, true);
|
|
91
|
+
offset += 4;
|
|
92
|
+
return {
|
|
93
|
+
offset,
|
|
94
|
+
value: {
|
|
95
|
+
crc32,
|
|
96
|
+
},
|
|
97
|
+
};
|
|
46
98
|
},
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
const
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
99
|
+
maxLength: 4,
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
function makeTagSet(buffer, stringStart, stringEnd) {
|
|
103
|
+
const str = buffer.toString('utf8', stringStart, stringEnd);
|
|
104
|
+
const tags = [];
|
|
105
|
+
for (let i = 0; i < str.length; i += 3) {
|
|
106
|
+
tags.push(str.slice(i, i + 3));
|
|
107
|
+
}
|
|
108
|
+
return tags;
|
|
109
|
+
}
|
|
110
|
+
export function cramTagDictionary() {
|
|
111
|
+
return {
|
|
112
|
+
parser: (buffer, offset) => {
|
|
113
|
+
const [size, newOffset1] = parseItf8(buffer, offset);
|
|
114
|
+
offset += newOffset1;
|
|
115
|
+
const subbuf = buffer.subarray(offset, offset + size);
|
|
116
|
+
offset += size;
|
|
117
|
+
const tagSets = [];
|
|
118
|
+
let stringStart = 0;
|
|
119
|
+
let i = 0;
|
|
120
|
+
for (; i < subbuf.length; i++) {
|
|
121
|
+
if (!subbuf[i]) {
|
|
122
|
+
tagSets.push(makeTagSet(subbuf, stringStart, i));
|
|
123
|
+
stringStart = i + 1;
|
|
124
|
+
}
|
|
65
125
|
}
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
/* eslint-disable */
|
|
69
|
-
var tagSets = [];
|
|
70
|
-
var stringStart = 0;
|
|
71
|
-
var i;
|
|
72
|
-
/* eslint-enable */
|
|
73
|
-
for (i = 0; i < buffer.length; i += 1) {
|
|
74
|
-
if (!buffer[i]) {
|
|
75
|
-
tagSets.push(makeTagSet(stringStart, i));
|
|
76
|
-
stringStart = i + 1;
|
|
126
|
+
if (i > stringStart) {
|
|
127
|
+
tagSets.push(makeTagSet(subbuf, stringStart, i));
|
|
77
128
|
}
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
}
|
|
85
|
-
// const cramPreservationMapKeys = 'XX RN AP RR SM TD'.split(' ')
|
|
86
|
-
const parseByteAsBool = new Parser().uint8(null, {
|
|
87
|
-
formatter: /* istanbul ignore next */ /* istanbul ignore next */ val => !!val,
|
|
88
|
-
});
|
|
89
|
-
const cramPreservationMap = new Parser()
|
|
90
|
-
.itf8('mapSize')
|
|
91
|
-
.itf8('mapCount')
|
|
92
|
-
.array('ents', {
|
|
93
|
-
length: 'mapCount',
|
|
94
|
-
type: new Parser()
|
|
95
|
-
.string('key', {
|
|
96
|
-
length: 2,
|
|
97
|
-
stripNull: false,
|
|
98
|
-
// formatter: val => cramPreservationMapKeys[val] || 0,
|
|
99
|
-
})
|
|
100
|
-
.choice('value', {
|
|
101
|
-
tag: 'key',
|
|
102
|
-
choices: {
|
|
103
|
-
MI: parseByteAsBool,
|
|
104
|
-
UI: parseByteAsBool,
|
|
105
|
-
PI: parseByteAsBool,
|
|
106
|
-
RN: parseByteAsBool,
|
|
107
|
-
AP: parseByteAsBool,
|
|
108
|
-
RR: parseByteAsBool,
|
|
109
|
-
SM: new Parser().array(null, { type: 'uint8', length: 5 }),
|
|
110
|
-
TD: new Parser().nest(null, {
|
|
111
|
-
type: cramTagDictionary,
|
|
112
|
-
formatter: /* istanbul ignore next */ /* istanbul ignore next */ data => data.ents,
|
|
113
|
-
}),
|
|
129
|
+
return {
|
|
130
|
+
value: {
|
|
131
|
+
size,
|
|
132
|
+
ents: tagSets,
|
|
133
|
+
},
|
|
134
|
+
offset,
|
|
135
|
+
};
|
|
114
136
|
},
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
export function cramPreservationMap() {
|
|
140
|
+
return {
|
|
141
|
+
parser: (buffer, offset) => {
|
|
142
|
+
const b = buffer;
|
|
143
|
+
const dataView = new DataView(b.buffer, b.byteOffset, b.length);
|
|
144
|
+
const [mapSize, newOffset1] = parseItf8(buffer, offset);
|
|
145
|
+
offset += newOffset1;
|
|
146
|
+
const [mapCount, newOffset2] = parseItf8(buffer, offset);
|
|
147
|
+
offset += newOffset2;
|
|
148
|
+
const ents = [];
|
|
149
|
+
for (let i = 0; i < mapCount; i++) {
|
|
150
|
+
const key = String.fromCharCode(buffer[offset]) +
|
|
151
|
+
String.fromCharCode(buffer[offset + 1]);
|
|
152
|
+
offset += 2;
|
|
153
|
+
if (key === 'MI' ||
|
|
154
|
+
key === 'UI' ||
|
|
155
|
+
key === 'PI' ||
|
|
156
|
+
key === 'RN' ||
|
|
157
|
+
key === 'AP' ||
|
|
158
|
+
key === 'RR') {
|
|
159
|
+
ents.push({
|
|
160
|
+
key,
|
|
161
|
+
value: !!dataView.getUint8(offset),
|
|
162
|
+
});
|
|
163
|
+
offset += 1;
|
|
164
|
+
}
|
|
165
|
+
else if (key === 'SM') {
|
|
166
|
+
ents.push({
|
|
167
|
+
key,
|
|
168
|
+
value: [
|
|
169
|
+
dataView.getUint8(offset),
|
|
170
|
+
dataView.getUint8(offset + 1),
|
|
171
|
+
dataView.getUint8(offset + 2),
|
|
172
|
+
dataView.getUint8(offset + 3),
|
|
173
|
+
dataView.getUint8(offset + 4),
|
|
174
|
+
],
|
|
175
|
+
});
|
|
176
|
+
offset += 5;
|
|
177
|
+
}
|
|
178
|
+
else if (key === 'TD') {
|
|
179
|
+
const { offset: offsetRet, value } = cramTagDictionary().parser(buffer, offset);
|
|
180
|
+
ents.push({ key, value: value.ents });
|
|
181
|
+
offset = offsetRet;
|
|
182
|
+
}
|
|
183
|
+
else {
|
|
184
|
+
throw new Error(`unknown key ${key}`);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
return {
|
|
188
|
+
value: {
|
|
189
|
+
mapSize,
|
|
190
|
+
mapCount,
|
|
191
|
+
ents,
|
|
192
|
+
},
|
|
193
|
+
offset,
|
|
194
|
+
};
|
|
195
|
+
},
|
|
196
|
+
};
|
|
197
|
+
}
|
|
118
198
|
function formatMap(data) {
|
|
119
199
|
const map = {};
|
|
120
200
|
for (const { key, value } of data.ents) {
|
|
@@ -125,212 +205,449 @@ function formatMap(data) {
|
|
|
125
205
|
}
|
|
126
206
|
return map;
|
|
127
207
|
}
|
|
128
|
-
const unversionedParsers = {
|
|
129
|
-
cramFileDefinition,
|
|
130
|
-
cramBlockHeader,
|
|
131
|
-
cramBlockCrc32,
|
|
132
|
-
};
|
|
133
208
|
export function isMappedSliceHeader(header) {
|
|
134
209
|
return typeof header.refSeqId === 'number';
|
|
135
210
|
}
|
|
136
|
-
//
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
211
|
+
// assemble a section parser for the unmapped slice header, with slight
|
|
212
|
+
// variations depending on the major version of the cram file
|
|
213
|
+
function cramUnmappedSliceHeader(majorVersion) {
|
|
214
|
+
let maxLength = 0;
|
|
215
|
+
maxLength += 5;
|
|
216
|
+
maxLength += 9;
|
|
217
|
+
maxLength += 5 * 2;
|
|
218
|
+
maxLength += 16;
|
|
219
|
+
const parser = (buffer, offset) => {
|
|
220
|
+
const [numRecords, newOffset1] = parseItf8(buffer, offset);
|
|
221
|
+
offset += newOffset1;
|
|
222
|
+
let recordCounter = 0;
|
|
144
223
|
// recordCounter is itf8 in a CRAM v2 file, absent in CRAM v1
|
|
145
224
|
if (majorVersion >= 3) {
|
|
146
|
-
|
|
147
|
-
|
|
225
|
+
const [rc, newOffset2] = parseLtf8(buffer, offset);
|
|
226
|
+
offset += newOffset2;
|
|
227
|
+
recordCounter = rc;
|
|
148
228
|
}
|
|
149
229
|
else if (majorVersion === 2) {
|
|
150
|
-
|
|
151
|
-
|
|
230
|
+
const [rc, newOffset2] = parseItf8(buffer, offset);
|
|
231
|
+
offset += newOffset2;
|
|
232
|
+
recordCounter = rc;
|
|
152
233
|
}
|
|
153
|
-
|
|
154
|
-
.
|
|
155
|
-
.itf8('numContentIds')
|
|
156
|
-
.array('contentIds', {
|
|
157
|
-
type: singleItf8,
|
|
158
|
-
length: 'numContentIds',
|
|
159
|
-
});
|
|
160
|
-
maxLength += 5 * 2; // + numContentIds*5
|
|
161
|
-
// the md5 sum is missing in cram v1
|
|
162
|
-
if (majorVersion >= 2) {
|
|
163
|
-
parser = parser.array('md5', { type: 'uint8', length: 16 });
|
|
164
|
-
maxLength += 16;
|
|
234
|
+
else {
|
|
235
|
+
console.warn('recordCounter=0');
|
|
165
236
|
}
|
|
166
|
-
const
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
.
|
|
175
|
-
.itf8('refSeqSpan')
|
|
176
|
-
.itf8('numRecords');
|
|
177
|
-
let maxLength = 5 * 4;
|
|
178
|
-
if (majorVersion >= 3) {
|
|
179
|
-
parser = parser.ltf8('recordCounter');
|
|
180
|
-
maxLength += 9;
|
|
181
|
-
}
|
|
182
|
-
else if (majorVersion === 2) {
|
|
183
|
-
parser = parser.itf8('recordCounter');
|
|
184
|
-
maxLength += 5;
|
|
237
|
+
const [numBlocks, newOffset3] = parseItf8(buffer, offset);
|
|
238
|
+
offset += newOffset3;
|
|
239
|
+
const [numContentIds, newOffset4] = parseItf8(buffer, offset);
|
|
240
|
+
offset += newOffset4;
|
|
241
|
+
const contentIds = [];
|
|
242
|
+
for (let i = 0; i < numContentIds; i++) {
|
|
243
|
+
const [id, newOffset5] = parseItf8(buffer, offset);
|
|
244
|
+
offset += newOffset5;
|
|
245
|
+
contentIds.push(id);
|
|
185
246
|
}
|
|
186
|
-
parser = parser
|
|
187
|
-
.itf8('numBlocks')
|
|
188
|
-
.itf8('numContentIds')
|
|
189
|
-
.array('contentIds', {
|
|
190
|
-
type: singleItf8,
|
|
191
|
-
length: 'numContentIds',
|
|
192
|
-
})
|
|
193
|
-
.itf8('refBaseBlockId');
|
|
194
|
-
maxLength += 5 * 3;
|
|
195
247
|
// the md5 sum is missing in cram v1
|
|
248
|
+
let md5;
|
|
196
249
|
if (majorVersion >= 2) {
|
|
197
|
-
|
|
198
|
-
|
|
250
|
+
md5 = [...buffer.subarray(offset, offset + 16)];
|
|
251
|
+
offset += 16;
|
|
199
252
|
}
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
.choice('parameters', {
|
|
209
|
-
tag: 'codecId',
|
|
210
|
-
choices: {
|
|
211
|
-
0: new Parser(), // NULL
|
|
212
|
-
1: new Parser().itf8('blockContentId'), // EXTERNAL
|
|
213
|
-
2: new Parser().itf8('offset').itf8('M'), // GOLOMB,
|
|
214
|
-
// HUFFMAN_INT
|
|
215
|
-
3: Parser.start()
|
|
216
|
-
.itf8('numCodes')
|
|
217
|
-
.array('symbols', { length: 'numCodes', type: singleItf8 })
|
|
218
|
-
.itf8('numLengths')
|
|
219
|
-
.array('bitLengths', { length: 'numLengths', type: singleItf8 }),
|
|
220
|
-
4: Parser.start() // BYTE_ARRAY_LEN
|
|
221
|
-
.nest('lengthsEncoding', { type: 'cramEncoding' })
|
|
222
|
-
.nest('valuesEncoding', { type: 'cramEncoding' }),
|
|
223
|
-
// BYTE_ARRAY_STOP is a little different for CRAM v1
|
|
224
|
-
5: new Parser().uint8('stopByte').itf8('blockContentId'),
|
|
225
|
-
6: new Parser().itf8('offset').itf8('length'), // BETA
|
|
226
|
-
7: new Parser().itf8('offset').itf8('K'), // SUBEXP
|
|
227
|
-
8: new Parser().itf8('offset').itf8('log2m'), // GOLOMB_RICE
|
|
228
|
-
9: new Parser().itf8('offset'), // GAMMA
|
|
253
|
+
return {
|
|
254
|
+
value: {
|
|
255
|
+
recordCounter,
|
|
256
|
+
md5,
|
|
257
|
+
contentIds,
|
|
258
|
+
numContentIds,
|
|
259
|
+
numBlocks,
|
|
260
|
+
numRecords,
|
|
229
261
|
},
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
262
|
+
offset,
|
|
263
|
+
};
|
|
264
|
+
};
|
|
265
|
+
return {
|
|
266
|
+
parser,
|
|
267
|
+
maxLength: (numContentIds) => maxLength + numContentIds * 5,
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
// assembles a section parser for the unmapped slice header, with slight
|
|
271
|
+
// variations depending on the major version of the cram file
|
|
272
|
+
function cramMappedSliceHeader(majorVersion) {
|
|
273
|
+
let maxLength = 0;
|
|
274
|
+
maxLength += 5 * 4; // EL0
|
|
275
|
+
maxLength += 9; // EL1
|
|
276
|
+
maxLength += 5 * 3; // EL2 ITF8s
|
|
277
|
+
maxLength += 16; // MD5
|
|
278
|
+
return {
|
|
279
|
+
parser: (buffer, offset) => {
|
|
280
|
+
// L0
|
|
281
|
+
const [refSeqId, newOffset1] = parseItf8(buffer, offset);
|
|
282
|
+
offset += newOffset1;
|
|
283
|
+
const [refSeqStart, newOffset2] = parseItf8(buffer, offset);
|
|
284
|
+
offset += newOffset2;
|
|
285
|
+
const [refSeqSpan, newOffset3] = parseItf8(buffer, offset);
|
|
286
|
+
offset += newOffset3;
|
|
287
|
+
const [numRecords, newOffset4] = parseItf8(buffer, offset);
|
|
288
|
+
offset += newOffset4;
|
|
289
|
+
// EL0
|
|
290
|
+
// L1
|
|
291
|
+
let recordCounter = 0;
|
|
292
|
+
if (majorVersion >= 3) {
|
|
293
|
+
const [rc, newOffset5] = parseLtf8(buffer, offset);
|
|
294
|
+
offset += newOffset5;
|
|
295
|
+
recordCounter = rc;
|
|
296
|
+
}
|
|
297
|
+
else if (majorVersion === 2) {
|
|
298
|
+
const [rc, newOffset5] = parseItf8(buffer, offset);
|
|
299
|
+
offset += newOffset5;
|
|
300
|
+
recordCounter = rc;
|
|
301
|
+
}
|
|
302
|
+
else {
|
|
303
|
+
console.warn('majorVersion is <2, recordCounter set to 0');
|
|
304
|
+
}
|
|
305
|
+
// EL1
|
|
306
|
+
// L2
|
|
307
|
+
const [numBlocks, newOffset6] = parseItf8(buffer, offset);
|
|
308
|
+
offset += newOffset6;
|
|
309
|
+
const [numContentIds, newOffset7] = parseItf8(buffer, offset);
|
|
310
|
+
offset += newOffset7;
|
|
311
|
+
const contentIds = [];
|
|
312
|
+
for (let i = 0; i < numContentIds; i++) {
|
|
313
|
+
const [id, newOffset5] = parseItf8(buffer, offset);
|
|
314
|
+
offset += newOffset5;
|
|
315
|
+
contentIds.push(id);
|
|
316
|
+
}
|
|
317
|
+
const [refBaseBlockId, newOffset8] = parseItf8(buffer, offset);
|
|
318
|
+
offset += newOffset8;
|
|
319
|
+
// EL2
|
|
320
|
+
// the md5 sum is missing in cram v1
|
|
321
|
+
let md5;
|
|
322
|
+
if (majorVersion >= 2) {
|
|
323
|
+
md5 = [...buffer.subarray(offset, offset + 16)];
|
|
324
|
+
offset += 16;
|
|
325
|
+
}
|
|
326
|
+
return {
|
|
327
|
+
value: {
|
|
328
|
+
md5,
|
|
329
|
+
numBlocks,
|
|
330
|
+
numRecords,
|
|
331
|
+
numContentIds,
|
|
332
|
+
refSeqSpan,
|
|
333
|
+
refSeqId,
|
|
334
|
+
refSeqStart,
|
|
335
|
+
recordCounter,
|
|
336
|
+
refBaseBlockId,
|
|
337
|
+
contentIds,
|
|
338
|
+
},
|
|
339
|
+
offset,
|
|
340
|
+
};
|
|
341
|
+
},
|
|
342
|
+
maxLength: (numContentIds) => maxLength + numContentIds * 5,
|
|
343
|
+
};
|
|
344
|
+
}
|
|
345
|
+
function cramEncoding() {
|
|
346
|
+
return {
|
|
347
|
+
parser: (buffer, offset) => cramEncodingSub(buffer, offset),
|
|
348
|
+
};
|
|
349
|
+
}
|
|
350
|
+
function cramEncodingSub(buffer, offset) {
|
|
351
|
+
const b = buffer;
|
|
352
|
+
const dataView = new DataView(b.buffer, b.byteOffset, b.length);
|
|
353
|
+
const [codecId, newOffset1] = parseItf8(buffer, offset);
|
|
354
|
+
offset += newOffset1;
|
|
355
|
+
const [parametersBytes, newOffset2] = parseItf8(buffer, offset);
|
|
356
|
+
offset += newOffset2;
|
|
357
|
+
const parameters = {};
|
|
358
|
+
if (codecId === 0) {
|
|
359
|
+
// NULL
|
|
360
|
+
}
|
|
361
|
+
else if (codecId === 1) {
|
|
362
|
+
// EXTERNAL
|
|
363
|
+
const [bc, newOffset3] = parseItf8(buffer, offset);
|
|
364
|
+
parameters.blockContentId = bc;
|
|
365
|
+
offset += newOffset3;
|
|
366
|
+
}
|
|
367
|
+
else if (codecId === 2) {
|
|
368
|
+
// GOLUMB
|
|
369
|
+
const [off, newOffset3] = parseItf8(buffer, offset);
|
|
370
|
+
parameters.offset = off;
|
|
371
|
+
offset += newOffset3;
|
|
372
|
+
const [M2, newOffset4] = parseItf8(buffer, offset);
|
|
373
|
+
parameters.M = M2;
|
|
374
|
+
offset += newOffset4;
|
|
375
|
+
}
|
|
376
|
+
else if (codecId === 3) {
|
|
377
|
+
// HUFFMAN_INT
|
|
378
|
+
const val = parseItf8(buffer, offset);
|
|
379
|
+
const numCodes = val[0];
|
|
380
|
+
offset += val[1];
|
|
381
|
+
const symbols = [];
|
|
382
|
+
for (let i = 0; i < numCodes; i++) {
|
|
383
|
+
const code = parseItf8(buffer, offset);
|
|
384
|
+
symbols.push(code[0]);
|
|
385
|
+
offset += code[1];
|
|
300
386
|
}
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
.array('landmarks', {
|
|
314
|
-
type: new Parser().itf8(),
|
|
315
|
-
length: 'numLandmarks',
|
|
316
|
-
});
|
|
317
|
-
let crcLength = 0;
|
|
318
|
-
if (majorVersion >= 3) {
|
|
319
|
-
parser = parser.uint32('crc32');
|
|
320
|
-
crcLength = 4;
|
|
387
|
+
parameters.symbols = symbols;
|
|
388
|
+
const val2 = parseItf8(buffer, offset);
|
|
389
|
+
const numLengths = val[0];
|
|
390
|
+
parameters.numLengths = numLengths;
|
|
391
|
+
parameters.numCodes = numCodes;
|
|
392
|
+
parameters.numLengths = numLengths;
|
|
393
|
+
offset += val2[1];
|
|
394
|
+
const bitLengths = [];
|
|
395
|
+
for (let i = 0; i < numLengths; i++) {
|
|
396
|
+
const len = parseItf8(buffer, offset);
|
|
397
|
+
offset += len[1];
|
|
398
|
+
bitLengths.push(len[0]);
|
|
321
399
|
}
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
}
|
|
333
|
-
|
|
400
|
+
parameters.bitLengths = bitLengths;
|
|
401
|
+
}
|
|
402
|
+
else if (codecId === 4) {
|
|
403
|
+
// BYTE_ARRAY_LEN
|
|
404
|
+
const { value: lengthsEncoding, offset: newOffset1 } = cramEncodingSub(buffer, offset);
|
|
405
|
+
parameters.lengthsEncoding = lengthsEncoding;
|
|
406
|
+
offset = newOffset1;
|
|
407
|
+
const { value: valuesEncoding, offset: newOffset2 } = cramEncodingSub(buffer, offset);
|
|
408
|
+
parameters.valuesEncoding = valuesEncoding;
|
|
409
|
+
offset = newOffset2;
|
|
410
|
+
}
|
|
411
|
+
else if (codecId === 5) {
|
|
412
|
+
// BYTE_ARRAY_STOP
|
|
413
|
+
parameters.stopByte = dataView.getUint8(offset);
|
|
414
|
+
offset += 1;
|
|
415
|
+
const [blockContentId, newOffset1] = parseItf8(buffer, offset);
|
|
416
|
+
parameters.blockContentId = blockContentId;
|
|
417
|
+
offset += newOffset1;
|
|
418
|
+
}
|
|
419
|
+
else if (codecId === 6) {
|
|
420
|
+
// BETA
|
|
421
|
+
const [off, newOffset1] = parseItf8(buffer, offset);
|
|
422
|
+
parameters.offset = off;
|
|
423
|
+
offset += newOffset1;
|
|
424
|
+
const [len, newOffset2] = parseItf8(buffer, offset);
|
|
425
|
+
parameters.length = len;
|
|
426
|
+
offset += newOffset2;
|
|
427
|
+
}
|
|
428
|
+
else if (codecId === 7) {
|
|
429
|
+
// SUBEXP
|
|
430
|
+
const [off, newOffset1] = parseItf8(buffer, offset);
|
|
431
|
+
parameters.offset = off;
|
|
432
|
+
offset += newOffset1;
|
|
433
|
+
const [K, newOffset2] = parseItf8(buffer, offset);
|
|
434
|
+
parameters.K = K;
|
|
435
|
+
offset += newOffset2;
|
|
436
|
+
}
|
|
437
|
+
else if (codecId === 8) {
|
|
438
|
+
// GOLOMB_RICE
|
|
439
|
+
const [off, newOffset1] = parseItf8(buffer, offset);
|
|
440
|
+
parameters.offset = off;
|
|
441
|
+
offset += newOffset1;
|
|
442
|
+
const [l2m, newOffset2] = parseItf8(buffer, offset);
|
|
443
|
+
parameters.log2m = l2m;
|
|
444
|
+
offset += newOffset2;
|
|
445
|
+
}
|
|
446
|
+
else if (codecId === 9) {
|
|
447
|
+
// GAMMA
|
|
448
|
+
const [off, newOffset1] = parseItf8(buffer, offset);
|
|
449
|
+
parameters.offset = off;
|
|
450
|
+
offset += newOffset1;
|
|
451
|
+
}
|
|
452
|
+
else {
|
|
453
|
+
throw new Error(`unknown codecId ${codecId}`);
|
|
454
|
+
}
|
|
455
|
+
return {
|
|
456
|
+
value: {
|
|
457
|
+
codecId,
|
|
458
|
+
parametersBytes,
|
|
459
|
+
parameters,
|
|
460
|
+
},
|
|
461
|
+
offset,
|
|
462
|
+
};
|
|
463
|
+
}
|
|
464
|
+
function cramDataSeriesEncodingMap() {
|
|
465
|
+
return {
|
|
466
|
+
parser: (buffer, offset) => {
|
|
467
|
+
const [mapSize, newOffset1] = parseItf8(buffer, offset);
|
|
468
|
+
offset += newOffset1;
|
|
469
|
+
const [mapCount, newOffset2] = parseItf8(buffer, offset);
|
|
470
|
+
offset += newOffset2;
|
|
471
|
+
const ents = [];
|
|
472
|
+
for (let i = 0; i < mapCount; i++) {
|
|
473
|
+
const key = String.fromCharCode(buffer[offset]) +
|
|
474
|
+
String.fromCharCode(buffer[offset + 1]);
|
|
475
|
+
offset += 2;
|
|
476
|
+
const { value, offset: newOffset4 } = cramEncodingSub(buffer, offset);
|
|
477
|
+
offset = newOffset4;
|
|
478
|
+
ents.push({ key, value });
|
|
479
|
+
}
|
|
480
|
+
return {
|
|
481
|
+
value: {
|
|
482
|
+
mapSize,
|
|
483
|
+
ents,
|
|
484
|
+
mapCount,
|
|
485
|
+
},
|
|
486
|
+
offset,
|
|
487
|
+
};
|
|
488
|
+
},
|
|
489
|
+
};
|
|
490
|
+
}
|
|
491
|
+
function cramTagEncodingMap() {
|
|
492
|
+
return {
|
|
493
|
+
parser: (buffer, offset) => {
|
|
494
|
+
const [mapSize, newOffset1] = parseItf8(buffer, offset);
|
|
495
|
+
offset += newOffset1;
|
|
496
|
+
const [mapCount, newOffset2] = parseItf8(buffer, offset);
|
|
497
|
+
offset += newOffset2;
|
|
498
|
+
const ents = [];
|
|
499
|
+
for (let i = 0; i < mapCount; i++) {
|
|
500
|
+
const [k0, newOffset3] = parseItf8(buffer, offset);
|
|
501
|
+
offset += newOffset3;
|
|
502
|
+
const key = String.fromCharCode((k0 >> 16) & 0xff) +
|
|
503
|
+
String.fromCharCode((k0 >> 8) & 0xff) +
|
|
504
|
+
String.fromCharCode(k0 & 0xff);
|
|
505
|
+
const { value, offset: newOffset4 } = cramEncodingSub(buffer, offset);
|
|
506
|
+
offset = newOffset4;
|
|
507
|
+
ents.push({ key, value });
|
|
508
|
+
}
|
|
509
|
+
return {
|
|
510
|
+
value: {
|
|
511
|
+
mapSize,
|
|
512
|
+
ents,
|
|
513
|
+
mapCount,
|
|
514
|
+
},
|
|
515
|
+
offset,
|
|
516
|
+
};
|
|
517
|
+
},
|
|
518
|
+
};
|
|
519
|
+
}
|
|
520
|
+
function cramCompressionHeader() {
|
|
521
|
+
return {
|
|
522
|
+
parser: (buffer, offset) => {
|
|
523
|
+
// TODO: if we want to support CRAM v1, we will need to refactor
|
|
524
|
+
// compression header into 2 parts to parse the landmarks, like the
|
|
525
|
+
// container header
|
|
526
|
+
const { value: preservation, offset: newOffset1 } = cramPreservationMap().parser(buffer, offset);
|
|
527
|
+
offset = newOffset1;
|
|
528
|
+
const { value: dataSeriesEncoding, offset: newOffset2 } = cramDataSeriesEncodingMap().parser(buffer, offset);
|
|
529
|
+
offset = newOffset2;
|
|
530
|
+
const { value: tagEncoding, offset: newOffset3 } = cramTagEncodingMap().parser(buffer, offset);
|
|
531
|
+
offset = newOffset3;
|
|
532
|
+
return {
|
|
533
|
+
value: {
|
|
534
|
+
dataSeriesEncoding: formatMap(dataSeriesEncoding),
|
|
535
|
+
preservation: formatMap(preservation),
|
|
536
|
+
tagEncoding: formatMap(tagEncoding),
|
|
537
|
+
},
|
|
538
|
+
offset,
|
|
539
|
+
};
|
|
540
|
+
},
|
|
541
|
+
};
|
|
542
|
+
}
|
|
543
|
+
function cramContainerHeader1(majorVersion) {
|
|
544
|
+
let maxLength = 4;
|
|
545
|
+
maxLength += 5 * 4;
|
|
546
|
+
maxLength += 9;
|
|
547
|
+
maxLength += 9;
|
|
548
|
+
maxLength += 5 + 5;
|
|
549
|
+
return {
|
|
550
|
+
maxLength,
|
|
551
|
+
parser: (buffer, offset) => {
|
|
552
|
+
const b = buffer;
|
|
553
|
+
const dataView = new DataView(b.buffer, b.byteOffset, b.length);
|
|
554
|
+
// byte size of the container data (blocks)
|
|
555
|
+
const length = dataView.getInt32(offset, true);
|
|
556
|
+
offset += 4;
|
|
557
|
+
// reference sequence identifier, -1 for unmapped reads, -2 for multiple
|
|
558
|
+
// reference sequences
|
|
559
|
+
const [refSeqId, newOffset1] = parseItf8(buffer, offset);
|
|
560
|
+
offset += newOffset1;
|
|
561
|
+
const [refSeqStart, newOffset2] = parseItf8(buffer, offset);
|
|
562
|
+
offset += newOffset2;
|
|
563
|
+
const [alignmentSpan, newOffset3] = parseItf8(buffer, offset);
|
|
564
|
+
offset += newOffset3;
|
|
565
|
+
const [numRecords, newOffset4] = parseItf8(buffer, offset);
|
|
566
|
+
offset += newOffset4;
|
|
567
|
+
let recordCounter = 0;
|
|
568
|
+
if (majorVersion >= 3) {
|
|
569
|
+
const [rc, newOffset5] = parseLtf8(buffer, offset);
|
|
570
|
+
recordCounter = rc;
|
|
571
|
+
offset += newOffset5;
|
|
572
|
+
}
|
|
573
|
+
else if (majorVersion === 2) {
|
|
574
|
+
const [rc, newOffset5] = parseItf8(buffer, offset);
|
|
575
|
+
recordCounter = rc;
|
|
576
|
+
offset += newOffset5;
|
|
577
|
+
}
|
|
578
|
+
else {
|
|
579
|
+
console.warn('setting recordCounter=0');
|
|
580
|
+
}
|
|
581
|
+
let numBases;
|
|
582
|
+
if (majorVersion > 1) {
|
|
583
|
+
const [n, newOffset5] = parseLtf8(buffer, offset);
|
|
584
|
+
numBases = n;
|
|
585
|
+
offset += newOffset5;
|
|
586
|
+
}
|
|
587
|
+
const [numBlocks, newOffset6] = parseItf8(buffer, offset);
|
|
588
|
+
offset += newOffset6;
|
|
589
|
+
const [numLandmarks, newOffset7] = parseItf8(buffer, offset);
|
|
590
|
+
offset += newOffset7;
|
|
591
|
+
return {
|
|
592
|
+
value: {
|
|
593
|
+
length,
|
|
594
|
+
refSeqId,
|
|
595
|
+
refSeqStart,
|
|
596
|
+
alignmentSpan,
|
|
597
|
+
numBlocks,
|
|
598
|
+
numLandmarks,
|
|
599
|
+
numBases,
|
|
600
|
+
recordCounter,
|
|
601
|
+
numRecords,
|
|
602
|
+
},
|
|
603
|
+
offset,
|
|
604
|
+
};
|
|
605
|
+
},
|
|
606
|
+
};
|
|
607
|
+
}
|
|
608
|
+
function cramContainerHeader2(majorVersion) {
|
|
609
|
+
return {
|
|
610
|
+
parser: (buffer, offset) => {
|
|
611
|
+
const b = buffer;
|
|
612
|
+
const dataView = new DataView(b.buffer, b.byteOffset, b.length);
|
|
613
|
+
const [numLandmarks, newOffset1] = parseItf8(buffer, offset);
|
|
614
|
+
offset += newOffset1;
|
|
615
|
+
const landmarks = [];
|
|
616
|
+
for (let i = 0; i < numLandmarks; i++) {
|
|
617
|
+
const [landmark, newOffset2] = parseItf8(buffer, offset);
|
|
618
|
+
offset += newOffset2;
|
|
619
|
+
landmarks.push(landmark);
|
|
620
|
+
}
|
|
621
|
+
let crc32;
|
|
622
|
+
if (majorVersion >= 3) {
|
|
623
|
+
crc32 = dataView.getUint32(offset, true);
|
|
624
|
+
offset += 4;
|
|
625
|
+
}
|
|
626
|
+
return {
|
|
627
|
+
value: {
|
|
628
|
+
...(crc32 === undefined ? {} : { crc32 }),
|
|
629
|
+
numLandmarks,
|
|
630
|
+
landmarks,
|
|
631
|
+
},
|
|
632
|
+
offset,
|
|
633
|
+
};
|
|
634
|
+
},
|
|
635
|
+
maxLength: (numLandmarks) => 5 + 5 * numLandmarks + 4,
|
|
636
|
+
};
|
|
637
|
+
}
|
|
638
|
+
export function getSectionParsers(majorVersion) {
|
|
639
|
+
return {
|
|
640
|
+
cramFileDefinition: cramFileDefinition(),
|
|
641
|
+
cramBlockHeader: cramBlockHeader(),
|
|
642
|
+
cramBlockCrc32: cramBlockCrc32(),
|
|
643
|
+
cramDataSeriesEncodingMap: cramDataSeriesEncodingMap(),
|
|
644
|
+
cramTagEncodingMap: cramTagEncodingMap(),
|
|
645
|
+
cramCompressionHeader: cramCompressionHeader(),
|
|
646
|
+
cramEncoding: cramEncoding(),
|
|
647
|
+
cramUnmappedSliceHeader: cramUnmappedSliceHeader(majorVersion),
|
|
648
|
+
cramMappedSliceHeader: cramMappedSliceHeader(majorVersion),
|
|
649
|
+
cramContainerHeader1: cramContainerHeader1(majorVersion),
|
|
650
|
+
cramContainerHeader2: cramContainerHeader2(majorVersion),
|
|
651
|
+
};
|
|
334
652
|
}
|
|
335
|
-
export { cramFileDefinition, getSectionParsers };
|
|
336
653
|
//# sourceMappingURL=sectionParsers.js.map
|