@gmod/cram 2.0.4 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/dist/cram-bundle.js +1 -1
- package/dist/cramFile/codecs/byteArrayLength.js +1 -1
- package/dist/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/dist/cramFile/codecs/byteArrayStop.js +1 -1
- package/dist/cramFile/codecs/external.js +1 -1
- package/dist/cramFile/codecs/external.js.map +1 -1
- package/dist/cramFile/codecs/huffman.js +3 -2
- package/dist/cramFile/codecs/huffman.js.map +1 -1
- package/dist/cramFile/codecs/subexp.js.map +1 -1
- package/dist/cramFile/container/compressionScheme.d.ts +0 -3
- package/dist/cramFile/container/compressionScheme.js +0 -4
- package/dist/cramFile/container/compressionScheme.js.map +1 -1
- package/dist/cramFile/container/index.d.ts +57 -3
- package/dist/cramFile/container/index.js +21 -12
- package/dist/cramFile/container/index.js.map +1 -1
- package/dist/cramFile/file.d.ts +25 -59
- package/dist/cramFile/file.js +33 -37
- package/dist/cramFile/file.js.map +1 -1
- package/dist/cramFile/record.d.ts +1 -1
- package/dist/cramFile/record.js +2 -2
- package/dist/cramFile/record.js.map +1 -1
- package/dist/cramFile/sectionParsers.d.ts +195 -48
- package/dist/cramFile/sectionParsers.js +621 -303
- package/dist/cramFile/sectionParsers.js.map +1 -1
- package/dist/cramFile/slice/decodeRecord.js +5 -4
- package/dist/cramFile/slice/decodeRecord.js.map +1 -1
- package/dist/cramFile/slice/index.d.ts +23 -1
- package/dist/cramFile/slice/index.js +11 -8
- package/dist/cramFile/slice/index.js.map +1 -1
- package/dist/cramFile/util.d.ts +6 -4
- package/dist/cramFile/util.js +88 -6
- package/dist/cramFile/util.js.map +1 -1
- package/dist/rans/d04.js.map +1 -1
- package/dist/rans/decoding.d.ts +4 -4
- package/dist/rans/decoding.js +5 -6
- package/dist/rans/decoding.js.map +1 -1
- package/dist/rans/index.js +4 -3
- package/dist/rans/index.js.map +1 -1
- package/esm/cramFile/codecs/byteArrayLength.js +1 -1
- package/esm/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/esm/cramFile/codecs/byteArrayStop.js +1 -1
- package/esm/cramFile/codecs/external.js +1 -1
- package/esm/cramFile/codecs/external.js.map +1 -1
- package/esm/cramFile/codecs/huffman.js +3 -2
- package/esm/cramFile/codecs/huffman.js.map +1 -1
- package/esm/cramFile/codecs/subexp.js.map +1 -1
- package/esm/cramFile/container/compressionScheme.d.ts +0 -3
- package/esm/cramFile/container/compressionScheme.js +0 -4
- package/esm/cramFile/container/compressionScheme.js.map +1 -1
- package/esm/cramFile/container/index.d.ts +57 -3
- package/esm/cramFile/container/index.js +19 -10
- package/esm/cramFile/container/index.js.map +1 -1
- package/esm/cramFile/file.d.ts +25 -59
- package/esm/cramFile/file.js +27 -29
- package/esm/cramFile/file.js.map +1 -1
- package/esm/cramFile/record.d.ts +1 -1
- package/esm/cramFile/record.js +2 -2
- package/esm/cramFile/record.js.map +1 -1
- package/esm/cramFile/sectionParsers.d.ts +195 -48
- package/esm/cramFile/sectionParsers.js +620 -303
- package/esm/cramFile/sectionParsers.js.map +1 -1
- package/esm/cramFile/slice/decodeRecord.js +5 -4
- package/esm/cramFile/slice/decodeRecord.js.map +1 -1
- package/esm/cramFile/slice/index.d.ts +23 -1
- package/esm/cramFile/slice/index.js +12 -9
- package/esm/cramFile/slice/index.js.map +1 -1
- package/esm/cramFile/util.d.ts +6 -4
- package/esm/cramFile/util.js +87 -6
- package/esm/cramFile/util.js.map +1 -1
- package/esm/rans/d04.js.map +1 -1
- package/esm/rans/decoding.d.ts +4 -4
- package/esm/rans/decoding.js +5 -6
- package/esm/rans/decoding.js.map +1 -1
- package/esm/rans/index.js +3 -2
- package/esm/rans/index.js.map +1 -1
- package/package.json +7 -8
- package/src/cramFile/codecs/byteArrayLength.ts +1 -2
- package/src/cramFile/codecs/byteArrayStop.ts +1 -1
- package/src/cramFile/codecs/external.ts +1 -1
- package/src/cramFile/codecs/huffman.ts +3 -2
- package/src/cramFile/codecs/subexp.ts +2 -2
- package/src/cramFile/container/compressionScheme.ts +1 -8
- package/src/cramFile/container/index.ts +23 -12
- package/src/cramFile/declare.d.ts +1 -0
- package/src/cramFile/file.ts +37 -53
- package/src/cramFile/record.ts +4 -7
- package/src/cramFile/sectionParsers.ts +668 -390
- package/src/cramFile/slice/decodeRecord.ts +20 -12
- package/src/cramFile/slice/index.ts +13 -7
- package/src/cramFile/util.ts +91 -92
- package/src/rans/d04.ts +1 -1
- package/src/rans/decoding.ts +5 -7
- package/src/rans/index.ts +3 -2
- package/src/typings/binary-parser.d.ts +0 -44
|
@@ -1,117 +1,160 @@
|
|
|
1
|
-
import { Parser } from '@gmod/binary-parser'
|
|
2
1
|
import { TupleOf } from '../typescript'
|
|
3
|
-
import {
|
|
2
|
+
import { parseItf8, parseLtf8 } from './util'
|
|
4
3
|
import { DataSeriesEncodingMap } from './codecs/dataSeriesTypes'
|
|
5
4
|
import { CramEncoding } from './encoding'
|
|
6
5
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
6
|
+
export function cramFileDefinition() {
|
|
7
|
+
return {
|
|
8
|
+
parser: (buffer: Buffer, _startOffset = 0) => {
|
|
9
|
+
const b = buffer
|
|
10
|
+
const dataView = new DataView(b.buffer, b.byteOffset, b.length)
|
|
11
|
+
let offset = 0
|
|
12
|
+
const magic = buffer.subarray(offset, offset + 4).toString()
|
|
13
|
+
offset += 4
|
|
14
|
+
const majorVersion = dataView.getUint8(offset)
|
|
15
|
+
offset += 1
|
|
16
|
+
const minorVersion = dataView.getUint8(offset)
|
|
17
|
+
offset += 1
|
|
18
|
+
const fileId = b
|
|
19
|
+
.subarray(offset, offset + 20)
|
|
20
|
+
.toString()
|
|
21
|
+
.replaceAll('\0', '')
|
|
22
|
+
offset += 20
|
|
23
|
+
return {
|
|
24
|
+
value: {
|
|
25
|
+
magic,
|
|
26
|
+
majorVersion,
|
|
27
|
+
minorVersion,
|
|
28
|
+
fileId,
|
|
29
|
+
},
|
|
30
|
+
offset,
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
maxLength: 26,
|
|
34
|
+
}
|
|
16
35
|
}
|
|
17
|
-
|
|
18
|
-
const
|
|
19
|
-
|
|
20
|
-
.
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
.
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
36
|
+
export function cramBlockHeader() {
|
|
37
|
+
const parser = (buffer: Buffer, _startOffset = 0) => {
|
|
38
|
+
const b = buffer
|
|
39
|
+
const dataView = new DataView(b.buffer, b.byteOffset, b.length)
|
|
40
|
+
let offset = 0
|
|
41
|
+
const d = dataView.getUint8(offset)
|
|
42
|
+
const compressionMethod = [
|
|
43
|
+
'raw',
|
|
44
|
+
'gzip',
|
|
45
|
+
'bzip2',
|
|
46
|
+
'lzma',
|
|
47
|
+
'rans',
|
|
48
|
+
'rans4x16',
|
|
49
|
+
'arith',
|
|
50
|
+
'fqzcomp',
|
|
51
|
+
'tok3',
|
|
52
|
+
][d]
|
|
53
|
+
if (!compressionMethod) {
|
|
54
|
+
throw new Error(`compression method number ${d} not implemented`)
|
|
55
|
+
}
|
|
56
|
+
offset += 1
|
|
57
|
+
|
|
58
|
+
const c = dataView.getUint8(offset)
|
|
59
|
+
const contentType = [
|
|
60
|
+
'FILE_HEADER',
|
|
61
|
+
'COMPRESSION_HEADER',
|
|
62
|
+
'MAPPED_SLICE_HEADER',
|
|
63
|
+
'UNMAPPED_SLICE_HEADER', // < only used in cram v1
|
|
64
|
+
'EXTERNAL_DATA',
|
|
65
|
+
'CORE_DATA',
|
|
66
|
+
][c]
|
|
67
|
+
if (!contentType) {
|
|
68
|
+
throw new Error(`invalid block content type id ${c}`)
|
|
69
|
+
}
|
|
70
|
+
offset += 1
|
|
71
|
+
|
|
72
|
+
const [contentId, newOffset1] = parseItf8(buffer, offset)
|
|
73
|
+
offset += newOffset1
|
|
74
|
+
const [compressedSize, newOffset2] = parseItf8(buffer, offset)
|
|
75
|
+
offset += newOffset2
|
|
76
|
+
const [uncompressedSize, newOffset3] = parseItf8(buffer, offset)
|
|
77
|
+
offset += newOffset3
|
|
78
|
+
return {
|
|
79
|
+
offset,
|
|
80
|
+
value: {
|
|
81
|
+
uncompressedSize,
|
|
82
|
+
compressedSize,
|
|
83
|
+
contentId,
|
|
84
|
+
contentType: contentType as
|
|
85
|
+
| 'FILE_HEADER'
|
|
86
|
+
| 'COMPRESSION_HEADER'
|
|
87
|
+
| 'MAPPED_SLICE_HEADER'
|
|
88
|
+
| 'UNMAPPED_SLICE_HEADER' // < only used in cram v1
|
|
89
|
+
| 'EXTERNAL_DATA'
|
|
90
|
+
| 'CORE_DATA',
|
|
91
|
+
compressionMethod: compressionMethod as CompressionMethod,
|
|
53
92
|
},
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
.itf8('uncompressedSize'),
|
|
58
|
-
maxLength: 17,
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return { parser, maxLength: 17 }
|
|
59
96
|
}
|
|
60
97
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
98
|
+
export function cramBlockCrc32() {
|
|
99
|
+
return {
|
|
100
|
+
parser: (buffer: Buffer, offset: number) => {
|
|
101
|
+
const b = buffer
|
|
102
|
+
const dataView = new DataView(b.buffer, b.byteOffset, b.length)
|
|
103
|
+
const crc32 = dataView.getUint32(offset, true)
|
|
104
|
+
offset += 4
|
|
105
|
+
return {
|
|
106
|
+
offset,
|
|
107
|
+
value: {
|
|
108
|
+
crc32,
|
|
109
|
+
},
|
|
110
|
+
}
|
|
111
|
+
},
|
|
112
|
+
maxLength: 4,
|
|
113
|
+
}
|
|
64
114
|
}
|
|
65
115
|
|
|
66
|
-
// const ENCODING_NAMES = [
|
|
67
|
-
// 'NULL', // 0
|
|
68
|
-
// 'EXTERNAL', // 1
|
|
69
|
-
// 'GOLOMB', // 2
|
|
70
|
-
// 'HUFFMAN_INT', // 3
|
|
71
|
-
// 'BYTE_ARRAY_LEN', // 4
|
|
72
|
-
// 'BYTE_ARRAY_STOP', // 5
|
|
73
|
-
// 'BETA', // 6
|
|
74
|
-
// 'SUBEXP', // 7
|
|
75
|
-
// 'GOLOMB_RICE', // 8
|
|
76
|
-
// 'GAMMA', // 9
|
|
77
|
-
// ]
|
|
78
|
-
|
|
79
116
|
export type CramTagDictionary = string[][]
|
|
80
117
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
}
|
|
90
|
-
return tags
|
|
91
|
-
}
|
|
118
|
+
function makeTagSet(buffer: Buffer, stringStart: number, stringEnd: number) {
|
|
119
|
+
const str = buffer.toString('utf8', stringStart, stringEnd)
|
|
120
|
+
const tags = []
|
|
121
|
+
for (let i = 0; i < str.length; i += 3) {
|
|
122
|
+
tags.push(str.slice(i, i + 3))
|
|
123
|
+
}
|
|
124
|
+
return tags
|
|
125
|
+
}
|
|
92
126
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
127
|
+
export function cramTagDictionary() {
|
|
128
|
+
return {
|
|
129
|
+
parser: (buffer: Buffer, offset: number) => {
|
|
130
|
+
const [size, newOffset1] = parseItf8(buffer, offset)
|
|
131
|
+
offset += newOffset1
|
|
132
|
+
const subbuf = buffer.subarray(offset, offset + size)
|
|
133
|
+
offset += size
|
|
134
|
+
|
|
135
|
+
const tagSets = []
|
|
136
|
+
let stringStart = 0
|
|
137
|
+
let i = 0
|
|
138
|
+
for (; i < subbuf.length; i++) {
|
|
139
|
+
if (!subbuf[i]) {
|
|
140
|
+
tagSets.push(makeTagSet(subbuf, stringStart, i))
|
|
141
|
+
stringStart = i + 1
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
if (i > stringStart) {
|
|
145
|
+
tagSets.push(makeTagSet(subbuf, stringStart, i))
|
|
102
146
|
}
|
|
103
|
-
}
|
|
104
|
-
if (i > stringStart) {
|
|
105
|
-
tagSets.push(makeTagSet(stringStart, i))
|
|
106
|
-
}
|
|
107
|
-
return tagSets
|
|
108
|
-
},
|
|
109
|
-
})
|
|
110
147
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
148
|
+
return {
|
|
149
|
+
value: {
|
|
150
|
+
size,
|
|
151
|
+
ents: tagSets,
|
|
152
|
+
},
|
|
153
|
+
offset,
|
|
154
|
+
}
|
|
155
|
+
},
|
|
156
|
+
}
|
|
157
|
+
}
|
|
115
158
|
|
|
116
159
|
export interface CramPreservationMap {
|
|
117
160
|
MI: boolean
|
|
@@ -124,38 +167,72 @@ export interface CramPreservationMap {
|
|
|
124
167
|
TD: CramTagDictionary
|
|
125
168
|
}
|
|
126
169
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
170
|
+
export function cramPreservationMap() {
|
|
171
|
+
return {
|
|
172
|
+
parser: (buffer: Buffer, offset: number) => {
|
|
173
|
+
const b = buffer
|
|
174
|
+
const dataView = new DataView(b.buffer, b.byteOffset, b.length)
|
|
175
|
+
const [mapSize, newOffset1] = parseItf8(buffer, offset)
|
|
176
|
+
offset += newOffset1
|
|
177
|
+
const [mapCount, newOffset2] = parseItf8(buffer, offset)
|
|
178
|
+
offset += newOffset2
|
|
179
|
+
const ents = []
|
|
180
|
+
for (let i = 0; i < mapCount; i++) {
|
|
181
|
+
const key =
|
|
182
|
+
String.fromCharCode(buffer[offset]) +
|
|
183
|
+
String.fromCharCode(buffer[offset + 1])
|
|
184
|
+
offset += 2
|
|
185
|
+
|
|
186
|
+
if (
|
|
187
|
+
key === 'MI' ||
|
|
188
|
+
key === 'UI' ||
|
|
189
|
+
key === 'PI' ||
|
|
190
|
+
key === 'RN' ||
|
|
191
|
+
key === 'AP' ||
|
|
192
|
+
key === 'RR'
|
|
193
|
+
) {
|
|
194
|
+
ents.push({
|
|
195
|
+
key,
|
|
196
|
+
value: !!dataView.getUint8(offset),
|
|
197
|
+
})
|
|
198
|
+
offset += 1
|
|
199
|
+
} else if (key === 'SM') {
|
|
200
|
+
ents.push({
|
|
201
|
+
key,
|
|
202
|
+
value: [
|
|
203
|
+
dataView.getUint8(offset),
|
|
204
|
+
dataView.getUint8(offset + 1),
|
|
205
|
+
dataView.getUint8(offset + 2),
|
|
206
|
+
dataView.getUint8(offset + 3),
|
|
207
|
+
dataView.getUint8(offset + 4),
|
|
208
|
+
],
|
|
209
|
+
})
|
|
210
|
+
offset += 5
|
|
211
|
+
} else if (key === 'TD') {
|
|
212
|
+
const { offset: offsetRet, value } = cramTagDictionary().parser(
|
|
213
|
+
buffer,
|
|
214
|
+
offset,
|
|
215
|
+
)
|
|
216
|
+
ents.push({ key, value: value.ents })
|
|
217
|
+
offset = offsetRet
|
|
218
|
+
} else {
|
|
219
|
+
throw new Error(`unknown key ${key}`)
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
return {
|
|
223
|
+
value: {
|
|
224
|
+
mapSize,
|
|
225
|
+
mapCount,
|
|
226
|
+
ents,
|
|
152
227
|
},
|
|
153
|
-
|
|
154
|
-
|
|
228
|
+
offset,
|
|
229
|
+
}
|
|
230
|
+
},
|
|
231
|
+
}
|
|
232
|
+
}
|
|
155
233
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
const map: Record<string, T> = {}
|
|
234
|
+
function formatMap(data: { ents: { key: string; value: unknown }[] }) {
|
|
235
|
+
const map: Record<string, unknown> = {}
|
|
159
236
|
for (const { key, value } of data.ents) {
|
|
160
237
|
if (map[key]) {
|
|
161
238
|
console.warn(`duplicate key ${key} in map`)
|
|
@@ -165,12 +242,6 @@ function formatMap<T>(data: { ents: { key: string; value: T }[] }) {
|
|
|
165
242
|
return map
|
|
166
243
|
}
|
|
167
244
|
|
|
168
|
-
const unversionedParsers = {
|
|
169
|
-
cramFileDefinition,
|
|
170
|
-
cramBlockHeader,
|
|
171
|
-
cramBlockCrc32,
|
|
172
|
-
}
|
|
173
|
-
|
|
174
245
|
export interface MappedSliceHeader {
|
|
175
246
|
refSeqId: number
|
|
176
247
|
refSeqStart: number
|
|
@@ -181,7 +252,7 @@ export interface MappedSliceHeader {
|
|
|
181
252
|
numContentIds: number
|
|
182
253
|
contentIds: number[]
|
|
183
254
|
refBaseBlockId: number
|
|
184
|
-
md5
|
|
255
|
+
md5?: TupleOf<number, 16>
|
|
185
256
|
}
|
|
186
257
|
|
|
187
258
|
export interface UnmappedSliceHeader {
|
|
@@ -190,227 +261,477 @@ export interface UnmappedSliceHeader {
|
|
|
190
261
|
numBlocks: number
|
|
191
262
|
numContentIds: number
|
|
192
263
|
contentIds: number[]
|
|
193
|
-
md5
|
|
264
|
+
md5?: TupleOf<number, 16>
|
|
194
265
|
}
|
|
195
266
|
|
|
196
267
|
export function isMappedSliceHeader(
|
|
197
|
-
header:
|
|
268
|
+
header: unknown,
|
|
198
269
|
): header is MappedSliceHeader {
|
|
199
270
|
return typeof (header as any).refSeqId === 'number'
|
|
200
271
|
}
|
|
201
272
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
273
|
+
interface Value {
|
|
274
|
+
codecId: number
|
|
275
|
+
parametersBytes: number
|
|
276
|
+
parameters: Record<string, unknown>
|
|
277
|
+
}
|
|
278
|
+
// assemble a section parser for the unmapped slice header, with slight
|
|
279
|
+
// variations depending on the major version of the cram file
|
|
280
|
+
function cramUnmappedSliceHeader(majorVersion: number) {
|
|
281
|
+
let maxLength = 0
|
|
282
|
+
maxLength += 5
|
|
283
|
+
maxLength += 9
|
|
284
|
+
maxLength += 5 * 2
|
|
285
|
+
maxLength += 16
|
|
286
|
+
|
|
287
|
+
const parser = (buffer: Buffer, offset: number) => {
|
|
288
|
+
const [numRecords, newOffset1] = parseItf8(buffer, offset)
|
|
289
|
+
offset += newOffset1
|
|
290
|
+
let recordCounter = 0
|
|
210
291
|
|
|
211
292
|
// recordCounter is itf8 in a CRAM v2 file, absent in CRAM v1
|
|
212
293
|
if (majorVersion >= 3) {
|
|
213
|
-
|
|
214
|
-
|
|
294
|
+
const [rc, newOffset2] = parseLtf8(buffer, offset)
|
|
295
|
+
offset += newOffset2
|
|
296
|
+
recordCounter = rc
|
|
215
297
|
} else if (majorVersion === 2) {
|
|
216
|
-
|
|
217
|
-
|
|
298
|
+
const [rc, newOffset2] = parseItf8(buffer, offset)
|
|
299
|
+
offset += newOffset2
|
|
300
|
+
recordCounter = rc
|
|
301
|
+
} else {
|
|
302
|
+
console.warn('recordCounter=0')
|
|
218
303
|
}
|
|
219
304
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
305
|
+
const [numBlocks, newOffset3] = parseItf8(buffer, offset)
|
|
306
|
+
offset += newOffset3
|
|
307
|
+
const [numContentIds, newOffset4] = parseItf8(buffer, offset)
|
|
308
|
+
offset += newOffset4
|
|
309
|
+
const contentIds = []
|
|
310
|
+
for (let i = 0; i < numContentIds; i++) {
|
|
311
|
+
const [id, newOffset5] = parseItf8(buffer, offset)
|
|
312
|
+
offset += newOffset5
|
|
313
|
+
contentIds.push(id)
|
|
314
|
+
}
|
|
228
315
|
|
|
229
316
|
// the md5 sum is missing in cram v1
|
|
317
|
+
let md5: TupleOf<number, 16> | undefined
|
|
230
318
|
if (majorVersion >= 2) {
|
|
231
|
-
|
|
232
|
-
|
|
319
|
+
md5 = [...buffer.subarray(offset, offset + 16)] as TupleOf<number, 16>
|
|
320
|
+
offset += 16
|
|
233
321
|
}
|
|
234
322
|
|
|
235
|
-
|
|
236
|
-
|
|
323
|
+
return {
|
|
324
|
+
value: {
|
|
325
|
+
recordCounter,
|
|
326
|
+
md5,
|
|
327
|
+
contentIds,
|
|
328
|
+
numContentIds,
|
|
329
|
+
numBlocks,
|
|
330
|
+
numRecords,
|
|
331
|
+
},
|
|
332
|
+
offset,
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
return {
|
|
336
|
+
parser,
|
|
337
|
+
maxLength: (numContentIds: number) => maxLength + numContentIds * 5,
|
|
338
|
+
}
|
|
339
|
+
}
|
|
237
340
|
|
|
238
|
-
|
|
239
|
-
|
|
341
|
+
// assembles a section parser for the unmapped slice header, with slight
|
|
342
|
+
// variations depending on the major version of the cram file
|
|
343
|
+
function cramMappedSliceHeader(majorVersion: number) {
|
|
344
|
+
let maxLength = 0
|
|
345
|
+
maxLength += 5 * 4 // EL0
|
|
346
|
+
maxLength += 9 // EL1
|
|
347
|
+
maxLength += 5 * 3 // EL2 ITF8s
|
|
348
|
+
maxLength += 16 // MD5
|
|
349
|
+
|
|
350
|
+
return {
|
|
351
|
+
parser: (buffer: Buffer, offset: number) => {
|
|
352
|
+
// L0
|
|
353
|
+
const [refSeqId, newOffset1] = parseItf8(buffer, offset)
|
|
354
|
+
offset += newOffset1
|
|
355
|
+
const [refSeqStart, newOffset2] = parseItf8(buffer, offset)
|
|
356
|
+
offset += newOffset2
|
|
357
|
+
const [refSeqSpan, newOffset3] = parseItf8(buffer, offset)
|
|
358
|
+
offset += newOffset3
|
|
359
|
+
const [numRecords, newOffset4] = parseItf8(buffer, offset)
|
|
360
|
+
offset += newOffset4
|
|
361
|
+
// EL0
|
|
362
|
+
|
|
363
|
+
// L1
|
|
364
|
+
let recordCounter = 0
|
|
365
|
+
if (majorVersion >= 3) {
|
|
366
|
+
const [rc, newOffset5] = parseLtf8(buffer, offset)
|
|
367
|
+
offset += newOffset5
|
|
368
|
+
recordCounter = rc
|
|
369
|
+
} else if (majorVersion === 2) {
|
|
370
|
+
const [rc, newOffset5] = parseItf8(buffer, offset)
|
|
371
|
+
offset += newOffset5
|
|
372
|
+
recordCounter = rc
|
|
373
|
+
} else {
|
|
374
|
+
console.warn('majorVersion is <2, recordCounter set to 0')
|
|
375
|
+
}
|
|
376
|
+
// EL1
|
|
377
|
+
|
|
378
|
+
// L2
|
|
379
|
+
const [numBlocks, newOffset6] = parseItf8(buffer, offset)
|
|
380
|
+
offset += newOffset6
|
|
381
|
+
const [numContentIds, newOffset7] = parseItf8(buffer, offset)
|
|
382
|
+
offset += newOffset7
|
|
383
|
+
const contentIds = []
|
|
384
|
+
for (let i = 0; i < numContentIds; i++) {
|
|
385
|
+
const [id, newOffset5] = parseItf8(buffer, offset)
|
|
386
|
+
offset += newOffset5
|
|
387
|
+
contentIds.push(id)
|
|
388
|
+
}
|
|
389
|
+
const [refBaseBlockId, newOffset8] = parseItf8(buffer, offset)
|
|
390
|
+
offset += newOffset8
|
|
391
|
+
// EL2
|
|
392
|
+
|
|
393
|
+
// the md5 sum is missing in cram v1
|
|
394
|
+
let md5: TupleOf<number, 16> | undefined
|
|
395
|
+
if (majorVersion >= 2) {
|
|
396
|
+
md5 = [...buffer.subarray(offset, offset + 16)] as TupleOf<number, 16>
|
|
397
|
+
offset += 16
|
|
398
|
+
}
|
|
240
399
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
400
|
+
return {
|
|
401
|
+
value: {
|
|
402
|
+
md5,
|
|
403
|
+
numBlocks,
|
|
404
|
+
numRecords,
|
|
405
|
+
numContentIds,
|
|
406
|
+
refSeqSpan,
|
|
407
|
+
refSeqId,
|
|
408
|
+
refSeqStart,
|
|
409
|
+
recordCounter,
|
|
410
|
+
refBaseBlockId,
|
|
411
|
+
contentIds,
|
|
412
|
+
},
|
|
413
|
+
offset,
|
|
414
|
+
}
|
|
415
|
+
},
|
|
416
|
+
maxLength: (numContentIds: number) => maxLength + numContentIds * 5,
|
|
417
|
+
}
|
|
418
|
+
}
|
|
250
419
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
420
|
+
function cramEncoding() {
|
|
421
|
+
return {
|
|
422
|
+
parser: (buffer: Buffer, offset: number) => cramEncodingSub(buffer, offset),
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
function cramEncodingSub(
|
|
427
|
+
buffer: Buffer,
|
|
428
|
+
offset: number,
|
|
429
|
+
): { value: Value; offset: number } {
|
|
430
|
+
const b = buffer
|
|
431
|
+
const dataView = new DataView(b.buffer, b.byteOffset, b.length)
|
|
432
|
+
const [codecId, newOffset1] = parseItf8(buffer, offset)
|
|
433
|
+
offset += newOffset1
|
|
434
|
+
const [parametersBytes, newOffset2] = parseItf8(buffer, offset)
|
|
435
|
+
offset += newOffset2
|
|
436
|
+
|
|
437
|
+
const parameters = {} as Record<string, unknown>
|
|
438
|
+
|
|
439
|
+
if (codecId === 0) {
|
|
440
|
+
// NULL
|
|
441
|
+
} else if (codecId === 1) {
|
|
442
|
+
// EXTERNAL
|
|
443
|
+
const [bc, newOffset3] = parseItf8(buffer, offset)
|
|
444
|
+
parameters.blockContentId = bc
|
|
445
|
+
offset += newOffset3
|
|
446
|
+
} else if (codecId === 2) {
|
|
447
|
+
// GOLUMB
|
|
448
|
+
const [off, newOffset3] = parseItf8(buffer, offset)
|
|
449
|
+
parameters.offset = off
|
|
450
|
+
offset += newOffset3
|
|
451
|
+
const [M2, newOffset4] = parseItf8(buffer, offset)
|
|
452
|
+
parameters.M = M2
|
|
453
|
+
offset += newOffset4
|
|
454
|
+
} else if (codecId === 3) {
|
|
455
|
+
// HUFFMAN_INT
|
|
456
|
+
const val = parseItf8(buffer, offset)
|
|
457
|
+
const numCodes = val[0]
|
|
458
|
+
offset += val[1]
|
|
459
|
+
const symbols = [] as number[]
|
|
460
|
+
for (let i = 0; i < numCodes; i++) {
|
|
461
|
+
const code = parseItf8(buffer, offset)
|
|
462
|
+
symbols.push(code[0])
|
|
463
|
+
offset += code[1]
|
|
257
464
|
}
|
|
465
|
+
parameters.symbols = symbols
|
|
466
|
+
const val2 = parseItf8(buffer, offset)
|
|
467
|
+
const numLengths = val[0]
|
|
468
|
+
parameters.numLengths = numLengths
|
|
469
|
+
parameters.numCodes = numCodes
|
|
470
|
+
parameters.numLengths = numLengths
|
|
471
|
+
offset += val2[1]
|
|
472
|
+
const bitLengths = [] as number[]
|
|
473
|
+
for (let i = 0; i < numLengths; i++) {
|
|
474
|
+
const len = parseItf8(buffer, offset)
|
|
475
|
+
offset += len[1]
|
|
476
|
+
bitLengths.push(len[0])
|
|
477
|
+
}
|
|
478
|
+
parameters.bitLengths = bitLengths
|
|
479
|
+
} else if (codecId === 4) {
|
|
480
|
+
// BYTE_ARRAY_LEN
|
|
481
|
+
const { value: lengthsEncoding, offset: newOffset1 } = cramEncodingSub(
|
|
482
|
+
buffer,
|
|
483
|
+
offset,
|
|
484
|
+
)
|
|
485
|
+
parameters.lengthsEncoding = lengthsEncoding
|
|
486
|
+
offset = newOffset1
|
|
487
|
+
const { value: valuesEncoding, offset: newOffset2 } = cramEncodingSub(
|
|
488
|
+
buffer,
|
|
489
|
+
offset,
|
|
490
|
+
)
|
|
491
|
+
parameters.valuesEncoding = valuesEncoding
|
|
492
|
+
offset = newOffset2
|
|
493
|
+
} else if (codecId === 5) {
|
|
494
|
+
// BYTE_ARRAY_STOP
|
|
495
|
+
parameters.stopByte = dataView.getUint8(offset)
|
|
496
|
+
offset += 1
|
|
497
|
+
const [blockContentId, newOffset1] = parseItf8(buffer, offset)
|
|
498
|
+
parameters.blockContentId = blockContentId
|
|
499
|
+
offset += newOffset1
|
|
500
|
+
} else if (codecId === 6) {
|
|
501
|
+
// BETA
|
|
502
|
+
const [off, newOffset1] = parseItf8(buffer, offset)
|
|
503
|
+
parameters.offset = off
|
|
504
|
+
offset += newOffset1
|
|
505
|
+
const [len, newOffset2] = parseItf8(buffer, offset)
|
|
506
|
+
parameters.length = len
|
|
507
|
+
offset += newOffset2
|
|
508
|
+
} else if (codecId === 7) {
|
|
509
|
+
// SUBEXP
|
|
510
|
+
const [off, newOffset1] = parseItf8(buffer, offset)
|
|
511
|
+
parameters.offset = off
|
|
512
|
+
offset += newOffset1
|
|
513
|
+
const [K, newOffset2] = parseItf8(buffer, offset)
|
|
514
|
+
parameters.K = K
|
|
515
|
+
offset += newOffset2
|
|
516
|
+
} else if (codecId === 8) {
|
|
517
|
+
// GOLOMB_RICE
|
|
518
|
+
const [off, newOffset1] = parseItf8(buffer, offset)
|
|
519
|
+
parameters.offset = off
|
|
520
|
+
offset += newOffset1
|
|
521
|
+
const [l2m, newOffset2] = parseItf8(buffer, offset)
|
|
522
|
+
parameters.log2m = l2m
|
|
523
|
+
offset += newOffset2
|
|
524
|
+
} else if (codecId === 9) {
|
|
525
|
+
// GAMMA
|
|
526
|
+
const [off, newOffset1] = parseItf8(buffer, offset)
|
|
527
|
+
parameters.offset = off
|
|
528
|
+
offset += newOffset1
|
|
529
|
+
} else {
|
|
530
|
+
throw new Error(`unknown codecId ${codecId}`)
|
|
531
|
+
}
|
|
258
532
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
533
|
+
return {
|
|
534
|
+
value: {
|
|
535
|
+
codecId,
|
|
536
|
+
parametersBytes,
|
|
537
|
+
parameters,
|
|
538
|
+
},
|
|
539
|
+
offset,
|
|
540
|
+
}
|
|
541
|
+
}
|
|
268
542
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
543
|
+
function cramDataSeriesEncodingMap() {
|
|
544
|
+
return {
|
|
545
|
+
parser: (buffer: Buffer, offset: number) => {
|
|
546
|
+
const [mapSize, newOffset1] = parseItf8(buffer, offset)
|
|
547
|
+
offset += newOffset1
|
|
548
|
+
const [mapCount, newOffset2] = parseItf8(buffer, offset)
|
|
549
|
+
offset += newOffset2
|
|
550
|
+
const ents = []
|
|
551
|
+
for (let i = 0; i < mapCount; i++) {
|
|
552
|
+
const key =
|
|
553
|
+
String.fromCharCode(buffer[offset]) +
|
|
554
|
+
String.fromCharCode(buffer[offset + 1])
|
|
555
|
+
offset += 2
|
|
556
|
+
|
|
557
|
+
const { value, offset: newOffset4 } = cramEncodingSub(buffer, offset)
|
|
558
|
+
offset = newOffset4
|
|
559
|
+
ents.push({ key, value })
|
|
560
|
+
}
|
|
561
|
+
return {
|
|
562
|
+
value: {
|
|
563
|
+
mapSize,
|
|
564
|
+
ents,
|
|
565
|
+
mapCount,
|
|
566
|
+
},
|
|
567
|
+
offset,
|
|
568
|
+
}
|
|
569
|
+
},
|
|
570
|
+
}
|
|
571
|
+
}
|
|
274
572
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
.nest('valuesEncoding', { type: 'cramEncoding' }),
|
|
301
|
-
// BYTE_ARRAY_STOP is a little different for CRAM v1
|
|
302
|
-
5: new Parser().uint8('stopByte').itf8('blockContentId'),
|
|
303
|
-
6: new Parser().itf8('offset').itf8('length'), // BETA
|
|
304
|
-
7: new Parser().itf8('offset').itf8('K'), // SUBEXP
|
|
305
|
-
8: new Parser().itf8('offset').itf8('log2m'), // GOLOMB_RICE
|
|
306
|
-
9: new Parser().itf8('offset'), // GAMMA
|
|
573
|
+
function cramTagEncodingMap() {
|
|
574
|
+
return {
|
|
575
|
+
parser: (buffer: Buffer, offset: number) => {
|
|
576
|
+
const [mapSize, newOffset1] = parseItf8(buffer, offset)
|
|
577
|
+
offset += newOffset1
|
|
578
|
+
const [mapCount, newOffset2] = parseItf8(buffer, offset)
|
|
579
|
+
offset += newOffset2
|
|
580
|
+
const ents = []
|
|
581
|
+
for (let i = 0; i < mapCount; i++) {
|
|
582
|
+
const [k0, newOffset3] = parseItf8(buffer, offset)
|
|
583
|
+
offset += newOffset3
|
|
584
|
+
const key =
|
|
585
|
+
String.fromCharCode((k0 >> 16) & 0xff) +
|
|
586
|
+
String.fromCharCode((k0 >> 8) & 0xff) +
|
|
587
|
+
String.fromCharCode(k0 & 0xff)
|
|
588
|
+
|
|
589
|
+
const { value, offset: newOffset4 } = cramEncodingSub(buffer, offset)
|
|
590
|
+
offset = newOffset4
|
|
591
|
+
ents.push({ key, value })
|
|
592
|
+
}
|
|
593
|
+
return {
|
|
594
|
+
value: {
|
|
595
|
+
mapSize,
|
|
596
|
+
ents,
|
|
597
|
+
mapCount,
|
|
307
598
|
},
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
cramDataSeriesEncodingMap(majorVersion: number) {
|
|
314
|
-
return new Parser()
|
|
315
|
-
.itf8('mapSize')
|
|
316
|
-
.itf8('mapCount')
|
|
317
|
-
.array('ents', {
|
|
318
|
-
length: 'mapCount',
|
|
319
|
-
type: new Parser()
|
|
320
|
-
.string('key', { length: 2, stripNull: false })
|
|
321
|
-
.nest('value', { type: this.cramEncoding(majorVersion).parser }),
|
|
322
|
-
})
|
|
323
|
-
},
|
|
324
|
-
|
|
325
|
-
cramTagEncodingMap(majorVersion: number) {
|
|
326
|
-
return new Parser()
|
|
327
|
-
.itf8('mapSize')
|
|
328
|
-
.itf8('mapCount')
|
|
329
|
-
.array('ents', {
|
|
330
|
-
length: 'mapCount',
|
|
331
|
-
type: new Parser()
|
|
332
|
-
.itf8('key', {
|
|
333
|
-
formatter: /* istanbul ignore next */ integerRepresentation =>
|
|
334
|
-
/* istanbul ignore next */
|
|
335
|
-
String.fromCharCode((integerRepresentation >> 16) & 0xff) +
|
|
336
|
-
String.fromCharCode((integerRepresentation >> 8) & 0xff) +
|
|
337
|
-
String.fromCharCode(integerRepresentation & 0xff),
|
|
338
|
-
})
|
|
339
|
-
.nest('value', { type: this.cramEncoding(majorVersion).parser }),
|
|
340
|
-
})
|
|
341
|
-
},
|
|
342
|
-
|
|
343
|
-
cramCompressionHeader(majorVersion: number) {
|
|
344
|
-
let parser = new Parser()
|
|
345
|
-
// TODO: if we want to support CRAM v1, we will need to refactor
|
|
346
|
-
// compression header into 2 parts to parse the landmarks,
|
|
347
|
-
// like the container header
|
|
348
|
-
parser = parser
|
|
349
|
-
.nest('preservation', {
|
|
350
|
-
type: cramPreservationMap,
|
|
351
|
-
formatter: formatMap,
|
|
352
|
-
})
|
|
353
|
-
.nest('dataSeriesEncoding', {
|
|
354
|
-
type: this.cramDataSeriesEncodingMap(majorVersion),
|
|
355
|
-
formatter: formatMap,
|
|
356
|
-
})
|
|
357
|
-
.nest('tagEncoding', {
|
|
358
|
-
type: this.cramTagEncodingMap(majorVersion),
|
|
359
|
-
formatter: formatMap,
|
|
360
|
-
})
|
|
361
|
-
return { parser }
|
|
362
|
-
},
|
|
363
|
-
|
|
364
|
-
cramContainerHeader1(majorVersion: number) {
|
|
365
|
-
let parser = new Parser()
|
|
366
|
-
.int32('length') // byte size of the container data (blocks)
|
|
367
|
-
.itf8('refSeqId') // reference sequence identifier, -1 for unmapped reads, -2 for multiple reference sequences
|
|
368
|
-
.itf8('refSeqStart') // the alignment start position or 0 for unmapped reads
|
|
369
|
-
.itf8('alignmentSpan') // the length of the alignment or 0 for unmapped reads
|
|
370
|
-
.itf8('numRecords') // number of records in the container
|
|
371
|
-
let maxLength = 4 + 5 * 4
|
|
599
|
+
offset,
|
|
600
|
+
}
|
|
601
|
+
},
|
|
602
|
+
}
|
|
603
|
+
}
|
|
372
604
|
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
605
|
+
function cramCompressionHeader() {
|
|
606
|
+
return {
|
|
607
|
+
parser: (buffer: Buffer, offset: number) => {
|
|
608
|
+
// TODO: if we want to support CRAM v1, we will need to refactor
|
|
609
|
+
// compression header into 2 parts to parse the landmarks, like the
|
|
610
|
+
// container header
|
|
611
|
+
const { value: preservation, offset: newOffset1 } =
|
|
612
|
+
cramPreservationMap().parser(buffer, offset)
|
|
613
|
+
offset = newOffset1
|
|
614
|
+
|
|
615
|
+
const { value: dataSeriesEncoding, offset: newOffset2 } =
|
|
616
|
+
cramDataSeriesEncodingMap().parser(buffer, offset)
|
|
617
|
+
offset = newOffset2
|
|
618
|
+
|
|
619
|
+
const { value: tagEncoding, offset: newOffset3 } =
|
|
620
|
+
cramTagEncodingMap().parser(buffer, offset)
|
|
621
|
+
offset = newOffset3
|
|
622
|
+
|
|
623
|
+
return {
|
|
624
|
+
value: {
|
|
625
|
+
dataSeriesEncoding: formatMap(
|
|
626
|
+
dataSeriesEncoding,
|
|
627
|
+
) as DataSeriesEncodingMap,
|
|
628
|
+
preservation: formatMap(
|
|
629
|
+
preservation,
|
|
630
|
+
) as unknown as CramPreservationMap,
|
|
631
|
+
tagEncoding: formatMap(tagEncoding) as Record<string, CramEncoding>,
|
|
632
|
+
},
|
|
633
|
+
offset,
|
|
634
|
+
}
|
|
635
|
+
},
|
|
636
|
+
}
|
|
637
|
+
}
|
|
380
638
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
maxLength
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
//
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
639
|
+
function cramContainerHeader1(majorVersion: number) {
|
|
640
|
+
let maxLength = 4
|
|
641
|
+
maxLength += 5 * 4
|
|
642
|
+
maxLength += 9
|
|
643
|
+
maxLength += 9
|
|
644
|
+
maxLength += 5 + 5
|
|
645
|
+
return {
|
|
646
|
+
maxLength,
|
|
647
|
+
parser: (buffer: Buffer, offset: number) => {
|
|
648
|
+
const b = buffer
|
|
649
|
+
const dataView = new DataView(b.buffer, b.byteOffset, b.length)
|
|
650
|
+
// byte size of the container data (blocks)
|
|
651
|
+
const length = dataView.getInt32(offset, true)
|
|
652
|
+
offset += 4
|
|
653
|
+
// reference sequence identifier, -1 for unmapped reads, -2 for multiple
|
|
654
|
+
// reference sequences
|
|
655
|
+
const [refSeqId, newOffset1] = parseItf8(buffer, offset)
|
|
656
|
+
offset += newOffset1
|
|
657
|
+
const [refSeqStart, newOffset2] = parseItf8(buffer, offset)
|
|
658
|
+
offset += newOffset2
|
|
659
|
+
const [alignmentSpan, newOffset3] = parseItf8(buffer, offset)
|
|
660
|
+
offset += newOffset3
|
|
661
|
+
const [numRecords, newOffset4] = parseItf8(buffer, offset)
|
|
662
|
+
offset += newOffset4
|
|
663
|
+
|
|
664
|
+
let recordCounter = 0
|
|
665
|
+
if (majorVersion >= 3) {
|
|
666
|
+
const [rc, newOffset5] = parseLtf8(buffer, offset)
|
|
667
|
+
recordCounter = rc
|
|
668
|
+
offset += newOffset5
|
|
669
|
+
} else if (majorVersion === 2) {
|
|
670
|
+
const [rc, newOffset5] = parseItf8(buffer, offset)
|
|
671
|
+
recordCounter = rc
|
|
672
|
+
offset += newOffset5
|
|
673
|
+
} else {
|
|
674
|
+
console.warn('setting recordCounter=0')
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
let numBases: number | undefined
|
|
678
|
+
if (majorVersion > 1) {
|
|
679
|
+
const [n, newOffset5] = parseLtf8(buffer, offset)
|
|
680
|
+
numBases = n
|
|
681
|
+
offset += newOffset5
|
|
682
|
+
}
|
|
683
|
+
const [numBlocks, newOffset6] = parseItf8(buffer, offset)
|
|
684
|
+
offset += newOffset6
|
|
685
|
+
const [numLandmarks, newOffset7] = parseItf8(buffer, offset)
|
|
686
|
+
offset += newOffset7
|
|
687
|
+
return {
|
|
688
|
+
value: {
|
|
689
|
+
length,
|
|
690
|
+
refSeqId,
|
|
691
|
+
refSeqStart,
|
|
692
|
+
alignmentSpan,
|
|
693
|
+
numBlocks,
|
|
694
|
+
numLandmarks,
|
|
695
|
+
numBases,
|
|
696
|
+
recordCounter,
|
|
697
|
+
numRecords,
|
|
698
|
+
},
|
|
699
|
+
offset,
|
|
700
|
+
}
|
|
701
|
+
},
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
function cramContainerHeader2(majorVersion: number) {
|
|
706
|
+
return {
|
|
707
|
+
parser: (buffer: Buffer, offset: number) => {
|
|
708
|
+
const b = buffer
|
|
709
|
+
const dataView = new DataView(b.buffer, b.byteOffset, b.length)
|
|
710
|
+
const [numLandmarks, newOffset1] = parseItf8(buffer, offset)
|
|
711
|
+
offset += newOffset1
|
|
712
|
+
const landmarks = []
|
|
713
|
+
for (let i = 0; i < numLandmarks; i++) {
|
|
714
|
+
const [landmark, newOffset2] = parseItf8(buffer, offset)
|
|
715
|
+
offset += newOffset2
|
|
716
|
+
landmarks.push(landmark)
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
let crc32: number | undefined
|
|
720
|
+
if (majorVersion >= 3) {
|
|
721
|
+
crc32 = dataView.getUint32(offset, true)
|
|
722
|
+
offset += 4
|
|
723
|
+
}
|
|
724
|
+
return {
|
|
725
|
+
value: {
|
|
726
|
+
...(crc32 === undefined ? {} : { crc32 }),
|
|
727
|
+
numLandmarks,
|
|
728
|
+
landmarks,
|
|
729
|
+
},
|
|
730
|
+
offset,
|
|
731
|
+
}
|
|
732
|
+
},
|
|
733
|
+
maxLength: (numLandmarks: number) => 5 + 5 * numLandmarks + 4,
|
|
734
|
+
}
|
|
414
735
|
}
|
|
415
736
|
|
|
416
737
|
export type CompressionMethod =
|
|
@@ -438,69 +759,26 @@ export interface BlockHeader {
|
|
|
438
759
|
uncompressedSize: number
|
|
439
760
|
}
|
|
440
761
|
|
|
441
|
-
export
|
|
762
|
+
export interface CramCompressionHeader {
|
|
442
763
|
preservation: CramPreservationMap
|
|
443
764
|
dataSeriesEncoding: DataSeriesEncodingMap
|
|
444
765
|
tagEncoding: Record<string, CramEncoding>
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
function getSectionParsers(majorVersion: number): {
|
|
448
|
-
cramFileDefinition: {
|
|
449
|
-
parser: Parser<{
|
|
450
|
-
magic: string
|
|
451
|
-
majorVersion: number
|
|
452
|
-
minorVersion: number
|
|
453
|
-
fileId: string
|
|
454
|
-
}>
|
|
455
|
-
maxLength: number
|
|
456
|
-
}
|
|
457
|
-
cramContainerHeader1: {
|
|
458
|
-
parser: Parser<{
|
|
459
|
-
length: number
|
|
460
|
-
refSeqId: number
|
|
461
|
-
refSeqStart: number
|
|
462
|
-
alignmentSpan: number
|
|
463
|
-
numRecords: number
|
|
464
|
-
recordCounter: number
|
|
465
|
-
numBases: number
|
|
466
|
-
numBlocks: number
|
|
467
|
-
numLandmarks: number
|
|
468
|
-
}>
|
|
469
|
-
maxLength: number
|
|
470
|
-
}
|
|
471
|
-
cramContainerHeader2: {
|
|
472
|
-
parser: Parser<{
|
|
473
|
-
numLandmarks: number
|
|
474
|
-
landmarks: number[]
|
|
475
|
-
crc32: number
|
|
476
|
-
}>
|
|
477
|
-
maxLength: (x: number) => number
|
|
478
|
-
}
|
|
479
|
-
cramBlockHeader: {
|
|
480
|
-
parser: Parser<BlockHeader>
|
|
481
|
-
maxLength: number
|
|
482
|
-
}
|
|
483
|
-
cramBlockCrc32: {
|
|
484
|
-
parser: Parser<{ crc32: number }>
|
|
485
|
-
maxLength: number
|
|
486
|
-
}
|
|
487
|
-
cramCompressionHeader: {
|
|
488
|
-
parser: Parser<CramCompressionHeader>
|
|
489
|
-
}
|
|
490
|
-
cramMappedSliceHeader: {
|
|
491
|
-
parser: Parser<MappedSliceHeader>
|
|
492
|
-
maxLength: (numContentIds: number) => number
|
|
493
|
-
}
|
|
494
|
-
cramUnmappedSliceHeader: {
|
|
495
|
-
parser: Parser<UnmappedSliceHeader>
|
|
496
|
-
maxLength: (numContentIds: number) => number
|
|
497
|
-
}
|
|
498
|
-
} {
|
|
499
|
-
const parsers: any = Object.assign({}, unversionedParsers)
|
|
500
|
-
Object.keys(versionedParsers).forEach(parserName => {
|
|
501
|
-
parsers[parserName] = (versionedParsers as any)[parserName](majorVersion)
|
|
502
|
-
})
|
|
503
|
-
return parsers
|
|
766
|
+
_size: number
|
|
767
|
+
_endPosition: number
|
|
504
768
|
}
|
|
505
769
|
|
|
506
|
-
export
|
|
770
|
+
export function getSectionParsers(majorVersion: number) {
|
|
771
|
+
return {
|
|
772
|
+
cramFileDefinition: cramFileDefinition(),
|
|
773
|
+
cramBlockHeader: cramBlockHeader(),
|
|
774
|
+
cramBlockCrc32: cramBlockCrc32(),
|
|
775
|
+
cramDataSeriesEncodingMap: cramDataSeriesEncodingMap(),
|
|
776
|
+
cramTagEncodingMap: cramTagEncodingMap(),
|
|
777
|
+
cramCompressionHeader: cramCompressionHeader(),
|
|
778
|
+
cramEncoding: cramEncoding(),
|
|
779
|
+
cramUnmappedSliceHeader: cramUnmappedSliceHeader(majorVersion),
|
|
780
|
+
cramMappedSliceHeader: cramMappedSliceHeader(majorVersion),
|
|
781
|
+
cramContainerHeader1: cramContainerHeader1(majorVersion),
|
|
782
|
+
cramContainerHeader2: cramContainerHeader2(majorVersion),
|
|
783
|
+
}
|
|
784
|
+
}
|