@gmod/cram 4.0.4 → 4.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -51
- package/dist/cram-bundle.js +1 -1
- package/dist/cramFile/codecs/_base.d.ts +1 -1
- package/dist/cramFile/codecs/byteArrayLength.js +3 -2
- package/dist/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/dist/cramFile/codecs/external.d.ts +1 -1
- package/dist/cramFile/codecs/external.js +1 -4
- package/dist/cramFile/codecs/external.js.map +1 -1
- package/dist/cramFile/container/index.d.ts +16 -34
- package/dist/cramFile/container/index.js +5 -20
- package/dist/cramFile/container/index.js.map +1 -1
- package/dist/cramFile/file.d.ts +8 -6
- package/dist/cramFile/file.js +37 -73
- package/dist/cramFile/file.js.map +1 -1
- package/dist/cramFile/record.js.map +1 -1
- package/dist/cramFile/sectionParsers.js +3 -2
- package/dist/cramFile/sectionParsers.js.map +1 -1
- package/dist/cramFile/slice/decodeRecord.d.ts +2 -2
- package/dist/cramFile/slice/decodeRecord.js +7 -4
- package/dist/cramFile/slice/decodeRecord.js.map +1 -1
- package/dist/cramFile/slice/index.js +14 -16
- package/dist/cramFile/slice/index.js.map +1 -1
- package/dist/cramFile/util.d.ts +5 -0
- package/dist/cramFile/util.js +75 -51
- package/dist/cramFile/util.js.map +1 -1
- package/esm/cramFile/codecs/_base.d.ts +1 -1
- package/esm/cramFile/codecs/byteArrayLength.js +3 -2
- package/esm/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/esm/cramFile/codecs/external.d.ts +1 -1
- package/esm/cramFile/codecs/external.js +2 -5
- package/esm/cramFile/codecs/external.js.map +1 -1
- package/esm/cramFile/container/index.d.ts +16 -34
- package/esm/cramFile/container/index.js +5 -20
- package/esm/cramFile/container/index.js.map +1 -1
- package/esm/cramFile/file.d.ts +8 -6
- package/esm/cramFile/file.js +37 -73
- package/esm/cramFile/file.js.map +1 -1
- package/esm/cramFile/record.js.map +1 -1
- package/esm/cramFile/sectionParsers.js +3 -2
- package/esm/cramFile/sectionParsers.js.map +1 -1
- package/esm/cramFile/slice/decodeRecord.d.ts +2 -2
- package/esm/cramFile/slice/decodeRecord.js +7 -4
- package/esm/cramFile/slice/decodeRecord.js.map +1 -1
- package/esm/cramFile/slice/index.js +14 -16
- package/esm/cramFile/slice/index.js.map +1 -1
- package/esm/cramFile/util.d.ts +5 -0
- package/esm/cramFile/util.js +74 -51
- package/esm/cramFile/util.js.map +1 -1
- package/package.json +3 -6
- package/src/cramFile/codecs/_base.ts +1 -1
- package/src/cramFile/codecs/byteArrayLength.ts +4 -12
- package/src/cramFile/codecs/external.ts +3 -7
- package/src/cramFile/container/index.ts +5 -24
- package/src/cramFile/file.ts +41 -77
- package/src/cramFile/record.ts +1 -1
- package/src/cramFile/sectionParsers.ts +5 -2
- package/src/cramFile/slice/decodeRecord.ts +26 -23
- package/src/cramFile/slice/index.ts +25 -31
- package/src/cramFile/util.ts +107 -73
- package/errors.js +0 -27
|
@@ -32,22 +32,14 @@ export default class ByteArrayStopCodec extends CramCodec<
|
|
|
32
32
|
cursors: Cursors,
|
|
33
33
|
) {
|
|
34
34
|
const lengthCodec = this._getLengthCodec()
|
|
35
|
-
const arrayLength =
|
|
36
|
-
slice,
|
|
37
|
-
coreDataBlock,
|
|
38
|
-
blocksByContentId,
|
|
39
|
-
cursors,
|
|
40
|
-
)
|
|
35
|
+
const arrayLength =
|
|
36
|
+
lengthCodec.decode(slice, coreDataBlock, blocksByContentId, cursors) || 0
|
|
41
37
|
|
|
42
38
|
const dataCodec = this._getDataCodec()
|
|
43
39
|
const data = new Uint8Array(arrayLength)
|
|
44
40
|
for (let i = 0; i < arrayLength; i += 1) {
|
|
45
|
-
data[i] =
|
|
46
|
-
slice,
|
|
47
|
-
coreDataBlock,
|
|
48
|
-
blocksByContentId,
|
|
49
|
-
cursors,
|
|
50
|
-
)
|
|
41
|
+
data[i] =
|
|
42
|
+
dataCodec.decode(slice, coreDataBlock, blocksByContentId, cursors) || 0
|
|
51
43
|
}
|
|
52
44
|
|
|
53
45
|
return data
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import CramCodec, { Cursor, Cursors } from './_base'
|
|
2
|
-
import {
|
|
2
|
+
import { CramUnimplementedError } from '../../errors'
|
|
3
3
|
import { CramFileBlock } from '../file'
|
|
4
4
|
import CramSlice from '../slice'
|
|
5
5
|
import { parseItf8 } from '../util'
|
|
@@ -39,13 +39,9 @@ export default class ExternalCodec extends CramCodec<
|
|
|
39
39
|
) {
|
|
40
40
|
const { blockContentId } = this.parameters
|
|
41
41
|
const contentBlock = blocksByContentId[blockContentId]
|
|
42
|
-
|
|
43
|
-
throw new CramMalformedError(
|
|
44
|
-
`no block found with content ID ${blockContentId}}`,
|
|
45
|
-
)
|
|
46
|
-
}
|
|
42
|
+
|
|
47
43
|
const cursor = cursors.externalBlocks.getCursor(blockContentId)
|
|
48
|
-
return this._decodeData(contentBlock, cursor)
|
|
44
|
+
return contentBlock ? this._decodeData(contentBlock, cursor) : undefined
|
|
49
45
|
}
|
|
50
46
|
|
|
51
47
|
_decodeInt(contentBlock: CramFileBlock, cursor: Cursor) {
|
|
@@ -21,16 +21,13 @@ export default class CramContainer {
|
|
|
21
21
|
|
|
22
22
|
// if there are no records in the container, there will be no compression
|
|
23
23
|
// header
|
|
24
|
-
if (!containerHeader
|
|
24
|
+
if (!containerHeader.numRecords) {
|
|
25
25
|
return null
|
|
26
26
|
}
|
|
27
27
|
const { majorVersion } = await this.file.getDefinition()
|
|
28
28
|
const sectionParsers = getSectionParsers(majorVersion)
|
|
29
29
|
|
|
30
30
|
const block = await this.getFirstBlock()
|
|
31
|
-
if (block === undefined) {
|
|
32
|
-
return undefined
|
|
33
|
-
}
|
|
34
31
|
if (block.contentType !== 'COMPRESSION_HEADER') {
|
|
35
32
|
throw new CramMalformedError(
|
|
36
33
|
`invalid content type ${block.contentType} in compression header block`,
|
|
@@ -51,9 +48,6 @@ export default class CramContainer {
|
|
|
51
48
|
|
|
52
49
|
async getFirstBlock() {
|
|
53
50
|
const containerHeader = await this.getHeader()
|
|
54
|
-
if (!containerHeader) {
|
|
55
|
-
return undefined
|
|
56
|
-
}
|
|
57
51
|
return this.file.readBlock(containerHeader._endPosition)
|
|
58
52
|
}
|
|
59
53
|
|
|
@@ -78,12 +72,6 @@ export default class CramContainer {
|
|
|
78
72
|
const { majorVersion } = await this.file.getDefinition()
|
|
79
73
|
const sectionParsers = getSectionParsers(majorVersion)
|
|
80
74
|
const { cramContainerHeader1, cramContainerHeader2 } = sectionParsers
|
|
81
|
-
const { size: fileSize } = await this.file.stat()
|
|
82
|
-
|
|
83
|
-
if (position >= fileSize) {
|
|
84
|
-
console.warn(`pos:${position}>=fileSize:${fileSize} in cram container`)
|
|
85
|
-
return undefined
|
|
86
|
-
}
|
|
87
75
|
|
|
88
76
|
// parse the container header. do it in 2 pieces because you cannot tell
|
|
89
77
|
// how much to buffer until you read numLandmarks
|
|
@@ -93,13 +81,6 @@ export default class CramContainer {
|
|
|
93
81
|
)
|
|
94
82
|
const header1 = parseItem(bytes1, cramContainerHeader1.parser)
|
|
95
83
|
const numLandmarksSize = itf8Size(header1.numLandmarks)
|
|
96
|
-
if (position + header1.length >= fileSize) {
|
|
97
|
-
// header indicates container goes beyond fileSize
|
|
98
|
-
console.warn(
|
|
99
|
-
`container at ${position} is beyond fileSize:${fileSize}, skipping`,
|
|
100
|
-
)
|
|
101
|
-
return undefined
|
|
102
|
-
}
|
|
103
84
|
|
|
104
85
|
const bytes2 = await this.file.read(
|
|
105
86
|
cramContainerHeader2.maxLength(header1.numLandmarks),
|
|
@@ -116,12 +97,12 @@ export default class CramContainer {
|
|
|
116
97
|
)
|
|
117
98
|
}
|
|
118
99
|
|
|
119
|
-
|
|
100
|
+
return {
|
|
101
|
+
...header1,
|
|
102
|
+
...header2,
|
|
120
103
|
_size: header1._size + header2._size - numLandmarksSize,
|
|
121
104
|
_endPosition: header1._size + header2._size - numLandmarksSize + position,
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
return completeHeader
|
|
105
|
+
}
|
|
125
106
|
}
|
|
126
107
|
}
|
|
127
108
|
|
package/src/cramFile/file.ts
CHANGED
|
@@ -102,12 +102,6 @@ export default class CramFile {
|
|
|
102
102
|
}
|
|
103
103
|
}
|
|
104
104
|
|
|
105
|
-
// can just stat this object like a filehandle
|
|
106
|
-
stat() {
|
|
107
|
-
return this.file.stat()
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
// can just stat this object like a filehandle
|
|
111
105
|
read(length: number, position: number) {
|
|
112
106
|
return this.file.read(length, position)
|
|
113
107
|
}
|
|
@@ -133,20 +127,17 @@ export default class CramFile {
|
|
|
133
127
|
}
|
|
134
128
|
|
|
135
129
|
const firstBlock = await firstContainer.getFirstBlock()
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
this.header = text
|
|
148
|
-
return parseHeaderText(text)
|
|
149
|
-
}
|
|
130
|
+
|
|
131
|
+
const content = firstBlock.content
|
|
132
|
+
const dataView = new DataView(content.buffer)
|
|
133
|
+
const headerLength = dataView.getInt32(0, true)
|
|
134
|
+
const textStart = 4
|
|
135
|
+
const decoder = new TextDecoder('utf8')
|
|
136
|
+
const text = decoder.decode(
|
|
137
|
+
content.subarray(textStart, textStart + headerLength),
|
|
138
|
+
)
|
|
139
|
+
this.header = text
|
|
140
|
+
return parseHeaderText(text)
|
|
150
141
|
}
|
|
151
142
|
|
|
152
143
|
async getHeaderText() {
|
|
@@ -158,25 +149,19 @@ export default class CramFile {
|
|
|
158
149
|
const { majorVersion } = await this.getDefinition()
|
|
159
150
|
const sectionParsers = getSectionParsers(majorVersion)
|
|
160
151
|
let position = sectionParsers.cramFileDefinition.maxLength
|
|
161
|
-
const { size: fileSize } = await this.file.stat()
|
|
162
|
-
const { cramContainerHeader1 } = sectionParsers
|
|
163
152
|
|
|
164
153
|
// skip with a series of reads to the proper container
|
|
165
154
|
let currentContainer: CramContainer | undefined
|
|
166
155
|
for (let i = 0; i <= containerNumber; i++) {
|
|
167
156
|
// if we are about to go off the end of the file
|
|
168
157
|
// and have not found that container, it does not exist
|
|
169
|
-
if (position + cramContainerHeader1.maxLength + 8 >= fileSize) {
|
|
170
|
-
|
|
171
|
-
}
|
|
158
|
+
// if (position + cramContainerHeader1.maxLength + 8 >= fileSize) {
|
|
159
|
+
// return undefined
|
|
160
|
+
// }
|
|
172
161
|
|
|
173
162
|
currentContainer = this.getContainerAtPosition(position)
|
|
174
163
|
const currentHeader = await currentContainer.getHeader()
|
|
175
|
-
|
|
176
|
-
throw new CramMalformedError(
|
|
177
|
-
`container ${containerNumber} not found in file`,
|
|
178
|
-
)
|
|
179
|
-
}
|
|
164
|
+
|
|
180
165
|
// if this is the first container, read all the blocks in the container
|
|
181
166
|
// to determine its length, because we cannot trust the container
|
|
182
167
|
// header's given length due to a bug somewhere in htslib
|
|
@@ -184,9 +169,6 @@ export default class CramFile {
|
|
|
184
169
|
position = currentHeader._endPosition
|
|
185
170
|
for (let j = 0; j < currentHeader.numBlocks; j++) {
|
|
186
171
|
const block = await this.readBlock(position)
|
|
187
|
-
if (block === undefined) {
|
|
188
|
-
return undefined
|
|
189
|
-
}
|
|
190
172
|
position = block._endPosition
|
|
191
173
|
}
|
|
192
174
|
} else {
|
|
@@ -219,39 +201,41 @@ export default class CramFile {
|
|
|
219
201
|
|
|
220
202
|
/**
|
|
221
203
|
* @returns {Promise[number]} the number of containers in the file
|
|
204
|
+
*
|
|
205
|
+
* note: this is currently used only in unit tests, and after removing file
|
|
206
|
+
* length check, relies on a try catch to read return an error to break
|
|
222
207
|
*/
|
|
223
208
|
async containerCount(): Promise<number | undefined> {
|
|
224
209
|
const { majorVersion } = await this.getDefinition()
|
|
225
210
|
const sectionParsers = getSectionParsers(majorVersion)
|
|
226
|
-
const { size: fileSize } = await this.file.stat()
|
|
227
|
-
const { cramContainerHeader1 } = sectionParsers
|
|
228
211
|
|
|
229
212
|
let containerCount = 0
|
|
230
213
|
let position = sectionParsers.cramFileDefinition.maxLength
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
return undefined
|
|
214
|
+
try {
|
|
215
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
216
|
+
while (true) {
|
|
217
|
+
const currentHeader =
|
|
218
|
+
await this.getContainerAtPosition(position).getHeader()
|
|
219
|
+
|
|
220
|
+
// if this is the first container, read all the blocks in the container,
|
|
221
|
+
// because we cannot trust the container header's given length due to a
|
|
222
|
+
// bug somewhere in htslib
|
|
223
|
+
if (containerCount === 0) {
|
|
224
|
+
position = currentHeader._endPosition
|
|
225
|
+
for (let j = 0; j < currentHeader.numBlocks; j++) {
|
|
226
|
+
const block = await this.readBlock(position)
|
|
227
|
+
position = block._endPosition
|
|
246
228
|
}
|
|
247
|
-
|
|
229
|
+
} else {
|
|
230
|
+
// otherwise, just traverse to the next container using the container's
|
|
231
|
+
// length
|
|
232
|
+
position += currentHeader._size + currentHeader.length
|
|
248
233
|
}
|
|
249
|
-
|
|
250
|
-
// otherwise, just traverse to the next container using the container's
|
|
251
|
-
// length
|
|
252
|
-
position += currentHeader._size + currentHeader.length
|
|
234
|
+
containerCount += 1
|
|
253
235
|
}
|
|
254
|
-
|
|
236
|
+
} catch (e) {
|
|
237
|
+
containerCount--
|
|
238
|
+
/* do nothing */
|
|
255
239
|
}
|
|
256
240
|
|
|
257
241
|
return containerCount
|
|
@@ -265,11 +249,6 @@ export default class CramFile {
|
|
|
265
249
|
const { majorVersion } = await this.getDefinition()
|
|
266
250
|
const sectionParsers = getSectionParsers(majorVersion)
|
|
267
251
|
const { cramBlockHeader } = sectionParsers
|
|
268
|
-
const { size: fileSize } = await this.file.stat()
|
|
269
|
-
|
|
270
|
-
if (position + cramBlockHeader.maxLength >= fileSize) {
|
|
271
|
-
return undefined
|
|
272
|
-
}
|
|
273
252
|
|
|
274
253
|
const buffer = await this.file.read(cramBlockHeader.maxLength, position)
|
|
275
254
|
return parseItem(buffer, cramBlockHeader.parser, 0, position)
|
|
@@ -287,16 +266,7 @@ export default class CramFile {
|
|
|
287
266
|
size = section.maxLength,
|
|
288
267
|
preReadBuffer?: Uint8Array,
|
|
289
268
|
) {
|
|
290
|
-
|
|
291
|
-
if (preReadBuffer) {
|
|
292
|
-
buffer = preReadBuffer
|
|
293
|
-
} else {
|
|
294
|
-
const { size: fileSize } = await this.file.stat()
|
|
295
|
-
if (position + size >= fileSize) {
|
|
296
|
-
return undefined
|
|
297
|
-
}
|
|
298
|
-
buffer = await this.file.read(size, position)
|
|
299
|
-
}
|
|
269
|
+
const buffer = preReadBuffer ?? (await this.file.read(size, position))
|
|
300
270
|
const data = parseItem(buffer, section.parser, 0, position)
|
|
301
271
|
if (data._size !== size) {
|
|
302
272
|
throw new CramMalformedError(
|
|
@@ -356,9 +326,6 @@ export default class CramFile {
|
|
|
356
326
|
const { majorVersion } = await this.getDefinition()
|
|
357
327
|
const sectionParsers = getSectionParsers(majorVersion)
|
|
358
328
|
const blockHeader = await this.readBlockHeader(position)
|
|
359
|
-
if (blockHeader === undefined) {
|
|
360
|
-
return undefined
|
|
361
|
-
}
|
|
362
329
|
const blockContentPosition = blockHeader._endPosition
|
|
363
330
|
|
|
364
331
|
const d = await this.file.read(
|
|
@@ -386,9 +353,6 @@ export default class CramFile {
|
|
|
386
353
|
sectionParsers.cramBlockCrc32,
|
|
387
354
|
blockContentPosition + blockHeader.compressedSize,
|
|
388
355
|
)
|
|
389
|
-
if (crc === undefined) {
|
|
390
|
-
return undefined
|
|
391
|
-
}
|
|
392
356
|
block.crc32 = crc.crc32
|
|
393
357
|
|
|
394
358
|
// check the block data crc32
|
package/src/cramFile/record.ts
CHANGED
|
@@ -271,7 +271,7 @@ export default class CramRecord {
|
|
|
271
271
|
|
|
272
272
|
this.readGroupId = readGroupId
|
|
273
273
|
this.readName = readName
|
|
274
|
-
this.sequenceId = sequenceId
|
|
274
|
+
this.sequenceId = sequenceId!
|
|
275
275
|
this.uniqueId = uniqueId
|
|
276
276
|
this.templateSize = templateSize
|
|
277
277
|
this.alignmentStart = alignmentStart
|
|
@@ -652,11 +652,14 @@ function cramContainerHeader1(majorVersion: number) {
|
|
|
652
652
|
parser: (buffer: Uint8Array, offset: number) => {
|
|
653
653
|
const b = buffer
|
|
654
654
|
const dataView = new DataView(b.buffer, b.byteOffset, b.length)
|
|
655
|
+
|
|
655
656
|
// byte size of the container data (blocks)
|
|
656
657
|
const length = dataView.getInt32(offset, true)
|
|
657
658
|
offset += 4
|
|
658
|
-
|
|
659
|
-
// reference
|
|
659
|
+
|
|
660
|
+
// reference sequence identifier:
|
|
661
|
+
// -1 for unmapped reads,
|
|
662
|
+
// -2 for multiple reference sequences
|
|
660
663
|
const [refSeqId, newOffset1] = parseItf8(buffer, offset)
|
|
661
664
|
offset += newOffset1
|
|
662
665
|
const [refSeqStart, newOffset2] = parseItf8(buffer, offset)
|
|
@@ -211,7 +211,7 @@ function decodeReadFeatures(
|
|
|
211
211
|
|
|
212
212
|
export type DataSeriesDecoder = <T extends DataSeriesEncodingKey>(
|
|
213
213
|
dataSeriesName: T,
|
|
214
|
-
) => DataTypeMapping[DataSeriesTypes[T]]
|
|
214
|
+
) => DataTypeMapping[DataSeriesTypes[T]] | undefined
|
|
215
215
|
|
|
216
216
|
export default function decodeRecord(
|
|
217
217
|
slice: CramSlice,
|
|
@@ -224,12 +224,11 @@ export default function decodeRecord(
|
|
|
224
224
|
majorVersion: number,
|
|
225
225
|
recordNumber: number,
|
|
226
226
|
) {
|
|
227
|
-
let flags = decodeDataSeries('BF')
|
|
227
|
+
let flags = decodeDataSeries('BF')!
|
|
228
228
|
|
|
229
|
-
// note: the C data type of compressionFlags is byte in cram v1
|
|
230
|
-
//
|
|
231
|
-
|
|
232
|
-
const cramFlags = decodeDataSeries('CF')
|
|
229
|
+
// note: the C data type of compressionFlags is byte in cram v1 and int32 in
|
|
230
|
+
// cram v2+, but that does not matter for us here in javascript land.
|
|
231
|
+
const cramFlags = decodeDataSeries('CF')!
|
|
233
232
|
|
|
234
233
|
if (!isMappedSliceHeader(sliceHeader.parsedContent)) {
|
|
235
234
|
throw new Error('slice header not mapped')
|
|
@@ -240,18 +239,18 @@ export default function decodeRecord(
|
|
|
240
239
|
? decodeDataSeries('RI')
|
|
241
240
|
: sliceHeader.parsedContent.refSeqId
|
|
242
241
|
|
|
243
|
-
const readLength = decodeDataSeries('RL')
|
|
242
|
+
const readLength = decodeDataSeries('RL')!
|
|
244
243
|
// if APDelta, will calculate the true start in a second pass
|
|
245
|
-
let alignmentStart = decodeDataSeries('AP')
|
|
244
|
+
let alignmentStart = decodeDataSeries('AP')!
|
|
246
245
|
if (compressionScheme.APdelta) {
|
|
247
246
|
alignmentStart = alignmentStart + cursors.lastAlignmentStart
|
|
248
247
|
}
|
|
249
248
|
cursors.lastAlignmentStart = alignmentStart
|
|
250
|
-
const readGroupId = decodeDataSeries('RG')
|
|
249
|
+
const readGroupId = decodeDataSeries('RG')!
|
|
251
250
|
|
|
252
251
|
let readName: string | undefined
|
|
253
252
|
if (compressionScheme.readNamesIncluded) {
|
|
254
|
-
readName = readNullTerminatedString(decodeDataSeries('RN'))
|
|
253
|
+
readName = readNullTerminatedString(decodeDataSeries('RN')!)
|
|
255
254
|
}
|
|
256
255
|
|
|
257
256
|
let mateToUse:
|
|
@@ -268,14 +267,14 @@ export default function decodeRecord(
|
|
|
268
267
|
if (CramFlagsDecoder.isDetached(cramFlags)) {
|
|
269
268
|
// note: the MF is a byte in 1.0, int32 in 2+, but once again this doesn't
|
|
270
269
|
// matter for javascript
|
|
271
|
-
const mateFlags = decodeDataSeries('MF')
|
|
270
|
+
const mateFlags = decodeDataSeries('MF')!
|
|
272
271
|
let mateReadName: string | undefined
|
|
273
272
|
if (!compressionScheme.readNamesIncluded) {
|
|
274
|
-
mateReadName = readNullTerminatedString(decodeDataSeries('RN'))
|
|
273
|
+
mateReadName = readNullTerminatedString(decodeDataSeries('RN')!)
|
|
275
274
|
readName = mateReadName
|
|
276
275
|
}
|
|
277
|
-
const mateSequenceId = decodeDataSeries('NS')
|
|
278
|
-
const mateAlignmentStart = decodeDataSeries('NP')
|
|
276
|
+
const mateSequenceId = decodeDataSeries('NS')!
|
|
277
|
+
const mateAlignmentStart = decodeDataSeries('NP')!
|
|
279
278
|
if (mateFlags || mateSequenceId > -1) {
|
|
280
279
|
mateToUse = {
|
|
281
280
|
mateFlags,
|
|
@@ -285,7 +284,7 @@ export default function decodeRecord(
|
|
|
285
284
|
}
|
|
286
285
|
}
|
|
287
286
|
|
|
288
|
-
templateSize = decodeDataSeries('TS')
|
|
287
|
+
templateSize = decodeDataSeries('TS')!
|
|
289
288
|
|
|
290
289
|
// set mate unmapped if needed
|
|
291
290
|
if (MateFlagsDecoder.isUnmapped(mateFlags)) {
|
|
@@ -298,12 +297,12 @@ export default function decodeRecord(
|
|
|
298
297
|
|
|
299
298
|
// detachedCount++
|
|
300
299
|
} else if (CramFlagsDecoder.isWithMateDownstream(cramFlags)) {
|
|
301
|
-
mateRecordNumber = decodeDataSeries('NF') + recordNumber + 1
|
|
300
|
+
mateRecordNumber = decodeDataSeries('NF')! + recordNumber + 1
|
|
302
301
|
}
|
|
303
302
|
|
|
304
303
|
// TODO: the aux tag parsing will have to be refactored if we want to support
|
|
305
304
|
// cram v1
|
|
306
|
-
const TLindex = decodeDataSeries('TL')
|
|
305
|
+
const TLindex = decodeDataSeries('TL')!
|
|
307
306
|
if (TLindex < 0) {
|
|
308
307
|
/* TODO: check nTL: TLindex >= compressionHeader.tagEncoding.size */
|
|
309
308
|
throw new CramMalformedError('invalid TL index')
|
|
@@ -322,7 +321,11 @@ export default function decodeRecord(
|
|
|
322
321
|
.getCodecForTag(tagId)
|
|
323
322
|
.decode(slice, coreDataBlock, blocksByContentId, cursors)
|
|
324
323
|
tags[tagName] =
|
|
325
|
-
|
|
324
|
+
tagData === undefined
|
|
325
|
+
? undefined
|
|
326
|
+
: typeof tagData === 'number'
|
|
327
|
+
? tagData
|
|
328
|
+
: parseTagData(tagType, tagData)
|
|
326
329
|
}
|
|
327
330
|
|
|
328
331
|
let readFeatures: ReadFeature[] | undefined
|
|
@@ -332,7 +335,7 @@ export default function decodeRecord(
|
|
|
332
335
|
let readBases = undefined
|
|
333
336
|
if (!BamFlagsDecoder.isSegmentUnmapped(flags)) {
|
|
334
337
|
// reading read features
|
|
335
|
-
const readFeatureCount = decodeDataSeries('FN')
|
|
338
|
+
const readFeatureCount = decodeDataSeries('FN')!
|
|
336
339
|
if (readFeatureCount) {
|
|
337
340
|
readFeatures = decodeReadFeatures(
|
|
338
341
|
alignmentStart,
|
|
@@ -367,11 +370,11 @@ export default function decodeRecord(
|
|
|
367
370
|
}
|
|
368
371
|
|
|
369
372
|
// mapping quality
|
|
370
|
-
mappingQuality = decodeDataSeries('MQ')
|
|
373
|
+
mappingQuality = decodeDataSeries('MQ')!
|
|
371
374
|
if (CramFlagsDecoder.isPreservingQualityScores(cramFlags)) {
|
|
372
375
|
qualityScores = new Array(readLength)
|
|
373
376
|
for (let i = 0; i < qualityScores.length; i++) {
|
|
374
|
-
qualityScores[i] = decodeDataSeries('QS')
|
|
377
|
+
qualityScores[i] = decodeDataSeries('QS')!
|
|
375
378
|
}
|
|
376
379
|
}
|
|
377
380
|
} else if (CramFlagsDecoder.isDecodeSequenceAsStar(cramFlags)) {
|
|
@@ -380,14 +383,14 @@ export default function decodeRecord(
|
|
|
380
383
|
} else {
|
|
381
384
|
const bases = new Array(readLength) as number[]
|
|
382
385
|
for (let i = 0; i < bases.length; i++) {
|
|
383
|
-
bases[i] = decodeDataSeries('BA')
|
|
386
|
+
bases[i] = decodeDataSeries('BA')!
|
|
384
387
|
}
|
|
385
388
|
readBases = String.fromCharCode(...bases)
|
|
386
389
|
|
|
387
390
|
if (CramFlagsDecoder.isPreservingQualityScores(cramFlags)) {
|
|
388
391
|
qualityScores = new Array(readLength)
|
|
389
392
|
for (let i = 0; i < bases.length; i++) {
|
|
390
|
-
qualityScores[i] = decodeDataSeries('QS')
|
|
393
|
+
qualityScores[i] = decodeDataSeries('QS')!
|
|
391
394
|
}
|
|
392
395
|
}
|
|
393
396
|
}
|
|
@@ -20,6 +20,13 @@ export type SliceHeader = CramFileBlock & {
|
|
|
20
20
|
parsedContent: MappedSliceHeader | UnmappedSliceHeader
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
+
interface RefRegion {
|
|
24
|
+
id: number
|
|
25
|
+
start: number
|
|
26
|
+
end: number
|
|
27
|
+
seq: string | null
|
|
28
|
+
}
|
|
29
|
+
|
|
23
30
|
/**
|
|
24
31
|
* @private
|
|
25
32
|
* Try to estimate the template length from a bunch of interrelated multi-segment reads.
|
|
@@ -191,16 +198,10 @@ export default class CramSlice {
|
|
|
191
198
|
const { majorVersion } = await this.file.getDefinition()
|
|
192
199
|
const sectionParsers = getSectionParsers(majorVersion)
|
|
193
200
|
const containerHeader = await this.container.getHeader()
|
|
194
|
-
if (!containerHeader) {
|
|
195
|
-
throw new Error('no container header detected')
|
|
196
|
-
}
|
|
197
201
|
|
|
198
202
|
const header = await this.file.readBlock(
|
|
199
203
|
containerHeader._endPosition + this.containerPosition,
|
|
200
204
|
)
|
|
201
|
-
if (header === undefined) {
|
|
202
|
-
throw new Error('block header undefined')
|
|
203
|
-
}
|
|
204
205
|
if (header.contentType === 'MAPPED_SLICE_HEADER') {
|
|
205
206
|
const content = parseItem(
|
|
206
207
|
header.content,
|
|
@@ -232,9 +233,6 @@ export default class CramSlice {
|
|
|
232
233
|
const blocks: CramFileBlock[] = new Array(header.parsedContent.numBlocks)
|
|
233
234
|
for (let i = 0; i < blocks.length; i++) {
|
|
234
235
|
const block = await this.file.readBlock(blockPosition)
|
|
235
|
-
if (block === undefined) {
|
|
236
|
-
throw new Error('block undefined')
|
|
237
|
-
}
|
|
238
236
|
blocks[i] = block
|
|
239
237
|
blockPosition = blocks[i]!._endPosition
|
|
240
238
|
}
|
|
@@ -404,21 +402,14 @@ export default class CramSlice {
|
|
|
404
402
|
T extends DataSeriesEncodingKey,
|
|
405
403
|
>(
|
|
406
404
|
dataSeriesName: T,
|
|
407
|
-
): DataTypeMapping[DataSeriesTypes[T]] => {
|
|
405
|
+
): DataTypeMapping[DataSeriesTypes[T]] | undefined => {
|
|
408
406
|
const codec = compressionScheme.getCodecForDataSeries(dataSeriesName)
|
|
409
407
|
if (!codec) {
|
|
410
408
|
throw new CramMalformedError(
|
|
411
409
|
`no codec defined for ${dataSeriesName} data series`,
|
|
412
410
|
)
|
|
413
411
|
}
|
|
414
|
-
|
|
415
|
-
const decoded = codec.decode(
|
|
416
|
-
this,
|
|
417
|
-
coreDataBlock,
|
|
418
|
-
blocksByContentId,
|
|
419
|
-
cursors,
|
|
420
|
-
)
|
|
421
|
-
return decoded
|
|
412
|
+
return codec.decode(this, coreDataBlock, blocksByContentId, cursors)
|
|
422
413
|
}
|
|
423
414
|
const records: CramRecord[] = new Array(
|
|
424
415
|
sliceHeader.parsedContent.numRecords,
|
|
@@ -457,16 +448,22 @@ export default class CramSlice {
|
|
|
457
448
|
}
|
|
458
449
|
|
|
459
450
|
// interpret `recordsToNextFragment` attributes to make standard `mate`
|
|
460
|
-
// objects
|
|
451
|
+
// objects
|
|
452
|
+
//
|
|
453
|
+
// Resolve mate pair cross-references between records in this slice
|
|
461
454
|
for (let i = 0; i < records.length; i += 1) {
|
|
462
|
-
const
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
455
|
+
const r = records[i]
|
|
456
|
+
// check for !!r added after removal of "stat" file size check: found
|
|
457
|
+
// some undefined entries
|
|
458
|
+
if (r) {
|
|
459
|
+
const { mateRecordNumber } = r
|
|
460
|
+
if (
|
|
461
|
+
mateRecordNumber !== undefined &&
|
|
462
|
+
mateRecordNumber >= 0 &&
|
|
463
|
+
records[mateRecordNumber]
|
|
464
|
+
) {
|
|
465
|
+
associateIntraSliceMate(records, i, r, records[mateRecordNumber])
|
|
466
|
+
}
|
|
470
467
|
}
|
|
471
468
|
}
|
|
472
469
|
|
|
@@ -501,10 +498,7 @@ export default class CramSlice {
|
|
|
501
498
|
if (compressionScheme === undefined) {
|
|
502
499
|
throw new Error('compression scheme undefined')
|
|
503
500
|
}
|
|
504
|
-
const refRegions: Record<
|
|
505
|
-
string,
|
|
506
|
-
{ id: number; start: number; end: number; seq: string | null }
|
|
507
|
-
> = {}
|
|
501
|
+
const refRegions: Record<string, RefRegion> = {}
|
|
508
502
|
|
|
509
503
|
// iterate over the records to find the spans of the reference
|
|
510
504
|
// sequences we need to fetch
|