@gmod/cram 4.0.3 → 4.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -51
- package/dist/cram-bundle.js +1 -1
- package/dist/cramFile/codecs/_base.d.ts +1 -1
- package/dist/cramFile/codecs/byteArrayLength.js +3 -2
- package/dist/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/dist/cramFile/codecs/external.d.ts +1 -1
- package/dist/cramFile/codecs/external.js +1 -4
- package/dist/cramFile/codecs/external.js.map +1 -1
- package/dist/cramFile/container/index.d.ts +16 -34
- package/dist/cramFile/container/index.js +5 -20
- package/dist/cramFile/container/index.js.map +1 -1
- package/dist/cramFile/file.d.ts +8 -6
- package/dist/cramFile/file.js +80 -82
- package/dist/cramFile/file.js.map +1 -1
- package/dist/cramFile/record.js.map +1 -1
- package/dist/cramFile/sectionParsers.js +3 -2
- package/dist/cramFile/sectionParsers.js.map +1 -1
- package/dist/cramFile/slice/decodeRecord.d.ts +2 -2
- package/dist/cramFile/slice/decodeRecord.js +10 -7
- package/dist/cramFile/slice/decodeRecord.js.map +1 -1
- package/dist/cramFile/slice/index.js +14 -16
- package/dist/cramFile/slice/index.js.map +1 -1
- package/dist/cramFile/util.d.ts +5 -2
- package/dist/cramFile/util.js +75 -79
- package/dist/cramFile/util.js.map +1 -1
- package/dist/htscodecs/arith_gen.d.ts +5 -7
- package/dist/htscodecs/arith_gen.js +122 -105
- package/dist/htscodecs/arith_gen.js.map +1 -1
- package/dist/htscodecs/arith_sh.d.ts +1 -8
- package/dist/htscodecs/arith_sh.js +16 -10
- package/dist/htscodecs/arith_sh.js.map +1 -1
- package/dist/htscodecs/byte_model.d.ts +1 -6
- package/dist/htscodecs/byte_model.js +25 -17
- package/dist/htscodecs/byte_model.js.map +1 -1
- package/dist/htscodecs/fqzcomp.d.ts +1 -1
- package/dist/htscodecs/fqzcomp.js +98 -77
- package/dist/htscodecs/fqzcomp.js.map +1 -1
- package/dist/htscodecs/index.d.ts +5 -5
- package/dist/htscodecs/index.js +53 -16
- package/dist/htscodecs/index.js.map +1 -1
- package/dist/htscodecs/iostream.d.ts +9 -20
- package/dist/htscodecs/iostream.js +21 -116
- package/dist/htscodecs/iostream.js.map +1 -1
- package/dist/htscodecs/rans.d.ts +1 -1
- package/dist/htscodecs/rans.js +65 -54
- package/dist/htscodecs/rans.js.map +1 -1
- package/dist/htscodecs/rans4x16.d.ts +1 -1
- package/dist/htscodecs/rans4x16.js +151 -111
- package/dist/htscodecs/rans4x16.js.map +1 -1
- package/dist/htscodecs/tok3.d.ts +1 -2
- package/dist/htscodecs/tok3.js +82 -239
- package/dist/htscodecs/tok3.js.map +1 -1
- package/dist/util.d.ts +1 -0
- package/dist/util.js +20 -0
- package/dist/util.js.map +1 -0
- package/esm/cramFile/codecs/_base.d.ts +1 -1
- package/esm/cramFile/codecs/byteArrayLength.js +3 -2
- package/esm/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/esm/cramFile/codecs/external.d.ts +1 -1
- package/esm/cramFile/codecs/external.js +2 -5
- package/esm/cramFile/codecs/external.js.map +1 -1
- package/esm/cramFile/container/index.d.ts +16 -34
- package/esm/cramFile/container/index.js +5 -20
- package/esm/cramFile/container/index.js.map +1 -1
- package/esm/cramFile/file.d.ts +8 -6
- package/esm/cramFile/file.js +40 -75
- package/esm/cramFile/file.js.map +1 -1
- package/esm/cramFile/record.js.map +1 -1
- package/esm/cramFile/sectionParsers.js +3 -2
- package/esm/cramFile/sectionParsers.js.map +1 -1
- package/esm/cramFile/slice/decodeRecord.d.ts +2 -2
- package/esm/cramFile/slice/decodeRecord.js +10 -7
- package/esm/cramFile/slice/decodeRecord.js.map +1 -1
- package/esm/cramFile/slice/index.js +14 -16
- package/esm/cramFile/slice/index.js.map +1 -1
- package/esm/cramFile/util.d.ts +5 -2
- package/esm/cramFile/util.js +74 -77
- package/esm/cramFile/util.js.map +1 -1
- package/esm/htscodecs/arith_gen.d.ts +5 -7
- package/esm/htscodecs/arith_gen.js +108 -97
- package/esm/htscodecs/arith_gen.js.map +1 -1
- package/esm/htscodecs/arith_sh.d.ts +1 -8
- package/esm/htscodecs/arith_sh.js +14 -11
- package/esm/htscodecs/arith_sh.js.map +1 -1
- package/esm/htscodecs/byte_model.d.ts +1 -6
- package/esm/htscodecs/byte_model.js +23 -18
- package/esm/htscodecs/byte_model.js.map +1 -1
- package/esm/htscodecs/fqzcomp.d.ts +1 -1
- package/esm/htscodecs/fqzcomp.js +91 -76
- package/esm/htscodecs/fqzcomp.js.map +1 -1
- package/esm/htscodecs/index.d.ts +5 -5
- package/esm/htscodecs/index.js +14 -20
- package/esm/htscodecs/index.js.map +1 -1
- package/esm/htscodecs/iostream.d.ts +9 -20
- package/esm/htscodecs/iostream.js +19 -117
- package/esm/htscodecs/iostream.js.map +1 -1
- package/esm/htscodecs/rans.d.ts +1 -1
- package/esm/htscodecs/rans.js +61 -56
- package/esm/htscodecs/rans.js.map +1 -1
- package/esm/htscodecs/rans4x16.d.ts +1 -1
- package/esm/htscodecs/rans4x16.js +143 -109
- package/esm/htscodecs/rans4x16.js.map +1 -1
- package/esm/htscodecs/tok3.d.ts +1 -2
- package/esm/htscodecs/tok3.js +41 -237
- package/esm/htscodecs/tok3.js.map +1 -1
- package/esm/util.d.ts +1 -0
- package/esm/util.js +17 -0
- package/esm/util.js.map +1 -0
- package/package.json +3 -6
- package/src/cramFile/codecs/_base.ts +1 -1
- package/src/cramFile/codecs/byteArrayLength.ts +4 -12
- package/src/cramFile/codecs/external.ts +3 -7
- package/src/cramFile/container/index.ts +5 -24
- package/src/cramFile/file.ts +44 -79
- package/src/cramFile/record.ts +1 -1
- package/src/cramFile/sectionParsers.ts +5 -2
- package/src/cramFile/slice/decodeRecord.ts +29 -28
- package/src/cramFile/slice/index.ts +25 -31
- package/src/cramFile/util.ts +107 -106
- package/src/htscodecs/{arith_gen.js → arith_gen.ts} +133 -95
- package/src/htscodecs/{arith_sh.js → arith_sh.ts} +17 -9
- package/src/htscodecs/{byte_model.js → byte_model.ts} +26 -16
- package/src/htscodecs/{fqzcomp.js → fqzcomp.ts} +108 -74
- package/src/htscodecs/{index.js → index.ts} +14 -20
- package/src/htscodecs/iostream.ts +159 -0
- package/src/htscodecs/{rans.js → rans.ts} +73 -56
- package/src/htscodecs/{rans4x16.js → rans4x16.ts} +180 -111
- package/src/htscodecs/tok3.ts +197 -0
- package/src/util.ts +16 -0
- package/errors.js +0 -27
- package/src/htscodecs/iostream.js +0 -257
- package/src/htscodecs/tok3.js +0 -413
package/src/cramFile/file.ts
CHANGED
|
@@ -4,11 +4,12 @@ import QuickLRU from 'quick-lru'
|
|
|
4
4
|
import { XzReadableStream } from 'xz-decompress'
|
|
5
5
|
|
|
6
6
|
import { CramMalformedError, CramUnimplementedError } from '../errors'
|
|
7
|
-
import htscodecs from '../htscodecs'
|
|
7
|
+
import * as htscodecs from '../htscodecs'
|
|
8
8
|
import { open } from '../io'
|
|
9
9
|
import ransuncompress from '../rans'
|
|
10
10
|
import { parseHeaderText } from '../sam'
|
|
11
11
|
import { unzip } from '../unzip'
|
|
12
|
+
import { concatUint8Array } from '../util'
|
|
12
13
|
import CramContainer from './container'
|
|
13
14
|
import CramRecord from './record'
|
|
14
15
|
import {
|
|
@@ -17,7 +18,7 @@ import {
|
|
|
17
18
|
cramFileDefinition,
|
|
18
19
|
getSectionParsers,
|
|
19
20
|
} from './sectionParsers'
|
|
20
|
-
import {
|
|
21
|
+
import { parseItem, tinyMemoize } from './util'
|
|
21
22
|
|
|
22
23
|
import type { GenericFilehandle } from 'generic-filehandle2'
|
|
23
24
|
|
|
@@ -101,12 +102,6 @@ export default class CramFile {
|
|
|
101
102
|
}
|
|
102
103
|
}
|
|
103
104
|
|
|
104
|
-
// can just stat this object like a filehandle
|
|
105
|
-
stat() {
|
|
106
|
-
return this.file.stat()
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
// can just stat this object like a filehandle
|
|
110
105
|
read(length: number, position: number) {
|
|
111
106
|
return this.file.read(length, position)
|
|
112
107
|
}
|
|
@@ -132,20 +127,17 @@ export default class CramFile {
|
|
|
132
127
|
}
|
|
133
128
|
|
|
134
129
|
const firstBlock = await firstContainer.getFirstBlock()
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
this.header = text
|
|
147
|
-
return parseHeaderText(text)
|
|
148
|
-
}
|
|
130
|
+
|
|
131
|
+
const content = firstBlock.content
|
|
132
|
+
const dataView = new DataView(content.buffer)
|
|
133
|
+
const headerLength = dataView.getInt32(0, true)
|
|
134
|
+
const textStart = 4
|
|
135
|
+
const decoder = new TextDecoder('utf8')
|
|
136
|
+
const text = decoder.decode(
|
|
137
|
+
content.subarray(textStart, textStart + headerLength),
|
|
138
|
+
)
|
|
139
|
+
this.header = text
|
|
140
|
+
return parseHeaderText(text)
|
|
149
141
|
}
|
|
150
142
|
|
|
151
143
|
async getHeaderText() {
|
|
@@ -157,25 +149,19 @@ export default class CramFile {
|
|
|
157
149
|
const { majorVersion } = await this.getDefinition()
|
|
158
150
|
const sectionParsers = getSectionParsers(majorVersion)
|
|
159
151
|
let position = sectionParsers.cramFileDefinition.maxLength
|
|
160
|
-
const { size: fileSize } = await this.file.stat()
|
|
161
|
-
const { cramContainerHeader1 } = sectionParsers
|
|
162
152
|
|
|
163
153
|
// skip with a series of reads to the proper container
|
|
164
154
|
let currentContainer: CramContainer | undefined
|
|
165
155
|
for (let i = 0; i <= containerNumber; i++) {
|
|
166
156
|
// if we are about to go off the end of the file
|
|
167
157
|
// and have not found that container, it does not exist
|
|
168
|
-
if (position + cramContainerHeader1.maxLength + 8 >= fileSize) {
|
|
169
|
-
|
|
170
|
-
}
|
|
158
|
+
// if (position + cramContainerHeader1.maxLength + 8 >= fileSize) {
|
|
159
|
+
// return undefined
|
|
160
|
+
// }
|
|
171
161
|
|
|
172
162
|
currentContainer = this.getContainerAtPosition(position)
|
|
173
163
|
const currentHeader = await currentContainer.getHeader()
|
|
174
|
-
|
|
175
|
-
throw new CramMalformedError(
|
|
176
|
-
`container ${containerNumber} not found in file`,
|
|
177
|
-
)
|
|
178
|
-
}
|
|
164
|
+
|
|
179
165
|
// if this is the first container, read all the blocks in the container
|
|
180
166
|
// to determine its length, because we cannot trust the container
|
|
181
167
|
// header's given length due to a bug somewhere in htslib
|
|
@@ -183,9 +169,6 @@ export default class CramFile {
|
|
|
183
169
|
position = currentHeader._endPosition
|
|
184
170
|
for (let j = 0; j < currentHeader.numBlocks; j++) {
|
|
185
171
|
const block = await this.readBlock(position)
|
|
186
|
-
if (block === undefined) {
|
|
187
|
-
return undefined
|
|
188
|
-
}
|
|
189
172
|
position = block._endPosition
|
|
190
173
|
}
|
|
191
174
|
} else {
|
|
@@ -218,39 +201,41 @@ export default class CramFile {
|
|
|
218
201
|
|
|
219
202
|
/**
|
|
220
203
|
* @returns {Promise[number]} the number of containers in the file
|
|
204
|
+
*
|
|
205
|
+
* note: this is currently used only in unit tests, and after removing file
|
|
206
|
+
* length check, relies on a try catch to read return an error to break
|
|
221
207
|
*/
|
|
222
208
|
async containerCount(): Promise<number | undefined> {
|
|
223
209
|
const { majorVersion } = await this.getDefinition()
|
|
224
210
|
const sectionParsers = getSectionParsers(majorVersion)
|
|
225
|
-
const { size: fileSize } = await this.file.stat()
|
|
226
|
-
const { cramContainerHeader1 } = sectionParsers
|
|
227
211
|
|
|
228
212
|
let containerCount = 0
|
|
229
213
|
let position = sectionParsers.cramFileDefinition.maxLength
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
return undefined
|
|
214
|
+
try {
|
|
215
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
216
|
+
while (true) {
|
|
217
|
+
const currentHeader =
|
|
218
|
+
await this.getContainerAtPosition(position).getHeader()
|
|
219
|
+
|
|
220
|
+
// if this is the first container, read all the blocks in the container,
|
|
221
|
+
// because we cannot trust the container header's given length due to a
|
|
222
|
+
// bug somewhere in htslib
|
|
223
|
+
if (containerCount === 0) {
|
|
224
|
+
position = currentHeader._endPosition
|
|
225
|
+
for (let j = 0; j < currentHeader.numBlocks; j++) {
|
|
226
|
+
const block = await this.readBlock(position)
|
|
227
|
+
position = block._endPosition
|
|
245
228
|
}
|
|
246
|
-
|
|
229
|
+
} else {
|
|
230
|
+
// otherwise, just traverse to the next container using the container's
|
|
231
|
+
// length
|
|
232
|
+
position += currentHeader._size + currentHeader.length
|
|
247
233
|
}
|
|
248
|
-
|
|
249
|
-
// otherwise, just traverse to the next container using the container's
|
|
250
|
-
// length
|
|
251
|
-
position += currentHeader._size + currentHeader.length
|
|
234
|
+
containerCount += 1
|
|
252
235
|
}
|
|
253
|
-
|
|
236
|
+
} catch (e) {
|
|
237
|
+
containerCount--
|
|
238
|
+
/* do nothing */
|
|
254
239
|
}
|
|
255
240
|
|
|
256
241
|
return containerCount
|
|
@@ -264,11 +249,6 @@ export default class CramFile {
|
|
|
264
249
|
const { majorVersion } = await this.getDefinition()
|
|
265
250
|
const sectionParsers = getSectionParsers(majorVersion)
|
|
266
251
|
const { cramBlockHeader } = sectionParsers
|
|
267
|
-
const { size: fileSize } = await this.file.stat()
|
|
268
|
-
|
|
269
|
-
if (position + cramBlockHeader.maxLength >= fileSize) {
|
|
270
|
-
return undefined
|
|
271
|
-
}
|
|
272
252
|
|
|
273
253
|
const buffer = await this.file.read(cramBlockHeader.maxLength, position)
|
|
274
254
|
return parseItem(buffer, cramBlockHeader.parser, 0, position)
|
|
@@ -286,16 +266,7 @@ export default class CramFile {
|
|
|
286
266
|
size = section.maxLength,
|
|
287
267
|
preReadBuffer?: Uint8Array,
|
|
288
268
|
) {
|
|
289
|
-
|
|
290
|
-
if (preReadBuffer) {
|
|
291
|
-
buffer = preReadBuffer
|
|
292
|
-
} else {
|
|
293
|
-
const { size: fileSize } = await this.file.stat()
|
|
294
|
-
if (position + size >= fileSize) {
|
|
295
|
-
return undefined
|
|
296
|
-
}
|
|
297
|
-
buffer = await this.file.read(size, position)
|
|
298
|
-
}
|
|
269
|
+
const buffer = preReadBuffer ?? (await this.file.read(size, position))
|
|
299
270
|
const data = parseItem(buffer, section.parser, 0, position)
|
|
300
271
|
if (data._size !== size) {
|
|
301
272
|
throw new CramMalformedError(
|
|
@@ -355,9 +326,6 @@ export default class CramFile {
|
|
|
355
326
|
const { majorVersion } = await this.getDefinition()
|
|
356
327
|
const sectionParsers = getSectionParsers(majorVersion)
|
|
357
328
|
const blockHeader = await this.readBlockHeader(position)
|
|
358
|
-
if (blockHeader === undefined) {
|
|
359
|
-
return undefined
|
|
360
|
-
}
|
|
361
329
|
const blockContentPosition = blockHeader._endPosition
|
|
362
330
|
|
|
363
331
|
const d = await this.file.read(
|
|
@@ -385,9 +353,6 @@ export default class CramFile {
|
|
|
385
353
|
sectionParsers.cramBlockCrc32,
|
|
386
354
|
blockContentPosition + blockHeader.compressedSize,
|
|
387
355
|
)
|
|
388
|
-
if (crc === undefined) {
|
|
389
|
-
return undefined
|
|
390
|
-
}
|
|
391
356
|
block.crc32 = crc.crc32
|
|
392
357
|
|
|
393
358
|
// check the block data crc32
|
package/src/cramFile/record.ts
CHANGED
|
@@ -271,7 +271,7 @@ export default class CramRecord {
|
|
|
271
271
|
|
|
272
272
|
this.readGroupId = readGroupId
|
|
273
273
|
this.readName = readName
|
|
274
|
-
this.sequenceId = sequenceId
|
|
274
|
+
this.sequenceId = sequenceId!
|
|
275
275
|
this.uniqueId = uniqueId
|
|
276
276
|
this.templateSize = templateSize
|
|
277
277
|
this.alignmentStart = alignmentStart
|
|
@@ -652,11 +652,14 @@ function cramContainerHeader1(majorVersion: number) {
|
|
|
652
652
|
parser: (buffer: Uint8Array, offset: number) => {
|
|
653
653
|
const b = buffer
|
|
654
654
|
const dataView = new DataView(b.buffer, b.byteOffset, b.length)
|
|
655
|
+
|
|
655
656
|
// byte size of the container data (blocks)
|
|
656
657
|
const length = dataView.getInt32(offset, true)
|
|
657
658
|
offset += 4
|
|
658
|
-
|
|
659
|
-
// reference
|
|
659
|
+
|
|
660
|
+
// reference sequence identifier:
|
|
661
|
+
// -1 for unmapped reads,
|
|
662
|
+
// -2 for multiple reference sequences
|
|
660
663
|
const [refSeqId, newOffset1] = parseItf8(buffer, offset)
|
|
661
664
|
offset += newOffset1
|
|
662
665
|
const [refSeqStart, newOffset2] = parseItf8(buffer, offset)
|
|
@@ -141,16 +141,14 @@ function decodeReadFeatures(
|
|
|
141
141
|
const data = decodeDataSeries(dataSeriesName)
|
|
142
142
|
if (type === 'character') {
|
|
143
143
|
return String.fromCharCode(data)
|
|
144
|
-
}
|
|
145
|
-
if (type === 'string') {
|
|
144
|
+
} else if (type === 'string') {
|
|
146
145
|
let r = ''
|
|
147
146
|
for (let i = 0; i < data.byteLength; i++) {
|
|
148
147
|
r += String.fromCharCode(data[i])
|
|
149
148
|
}
|
|
150
149
|
return r
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
return data.toArray()
|
|
150
|
+
} else if (type === 'numArray') {
|
|
151
|
+
return Array.from(data)
|
|
154
152
|
}
|
|
155
153
|
// else if (type === 'number') {
|
|
156
154
|
// return data[0]
|
|
@@ -213,7 +211,7 @@ function decodeReadFeatures(
|
|
|
213
211
|
|
|
214
212
|
export type DataSeriesDecoder = <T extends DataSeriesEncodingKey>(
|
|
215
213
|
dataSeriesName: T,
|
|
216
|
-
) => DataTypeMapping[DataSeriesTypes[T]]
|
|
214
|
+
) => DataTypeMapping[DataSeriesTypes[T]] | undefined
|
|
217
215
|
|
|
218
216
|
export default function decodeRecord(
|
|
219
217
|
slice: CramSlice,
|
|
@@ -226,12 +224,11 @@ export default function decodeRecord(
|
|
|
226
224
|
majorVersion: number,
|
|
227
225
|
recordNumber: number,
|
|
228
226
|
) {
|
|
229
|
-
let flags = decodeDataSeries('BF')
|
|
227
|
+
let flags = decodeDataSeries('BF')!
|
|
230
228
|
|
|
231
|
-
// note: the C data type of compressionFlags is byte in cram v1
|
|
232
|
-
//
|
|
233
|
-
|
|
234
|
-
const cramFlags = decodeDataSeries('CF')
|
|
229
|
+
// note: the C data type of compressionFlags is byte in cram v1 and int32 in
|
|
230
|
+
// cram v2+, but that does not matter for us here in javascript land.
|
|
231
|
+
const cramFlags = decodeDataSeries('CF')!
|
|
235
232
|
|
|
236
233
|
if (!isMappedSliceHeader(sliceHeader.parsedContent)) {
|
|
237
234
|
throw new Error('slice header not mapped')
|
|
@@ -242,18 +239,18 @@ export default function decodeRecord(
|
|
|
242
239
|
? decodeDataSeries('RI')
|
|
243
240
|
: sliceHeader.parsedContent.refSeqId
|
|
244
241
|
|
|
245
|
-
const readLength = decodeDataSeries('RL')
|
|
242
|
+
const readLength = decodeDataSeries('RL')!
|
|
246
243
|
// if APDelta, will calculate the true start in a second pass
|
|
247
|
-
let alignmentStart = decodeDataSeries('AP')
|
|
244
|
+
let alignmentStart = decodeDataSeries('AP')!
|
|
248
245
|
if (compressionScheme.APdelta) {
|
|
249
246
|
alignmentStart = alignmentStart + cursors.lastAlignmentStart
|
|
250
247
|
}
|
|
251
248
|
cursors.lastAlignmentStart = alignmentStart
|
|
252
|
-
const readGroupId = decodeDataSeries('RG')
|
|
249
|
+
const readGroupId = decodeDataSeries('RG')!
|
|
253
250
|
|
|
254
251
|
let readName: string | undefined
|
|
255
252
|
if (compressionScheme.readNamesIncluded) {
|
|
256
|
-
readName = readNullTerminatedString(decodeDataSeries('RN'))
|
|
253
|
+
readName = readNullTerminatedString(decodeDataSeries('RN')!)
|
|
257
254
|
}
|
|
258
255
|
|
|
259
256
|
let mateToUse:
|
|
@@ -270,14 +267,14 @@ export default function decodeRecord(
|
|
|
270
267
|
if (CramFlagsDecoder.isDetached(cramFlags)) {
|
|
271
268
|
// note: the MF is a byte in 1.0, int32 in 2+, but once again this doesn't
|
|
272
269
|
// matter for javascript
|
|
273
|
-
const mateFlags = decodeDataSeries('MF')
|
|
270
|
+
const mateFlags = decodeDataSeries('MF')!
|
|
274
271
|
let mateReadName: string | undefined
|
|
275
272
|
if (!compressionScheme.readNamesIncluded) {
|
|
276
|
-
mateReadName = readNullTerminatedString(decodeDataSeries('RN'))
|
|
273
|
+
mateReadName = readNullTerminatedString(decodeDataSeries('RN')!)
|
|
277
274
|
readName = mateReadName
|
|
278
275
|
}
|
|
279
|
-
const mateSequenceId = decodeDataSeries('NS')
|
|
280
|
-
const mateAlignmentStart = decodeDataSeries('NP')
|
|
276
|
+
const mateSequenceId = decodeDataSeries('NS')!
|
|
277
|
+
const mateAlignmentStart = decodeDataSeries('NP')!
|
|
281
278
|
if (mateFlags || mateSequenceId > -1) {
|
|
282
279
|
mateToUse = {
|
|
283
280
|
mateFlags,
|
|
@@ -287,7 +284,7 @@ export default function decodeRecord(
|
|
|
287
284
|
}
|
|
288
285
|
}
|
|
289
286
|
|
|
290
|
-
templateSize = decodeDataSeries('TS')
|
|
287
|
+
templateSize = decodeDataSeries('TS')!
|
|
291
288
|
|
|
292
289
|
// set mate unmapped if needed
|
|
293
290
|
if (MateFlagsDecoder.isUnmapped(mateFlags)) {
|
|
@@ -300,12 +297,12 @@ export default function decodeRecord(
|
|
|
300
297
|
|
|
301
298
|
// detachedCount++
|
|
302
299
|
} else if (CramFlagsDecoder.isWithMateDownstream(cramFlags)) {
|
|
303
|
-
mateRecordNumber = decodeDataSeries('NF') + recordNumber + 1
|
|
300
|
+
mateRecordNumber = decodeDataSeries('NF')! + recordNumber + 1
|
|
304
301
|
}
|
|
305
302
|
|
|
306
303
|
// TODO: the aux tag parsing will have to be refactored if we want to support
|
|
307
304
|
// cram v1
|
|
308
|
-
const TLindex = decodeDataSeries('TL')
|
|
305
|
+
const TLindex = decodeDataSeries('TL')!
|
|
309
306
|
if (TLindex < 0) {
|
|
310
307
|
/* TODO: check nTL: TLindex >= compressionHeader.tagEncoding.size */
|
|
311
308
|
throw new CramMalformedError('invalid TL index')
|
|
@@ -324,7 +321,11 @@ export default function decodeRecord(
|
|
|
324
321
|
.getCodecForTag(tagId)
|
|
325
322
|
.decode(slice, coreDataBlock, blocksByContentId, cursors)
|
|
326
323
|
tags[tagName] =
|
|
327
|
-
|
|
324
|
+
tagData === undefined
|
|
325
|
+
? undefined
|
|
326
|
+
: typeof tagData === 'number'
|
|
327
|
+
? tagData
|
|
328
|
+
: parseTagData(tagType, tagData)
|
|
328
329
|
}
|
|
329
330
|
|
|
330
331
|
let readFeatures: ReadFeature[] | undefined
|
|
@@ -334,7 +335,7 @@ export default function decodeRecord(
|
|
|
334
335
|
let readBases = undefined
|
|
335
336
|
if (!BamFlagsDecoder.isSegmentUnmapped(flags)) {
|
|
336
337
|
// reading read features
|
|
337
|
-
const readFeatureCount = decodeDataSeries('FN')
|
|
338
|
+
const readFeatureCount = decodeDataSeries('FN')!
|
|
338
339
|
if (readFeatureCount) {
|
|
339
340
|
readFeatures = decodeReadFeatures(
|
|
340
341
|
alignmentStart,
|
|
@@ -369,11 +370,11 @@ export default function decodeRecord(
|
|
|
369
370
|
}
|
|
370
371
|
|
|
371
372
|
// mapping quality
|
|
372
|
-
mappingQuality = decodeDataSeries('MQ')
|
|
373
|
+
mappingQuality = decodeDataSeries('MQ')!
|
|
373
374
|
if (CramFlagsDecoder.isPreservingQualityScores(cramFlags)) {
|
|
374
375
|
qualityScores = new Array(readLength)
|
|
375
376
|
for (let i = 0; i < qualityScores.length; i++) {
|
|
376
|
-
qualityScores[i] = decodeDataSeries('QS')
|
|
377
|
+
qualityScores[i] = decodeDataSeries('QS')!
|
|
377
378
|
}
|
|
378
379
|
}
|
|
379
380
|
} else if (CramFlagsDecoder.isDecodeSequenceAsStar(cramFlags)) {
|
|
@@ -382,14 +383,14 @@ export default function decodeRecord(
|
|
|
382
383
|
} else {
|
|
383
384
|
const bases = new Array(readLength) as number[]
|
|
384
385
|
for (let i = 0; i < bases.length; i++) {
|
|
385
|
-
bases[i] = decodeDataSeries('BA')
|
|
386
|
+
bases[i] = decodeDataSeries('BA')!
|
|
386
387
|
}
|
|
387
388
|
readBases = String.fromCharCode(...bases)
|
|
388
389
|
|
|
389
390
|
if (CramFlagsDecoder.isPreservingQualityScores(cramFlags)) {
|
|
390
391
|
qualityScores = new Array(readLength)
|
|
391
392
|
for (let i = 0; i < bases.length; i++) {
|
|
392
|
-
qualityScores[i] = decodeDataSeries('QS')
|
|
393
|
+
qualityScores[i] = decodeDataSeries('QS')!
|
|
393
394
|
}
|
|
394
395
|
}
|
|
395
396
|
}
|
|
@@ -20,6 +20,13 @@ export type SliceHeader = CramFileBlock & {
|
|
|
20
20
|
parsedContent: MappedSliceHeader | UnmappedSliceHeader
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
+
interface RefRegion {
|
|
24
|
+
id: number
|
|
25
|
+
start: number
|
|
26
|
+
end: number
|
|
27
|
+
seq: string | null
|
|
28
|
+
}
|
|
29
|
+
|
|
23
30
|
/**
|
|
24
31
|
* @private
|
|
25
32
|
* Try to estimate the template length from a bunch of interrelated multi-segment reads.
|
|
@@ -191,16 +198,10 @@ export default class CramSlice {
|
|
|
191
198
|
const { majorVersion } = await this.file.getDefinition()
|
|
192
199
|
const sectionParsers = getSectionParsers(majorVersion)
|
|
193
200
|
const containerHeader = await this.container.getHeader()
|
|
194
|
-
if (!containerHeader) {
|
|
195
|
-
throw new Error('no container header detected')
|
|
196
|
-
}
|
|
197
201
|
|
|
198
202
|
const header = await this.file.readBlock(
|
|
199
203
|
containerHeader._endPosition + this.containerPosition,
|
|
200
204
|
)
|
|
201
|
-
if (header === undefined) {
|
|
202
|
-
throw new Error('block header undefined')
|
|
203
|
-
}
|
|
204
205
|
if (header.contentType === 'MAPPED_SLICE_HEADER') {
|
|
205
206
|
const content = parseItem(
|
|
206
207
|
header.content,
|
|
@@ -232,9 +233,6 @@ export default class CramSlice {
|
|
|
232
233
|
const blocks: CramFileBlock[] = new Array(header.parsedContent.numBlocks)
|
|
233
234
|
for (let i = 0; i < blocks.length; i++) {
|
|
234
235
|
const block = await this.file.readBlock(blockPosition)
|
|
235
|
-
if (block === undefined) {
|
|
236
|
-
throw new Error('block undefined')
|
|
237
|
-
}
|
|
238
236
|
blocks[i] = block
|
|
239
237
|
blockPosition = blocks[i]!._endPosition
|
|
240
238
|
}
|
|
@@ -404,21 +402,14 @@ export default class CramSlice {
|
|
|
404
402
|
T extends DataSeriesEncodingKey,
|
|
405
403
|
>(
|
|
406
404
|
dataSeriesName: T,
|
|
407
|
-
): DataTypeMapping[DataSeriesTypes[T]] => {
|
|
405
|
+
): DataTypeMapping[DataSeriesTypes[T]] | undefined => {
|
|
408
406
|
const codec = compressionScheme.getCodecForDataSeries(dataSeriesName)
|
|
409
407
|
if (!codec) {
|
|
410
408
|
throw new CramMalformedError(
|
|
411
409
|
`no codec defined for ${dataSeriesName} data series`,
|
|
412
410
|
)
|
|
413
411
|
}
|
|
414
|
-
|
|
415
|
-
const decoded = codec.decode(
|
|
416
|
-
this,
|
|
417
|
-
coreDataBlock,
|
|
418
|
-
blocksByContentId,
|
|
419
|
-
cursors,
|
|
420
|
-
)
|
|
421
|
-
return decoded
|
|
412
|
+
return codec.decode(this, coreDataBlock, blocksByContentId, cursors)
|
|
422
413
|
}
|
|
423
414
|
const records: CramRecord[] = new Array(
|
|
424
415
|
sliceHeader.parsedContent.numRecords,
|
|
@@ -457,16 +448,22 @@ export default class CramSlice {
|
|
|
457
448
|
}
|
|
458
449
|
|
|
459
450
|
// interpret `recordsToNextFragment` attributes to make standard `mate`
|
|
460
|
-
// objects
|
|
451
|
+
// objects
|
|
452
|
+
//
|
|
453
|
+
// Resolve mate pair cross-references between records in this slice
|
|
461
454
|
for (let i = 0; i < records.length; i += 1) {
|
|
462
|
-
const
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
455
|
+
const r = records[i]
|
|
456
|
+
// check for !!r added after removal of "stat" file size check: found
|
|
457
|
+
// some undefined entries
|
|
458
|
+
if (r) {
|
|
459
|
+
const { mateRecordNumber } = r
|
|
460
|
+
if (
|
|
461
|
+
mateRecordNumber !== undefined &&
|
|
462
|
+
mateRecordNumber >= 0 &&
|
|
463
|
+
records[mateRecordNumber]
|
|
464
|
+
) {
|
|
465
|
+
associateIntraSliceMate(records, i, r, records[mateRecordNumber])
|
|
466
|
+
}
|
|
470
467
|
}
|
|
471
468
|
}
|
|
472
469
|
|
|
@@ -501,10 +498,7 @@ export default class CramSlice {
|
|
|
501
498
|
if (compressionScheme === undefined) {
|
|
502
499
|
throw new Error('compression scheme undefined')
|
|
503
500
|
}
|
|
504
|
-
const refRegions: Record<
|
|
505
|
-
string,
|
|
506
|
-
{ id: number; start: number; end: number; seq: string | null }
|
|
507
|
-
> = {}
|
|
501
|
+
const refRegions: Record<string, RefRegion> = {}
|
|
508
502
|
|
|
509
503
|
// iterate over the records to find the spans of the reference
|
|
510
504
|
// sequences we need to fetch
|