@gmod/cram 1.6.3 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/dist/craiIndex.d.ts +19 -12
- package/dist/craiIndex.js +63 -123
- package/dist/craiIndex.js.map +1 -1
- package/dist/cram-bundle.js +2 -17
- package/dist/cram-bundle.js.LICENSE.txt +17 -0
- package/dist/cramFile/codecs/_base.d.ts +26 -5
- package/dist/cramFile/codecs/_base.js +3 -39
- package/dist/cramFile/codecs/_base.js.map +1 -1
- package/dist/cramFile/codecs/beta.d.ts +7 -3
- package/dist/cramFile/codecs/beta.js +13 -31
- package/dist/cramFile/codecs/beta.js.map +1 -1
- package/dist/cramFile/codecs/byteArrayLength.d.ts +13 -7
- package/dist/cramFile/codecs/byteArrayLength.js +22 -41
- package/dist/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/dist/cramFile/codecs/byteArrayStop.d.ts +9 -5
- package/dist/cramFile/codecs/byteArrayStop.js +25 -46
- package/dist/cramFile/codecs/byteArrayStop.js.map +1 -1
- package/dist/cramFile/codecs/dataSeriesTypes.d.ts +4 -0
- package/dist/cramFile/codecs/dataSeriesTypes.js +3 -0
- package/dist/cramFile/codecs/dataSeriesTypes.js.map +1 -0
- package/dist/cramFile/codecs/external.d.ts +10 -6
- package/dist/cramFile/codecs/external.js +26 -44
- package/dist/cramFile/codecs/external.js.map +1 -1
- package/dist/cramFile/codecs/gamma.d.ts +7 -3
- package/dist/cramFile/codecs/gamma.js +16 -34
- package/dist/cramFile/codecs/gamma.js.map +1 -1
- package/dist/cramFile/codecs/getBits.d.ts +7 -0
- package/dist/cramFile/codecs/getBits.js +26 -0
- package/dist/cramFile/codecs/getBits.js.map +1 -0
- package/dist/cramFile/codecs/huffman.d.ts +17 -13
- package/dist/cramFile/codecs/huffman.js +76 -85
- package/dist/cramFile/codecs/huffman.js.map +1 -1
- package/dist/cramFile/codecs/index.d.ts +4 -2
- package/dist/cramFile/codecs/index.js +12 -13
- package/dist/cramFile/codecs/index.js.map +1 -1
- package/dist/cramFile/codecs/subexp.d.ts +7 -3
- package/dist/cramFile/codecs/subexp.js +19 -36
- package/dist/cramFile/codecs/subexp.js.map +1 -1
- package/dist/cramFile/constants.d.ts +35 -35
- package/dist/cramFile/constants.js +1 -1
- package/dist/cramFile/constants.js.map +1 -1
- package/dist/cramFile/container/compressionScheme.d.ts +57 -11
- package/dist/cramFile/container/compressionScheme.js +37 -32
- package/dist/cramFile/container/compressionScheme.js.map +1 -1
- package/dist/cramFile/container/index.d.ts +23 -9
- package/dist/cramFile/container/index.js +74 -144
- package/dist/cramFile/container/index.js.map +1 -1
- package/dist/cramFile/encoding.d.ts +78 -0
- package/dist/cramFile/encoding.js +3 -0
- package/dist/cramFile/encoding.js.map +1 -0
- package/dist/cramFile/file.d.ts +91 -41
- package/dist/cramFile/file.js +234 -368
- package/dist/cramFile/file.js.map +1 -1
- package/dist/cramFile/filehandle.d.ts +2 -0
- package/dist/cramFile/filehandle.js +3 -0
- package/dist/cramFile/filehandle.js.map +1 -0
- package/dist/cramFile/index.d.ts +1 -1
- package/dist/cramFile/index.js +1 -1
- package/dist/cramFile/index.js.map +1 -1
- package/dist/cramFile/record.d.ts +61 -17
- package/dist/cramFile/record.js +153 -77
- package/dist/cramFile/record.js.map +1 -1
- package/dist/cramFile/sectionParsers.d.ts +99 -8
- package/dist/cramFile/sectionParsers.js +70 -80
- package/dist/cramFile/sectionParsers.js.map +1 -1
- package/dist/cramFile/slice/decodeRecord.d.ts +30 -2
- package/dist/cramFile/slice/decodeRecord.js +148 -118
- package/dist/cramFile/slice/decodeRecord.js.map +1 -1
- package/dist/cramFile/slice/index.d.ts +21 -14
- package/dist/cramFile/slice/index.js +286 -381
- package/dist/cramFile/slice/index.js.map +1 -1
- package/dist/cramFile/util.d.ts +11 -5
- package/dist/cramFile/util.js +19 -97
- package/dist/cramFile/util.js.map +1 -1
- package/dist/errors.d.ts +5 -10
- package/dist/errors.js +11 -62
- package/dist/errors.js.map +1 -1
- package/dist/index.d.ts +3 -3
- package/dist/index.js +3 -3
- package/dist/index.js.map +1 -1
- package/dist/indexedCramFile.d.ts +37 -12
- package/dist/indexedCramFile.js +114 -154
- package/dist/indexedCramFile.js.map +1 -1
- package/dist/io/index.d.ts +5 -5
- package/dist/io/index.js +9 -9
- package/dist/io/index.js.map +1 -1
- package/dist/rans/constants.js +3 -3
- package/dist/rans/constants.js.map +1 -1
- package/dist/rans/d04.js +15 -15
- package/dist/rans/d04.js.map +1 -1
- package/dist/rans/d14.js +21 -21
- package/dist/rans/d14.js.map +1 -1
- package/dist/rans/decoding.js +27 -30
- package/dist/rans/decoding.js.map +1 -1
- package/dist/rans/frequencies.js +11 -11
- package/dist/rans/frequencies.js.map +1 -1
- package/dist/rans/index.js +46 -49
- package/dist/rans/index.js.map +1 -1
- package/dist/sam.d.ts +8 -1
- package/dist/sam.js +7 -7
- package/dist/sam.js.map +1 -1
- package/dist/typescript.d.ts +3 -0
- package/dist/typescript.js +11 -0
- package/dist/typescript.js.map +1 -0
- package/dist/unzip-pako.js +1 -1
- package/dist/unzip-pako.js.map +1 -1
- package/dist/unzip.js +1 -1
- package/dist/unzip.js.map +1 -1
- package/errors.js +11 -62
- package/esm/craiIndex.d.ts +19 -12
- package/esm/craiIndex.js +8 -24
- package/esm/craiIndex.js.map +1 -1
- package/esm/cramFile/codecs/_base.d.ts +26 -5
- package/esm/cramFile/codecs/_base.js +1 -35
- package/esm/cramFile/codecs/_base.js.map +1 -1
- package/esm/cramFile/codecs/beta.d.ts +7 -3
- package/esm/cramFile/codecs/beta.js +4 -3
- package/esm/cramFile/codecs/beta.js.map +1 -1
- package/esm/cramFile/codecs/byteArrayLength.d.ts +13 -7
- package/esm/cramFile/codecs/byteArrayLength.js +1 -1
- package/esm/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/esm/cramFile/codecs/byteArrayStop.d.ts +9 -5
- package/esm/cramFile/codecs/byteArrayStop.js +7 -9
- package/esm/cramFile/codecs/byteArrayStop.js.map +1 -1
- package/esm/cramFile/codecs/dataSeriesTypes.d.ts +4 -0
- package/esm/cramFile/codecs/dataSeriesTypes.js +2 -0
- package/esm/cramFile/codecs/dataSeriesTypes.js.map +1 -0
- package/esm/cramFile/codecs/external.d.ts +10 -6
- package/esm/cramFile/codecs/external.js +4 -3
- package/esm/cramFile/codecs/external.js.map +1 -1
- package/esm/cramFile/codecs/gamma.d.ts +7 -3
- package/esm/cramFile/codecs/gamma.js +5 -4
- package/esm/cramFile/codecs/gamma.js.map +1 -1
- package/esm/cramFile/codecs/getBits.d.ts +7 -0
- package/esm/cramFile/codecs/getBits.js +21 -0
- package/esm/cramFile/codecs/getBits.js.map +1 -0
- package/esm/cramFile/codecs/huffman.d.ts +17 -13
- package/esm/cramFile/codecs/huffman.js +22 -9
- package/esm/cramFile/codecs/huffman.js.map +1 -1
- package/esm/cramFile/codecs/index.d.ts +4 -2
- package/esm/cramFile/codecs/index.js +1 -1
- package/esm/cramFile/codecs/index.js.map +1 -1
- package/esm/cramFile/codecs/subexp.d.ts +7 -3
- package/esm/cramFile/codecs/subexp.js +7 -5
- package/esm/cramFile/codecs/subexp.js.map +1 -1
- package/esm/cramFile/constants.d.ts +35 -35
- package/esm/cramFile/constants.js.map +1 -1
- package/esm/cramFile/container/compressionScheme.d.ts +57 -11
- package/esm/cramFile/container/compressionScheme.js +15 -8
- package/esm/cramFile/container/compressionScheme.js.map +1 -1
- package/esm/cramFile/container/index.d.ts +23 -9
- package/esm/cramFile/container/index.js +11 -9
- package/esm/cramFile/container/index.js.map +1 -1
- package/esm/cramFile/encoding.d.ts +78 -0
- package/esm/cramFile/encoding.js +2 -0
- package/esm/cramFile/encoding.js.map +1 -0
- package/esm/cramFile/file.d.ts +91 -41
- package/esm/cramFile/file.js +59 -47
- package/esm/cramFile/file.js.map +1 -1
- package/esm/cramFile/filehandle.d.ts +2 -0
- package/esm/cramFile/filehandle.js +2 -0
- package/esm/cramFile/filehandle.js.map +1 -0
- package/esm/cramFile/index.d.ts +1 -1
- package/esm/cramFile/index.js.map +1 -1
- package/esm/cramFile/record.d.ts +61 -17
- package/esm/cramFile/record.js +83 -5
- package/esm/cramFile/record.js.map +1 -1
- package/esm/cramFile/sectionParsers.d.ts +99 -8
- package/esm/cramFile/sectionParsers.js +7 -17
- package/esm/cramFile/sectionParsers.js.map +1 -1
- package/esm/cramFile/slice/decodeRecord.d.ts +30 -2
- package/esm/cramFile/slice/decodeRecord.js +102 -70
- package/esm/cramFile/slice/decodeRecord.js.map +1 -1
- package/esm/cramFile/slice/index.d.ts +21 -14
- package/esm/cramFile/slice/index.js +77 -38
- package/esm/cramFile/slice/index.js.map +1 -1
- package/esm/cramFile/util.d.ts +11 -5
- package/esm/cramFile/util.js +11 -82
- package/esm/cramFile/util.js.map +1 -1
- package/esm/errors.d.ts +5 -10
- package/esm/errors.js +0 -5
- package/esm/errors.js.map +1 -1
- package/esm/index.d.ts +3 -3
- package/esm/index.js.map +1 -1
- package/esm/indexedCramFile.d.ts +37 -12
- package/esm/indexedCramFile.js +19 -8
- package/esm/indexedCramFile.js.map +1 -1
- package/esm/io/index.d.ts +5 -5
- package/esm/io/index.js +3 -3
- package/esm/io/index.js.map +1 -1
- package/esm/sam.d.ts +8 -1
- package/esm/sam.js.map +1 -1
- package/esm/typescript.d.ts +3 -0
- package/esm/typescript.js +7 -0
- package/esm/typescript.js.map +1 -0
- package/package.json +18 -11
- package/src/{craiIndex.js → craiIndex.ts} +37 -31
- package/src/cramFile/codecs/_base.ts +45 -0
- package/src/cramFile/codecs/beta.ts +34 -0
- package/src/cramFile/codecs/{byteArrayLength.js → byteArrayLength.ts} +27 -5
- package/src/cramFile/codecs/{byteArrayStop.js → byteArrayStop.ts} +25 -12
- package/src/cramFile/codecs/dataSeriesTypes.ts +39 -0
- package/src/cramFile/codecs/{external.js → external.ts} +28 -12
- package/src/cramFile/codecs/gamma.ts +42 -0
- package/src/cramFile/codecs/getBits.ts +28 -0
- package/src/cramFile/codecs/{huffman.js → huffman.ts} +48 -15
- package/src/cramFile/codecs/{index.js → index.ts} +9 -3
- package/src/cramFile/codecs/subexp.ts +45 -0
- package/src/cramFile/{constants.js → constants.ts} +0 -0
- package/src/cramFile/container/{compressionScheme.js → compressionScheme.ts} +50 -18
- package/src/cramFile/container/{index.js → index.ts} +13 -13
- package/src/cramFile/encoding.ts +98 -0
- package/src/cramFile/{file.js → file.ts} +136 -62
- package/src/cramFile/filehandle.ts +3 -0
- package/src/cramFile/{index.js → index.ts} +0 -0
- package/src/cramFile/{record.js → record.ts} +185 -14
- package/src/cramFile/{sectionParsers.js → sectionParsers.ts} +148 -20
- package/src/cramFile/slice/{decodeRecord.js → decodeRecord.ts} +158 -105
- package/src/cramFile/slice/{index.js → index.ts} +138 -63
- package/src/cramFile/{util.js → util.ts} +28 -17
- package/src/{errors.js → errors.ts} +0 -5
- package/src/{index.js → index.ts} +0 -0
- package/src/{indexedCramFile.js → indexedCramFile.ts} +79 -19
- package/src/io/{index.js → index.ts} +10 -5
- package/src/{sam.js → sam.ts} +7 -2
- package/src/typescript.ts +17 -0
- package/src/typings/binary-parser.d.ts +44 -0
- package/src/typings/bzip2.d.ts +7 -0
- package/src/typings/htscodecs.d.ts +6 -0
- package/dist/io/bufferCache.d.ts +0 -12
- package/dist/io/bufferCache.js +0 -112
- package/dist/io/bufferCache.js.map +0 -1
- package/dist/io/localFile.d.ts +0 -10
- package/dist/io/localFile.js +0 -108
- package/dist/io/localFile.js.map +0 -1
- package/dist/io/remoteFile.d.ts +0 -16
- package/dist/io/remoteFile.js +0 -143
- package/dist/io/remoteFile.js.map +0 -1
- package/esm/io/bufferCache.d.ts +0 -12
- package/esm/io/bufferCache.js +0 -54
- package/esm/io/bufferCache.js.map +0 -1
- package/esm/io/localFile.d.ts +0 -10
- package/esm/io/localFile.js +0 -31
- package/esm/io/localFile.js.map +0 -1
- package/esm/io/remoteFile.d.ts +0 -16
- package/esm/io/remoteFile.js +0 -64
- package/esm/io/remoteFile.js.map +0 -1
- package/src/cramFile/codecs/_base.js +0 -49
- package/src/cramFile/codecs/beta.js +0 -23
- package/src/cramFile/codecs/gamma.js +0 -30
- package/src/cramFile/codecs/subexp.js +0 -32
- package/src/io/bufferCache.js +0 -66
- package/src/io/localFile.js +0 -35
- package/src/io/remoteFile.js +0 -71
|
@@ -1,12 +1,24 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
CramBufferOverrunError,
|
|
4
|
-
CramArgumentError,
|
|
5
|
-
} from '../../errors'
|
|
6
|
-
import { parseItem, tinyMemoize, sequenceMD5 } from '../util'
|
|
1
|
+
import { CramArgumentError, CramMalformedError } from '../../errors'
|
|
2
|
+
import { parseItem, sequenceMD5, tinyMemoize } from '../util'
|
|
7
3
|
|
|
8
4
|
import Constants from '../constants'
|
|
9
|
-
import decodeRecord from './decodeRecord'
|
|
5
|
+
import decodeRecord, { DataSeriesDecoder } from './decodeRecord'
|
|
6
|
+
import CramRecord from '../record'
|
|
7
|
+
import CramContainer from '../container'
|
|
8
|
+
import CramFile, { CramFileBlock } from '../file'
|
|
9
|
+
import {
|
|
10
|
+
isMappedSliceHeader,
|
|
11
|
+
MappedSliceHeader,
|
|
12
|
+
UnmappedSliceHeader,
|
|
13
|
+
} from '../sectionParsers'
|
|
14
|
+
import { CramBufferOverrunError } from '../codecs/getBits'
|
|
15
|
+
import { Cursors, DataTypeMapping } from '../codecs/_base'
|
|
16
|
+
import { DataSeriesEncodingKey } from '../codecs/dataSeriesTypes'
|
|
17
|
+
import { DataSeriesTypes } from '../container/compressionScheme'
|
|
18
|
+
|
|
19
|
+
export type SliceHeader = CramFileBlock & {
|
|
20
|
+
parsedContent: MappedSliceHeader | UnmappedSliceHeader
|
|
21
|
+
}
|
|
10
22
|
|
|
11
23
|
/**
|
|
12
24
|
* @private
|
|
@@ -16,13 +28,16 @@ import decodeRecord from './decodeRecord'
|
|
|
16
28
|
* @param {CramRecord} thisRecord
|
|
17
29
|
*/
|
|
18
30
|
function calculateMultiSegmentMatedTemplateLength(
|
|
19
|
-
allRecords,
|
|
20
|
-
currentRecordNumber,
|
|
21
|
-
thisRecord,
|
|
31
|
+
allRecords: CramRecord[],
|
|
32
|
+
currentRecordNumber: number,
|
|
33
|
+
thisRecord: CramRecord,
|
|
22
34
|
) {
|
|
23
|
-
function getAllMatedRecords(startRecord) {
|
|
35
|
+
function getAllMatedRecords(startRecord: CramRecord) {
|
|
24
36
|
const records = [startRecord]
|
|
25
|
-
if (
|
|
37
|
+
if (
|
|
38
|
+
startRecord.mateRecordNumber !== undefined &&
|
|
39
|
+
startRecord.mateRecordNumber >= 0
|
|
40
|
+
) {
|
|
26
41
|
const mateRecord = allRecords[startRecord.mateRecordNumber]
|
|
27
42
|
if (!mateRecord) {
|
|
28
43
|
throw new CramMalformedError(
|
|
@@ -57,7 +72,10 @@ function calculateMultiSegmentMatedTemplateLength(
|
|
|
57
72
|
* @param {CramRecord} thisRecord
|
|
58
73
|
* @param {CramRecord} mateRecord
|
|
59
74
|
*/
|
|
60
|
-
function calculateIntraSliceMatePairTemplateLength(
|
|
75
|
+
function calculateIntraSliceMatePairTemplateLength(
|
|
76
|
+
thisRecord: CramRecord,
|
|
77
|
+
mateRecord: CramRecord,
|
|
78
|
+
) {
|
|
61
79
|
// this just estimates the template length by using the simple (non-gapped) end coordinate of each
|
|
62
80
|
// read, because gapping in the alignment doesn't mean the template is longer or shorter
|
|
63
81
|
const start = Math.min(thisRecord.alignmentStart, mateRecord.alignmentStart)
|
|
@@ -76,10 +94,10 @@ function calculateIntraSliceMatePairTemplateLength(thisRecord, mateRecord) {
|
|
|
76
94
|
* just one record having the index in the slice of its mate
|
|
77
95
|
*/
|
|
78
96
|
function associateIntraSliceMate(
|
|
79
|
-
allRecords,
|
|
80
|
-
currentRecordNumber,
|
|
81
|
-
thisRecord,
|
|
82
|
-
mateRecord,
|
|
97
|
+
allRecords: CramRecord[],
|
|
98
|
+
currentRecordNumber: number,
|
|
99
|
+
thisRecord: CramRecord,
|
|
100
|
+
mateRecord: CramRecord,
|
|
83
101
|
) {
|
|
84
102
|
if (!mateRecord) {
|
|
85
103
|
throw new CramMalformedError(
|
|
@@ -162,40 +180,48 @@ function associateIntraSliceMate(
|
|
|
162
180
|
}
|
|
163
181
|
|
|
164
182
|
export default class CramSlice {
|
|
165
|
-
|
|
166
|
-
|
|
183
|
+
private file: CramFile
|
|
184
|
+
|
|
185
|
+
constructor(
|
|
186
|
+
public container: CramContainer,
|
|
187
|
+
public containerPosition: number,
|
|
188
|
+
_unused: number,
|
|
189
|
+
) {
|
|
167
190
|
this.file = container.file
|
|
168
|
-
this.containerPosition = position
|
|
169
191
|
}
|
|
170
192
|
|
|
171
193
|
// memoize
|
|
172
|
-
async getHeader() {
|
|
194
|
+
async getHeader(): Promise<SliceHeader> {
|
|
173
195
|
// fetch and parse the slice header
|
|
174
196
|
const sectionParsers = await this.file.getSectionParsers()
|
|
175
197
|
const containerHeader = await this.container.getHeader()
|
|
176
198
|
const header = await this.file.readBlock(
|
|
177
199
|
containerHeader._endPosition + this.containerPosition,
|
|
178
200
|
)
|
|
201
|
+
if (header === undefined) {
|
|
202
|
+
throw new Error()
|
|
203
|
+
}
|
|
179
204
|
if (header.contentType === 'MAPPED_SLICE_HEADER') {
|
|
180
|
-
|
|
205
|
+
const content = parseItem(
|
|
181
206
|
header.content,
|
|
182
207
|
sectionParsers.cramMappedSliceHeader.parser,
|
|
183
208
|
0,
|
|
184
209
|
containerHeader._endPosition,
|
|
185
210
|
)
|
|
211
|
+
return { ...header, parsedContent: content }
|
|
186
212
|
} else if (header.contentType === 'UNMAPPED_SLICE_HEADER') {
|
|
187
|
-
|
|
213
|
+
const content = parseItem(
|
|
188
214
|
header.content,
|
|
189
215
|
sectionParsers.cramUnmappedSliceHeader.parser,
|
|
190
216
|
0,
|
|
191
217
|
containerHeader._endPosition,
|
|
192
218
|
)
|
|
219
|
+
return { ...header, parsedContent: content }
|
|
193
220
|
} else {
|
|
194
221
|
throw new CramMalformedError(
|
|
195
|
-
`error reading slice header block, invalid content type ${header.
|
|
222
|
+
`error reading slice header block, invalid content type ${header.contentType}`,
|
|
196
223
|
)
|
|
197
224
|
}
|
|
198
|
-
return header
|
|
199
225
|
}
|
|
200
226
|
|
|
201
227
|
// memoize
|
|
@@ -203,9 +229,13 @@ export default class CramSlice {
|
|
|
203
229
|
const header = await this.getHeader()
|
|
204
230
|
// read all the blocks into memory and store them
|
|
205
231
|
let blockPosition = header._endPosition
|
|
206
|
-
const blocks = new Array(header.
|
|
232
|
+
const blocks: CramFileBlock[] = new Array(header.parsedContent.numBlocks)
|
|
207
233
|
for (let i = 0; i < blocks.length; i += 1) {
|
|
208
|
-
|
|
234
|
+
const block = await this.file.readBlock(blockPosition)
|
|
235
|
+
if (block === undefined) {
|
|
236
|
+
throw new Error()
|
|
237
|
+
}
|
|
238
|
+
blocks[i] = block
|
|
209
239
|
blockPosition = blocks[i]._endPosition
|
|
210
240
|
}
|
|
211
241
|
|
|
@@ -220,9 +250,9 @@ export default class CramSlice {
|
|
|
220
250
|
}
|
|
221
251
|
|
|
222
252
|
// memoize
|
|
223
|
-
async _getBlocksContentIdIndex() {
|
|
253
|
+
async _getBlocksContentIdIndex(): Promise<Record<number, CramFileBlock>> {
|
|
224
254
|
const blocks = await this.getBlocks()
|
|
225
|
-
const blocksByContentId = {}
|
|
255
|
+
const blocksByContentId: Record<number, CramFileBlock> = {}
|
|
226
256
|
blocks.forEach(block => {
|
|
227
257
|
if (block.contentType === 'EXTERNAL_DATA') {
|
|
228
258
|
blocksByContentId[block.contentId] = block
|
|
@@ -231,37 +261,47 @@ export default class CramSlice {
|
|
|
231
261
|
return blocksByContentId
|
|
232
262
|
}
|
|
233
263
|
|
|
234
|
-
async getBlockByContentId(id) {
|
|
264
|
+
async getBlockByContentId(id: number) {
|
|
235
265
|
const blocksByContentId = await this._getBlocksContentIdIndex()
|
|
236
266
|
return blocksByContentId[id]
|
|
237
267
|
}
|
|
238
268
|
|
|
239
269
|
async getReferenceRegion() {
|
|
240
270
|
// read the slice header
|
|
241
|
-
const sliceHeader = (await this.getHeader()).
|
|
271
|
+
const sliceHeader = (await this.getHeader()).parsedContent
|
|
272
|
+
if (!isMappedSliceHeader(sliceHeader)) {
|
|
273
|
+
throw new Error()
|
|
274
|
+
}
|
|
242
275
|
|
|
243
276
|
if (sliceHeader.refSeqId < 0) {
|
|
244
277
|
return undefined
|
|
245
278
|
}
|
|
246
279
|
|
|
247
280
|
const compressionScheme = await this.container.getCompressionScheme()
|
|
281
|
+
if (compressionScheme === undefined) {
|
|
282
|
+
throw new Error()
|
|
283
|
+
}
|
|
248
284
|
|
|
249
285
|
// console.log(JSON.stringify(sliceHeader, null, ' '))
|
|
250
286
|
|
|
251
287
|
if (sliceHeader.refBaseBlockId >= 0) {
|
|
252
|
-
const refBlock = this.getBlockByContentId(
|
|
288
|
+
const refBlock = await this.getBlockByContentId(
|
|
289
|
+
sliceHeader.refBaseBlockId,
|
|
290
|
+
)
|
|
253
291
|
if (!refBlock) {
|
|
254
292
|
throw new CramMalformedError(
|
|
255
293
|
'embedded reference specified, but reference block does not exist',
|
|
256
294
|
)
|
|
257
295
|
}
|
|
258
296
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
297
|
+
// TODO: we do not read anything named 'span'
|
|
298
|
+
// if (sliceHeader.span > refBlock.uncompressedSize) {
|
|
299
|
+
// throw new CramMalformedError('Embedded reference is too small')
|
|
300
|
+
// }
|
|
262
301
|
|
|
302
|
+
// TODO verify
|
|
263
303
|
return {
|
|
264
|
-
seq: refBlock.data.toString('utf8'),
|
|
304
|
+
seq: (refBlock as any).data.toString('utf8'),
|
|
265
305
|
start: sliceHeader.refSeqStart,
|
|
266
306
|
end: sliceHeader.refSeqStart + sliceHeader.refSeqSpan - 1,
|
|
267
307
|
span: sliceHeader.refSeqSpan,
|
|
@@ -308,8 +348,14 @@ export default class CramSlice {
|
|
|
308
348
|
const { majorVersion } = await this.file.getDefinition()
|
|
309
349
|
|
|
310
350
|
const compressionScheme = await this.container.getCompressionScheme()
|
|
351
|
+
if (compressionScheme === undefined) {
|
|
352
|
+
throw new Error()
|
|
353
|
+
}
|
|
311
354
|
|
|
312
355
|
const sliceHeader = await this.getHeader()
|
|
356
|
+
if (sliceHeader === undefined) {
|
|
357
|
+
throw new Error()
|
|
358
|
+
}
|
|
313
359
|
|
|
314
360
|
const blocksByContentId = await this._getBlocksContentIdIndex()
|
|
315
361
|
|
|
@@ -317,19 +363,20 @@ export default class CramSlice {
|
|
|
317
363
|
if (
|
|
318
364
|
majorVersion > 1 &&
|
|
319
365
|
this.file.options.checkSequenceMD5 &&
|
|
320
|
-
sliceHeader.
|
|
321
|
-
sliceHeader.
|
|
366
|
+
isMappedSliceHeader(sliceHeader.parsedContent) &&
|
|
367
|
+
sliceHeader.parsedContent.refSeqId >= 0 &&
|
|
368
|
+
sliceHeader.parsedContent.md5.join('') !== '0000000000000000'
|
|
322
369
|
) {
|
|
323
370
|
const refRegion = await this.getReferenceRegion()
|
|
324
371
|
if (refRegion) {
|
|
325
372
|
const { seq, start, end } = refRegion
|
|
326
373
|
const seqMd5 = sequenceMD5(seq)
|
|
327
|
-
const storedMd5 = sliceHeader.
|
|
374
|
+
const storedMd5 = sliceHeader.parsedContent.md5
|
|
328
375
|
.map(byte => (byte < 16 ? '0' : '') + byte.toString(16))
|
|
329
376
|
.join('')
|
|
330
377
|
if (seqMd5 !== storedMd5) {
|
|
331
378
|
throw new CramMalformedError(
|
|
332
|
-
`MD5 checksum reference mismatch for ref ${sliceHeader.
|
|
379
|
+
`MD5 checksum reference mismatch for ref ${sliceHeader.parsedContent.refSeqId} pos ${start}..${end}. recorded MD5: ${storedMd5}, calculated MD5: ${seqMd5}`,
|
|
333
380
|
)
|
|
334
381
|
}
|
|
335
382
|
}
|
|
@@ -340,12 +387,14 @@ export default class CramSlice {
|
|
|
340
387
|
// data note that we are only decoding a single block here, the core
|
|
341
388
|
// data block
|
|
342
389
|
const coreDataBlock = await this.getCoreDataBlock()
|
|
343
|
-
const cursors = {
|
|
344
|
-
lastAlignmentStart: sliceHeader.
|
|
390
|
+
const cursors: Cursors = {
|
|
391
|
+
lastAlignmentStart: isMappedSliceHeader(sliceHeader.parsedContent)
|
|
392
|
+
? sliceHeader.parsedContent.refSeqStart
|
|
393
|
+
: 0,
|
|
345
394
|
coreBlock: { bitPosition: 7, bytePosition: 0 },
|
|
346
395
|
externalBlocks: {
|
|
347
396
|
map: new Map(),
|
|
348
|
-
getCursor(contentId) {
|
|
397
|
+
getCursor(contentId: number) {
|
|
349
398
|
let r = this.map.get(contentId)
|
|
350
399
|
if (r === undefined) {
|
|
351
400
|
r = { bitPosition: 7, bytePosition: 0 }
|
|
@@ -356,7 +405,11 @@ export default class CramSlice {
|
|
|
356
405
|
},
|
|
357
406
|
}
|
|
358
407
|
|
|
359
|
-
const decodeDataSeries =
|
|
408
|
+
const decodeDataSeries: DataSeriesDecoder = <
|
|
409
|
+
T extends DataSeriesEncodingKey,
|
|
410
|
+
>(
|
|
411
|
+
dataSeriesName: T,
|
|
412
|
+
): DataTypeMapping[DataSeriesTypes[T]] => {
|
|
360
413
|
const codec = compressionScheme.getCodecForDataSeries(dataSeriesName)
|
|
361
414
|
if (!codec) {
|
|
362
415
|
throw new CramMalformedError(
|
|
@@ -364,12 +417,18 @@ export default class CramSlice {
|
|
|
364
417
|
)
|
|
365
418
|
}
|
|
366
419
|
// console.log(dataSeriesName, Object.getPrototypeOf(codec))
|
|
367
|
-
|
|
420
|
+
const decoded = codec.decode(
|
|
421
|
+
this,
|
|
422
|
+
coreDataBlock,
|
|
423
|
+
blocksByContentId,
|
|
424
|
+
cursors,
|
|
425
|
+
)
|
|
426
|
+
return decoded
|
|
368
427
|
}
|
|
369
|
-
let records = new Array(sliceHeader.
|
|
428
|
+
let records: CramRecord[] = new Array(sliceHeader.parsedContent.numRecords)
|
|
370
429
|
for (let i = 0; i < records.length; i += 1) {
|
|
371
430
|
try {
|
|
372
|
-
|
|
431
|
+
const init = decodeRecord(
|
|
373
432
|
this,
|
|
374
433
|
decodeDataSeries,
|
|
375
434
|
compressionScheme,
|
|
@@ -380,11 +439,14 @@ export default class CramSlice {
|
|
|
380
439
|
majorVersion,
|
|
381
440
|
i,
|
|
382
441
|
)
|
|
383
|
-
records[i]
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
442
|
+
records[i] = new CramRecord({
|
|
443
|
+
...init,
|
|
444
|
+
uniqueId:
|
|
445
|
+
sliceHeader.contentPosition +
|
|
446
|
+
sliceHeader.parsedContent.recordCounter +
|
|
447
|
+
i +
|
|
448
|
+
1,
|
|
449
|
+
})
|
|
388
450
|
} catch (e) {
|
|
389
451
|
if (e instanceof CramBufferOverrunError) {
|
|
390
452
|
console.warn(
|
|
@@ -402,7 +464,7 @@ export default class CramSlice {
|
|
|
402
464
|
// objects Resolve mate pair cross-references between records in this slice
|
|
403
465
|
for (let i = 0; i < records.length; i += 1) {
|
|
404
466
|
const { mateRecordNumber } = records[i]
|
|
405
|
-
if (mateRecordNumber >= 0) {
|
|
467
|
+
if (mateRecordNumber !== undefined && mateRecordNumber >= 0) {
|
|
406
468
|
associateIntraSliceMate(
|
|
407
469
|
records,
|
|
408
470
|
i,
|
|
@@ -415,30 +477,38 @@ export default class CramSlice {
|
|
|
415
477
|
return records
|
|
416
478
|
}
|
|
417
479
|
|
|
418
|
-
async getRecords(filterFunction) {
|
|
480
|
+
async getRecords(filterFunction: (r: CramRecord) => boolean) {
|
|
419
481
|
// fetch the features if necessary, using the file-level feature cache
|
|
420
482
|
const cacheKey = this.container.filePosition + this.containerPosition
|
|
421
|
-
let recordsPromise = this.file.featureCache.get(cacheKey)
|
|
483
|
+
let recordsPromise = this.file.featureCache.get(cacheKey.toString())
|
|
422
484
|
if (!recordsPromise) {
|
|
423
485
|
recordsPromise = this._fetchRecords()
|
|
424
|
-
this.file.featureCache.set(cacheKey, recordsPromise)
|
|
486
|
+
this.file.featureCache.set(cacheKey.toString(), recordsPromise)
|
|
425
487
|
}
|
|
426
488
|
|
|
427
|
-
const
|
|
489
|
+
const unfiltered = await recordsPromise
|
|
490
|
+
const records = unfiltered.filter(filterFunction)
|
|
428
491
|
|
|
429
492
|
// if we can fetch reference sequence, add the reference sequence to the records
|
|
430
493
|
if (records.length && this.file.fetchReferenceSequenceCallback) {
|
|
431
494
|
const sliceHeader = await this.getHeader()
|
|
432
495
|
if (
|
|
433
|
-
sliceHeader.
|
|
434
|
-
sliceHeader.
|
|
496
|
+
isMappedSliceHeader(sliceHeader.parsedContent) &&
|
|
497
|
+
(sliceHeader.parsedContent.refSeqId >= 0 || // single-ref slice
|
|
498
|
+
sliceHeader.parsedContent.refSeqId === -2) // multi-ref slice
|
|
435
499
|
) {
|
|
436
500
|
const singleRefId =
|
|
437
|
-
sliceHeader.
|
|
438
|
-
? sliceHeader.
|
|
501
|
+
sliceHeader.parsedContent.refSeqId >= 0
|
|
502
|
+
? sliceHeader.parsedContent.refSeqId
|
|
439
503
|
: undefined
|
|
440
504
|
const compressionScheme = await this.container.getCompressionScheme()
|
|
441
|
-
|
|
505
|
+
if (compressionScheme === undefined) {
|
|
506
|
+
throw new Error()
|
|
507
|
+
}
|
|
508
|
+
const refRegions: Record<
|
|
509
|
+
string,
|
|
510
|
+
{ id: number; start: number; end: number; seq: string | null }
|
|
511
|
+
> = {} // seqId => { start, end, seq }
|
|
442
512
|
|
|
443
513
|
// iterate over the records to find the spans of the reference sequences we need to fetch
|
|
444
514
|
for (let i = 0; i < records.length; i += 1) {
|
|
@@ -450,6 +520,7 @@ export default class CramSlice {
|
|
|
450
520
|
id: seqId,
|
|
451
521
|
start: records[i].alignmentStart,
|
|
452
522
|
end: -Infinity,
|
|
523
|
+
seq: null,
|
|
453
524
|
}
|
|
454
525
|
refRegions[seqId] = refRegion
|
|
455
526
|
}
|
|
@@ -485,7 +556,11 @@ export default class CramSlice {
|
|
|
485
556
|
singleRefId !== undefined ? singleRefId : records[i].sequenceId
|
|
486
557
|
const refRegion = refRegions[seqId]
|
|
487
558
|
if (refRegion && refRegion.seq) {
|
|
488
|
-
|
|
559
|
+
const seq = refRegion.seq
|
|
560
|
+
records[i].addReferenceSequence(
|
|
561
|
+
{ ...refRegion, seq },
|
|
562
|
+
compressionScheme,
|
|
563
|
+
)
|
|
489
564
|
}
|
|
490
565
|
}
|
|
491
566
|
}
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import md5 from 'md5'
|
|
2
|
-
import {
|
|
2
|
+
import { Parser } from '@gmod/binary-parser'
|
|
3
|
+
import { CramBufferOverrunError } from './codecs/getBits'
|
|
3
4
|
|
|
4
|
-
export function itf8Size(v) {
|
|
5
|
+
export function itf8Size(v: number) {
|
|
5
6
|
if (!(v & ~0x7f)) {
|
|
6
7
|
return 1
|
|
7
8
|
}
|
|
@@ -17,21 +18,24 @@ export function itf8Size(v) {
|
|
|
17
18
|
return 5
|
|
18
19
|
}
|
|
19
20
|
|
|
20
|
-
export function parseItf8(
|
|
21
|
+
export function parseItf8(
|
|
22
|
+
buffer: Uint8Array,
|
|
23
|
+
initialOffset: number,
|
|
24
|
+
): [number, number] {
|
|
21
25
|
let offset = initialOffset
|
|
22
26
|
const countFlags = buffer[offset]
|
|
23
27
|
let result
|
|
24
28
|
if (countFlags < 0x80) {
|
|
25
29
|
result = countFlags
|
|
26
|
-
offset
|
|
30
|
+
offset = offset + 1
|
|
27
31
|
} else if (countFlags < 0xc0) {
|
|
28
32
|
result = ((countFlags << 8) | buffer[offset + 1]) & 0x3fff
|
|
29
|
-
offset
|
|
33
|
+
offset = offset + 2
|
|
30
34
|
} else if (countFlags < 0xe0) {
|
|
31
35
|
result =
|
|
32
36
|
((countFlags << 16) | (buffer[offset + 1] << 8) | buffer[offset + 2]) &
|
|
33
37
|
0x1fffff
|
|
34
|
-
offset
|
|
38
|
+
offset = offset + 3
|
|
35
39
|
} else if (countFlags < 0xf0) {
|
|
36
40
|
result =
|
|
37
41
|
((countFlags << 24) |
|
|
@@ -39,7 +43,7 @@ export function parseItf8(buffer, initialOffset) {
|
|
|
39
43
|
(buffer[offset + 2] << 8) |
|
|
40
44
|
buffer[offset + 3]) &
|
|
41
45
|
0x0fffffff
|
|
42
|
-
offset
|
|
46
|
+
offset = offset + 4
|
|
43
47
|
} else {
|
|
44
48
|
result =
|
|
45
49
|
((countFlags & 0x0f) << 28) |
|
|
@@ -49,7 +53,7 @@ export function parseItf8(buffer, initialOffset) {
|
|
|
49
53
|
(buffer[offset + 4] & 0x0f)
|
|
50
54
|
// x=((0xff & 0x0f)<<28) | (0xff<<20) | (0xff<<12) | (0xff<<4) | (0x0f & 0x0f);
|
|
51
55
|
// TODO *val_p = uv < 0x80000000UL ? uv : -((int32_t) (0xffffffffUL - uv)) - 1;
|
|
52
|
-
offset
|
|
56
|
+
offset = offset + 5
|
|
53
57
|
}
|
|
54
58
|
if (offset > buffer.length) {
|
|
55
59
|
throw new CramBufferOverrunError(
|
|
@@ -133,23 +137,30 @@ export function parseItf8(buffer, initialOffset) {
|
|
|
133
137
|
// return [result, offset - initialOffset]
|
|
134
138
|
// },
|
|
135
139
|
|
|
136
|
-
export
|
|
137
|
-
|
|
138
|
-
|
|
140
|
+
export type ParsedItem<T> = T & {
|
|
141
|
+
_endPosition: number
|
|
142
|
+
_size: number
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
export function parseItem<T>(
|
|
146
|
+
buffer: Buffer,
|
|
147
|
+
parser: Parser<T>,
|
|
139
148
|
startBufferPosition = 0,
|
|
140
149
|
startFilePosition = 0,
|
|
141
|
-
) {
|
|
150
|
+
): ParsedItem<T> {
|
|
142
151
|
const { offset, result } = parser.parse(buffer)
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
152
|
+
return {
|
|
153
|
+
...result,
|
|
154
|
+
_endPosition: offset + startFilePosition,
|
|
155
|
+
_size: offset - startBufferPosition,
|
|
156
|
+
}
|
|
146
157
|
}
|
|
147
158
|
|
|
148
159
|
// this would be nice as a decorator, but i'm a little worried about
|
|
149
160
|
// babel support for it going away or changing.
|
|
150
161
|
// memoizes a method in the stupidest possible way, with no regard for the
|
|
151
162
|
// arguments. actually, this only works on methods that take no arguments
|
|
152
|
-
export function tinyMemoize(_class, methodName) {
|
|
163
|
+
export function tinyMemoize(_class: any, methodName: any) {
|
|
153
164
|
const method = _class.prototype[methodName]
|
|
154
165
|
const memoAttrName = `_memo_${methodName}`
|
|
155
166
|
_class.prototype[methodName] = function _tinyMemoized() {
|
|
@@ -164,6 +175,6 @@ export function tinyMemoize(_class, methodName) {
|
|
|
164
175
|
}
|
|
165
176
|
}
|
|
166
177
|
|
|
167
|
-
export function sequenceMD5(seq) {
|
|
178
|
+
export function sequenceMD5(seq: string) {
|
|
168
179
|
return md5(seq.toUpperCase().replace(/[^\x21-\x7e]/g, ''))
|
|
169
180
|
}
|
|
@@ -6,11 +6,6 @@ export class CramUnimplementedError extends Error {}
|
|
|
6
6
|
/** An error caused by malformed data. */
|
|
7
7
|
export class CramMalformedError extends CramError {}
|
|
8
8
|
|
|
9
|
-
/**
|
|
10
|
-
* An error caused by attempting to read beyond the end of the defined data.
|
|
11
|
-
*/
|
|
12
|
-
export class CramBufferOverrunError extends CramMalformedError {}
|
|
13
|
-
|
|
14
9
|
/**
|
|
15
10
|
* An error caused by data being too big, exceeding a size limit.
|
|
16
11
|
*/
|
|
File without changes
|