@gmod/cram 8.0.2 → 8.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cram-bundle.js +1 -1
- package/dist/cramFile/codecs/_base.d.ts +1 -0
- package/dist/cramFile/codecs/_base.js +3 -0
- package/dist/cramFile/codecs/_base.js.map +1 -1
- package/dist/cramFile/codecs/byteArrayLength.d.ts +1 -1
- package/dist/cramFile/codecs/byteArrayLength.js +14 -7
- package/dist/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/dist/cramFile/codecs/external.d.ts +1 -1
- package/dist/cramFile/codecs/external.js +32 -4
- package/dist/cramFile/codecs/external.js.map +1 -1
- package/dist/cramFile/codecs/getBits.d.ts +1 -0
- package/dist/cramFile/codecs/getBits.js +4 -0
- package/dist/cramFile/codecs/getBits.js.map +1 -1
- package/dist/cramFile/record.d.ts +39 -9
- package/dist/cramFile/record.js +35 -35
- package/dist/cramFile/record.js.map +1 -1
- package/dist/cramFile/slice/decodeRecord.d.ts +4 -3
- package/dist/cramFile/slice/decodeRecord.js +62 -77
- package/dist/cramFile/slice/decodeRecord.js.map +1 -1
- package/dist/cramFile/slice/index.js +17 -27
- package/dist/cramFile/slice/index.js.map +1 -1
- package/dist/cramFile/util.d.ts +2 -0
- package/dist/cramFile/util.js +13 -0
- package/dist/cramFile/util.js.map +1 -1
- package/dist/indexedCramFile.js +0 -3
- package/dist/indexedCramFile.js.map +1 -1
- package/esm/cramFile/codecs/_base.d.ts +1 -0
- package/esm/cramFile/codecs/_base.js +3 -0
- package/esm/cramFile/codecs/_base.js.map +1 -1
- package/esm/cramFile/codecs/byteArrayLength.d.ts +1 -1
- package/esm/cramFile/codecs/byteArrayLength.js +14 -7
- package/esm/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/esm/cramFile/codecs/external.d.ts +1 -1
- package/esm/cramFile/codecs/external.js +32 -4
- package/esm/cramFile/codecs/external.js.map +1 -1
- package/esm/cramFile/codecs/getBits.d.ts +1 -0
- package/esm/cramFile/codecs/getBits.js +4 -0
- package/esm/cramFile/codecs/getBits.js.map +1 -1
- package/esm/cramFile/record.d.ts +39 -9
- package/esm/cramFile/record.js +35 -35
- package/esm/cramFile/record.js.map +1 -1
- package/esm/cramFile/slice/decodeRecord.d.ts +4 -3
- package/esm/cramFile/slice/decodeRecord.js +62 -77
- package/esm/cramFile/slice/decodeRecord.js.map +1 -1
- package/esm/cramFile/slice/index.js +17 -27
- package/esm/cramFile/slice/index.js.map +1 -1
- package/esm/cramFile/util.d.ts +2 -0
- package/esm/cramFile/util.js +11 -0
- package/esm/cramFile/util.js.map +1 -1
- package/esm/indexedCramFile.js +0 -3
- package/esm/indexedCramFile.js.map +1 -1
- package/package.json +1 -1
- package/src/cramFile/codecs/_base.ts +8 -0
- package/src/cramFile/codecs/byteArrayLength.ts +21 -8
- package/src/cramFile/codecs/external.ts +41 -9
- package/src/cramFile/codecs/getBits.ts +3 -1
- package/src/cramFile/record.ts +76 -49
- package/src/cramFile/slice/decodeRecord.ts +77 -96
- package/src/cramFile/slice/index.ts +31 -47
- package/src/cramFile/util.ts +14 -0
- package/src/indexedCramFile.ts +0 -4
|
@@ -14,72 +14,54 @@ import {
|
|
|
14
14
|
import CramSlice, { SliceHeader } from './index.ts'
|
|
15
15
|
import { CramFileBlock } from '../file.ts'
|
|
16
16
|
import { isMappedSliceHeader } from '../sectionParsers.ts'
|
|
17
|
-
|
|
18
|
-
// Reusable TextDecoder instance for string decoding (ASCII/Latin1)
|
|
19
|
-
const textDecoder = new TextDecoder('latin1')
|
|
20
|
-
|
|
21
|
-
/**
|
|
22
|
-
* given a Buffer, read a string up to the first null character
|
|
23
|
-
* @private
|
|
24
|
-
*/
|
|
25
|
-
function readNullTerminatedString(buffer: Uint8Array) {
|
|
26
|
-
// Find the null terminator
|
|
27
|
-
let end = 0
|
|
28
|
-
while (end < buffer.length && buffer[end] !== 0) {
|
|
29
|
-
end++
|
|
30
|
-
}
|
|
31
|
-
// Decode using TextDecoder (faster than char-by-char concatenation)
|
|
32
|
-
return textDecoder.decode(buffer.subarray(0, end))
|
|
33
|
-
}
|
|
17
|
+
import { decodeLatin1, readNullTerminatedStringFromBuffer } from '../util.ts'
|
|
34
18
|
|
|
35
19
|
/**
|
|
36
20
|
* parse a BAM tag's array value from a binary buffer
|
|
37
21
|
* @private
|
|
38
22
|
*/
|
|
23
|
+
// Uses DataView instead of typed arrays (e.g. new Int32Array(buffer.buffer))
|
|
24
|
+
// because the buffer may be a subarray of a larger ArrayBuffer. Typed array
|
|
25
|
+
// constructors like Int32Array interpret .buffer as the entire underlying
|
|
26
|
+
// ArrayBuffer starting at byte 0, ignoring the subarray's byteOffset. This
|
|
27
|
+
// caused silent data corruption when reading tag values. DataView with explicit
|
|
28
|
+
// byteOffset reads from the correct position within the parent buffer.
|
|
39
29
|
function parseTagValueArray(buffer: Uint8Array) {
|
|
40
30
|
const arrayType = String.fromCharCode(buffer[0]!)
|
|
41
31
|
|
|
42
|
-
const
|
|
43
|
-
const
|
|
44
|
-
const length = dataView.getUint32(1, littleEndian)
|
|
32
|
+
const dv = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength)
|
|
33
|
+
const length = dv.getUint32(1, true)
|
|
45
34
|
|
|
46
35
|
const array: number[] = new Array(length)
|
|
47
|
-
|
|
36
|
+
const dataOffset = 5
|
|
48
37
|
|
|
49
38
|
if (arrayType === 'c') {
|
|
50
|
-
const arr = new Int8Array(buffer.buffer)
|
|
51
39
|
for (let i = 0; i < length; i++) {
|
|
52
|
-
array[i] =
|
|
40
|
+
array[i] = dv.getInt8(dataOffset + i)
|
|
53
41
|
}
|
|
54
42
|
} else if (arrayType === 'C') {
|
|
55
|
-
const arr = new Uint8Array(buffer.buffer)
|
|
56
43
|
for (let i = 0; i < length; i++) {
|
|
57
|
-
array[i] =
|
|
44
|
+
array[i] = dv.getUint8(dataOffset + i)
|
|
58
45
|
}
|
|
59
46
|
} else if (arrayType === 's') {
|
|
60
|
-
const arr = new Int16Array(buffer.buffer)
|
|
61
47
|
for (let i = 0; i < length; i++) {
|
|
62
|
-
array[i] =
|
|
48
|
+
array[i] = dv.getInt16(dataOffset + i * 2, true)
|
|
63
49
|
}
|
|
64
50
|
} else if (arrayType === 'S') {
|
|
65
|
-
const arr = new Uint16Array(buffer.buffer)
|
|
66
51
|
for (let i = 0; i < length; i++) {
|
|
67
|
-
array[i] =
|
|
52
|
+
array[i] = dv.getUint16(dataOffset + i * 2, true)
|
|
68
53
|
}
|
|
69
54
|
} else if (arrayType === 'i') {
|
|
70
|
-
const arr = new Int32Array(buffer.buffer)
|
|
71
55
|
for (let i = 0; i < length; i++) {
|
|
72
|
-
array[i] =
|
|
56
|
+
array[i] = dv.getInt32(dataOffset + i * 4, true)
|
|
73
57
|
}
|
|
74
58
|
} else if (arrayType === 'I') {
|
|
75
|
-
const arr = new Uint32Array(buffer.buffer)
|
|
76
59
|
for (let i = 0; i < length; i++) {
|
|
77
|
-
array[i] =
|
|
60
|
+
array[i] = dv.getUint32(dataOffset + i * 4, true)
|
|
78
61
|
}
|
|
79
62
|
} else if (arrayType === 'f') {
|
|
80
|
-
const arr = new Float32Array(buffer.buffer)
|
|
81
63
|
for (let i = 0; i < length; i++) {
|
|
82
|
-
array[i] =
|
|
64
|
+
array[i] = dv.getFloat32(dataOffset + i * 4, true)
|
|
83
65
|
}
|
|
84
66
|
} else {
|
|
85
67
|
throw new Error(`unknown type: ${arrayType}`)
|
|
@@ -90,35 +72,36 @@ function parseTagValueArray(buffer: Uint8Array) {
|
|
|
90
72
|
|
|
91
73
|
function parseTagData(tagType: string, buffer: Uint8Array) {
|
|
92
74
|
if (tagType === 'Z') {
|
|
93
|
-
return
|
|
75
|
+
return readNullTerminatedStringFromBuffer(buffer)
|
|
94
76
|
}
|
|
95
77
|
if (tagType === 'A') {
|
|
96
78
|
return String.fromCharCode(buffer[0]!)
|
|
97
79
|
}
|
|
80
|
+
const dv = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength)
|
|
98
81
|
if (tagType === 'I') {
|
|
99
|
-
return
|
|
82
|
+
return dv.getUint32(0, true)
|
|
100
83
|
}
|
|
101
84
|
if (tagType === 'i') {
|
|
102
|
-
return
|
|
85
|
+
return dv.getInt32(0, true)
|
|
103
86
|
}
|
|
104
87
|
if (tagType === 's') {
|
|
105
|
-
return
|
|
88
|
+
return dv.getInt16(0, true)
|
|
106
89
|
}
|
|
107
90
|
if (tagType === 'S') {
|
|
108
|
-
return
|
|
91
|
+
return dv.getUint16(0, true)
|
|
109
92
|
}
|
|
110
93
|
if (tagType === 'c') {
|
|
111
|
-
return
|
|
94
|
+
return dv.getInt8(0)
|
|
112
95
|
}
|
|
113
96
|
if (tagType === 'C') {
|
|
114
97
|
return buffer[0]!
|
|
115
98
|
}
|
|
116
99
|
if (tagType === 'f') {
|
|
117
|
-
return
|
|
100
|
+
return dv.getFloat32(0, true)
|
|
118
101
|
}
|
|
119
102
|
if (tagType === 'H') {
|
|
120
103
|
return Number.parseInt(
|
|
121
|
-
|
|
104
|
+
readNullTerminatedStringFromBuffer(buffer).replace(/^0x/, ''),
|
|
122
105
|
16,
|
|
123
106
|
)
|
|
124
107
|
}
|
|
@@ -129,22 +112,25 @@ function parseTagData(tagType: string, buffer: Uint8Array) {
|
|
|
129
112
|
throw new CramMalformedError(`Unrecognized tag type ${tagType}`)
|
|
130
113
|
}
|
|
131
114
|
|
|
132
|
-
//
|
|
115
|
+
// Read feature schema lookup tables. Each entry maps a feature code to
|
|
116
|
+
// [dataType, dataSeriesName] where dataType controls how the raw codec
|
|
117
|
+
// output is converted (character→fromCharCode, string→TextDecoder,
|
|
118
|
+
// numArray→Array.from, number→as-is).
|
|
133
119
|
const data1SchemaBase = {
|
|
134
|
-
B: ['character', 'BA'] as const,
|
|
135
|
-
X: ['number', 'BS'] as const,
|
|
136
|
-
D: ['number', 'DL'] as const,
|
|
137
|
-
I: ['string', 'IN'] as const,
|
|
138
|
-
i: ['character', 'BA'] as const,
|
|
139
|
-
b: ['string', 'BB'] as const,
|
|
140
|
-
q: ['numArray', 'QQ'] as const,
|
|
141
|
-
Q: ['number', 'QS'] as const,
|
|
142
|
-
H: ['number', 'HC'] as const,
|
|
143
|
-
P: ['number', 'PD'] as const,
|
|
144
|
-
N: ['number', 'RS'] as const,
|
|
120
|
+
B: ['character', 'BA'] as const, // base substitution (base component)
|
|
121
|
+
X: ['number', 'BS'] as const, // base substitution matrix index
|
|
122
|
+
D: ['number', 'DL'] as const, // deletion length
|
|
123
|
+
I: ['string', 'IN'] as const, // insertion bases
|
|
124
|
+
i: ['character', 'BA'] as const, // single-base insertion
|
|
125
|
+
b: ['string', 'BB'] as const, // stretch of bases
|
|
126
|
+
q: ['numArray', 'QQ'] as const, // stretch of quality scores
|
|
127
|
+
Q: ['number', 'QS'] as const, // single quality score
|
|
128
|
+
H: ['number', 'HC'] as const, // hard clip length
|
|
129
|
+
P: ['number', 'PD'] as const, // padding length
|
|
130
|
+
N: ['number', 'RS'] as const, // reference skip length
|
|
145
131
|
} as const
|
|
146
132
|
|
|
147
|
-
//
|
|
133
|
+
// Soft clip data series changed between CRAM v1 (IN) and v2+ (SC)
|
|
148
134
|
const data1SchemaV1: Record<string, readonly [string, string]> = {
|
|
149
135
|
...data1SchemaBase,
|
|
150
136
|
S: ['string', 'IN'] as const,
|
|
@@ -154,7 +140,7 @@ const data1SchemaV2Plus: Record<string, readonly [string, string]> = {
|
|
|
154
140
|
S: ['string', 'SC'] as const,
|
|
155
141
|
}
|
|
156
142
|
|
|
157
|
-
//
|
|
143
|
+
// Features with a second data item (B has both a base and a quality score)
|
|
158
144
|
const data2Schema: Record<string, readonly [string, string]> = {
|
|
159
145
|
B: ['number', 'QS'] as const,
|
|
160
146
|
}
|
|
@@ -162,8 +148,7 @@ const data2Schema: Record<string, readonly [string, string]> = {
|
|
|
162
148
|
function decodeReadFeatures(
|
|
163
149
|
alignmentStart: number,
|
|
164
150
|
readFeatureCount: number,
|
|
165
|
-
decodeDataSeries:
|
|
166
|
-
_compressionScheme: CramContainerCompressionScheme,
|
|
151
|
+
decodeDataSeries: DataSeriesDecoder,
|
|
167
152
|
majorVersion: number,
|
|
168
153
|
) {
|
|
169
154
|
let currentReadPos = 0
|
|
@@ -176,22 +161,22 @@ function decodeReadFeatures(
|
|
|
176
161
|
function decodeRFData([type, dataSeriesName]: readonly [
|
|
177
162
|
type: string,
|
|
178
163
|
dataSeriesName: string,
|
|
179
|
-
]) {
|
|
180
|
-
const data = decodeDataSeries(dataSeriesName)
|
|
164
|
+
]): string | number | number[] {
|
|
165
|
+
const data = decodeDataSeries(dataSeriesName as DataSeriesEncodingKey)
|
|
181
166
|
if (type === 'character') {
|
|
182
|
-
return String.fromCharCode(data)
|
|
167
|
+
return String.fromCharCode(data as number)
|
|
183
168
|
} else if (type === 'string') {
|
|
184
|
-
return
|
|
169
|
+
return decodeLatin1(data as Uint8Array)
|
|
185
170
|
} else if (type === 'numArray') {
|
|
186
|
-
return Array.from(data)
|
|
171
|
+
return Array.from(data as Uint8Array)
|
|
187
172
|
}
|
|
188
|
-
return data
|
|
173
|
+
return data as number
|
|
189
174
|
}
|
|
190
175
|
|
|
191
176
|
for (let i = 0; i < readFeatureCount; i++) {
|
|
192
|
-
const code = String.fromCharCode(decodeDataSeries('FC'))
|
|
177
|
+
const code = String.fromCharCode(decodeDataSeries('FC')!)
|
|
193
178
|
|
|
194
|
-
const readPosDelta = decodeDataSeries('FP')
|
|
179
|
+
const readPosDelta = decodeDataSeries('FP')!
|
|
195
180
|
|
|
196
181
|
const schema = data1Schema[code]
|
|
197
182
|
|
|
@@ -199,12 +184,13 @@ function decodeReadFeatures(
|
|
|
199
184
|
throw new CramMalformedError(`invalid read feature code "${code}"`)
|
|
200
185
|
}
|
|
201
186
|
|
|
202
|
-
let data:
|
|
187
|
+
let data: string | number | number[] | [string, number] =
|
|
188
|
+
decodeRFData(schema)
|
|
203
189
|
|
|
204
|
-
// if this is a read feature with two data items, make the data
|
|
190
|
+
// if this is a read feature with two data items, make the data a tuple
|
|
205
191
|
const schema2 = data2Schema[code]
|
|
206
192
|
if (schema2) {
|
|
207
|
-
data = [data, decodeRFData(schema2)]
|
|
193
|
+
data = [data as string, decodeRFData(schema2) as number]
|
|
208
194
|
}
|
|
209
195
|
|
|
210
196
|
currentReadPos += readPosDelta
|
|
@@ -215,14 +201,14 @@ function decodeReadFeatures(
|
|
|
215
201
|
|
|
216
202
|
// for gapping features, adjust the reference position for read features that follow
|
|
217
203
|
if (code === 'D' || code === 'N') {
|
|
218
|
-
currentRefPos += data
|
|
204
|
+
currentRefPos += data as number
|
|
219
205
|
} else if (code === 'I' || code === 'S') {
|
|
220
|
-
currentRefPos -= data.length
|
|
206
|
+
currentRefPos -= (data as string).length
|
|
221
207
|
} else if (code === 'i') {
|
|
222
208
|
currentRefPos -= 1
|
|
223
209
|
}
|
|
224
210
|
|
|
225
|
-
readFeatures[i] = { code, pos, refPos, data }
|
|
211
|
+
readFeatures[i] = { code, pos, refPos, data } as ReadFeature
|
|
226
212
|
}
|
|
227
213
|
return readFeatures
|
|
228
214
|
}
|
|
@@ -246,6 +232,7 @@ export default function decodeRecord(
|
|
|
246
232
|
cursors: Cursors,
|
|
247
233
|
majorVersion: number,
|
|
248
234
|
recordNumber: number,
|
|
235
|
+
uniqueId: number,
|
|
249
236
|
decodeOptions?: Required<DecodeOptions>,
|
|
250
237
|
decodeBulkBytesRaw?: BulkByteRawDecoder,
|
|
251
238
|
) {
|
|
@@ -273,9 +260,9 @@ export default function decodeRecord(
|
|
|
273
260
|
cursors.lastAlignmentStart = alignmentStart
|
|
274
261
|
const readGroupId = decodeDataSeries('RG')!
|
|
275
262
|
|
|
276
|
-
let
|
|
263
|
+
let readNameRaw: Uint8Array | undefined
|
|
277
264
|
if (compressionScheme.readNamesIncluded) {
|
|
278
|
-
|
|
265
|
+
readNameRaw = decodeDataSeries('RN')!
|
|
279
266
|
}
|
|
280
267
|
|
|
281
268
|
let mateToUse:
|
|
@@ -295,8 +282,8 @@ export default function decodeRecord(
|
|
|
295
282
|
const mateFlags = decodeDataSeries('MF')!
|
|
296
283
|
let mateReadName: string | undefined
|
|
297
284
|
if (!compressionScheme.readNamesIncluded) {
|
|
298
|
-
|
|
299
|
-
|
|
285
|
+
readNameRaw = decodeDataSeries('RN')!
|
|
286
|
+
mateReadName = readNullTerminatedStringFromBuffer(readNameRaw)
|
|
300
287
|
}
|
|
301
288
|
const mateSequenceId = decodeDataSeries('NS')!
|
|
302
289
|
const mateAlignmentStart = decodeDataSeries('NP')!
|
|
@@ -319,8 +306,6 @@ export default function decodeRecord(
|
|
|
319
306
|
if (MateFlagsDecoder.isOnNegativeStrand(mateFlags)) {
|
|
320
307
|
flags = BamFlagsDecoder.setMateReverseComplemented(flags)
|
|
321
308
|
}
|
|
322
|
-
|
|
323
|
-
// detachedCount++
|
|
324
309
|
} else if (CramFlagsDecoder.isWithMateDownstream(cramFlags)) {
|
|
325
310
|
mateRecordNumber = decodeDataSeries('NF')! + recordNumber + 1
|
|
326
311
|
}
|
|
@@ -333,21 +318,19 @@ export default function decodeRecord(
|
|
|
333
318
|
throw new CramMalformedError('invalid TL index')
|
|
334
319
|
}
|
|
335
320
|
|
|
336
|
-
|
|
321
|
+
type TagValue = string | number | number[] | undefined
|
|
322
|
+
const tags: Record<string, TagValue> = {}
|
|
337
323
|
// TN = tag names
|
|
338
324
|
const TN = compressionScheme.getTagNames(TLindex)!
|
|
339
325
|
const ntags = TN.length
|
|
340
326
|
const shouldDecodeTags = decodeOptions?.decodeTags !== false
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
// Only parse tags if requested (default: true)
|
|
349
|
-
if (shouldDecodeTags) {
|
|
350
|
-
// Use direct character access instead of slice() to avoid string allocation
|
|
327
|
+
if (shouldDecodeTags) {
|
|
328
|
+
for (let i = 0; i < ntags; i++) {
|
|
329
|
+
const tagId = TN[i]!
|
|
330
|
+
const tagData = compressionScheme
|
|
331
|
+
.getCodecForTag(tagId)
|
|
332
|
+
.decode(slice, coreDataBlock, blocksByContentId, cursors)
|
|
333
|
+
|
|
351
334
|
const tagName = tagId[0]! + tagId[1]!
|
|
352
335
|
const tagType = tagId[2]!
|
|
353
336
|
tags[tagName] =
|
|
@@ -372,7 +355,6 @@ export default function decodeRecord(
|
|
|
372
355
|
alignmentStart,
|
|
373
356
|
readFeatureCount,
|
|
374
357
|
decodeDataSeries,
|
|
375
|
-
compressionScheme,
|
|
376
358
|
majorVersion,
|
|
377
359
|
)
|
|
378
360
|
}
|
|
@@ -393,9 +375,7 @@ export default function decodeRecord(
|
|
|
393
375
|
}
|
|
394
376
|
if (Number.isNaN(lengthOnRef)) {
|
|
395
377
|
console.warn(
|
|
396
|
-
`${
|
|
397
|
-
readName || `${sequenceId}:${alignmentStart}`
|
|
398
|
-
} record has invalid read features`,
|
|
378
|
+
`${sequenceId}:${alignmentStart} record has invalid read features`,
|
|
399
379
|
)
|
|
400
380
|
lengthOnRef = readLength
|
|
401
381
|
}
|
|
@@ -423,7 +403,7 @@ export default function decodeRecord(
|
|
|
423
403
|
// Try raw bytes first for TextDecoder (most efficient)
|
|
424
404
|
const rawBA = decodeBulkBytesRaw?.('BA', readLength)
|
|
425
405
|
if (rawBA) {
|
|
426
|
-
readBases =
|
|
406
|
+
readBases = decodeLatin1(rawBA)
|
|
427
407
|
} else {
|
|
428
408
|
// Fallback to single-byte decoding
|
|
429
409
|
let s = ''
|
|
@@ -455,7 +435,7 @@ export default function decodeRecord(
|
|
|
455
435
|
flags,
|
|
456
436
|
alignmentStart,
|
|
457
437
|
readGroupId,
|
|
458
|
-
|
|
438
|
+
readNameRaw,
|
|
459
439
|
mateToUse,
|
|
460
440
|
templateSize,
|
|
461
441
|
mateRecordNumber,
|
|
@@ -465,5 +445,6 @@ export default function decodeRecord(
|
|
|
465
445
|
qualityScores,
|
|
466
446
|
readBases,
|
|
467
447
|
tags,
|
|
448
|
+
uniqueId,
|
|
468
449
|
}
|
|
469
450
|
}
|
|
@@ -1,13 +1,11 @@
|
|
|
1
1
|
import { CramArgumentError, CramMalformedError } from '../../errors.ts'
|
|
2
2
|
import { Cursors, DataTypeMapping } from '../codecs/_base.ts'
|
|
3
3
|
import { DataSeriesEncodingKey } from '../codecs/dataSeriesTypes.ts'
|
|
4
|
-
import { CramBufferOverrunError } from '../codecs/getBits.ts'
|
|
5
4
|
import Constants from '../constants.ts'
|
|
6
5
|
import decodeRecord, {
|
|
7
6
|
BulkByteRawDecoder,
|
|
8
7
|
DataSeriesDecoder,
|
|
9
8
|
} from './decodeRecord.ts'
|
|
10
|
-
import ExternalCodec from '../codecs/external.ts'
|
|
11
9
|
import { DataSeriesTypes } from '../container/compressionScheme.ts'
|
|
12
10
|
import CramContainer from '../container/index.ts'
|
|
13
11
|
import CramFile, { CramFileBlock } from '../file.ts'
|
|
@@ -111,10 +109,12 @@ function associateIntraSliceMate(
|
|
|
111
109
|
mateRecord.mateRecordNumber !== currentRecordNumber)
|
|
112
110
|
)
|
|
113
111
|
|
|
114
|
-
// Deal with lossy read names
|
|
112
|
+
// Deal with lossy read names — assign a synthetic name from uniqueId
|
|
113
|
+
// so that paired records share the same name
|
|
115
114
|
if (!thisRecord.readName) {
|
|
116
|
-
|
|
117
|
-
|
|
115
|
+
const syntheticName = String(thisRecord.uniqueId)
|
|
116
|
+
thisRecord._syntheticReadName = syntheticName
|
|
117
|
+
mateRecord._syntheticReadName = syntheticName
|
|
118
118
|
}
|
|
119
119
|
|
|
120
120
|
thisRecord.mate = {
|
|
@@ -446,30 +446,15 @@ export default class CramSlice {
|
|
|
446
446
|
return codec.decode(this, coreDataBlock, blocksByContentId, cursors)
|
|
447
447
|
}
|
|
448
448
|
|
|
449
|
-
//
|
|
449
|
+
// Bulk byte decoder for QS and BA — getBytesSubarray returns a subarray
|
|
450
|
+
// view when the codec supports it (e.g. ExternalCodec), or undefined otherwise
|
|
450
451
|
const qsCodec = compressionScheme.getCodecForDataSeries('QS')
|
|
451
452
|
const baCodec = compressionScheme.getCodecForDataSeries('BA')
|
|
452
|
-
const qsIsExternal = qsCodec instanceof ExternalCodec
|
|
453
|
-
const baIsExternal = baCodec instanceof ExternalCodec
|
|
454
|
-
// Create raw byte decoder for QS/BA decoding
|
|
455
453
|
const decodeBulkBytesRaw: BulkByteRawDecoder | undefined =
|
|
456
|
-
|
|
454
|
+
qsCodec || baCodec
|
|
457
455
|
? (dataSeriesName, length) => {
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
blocksByContentId,
|
|
461
|
-
cursors,
|
|
462
|
-
length,
|
|
463
|
-
)
|
|
464
|
-
}
|
|
465
|
-
if (dataSeriesName === 'BA' && baIsExternal) {
|
|
466
|
-
return baCodec.getBytesSubarray(
|
|
467
|
-
blocksByContentId,
|
|
468
|
-
cursors,
|
|
469
|
-
length,
|
|
470
|
-
)
|
|
471
|
-
}
|
|
472
|
-
return undefined
|
|
456
|
+
const codec = dataSeriesName === 'QS' ? qsCodec : baCodec
|
|
457
|
+
return codec?.getBytesSubarray(blocksByContentId, cursors, length)
|
|
473
458
|
}
|
|
474
459
|
: undefined
|
|
475
460
|
|
|
@@ -478,35 +463,34 @@ export default class CramSlice {
|
|
|
478
463
|
)
|
|
479
464
|
for (let i = 0; i < records.length; i += 1) {
|
|
480
465
|
try {
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
decodeBulkBytesRaw,
|
|
493
|
-
)
|
|
494
|
-
records[i] = new CramRecord({
|
|
495
|
-
...init,
|
|
496
|
-
uniqueId:
|
|
466
|
+
records[i] = new CramRecord(
|
|
467
|
+
decodeRecord(
|
|
468
|
+
this,
|
|
469
|
+
decodeDataSeries,
|
|
470
|
+
compressionScheme,
|
|
471
|
+
sliceHeader,
|
|
472
|
+
coreDataBlock,
|
|
473
|
+
blocksByContentId,
|
|
474
|
+
cursors,
|
|
475
|
+
majorVersion,
|
|
476
|
+
i,
|
|
497
477
|
sliceHeader.contentPosition +
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
478
|
+
sliceHeader.parsedContent.recordCounter +
|
|
479
|
+
i +
|
|
480
|
+
1,
|
|
481
|
+
decodeOptions,
|
|
482
|
+
decodeBulkBytesRaw,
|
|
483
|
+
),
|
|
484
|
+
)
|
|
502
485
|
} catch (e) {
|
|
503
|
-
|
|
486
|
+
const err = e as { code?: string; message?: string }
|
|
487
|
+
if (err.code === 'CRAM_BUFFER_OVERRUN') {
|
|
504
488
|
const recordsDecoded = i
|
|
505
489
|
const recordsExpected = sliceHeader.parsedContent.numRecords
|
|
506
490
|
throw new CramMalformedError(
|
|
507
491
|
`Failed to decode all records in slice. Decoded ${recordsDecoded} of ${recordsExpected} expected records. ` +
|
|
508
492
|
`Buffer overrun suggests either: (1) file is truncated/corrupted, (2) compression scheme is incorrect, ` +
|
|
509
|
-
`or (3) there's a bug in the decoder. Original error: ${
|
|
493
|
+
`or (3) there's a bug in the decoder. Original error: ${err.message}`,
|
|
510
494
|
)
|
|
511
495
|
} else {
|
|
512
496
|
throw e
|
package/src/cramFile/util.ts
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
import md5 from 'md5'
|
|
2
2
|
|
|
3
|
+
const textDecoder = new TextDecoder('latin1')
|
|
4
|
+
|
|
5
|
+
export function readNullTerminatedStringFromBuffer(buffer: Uint8Array) {
|
|
6
|
+
let end = 0
|
|
7
|
+
while (end < buffer.length && buffer[end] !== 0) {
|
|
8
|
+
end++
|
|
9
|
+
}
|
|
10
|
+
return textDecoder.decode(buffer.subarray(0, end))
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function decodeLatin1(buffer: Uint8Array) {
|
|
14
|
+
return textDecoder.decode(buffer)
|
|
15
|
+
}
|
|
16
|
+
|
|
3
17
|
export const TWO_PWR_16_DBL = 1 << 16
|
|
4
18
|
export const TWO_PWR_32_DBL = TWO_PWR_16_DBL * TWO_PWR_16_DBL
|
|
5
19
|
export const TWO_PWR_64_DBL = TWO_PWR_32_DBL * TWO_PWR_32_DBL
|
package/src/indexedCramFile.ts
CHANGED