@gmod/cram 7.0.2 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cram-bundle.js +1 -1
- package/dist/cramFile/codecs/beta.js +27 -3
- package/dist/cramFile/codecs/beta.js.map +1 -1
- package/dist/cramFile/codecs/external.d.ts +1 -0
- package/dist/cramFile/codecs/external.js +15 -0
- package/dist/cramFile/codecs/external.js.map +1 -1
- package/dist/cramFile/codecs/gamma.js +44 -8
- package/dist/cramFile/codecs/gamma.js.map +1 -1
- package/dist/cramFile/codecs/getBits.js +18 -2
- package/dist/cramFile/codecs/getBits.js.map +1 -1
- package/dist/cramFile/codecs/huffman.js +37 -3
- package/dist/cramFile/codecs/huffman.js.map +1 -1
- package/dist/cramFile/codecs/subexp.js +37 -15
- package/dist/cramFile/codecs/subexp.js.map +1 -1
- package/dist/cramFile/file.d.ts +1 -1
- package/dist/cramFile/file.js +1 -1
- package/dist/cramFile/file.js.map +1 -1
- package/dist/cramFile/record.d.ts +12 -1
- package/dist/cramFile/record.js +18 -5
- package/dist/cramFile/record.js.map +1 -1
- package/dist/cramFile/slice/decodeRecord.d.ts +4 -3
- package/dist/cramFile/slice/decodeRecord.js +95 -53
- package/dist/cramFile/slice/decodeRecord.js.map +1 -1
- package/dist/cramFile/slice/index.d.ts +3 -3
- package/dist/cramFile/slice/index.js +63 -8
- package/dist/cramFile/slice/index.js.map +1 -1
- package/dist/indexedCramFile.d.ts +3 -3
- package/dist/indexedCramFile.js +12 -9
- package/dist/indexedCramFile.js.map +1 -1
- package/dist/wasm/noodles-cram/noodles_cram_wasm.d.ts +1 -0
- package/dist/wasm/noodles-cram/noodles_cram_wasm.js +44 -0
- package/dist/wasm/noodles-cram/noodles_cram_wasm.js.map +1 -0
- package/dist/wasm/noodles-cram/noodles_cram_wasm_bg.d.ts +94 -0
- package/dist/wasm/noodles-cram/noodles_cram_wasm_bg.js +578 -0
- package/dist/wasm/noodles-cram/noodles_cram_wasm_bg.js.map +1 -0
- package/esm/cramFile/codecs/beta.js +27 -3
- package/esm/cramFile/codecs/beta.js.map +1 -1
- package/esm/cramFile/codecs/external.d.ts +1 -0
- package/esm/cramFile/codecs/external.js +15 -0
- package/esm/cramFile/codecs/external.js.map +1 -1
- package/esm/cramFile/codecs/gamma.js +43 -7
- package/esm/cramFile/codecs/gamma.js.map +1 -1
- package/esm/cramFile/codecs/getBits.js +18 -2
- package/esm/cramFile/codecs/getBits.js.map +1 -1
- package/esm/cramFile/codecs/huffman.js +37 -3
- package/esm/cramFile/codecs/huffman.js.map +1 -1
- package/esm/cramFile/codecs/subexp.js +36 -14
- package/esm/cramFile/codecs/subexp.js.map +1 -1
- package/esm/cramFile/file.d.ts +1 -1
- package/esm/cramFile/file.js +1 -1
- package/esm/cramFile/file.js.map +1 -1
- package/esm/cramFile/record.d.ts +12 -1
- package/esm/cramFile/record.js +17 -4
- package/esm/cramFile/record.js.map +1 -1
- package/esm/cramFile/slice/decodeRecord.d.ts +4 -3
- package/esm/cramFile/slice/decodeRecord.js +95 -53
- package/esm/cramFile/slice/decodeRecord.js.map +1 -1
- package/esm/cramFile/slice/index.d.ts +3 -3
- package/esm/cramFile/slice/index.js +30 -8
- package/esm/cramFile/slice/index.js.map +1 -1
- package/esm/indexedCramFile.d.ts +3 -3
- package/esm/indexedCramFile.js +12 -9
- package/esm/indexedCramFile.js.map +1 -1
- package/esm/wasm/noodles-cram/noodles_cram_wasm.d.ts +1 -0
- package/esm/wasm/noodles-cram/noodles_cram_wasm.js +6 -0
- package/esm/wasm/noodles-cram/noodles_cram_wasm.js.map +1 -0
- package/esm/wasm/noodles-cram/noodles_cram_wasm_bg.d.ts +94 -0
- package/esm/wasm/noodles-cram/noodles_cram_wasm_bg.js +529 -0
- package/esm/wasm/noodles-cram/noodles_cram_wasm_bg.js.map +1 -0
- package/package.json +13 -11
- package/src/cramFile/codecs/beta.ts +38 -4
- package/src/cramFile/codecs/external.ts +25 -0
- package/src/cramFile/codecs/gamma.ts +54 -12
- package/src/cramFile/codecs/getBits.ts +21 -2
- package/src/cramFile/codecs/huffman.ts +45 -3
- package/src/cramFile/codecs/subexp.ts +53 -16
- package/src/cramFile/file.ts +1 -1
- package/src/cramFile/record.ts +26 -11
- package/src/cramFile/slice/decodeRecord.ts +107 -55
- package/src/cramFile/slice/index.ts +51 -9
- package/src/indexedCramFile.ts +35 -27
- package/src/wasm/noodles-cram/.gitignore +1 -0
- package/src/wasm/noodles-cram/noodles_cram_wasm.d.ts +42 -0
- package/src/wasm/noodles-cram/noodles_cram_wasm.js +5 -0
- package/src/wasm/noodles-cram/noodles_cram_wasm_bg.js +541 -0
- package/src/wasm/noodles-cram/noodles_cram_wasm_bg.wasm +0 -0
- package/src/wasm/noodles-cram/noodles_cram_wasm_bg.wasm.d.ts +18 -0
- package/src/wasm/noodles-cram/package.json +17 -0
|
@@ -7,6 +7,7 @@ import CramContainerCompressionScheme, {
|
|
|
7
7
|
import {
|
|
8
8
|
BamFlagsDecoder,
|
|
9
9
|
CramFlagsDecoder,
|
|
10
|
+
DecodeOptions,
|
|
10
11
|
MateFlagsDecoder,
|
|
11
12
|
ReadFeature,
|
|
12
13
|
} from '../record.ts'
|
|
@@ -14,16 +15,21 @@ import CramSlice, { SliceHeader } from './index.ts'
|
|
|
14
15
|
import { CramFileBlock } from '../file.ts'
|
|
15
16
|
import { isMappedSliceHeader } from '../sectionParsers.ts'
|
|
16
17
|
|
|
18
|
+
// Reusable TextDecoder instance for string decoding (ASCII/Latin1)
|
|
19
|
+
const textDecoder = new TextDecoder('latin1')
|
|
20
|
+
|
|
17
21
|
/**
|
|
18
22
|
* given a Buffer, read a string up to the first null character
|
|
19
23
|
* @private
|
|
20
24
|
*/
|
|
21
25
|
function readNullTerminatedString(buffer: Uint8Array) {
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
26
|
+
// Find the null terminator
|
|
27
|
+
let end = 0
|
|
28
|
+
while (end < buffer.length && buffer[end] !== 0) {
|
|
29
|
+
end++
|
|
25
30
|
}
|
|
26
|
-
|
|
31
|
+
// Decode using TextDecoder (faster than char-by-char concatenation)
|
|
32
|
+
return textDecoder.decode(buffer.subarray(0, end))
|
|
27
33
|
}
|
|
28
34
|
|
|
29
35
|
/**
|
|
@@ -123,17 +129,50 @@ function parseTagData(tagType: string, buffer: Uint8Array) {
|
|
|
123
129
|
throw new CramMalformedError(`Unrecognized tag type ${tagType}`)
|
|
124
130
|
}
|
|
125
131
|
|
|
132
|
+
// Pre-defined schema lookup tables (version-independent entries)
|
|
133
|
+
const data1SchemaBase = {
|
|
134
|
+
B: ['character', 'BA'] as const,
|
|
135
|
+
X: ['number', 'BS'] as const,
|
|
136
|
+
D: ['number', 'DL'] as const,
|
|
137
|
+
I: ['string', 'IN'] as const,
|
|
138
|
+
i: ['character', 'BA'] as const,
|
|
139
|
+
b: ['string', 'BB'] as const,
|
|
140
|
+
q: ['numArray', 'QQ'] as const,
|
|
141
|
+
Q: ['number', 'QS'] as const,
|
|
142
|
+
H: ['number', 'HC'] as const,
|
|
143
|
+
P: ['number', 'PD'] as const,
|
|
144
|
+
N: ['number', 'RS'] as const,
|
|
145
|
+
} as const
|
|
146
|
+
|
|
147
|
+
// Version-specific S entry
|
|
148
|
+
const data1SchemaV1: Record<string, readonly [string, string]> = {
|
|
149
|
+
...data1SchemaBase,
|
|
150
|
+
S: ['string', 'IN'] as const,
|
|
151
|
+
}
|
|
152
|
+
const data1SchemaV2Plus: Record<string, readonly [string, string]> = {
|
|
153
|
+
...data1SchemaBase,
|
|
154
|
+
S: ['string', 'SC'] as const,
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Second data item schema for read features that have two values
|
|
158
|
+
const data2Schema: Record<string, readonly [string, string]> = {
|
|
159
|
+
B: ['number', 'QS'] as const,
|
|
160
|
+
}
|
|
161
|
+
|
|
126
162
|
function decodeReadFeatures(
|
|
127
163
|
alignmentStart: number,
|
|
128
164
|
readFeatureCount: number,
|
|
129
165
|
decodeDataSeries: any,
|
|
130
|
-
|
|
166
|
+
_compressionScheme: CramContainerCompressionScheme,
|
|
131
167
|
majorVersion: number,
|
|
132
168
|
) {
|
|
133
169
|
let currentReadPos = 0
|
|
134
170
|
let currentRefPos = alignmentStart - 1
|
|
135
171
|
const readFeatures: ReadFeature[] = new Array(readFeatureCount)
|
|
136
172
|
|
|
173
|
+
// Select the appropriate schema based on version (once per call, not per iteration)
|
|
174
|
+
const data1Schema = majorVersion > 1 ? data1SchemaV2Plus : data1SchemaV1
|
|
175
|
+
|
|
137
176
|
function decodeRFData([type, dataSeriesName]: readonly [
|
|
138
177
|
type: string,
|
|
139
178
|
dataSeriesName: string,
|
|
@@ -142,17 +181,10 @@ function decodeReadFeatures(
|
|
|
142
181
|
if (type === 'character') {
|
|
143
182
|
return String.fromCharCode(data)
|
|
144
183
|
} else if (type === 'string') {
|
|
145
|
-
|
|
146
|
-
for (let i = 0; i < data.byteLength; i++) {
|
|
147
|
-
r += String.fromCharCode(data[i])
|
|
148
|
-
}
|
|
149
|
-
return r
|
|
184
|
+
return textDecoder.decode(data)
|
|
150
185
|
} else if (type === 'numArray') {
|
|
151
186
|
return Array.from(data)
|
|
152
187
|
}
|
|
153
|
-
// else if (type === 'number') {
|
|
154
|
-
// return data[0]
|
|
155
|
-
// }
|
|
156
188
|
return data
|
|
157
189
|
}
|
|
158
190
|
|
|
@@ -161,32 +193,18 @@ function decodeReadFeatures(
|
|
|
161
193
|
|
|
162
194
|
const readPosDelta = decodeDataSeries('FP')
|
|
163
195
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
S: ['string', majorVersion > 1 ? 'SC' : 'IN'] as const, // IN if cram v1, SC otherwise
|
|
168
|
-
X: ['number', 'BS'] as const,
|
|
169
|
-
D: ['number', 'DL'] as const,
|
|
170
|
-
I: ['string', 'IN'] as const,
|
|
171
|
-
i: ['character', 'BA'] as const,
|
|
172
|
-
b: ['string', 'BB'] as const,
|
|
173
|
-
q: ['numArray', 'QQ'] as const,
|
|
174
|
-
Q: ['number', 'QS'] as const,
|
|
175
|
-
H: ['number', 'HC'] as const,
|
|
176
|
-
P: ['number', 'PD'] as const,
|
|
177
|
-
N: ['number', 'RS'] as const,
|
|
178
|
-
}[code]
|
|
179
|
-
|
|
180
|
-
if (!data1Schema) {
|
|
196
|
+
const schema = data1Schema[code]
|
|
197
|
+
|
|
198
|
+
if (!schema) {
|
|
181
199
|
throw new CramMalformedError(`invalid read feature code "${code}"`)
|
|
182
200
|
}
|
|
183
201
|
|
|
184
|
-
let data = decodeRFData(
|
|
202
|
+
let data: any = decodeRFData(schema)
|
|
185
203
|
|
|
186
|
-
// if this is a
|
|
187
|
-
const
|
|
188
|
-
if (
|
|
189
|
-
data = [data, decodeRFData(
|
|
204
|
+
// if this is a read feature with two data items, make the data an array
|
|
205
|
+
const schema2 = data2Schema[code]
|
|
206
|
+
if (schema2) {
|
|
207
|
+
data = [data, decodeRFData(schema2)]
|
|
190
208
|
}
|
|
191
209
|
|
|
192
210
|
currentReadPos += readPosDelta
|
|
@@ -213,6 +231,11 @@ export type DataSeriesDecoder = <T extends DataSeriesEncodingKey>(
|
|
|
213
231
|
dataSeriesName: T,
|
|
214
232
|
) => DataTypeMapping[DataSeriesTypes[T]] | undefined
|
|
215
233
|
|
|
234
|
+
export type BulkByteRawDecoder = (
|
|
235
|
+
dataSeriesName: 'QS' | 'BA',
|
|
236
|
+
length: number,
|
|
237
|
+
) => Uint8Array | undefined
|
|
238
|
+
|
|
216
239
|
export default function decodeRecord(
|
|
217
240
|
slice: CramSlice,
|
|
218
241
|
decodeDataSeries: DataSeriesDecoder,
|
|
@@ -223,6 +246,8 @@ export default function decodeRecord(
|
|
|
223
246
|
cursors: Cursors,
|
|
224
247
|
majorVersion: number,
|
|
225
248
|
recordNumber: number,
|
|
249
|
+
decodeOptions?: Required<DecodeOptions>,
|
|
250
|
+
decodeBulkBytesRaw?: BulkByteRawDecoder,
|
|
226
251
|
) {
|
|
227
252
|
let flags = decodeDataSeries('BF')!
|
|
228
253
|
|
|
@@ -312,26 +337,32 @@ export default function decodeRecord(
|
|
|
312
337
|
// TN = tag names
|
|
313
338
|
const TN = compressionScheme.getTagNames(TLindex)!
|
|
314
339
|
const ntags = TN.length
|
|
340
|
+
const shouldDecodeTags = decodeOptions?.decodeTags !== false
|
|
315
341
|
for (let i = 0; i < ntags; i++) {
|
|
316
342
|
const tagId = TN[i]!
|
|
317
|
-
|
|
318
|
-
const tagType = tagId.slice(2, 3)
|
|
319
|
-
|
|
343
|
+
// Always decode to advance cursor position
|
|
320
344
|
const tagData = compressionScheme
|
|
321
345
|
.getCodecForTag(tagId)
|
|
322
346
|
.decode(slice, coreDataBlock, blocksByContentId, cursors)
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
347
|
+
|
|
348
|
+
// Only parse tags if requested (default: true)
|
|
349
|
+
if (shouldDecodeTags) {
|
|
350
|
+
// Use direct character access instead of slice() to avoid string allocation
|
|
351
|
+
const tagName = tagId[0]! + tagId[1]!
|
|
352
|
+
const tagType = tagId[2]!
|
|
353
|
+
tags[tagName] =
|
|
354
|
+
tagData === undefined
|
|
355
|
+
? undefined
|
|
356
|
+
: typeof tagData === 'number'
|
|
357
|
+
? tagData
|
|
358
|
+
: parseTagData(tagType, tagData)
|
|
359
|
+
}
|
|
329
360
|
}
|
|
330
361
|
|
|
331
362
|
let readFeatures: ReadFeature[] | undefined
|
|
332
363
|
let lengthOnRef: number | undefined
|
|
333
364
|
let mappingQuality: number | undefined
|
|
334
|
-
let qualityScores:
|
|
365
|
+
let qualityScores: Uint8Array | undefined | null
|
|
335
366
|
let readBases = undefined
|
|
336
367
|
if (!BamFlagsDecoder.isSegmentUnmapped(flags)) {
|
|
337
368
|
// reading read features
|
|
@@ -373,25 +404,46 @@ export default function decodeRecord(
|
|
|
373
404
|
mappingQuality = decodeDataSeries('MQ')!
|
|
374
405
|
|
|
375
406
|
if (CramFlagsDecoder.isPreservingQualityScores(cramFlags)) {
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
407
|
+
// Try raw bytes first (most efficient - just a subarray view)
|
|
408
|
+
const rawQS = decodeBulkBytesRaw?.('QS', readLength)
|
|
409
|
+
if (rawQS) {
|
|
410
|
+
qualityScores = rawQS
|
|
411
|
+
} else {
|
|
412
|
+
// Fallback to single-byte decoding into new Uint8Array
|
|
413
|
+
qualityScores = new Uint8Array(readLength)
|
|
414
|
+
for (let i = 0; i < readLength; i++) {
|
|
415
|
+
qualityScores[i] = decodeDataSeries('QS')!
|
|
416
|
+
}
|
|
379
417
|
}
|
|
380
418
|
}
|
|
381
419
|
} else if (CramFlagsDecoder.isDecodeSequenceAsStar(cramFlags)) {
|
|
382
420
|
readBases = null
|
|
383
421
|
qualityScores = null
|
|
384
422
|
} else {
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
423
|
+
// Try raw bytes first for TextDecoder (most efficient)
|
|
424
|
+
const rawBA = decodeBulkBytesRaw?.('BA', readLength)
|
|
425
|
+
if (rawBA) {
|
|
426
|
+
readBases = textDecoder.decode(rawBA)
|
|
427
|
+
} else {
|
|
428
|
+
// Fallback to single-byte decoding
|
|
429
|
+
let s = ''
|
|
430
|
+
for (let i = 0; i < readLength; i++) {
|
|
431
|
+
s += String.fromCharCode(decodeDataSeries('BA')!)
|
|
432
|
+
}
|
|
433
|
+
readBases = s
|
|
388
434
|
}
|
|
389
|
-
readBases = String.fromCharCode(...bases)
|
|
390
435
|
|
|
391
436
|
if (CramFlagsDecoder.isPreservingQualityScores(cramFlags)) {
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
437
|
+
// Try raw bytes first (most efficient - just a subarray view)
|
|
438
|
+
const rawQS = decodeBulkBytesRaw?.('QS', readLength)
|
|
439
|
+
if (rawQS) {
|
|
440
|
+
qualityScores = rawQS
|
|
441
|
+
} else {
|
|
442
|
+
// Fallback to single-byte decoding into new Uint8Array
|
|
443
|
+
qualityScores = new Uint8Array(readLength)
|
|
444
|
+
for (let i = 0; i < readLength; i++) {
|
|
445
|
+
qualityScores[i] = decodeDataSeries('QS')!
|
|
446
|
+
}
|
|
395
447
|
}
|
|
396
448
|
}
|
|
397
449
|
}
|
|
@@ -1,13 +1,17 @@
|
|
|
1
1
|
import { CramArgumentError, CramMalformedError } from '../../errors.ts'
|
|
2
2
|
import { Cursors, DataTypeMapping } from '../codecs/_base.ts'
|
|
3
|
+
import { DataSeriesEncodingKey } from '../codecs/dataSeriesTypes.ts'
|
|
3
4
|
import { CramBufferOverrunError } from '../codecs/getBits.ts'
|
|
4
5
|
import Constants from '../constants.ts'
|
|
5
|
-
import decodeRecord, {
|
|
6
|
-
|
|
6
|
+
import decodeRecord, {
|
|
7
|
+
BulkByteRawDecoder,
|
|
8
|
+
DataSeriesDecoder,
|
|
9
|
+
} from './decodeRecord.ts'
|
|
10
|
+
import ExternalCodec from '../codecs/external.ts'
|
|
7
11
|
import { DataSeriesTypes } from '../container/compressionScheme.ts'
|
|
8
12
|
import CramContainer from '../container/index.ts'
|
|
9
13
|
import CramFile, { CramFileBlock } from '../file.ts'
|
|
10
|
-
import CramRecord from '../record.ts'
|
|
14
|
+
import CramRecord, { DecodeOptions, defaultDecodeOptions } from '../record.ts'
|
|
11
15
|
import {
|
|
12
16
|
MappedSliceHeader,
|
|
13
17
|
UnmappedSliceHeader,
|
|
@@ -335,7 +339,7 @@ export default class CramSlice {
|
|
|
335
339
|
return this.getRecords(() => true)
|
|
336
340
|
}
|
|
337
341
|
|
|
338
|
-
async _fetchRecords() {
|
|
342
|
+
async _fetchRecords(decodeOptions: Required<DecodeOptions>) {
|
|
339
343
|
const { majorVersion } = await this.file.getDefinition()
|
|
340
344
|
|
|
341
345
|
const compressionScheme = await this.container.getCompressionScheme()
|
|
@@ -412,6 +416,34 @@ export default class CramSlice {
|
|
|
412
416
|
}
|
|
413
417
|
return codec.decode(this, coreDataBlock, blocksByContentId, cursors)
|
|
414
418
|
}
|
|
419
|
+
|
|
420
|
+
// Create bulk byte decoder for QS and BA data series if they use External codec
|
|
421
|
+
const qsCodec = compressionScheme.getCodecForDataSeries('QS')
|
|
422
|
+
const baCodec = compressionScheme.getCodecForDataSeries('BA')
|
|
423
|
+
const qsIsExternal = qsCodec instanceof ExternalCodec
|
|
424
|
+
const baIsExternal = baCodec instanceof ExternalCodec
|
|
425
|
+
// Create raw byte decoder for QS/BA decoding
|
|
426
|
+
const decodeBulkBytesRaw: BulkByteRawDecoder | undefined =
|
|
427
|
+
qsIsExternal || baIsExternal
|
|
428
|
+
? (dataSeriesName, length) => {
|
|
429
|
+
if (dataSeriesName === 'QS' && qsIsExternal) {
|
|
430
|
+
return qsCodec.getBytesSubarray(
|
|
431
|
+
blocksByContentId,
|
|
432
|
+
cursors,
|
|
433
|
+
length,
|
|
434
|
+
)
|
|
435
|
+
}
|
|
436
|
+
if (dataSeriesName === 'BA' && baIsExternal) {
|
|
437
|
+
return baCodec.getBytesSubarray(
|
|
438
|
+
blocksByContentId,
|
|
439
|
+
cursors,
|
|
440
|
+
length,
|
|
441
|
+
)
|
|
442
|
+
}
|
|
443
|
+
return undefined
|
|
444
|
+
}
|
|
445
|
+
: undefined
|
|
446
|
+
|
|
415
447
|
const records: CramRecord[] = new Array(
|
|
416
448
|
sliceHeader.parsedContent.numRecords,
|
|
417
449
|
)
|
|
@@ -427,6 +459,8 @@ export default class CramSlice {
|
|
|
427
459
|
cursors,
|
|
428
460
|
majorVersion,
|
|
429
461
|
i,
|
|
462
|
+
decodeOptions,
|
|
463
|
+
decodeBulkBytesRaw,
|
|
430
464
|
)
|
|
431
465
|
records[i] = new CramRecord({
|
|
432
466
|
...init,
|
|
@@ -474,13 +508,21 @@ export default class CramSlice {
|
|
|
474
508
|
return records
|
|
475
509
|
}
|
|
476
510
|
|
|
477
|
-
async getRecords(
|
|
511
|
+
async getRecords(
|
|
512
|
+
filterFunction: (r: CramRecord) => boolean,
|
|
513
|
+
decodeOptions?: DecodeOptions,
|
|
514
|
+
) {
|
|
515
|
+
// Merge with defaults
|
|
516
|
+
const opts = { ...defaultDecodeOptions, ...decodeOptions }
|
|
517
|
+
|
|
478
518
|
// fetch the features if necessary, using the file-level feature cache
|
|
479
|
-
|
|
480
|
-
|
|
519
|
+
// Include decode options in cache key so different decode configs are cached separately
|
|
520
|
+
const optionsKey = `${opts.decodeTags ? 1 : 0}`
|
|
521
|
+
const cacheKey = `${this.container.filePosition}:${this.containerPosition}:${optionsKey}`
|
|
522
|
+
let recordsPromise = this.file.featureCache.get(cacheKey)
|
|
481
523
|
if (!recordsPromise) {
|
|
482
|
-
recordsPromise = this._fetchRecords()
|
|
483
|
-
this.file.featureCache.set(cacheKey
|
|
524
|
+
recordsPromise = this._fetchRecords(opts)
|
|
525
|
+
this.file.featureCache.set(cacheKey, recordsPromise)
|
|
484
526
|
}
|
|
485
527
|
|
|
486
528
|
const unfiltered = await recordsPromise
|
package/src/indexedCramFile.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { Slice } from './craiIndex.ts'
|
|
2
2
|
import { SeqFetch } from './cramFile/file.ts'
|
|
3
3
|
import CramFile from './cramFile/index.ts'
|
|
4
|
-
import CramRecord from './cramFile/record.ts'
|
|
4
|
+
import CramRecord, { DecodeOptions } from './cramFile/record.ts'
|
|
5
5
|
import { CramUnimplementedError } from './errors.ts'
|
|
6
6
|
|
|
7
7
|
import type { GenericFilehandle } from 'generic-filehandle2'
|
|
@@ -86,11 +86,14 @@ export default class IndexedCramFile {
|
|
|
86
86
|
viewAsPairs?: boolean
|
|
87
87
|
pairAcrossChr?: boolean
|
|
88
88
|
maxInsertSize?: number
|
|
89
|
-
} = {},
|
|
89
|
+
} & DecodeOptions = {},
|
|
90
90
|
) {
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
91
|
+
const viewAsPairs = opts.viewAsPairs || false
|
|
92
|
+
const pairAcrossChr = opts.pairAcrossChr || false
|
|
93
|
+
const maxInsertSize = opts.maxInsertSize || 200000
|
|
94
|
+
const decodeOptions: DecodeOptions = {
|
|
95
|
+
decodeTags: opts.decodeTags,
|
|
96
|
+
}
|
|
94
97
|
|
|
95
98
|
if (typeof seq === 'string') {
|
|
96
99
|
// TODO: support string reference sequence names somehow
|
|
@@ -104,32 +107,36 @@ export default class IndexedCramFile {
|
|
|
104
107
|
// fetch all the slices and parse the feature data
|
|
105
108
|
const sliceResults = await Promise.all(
|
|
106
109
|
slices.map(slice =>
|
|
107
|
-
this.getRecordsInSlice(
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
110
|
+
this.getRecordsInSlice(
|
|
111
|
+
slice,
|
|
112
|
+
feature => {
|
|
113
|
+
// Check if feature belongs to this sequence
|
|
114
|
+
if (feature.sequenceId !== seq) {
|
|
115
|
+
return false
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// For unmapped reads (lengthOnRef is undefined), they are placed at their
|
|
119
|
+
// mate's position. Include them if that position is within the range.
|
|
120
|
+
if (feature.lengthOnRef === undefined) {
|
|
121
|
+
return (
|
|
122
|
+
feature.alignmentStart >= start && feature.alignmentStart <= end
|
|
123
|
+
)
|
|
124
|
+
}
|
|
112
125
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
if (feature.lengthOnRef === undefined) {
|
|
126
|
+
// For mapped reads, check if they overlap the requested range
|
|
127
|
+
// Use > instead of >= for start boundary to match samtools behavior
|
|
116
128
|
return (
|
|
117
|
-
feature.alignmentStart
|
|
129
|
+
feature.alignmentStart <= end &&
|
|
130
|
+
feature.alignmentStart + feature.lengthOnRef - 1 > start
|
|
118
131
|
)
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
// Use > instead of >= for start boundary to match samtools behavior
|
|
123
|
-
return (
|
|
124
|
-
feature.alignmentStart <= end &&
|
|
125
|
-
feature.alignmentStart + feature.lengthOnRef - 1 > start
|
|
126
|
-
)
|
|
127
|
-
}),
|
|
132
|
+
},
|
|
133
|
+
decodeOptions,
|
|
134
|
+
),
|
|
128
135
|
),
|
|
129
136
|
)
|
|
130
137
|
|
|
131
138
|
let ret: CramRecord[] = Array.prototype.concat(...sliceResults)
|
|
132
|
-
if (
|
|
139
|
+
if (viewAsPairs) {
|
|
133
140
|
const readNames: Record<string, number> = {}
|
|
134
141
|
const readIds: Record<string, number> = {}
|
|
135
142
|
for (const read of ret) {
|
|
@@ -159,9 +166,9 @@ export default class IndexedCramFile {
|
|
|
159
166
|
if (
|
|
160
167
|
unmatedPairs[name] &&
|
|
161
168
|
cramRecord.mate &&
|
|
162
|
-
(cramRecord.mate.sequenceId === seqId ||
|
|
169
|
+
(cramRecord.mate.sequenceId === seqId || pairAcrossChr) &&
|
|
163
170
|
Math.abs(cramRecord.alignmentStart - cramRecord.mate.alignmentStart) <
|
|
164
|
-
|
|
171
|
+
maxInsertSize
|
|
165
172
|
) {
|
|
166
173
|
const mateSlices = this.index.getEntriesForRange(
|
|
167
174
|
cramRecord.mate.sequenceId,
|
|
@@ -225,10 +232,11 @@ export default class IndexedCramFile {
|
|
|
225
232
|
sliceBytes,
|
|
226
233
|
}: { containerStart: number; sliceStart: number; sliceBytes: number },
|
|
227
234
|
filterFunction: (r: CramRecord) => boolean,
|
|
235
|
+
decodeOptions?: DecodeOptions,
|
|
228
236
|
) {
|
|
229
237
|
const container = this.cram.getContainerAtPosition(containerStart)
|
|
230
238
|
const slice = container.getSlice(sliceStart, sliceBytes)
|
|
231
|
-
return slice.getRecords(filterFunction)
|
|
239
|
+
return slice.getRecords(filterFunction, decodeOptions)
|
|
232
240
|
}
|
|
233
241
|
|
|
234
242
|
/**
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
*
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/* tslint:disable */
|
|
2
|
+
/* eslint-disable */
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Get the byte position where the header ends (first data container starts)
|
|
6
|
+
*
|
|
7
|
+
* # Arguments
|
|
8
|
+
* * `data` - The CRAM file data (only needs first ~64KB typically)
|
|
9
|
+
*/
|
|
10
|
+
export function get_header_length(data: Uint8Array): bigint;
|
|
11
|
+
|
|
12
|
+
export function init_panic_hook(): void;
|
|
13
|
+
|
|
14
|
+
export function parse_crai_index(data: Uint8Array): any;
|
|
15
|
+
|
|
16
|
+
export function parse_cram_file(data: Uint8Array): any;
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Parse a CRAM file with reference sequences provided
|
|
20
|
+
*
|
|
21
|
+
* # Arguments
|
|
22
|
+
* * `data` - The CRAM file data as bytes
|
|
23
|
+
* * `references` - Array of {name: string, sequence: string} objects
|
|
24
|
+
*/
|
|
25
|
+
export function parse_cram_file_with_references(data: Uint8Array, references: any): any;
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Parse records from a specific region of a CRAM file
|
|
29
|
+
*
|
|
30
|
+
* This function is designed for indexed access. It takes:
|
|
31
|
+
* - header_data: The CRAM file from start up to the first data container
|
|
32
|
+
* - container_data: The bytes of the container(s) to parse
|
|
33
|
+
* - references: Reference sequences needed for decoding
|
|
34
|
+
*
|
|
35
|
+
* # Arguments
|
|
36
|
+
* * `header_data` - Bytes from file start through header container
|
|
37
|
+
* * `container_data` - Bytes of the data container(s) to parse
|
|
38
|
+
* * `references` - Array of {name: string, sequence: string} objects
|
|
39
|
+
*/
|
|
40
|
+
export function parse_cram_records_from_container(header_data: Uint8Array, container_data: Uint8Array, references: any): any;
|
|
41
|
+
|
|
42
|
+
export function parse_header(data: Uint8Array): any;
|