@gmod/cram 8.0.3 → 8.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cram-bundle.js +1 -1
- package/dist/cramFile/codecs/_base.d.ts +1 -0
- package/dist/cramFile/codecs/_base.js +3 -0
- package/dist/cramFile/codecs/_base.js.map +1 -1
- package/dist/cramFile/codecs/byteArrayLength.d.ts +1 -1
- package/dist/cramFile/codecs/byteArrayLength.js +14 -7
- package/dist/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/dist/cramFile/codecs/external.d.ts +1 -1
- package/dist/cramFile/codecs/external.js +32 -4
- package/dist/cramFile/codecs/external.js.map +1 -1
- package/dist/cramFile/codecs/getBits.d.ts +1 -0
- package/dist/cramFile/codecs/getBits.js +4 -0
- package/dist/cramFile/codecs/getBits.js.map +1 -1
- package/dist/cramFile/record.d.ts +39 -9
- package/dist/cramFile/record.js +23 -19
- package/dist/cramFile/record.js.map +1 -1
- package/dist/cramFile/slice/decodeRecord.d.ts +4 -3
- package/dist/cramFile/slice/decodeRecord.js +62 -77
- package/dist/cramFile/slice/decodeRecord.js.map +1 -1
- package/dist/cramFile/slice/index.js +17 -27
- package/dist/cramFile/slice/index.js.map +1 -1
- package/dist/cramFile/util.d.ts +2 -0
- package/dist/cramFile/util.js +13 -0
- package/dist/cramFile/util.js.map +1 -1
- package/dist/indexedCramFile.js +0 -3
- package/dist/indexedCramFile.js.map +1 -1
- package/esm/cramFile/codecs/_base.d.ts +1 -0
- package/esm/cramFile/codecs/_base.js +3 -0
- package/esm/cramFile/codecs/_base.js.map +1 -1
- package/esm/cramFile/codecs/byteArrayLength.d.ts +1 -1
- package/esm/cramFile/codecs/byteArrayLength.js +14 -7
- package/esm/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/esm/cramFile/codecs/external.d.ts +1 -1
- package/esm/cramFile/codecs/external.js +32 -4
- package/esm/cramFile/codecs/external.js.map +1 -1
- package/esm/cramFile/codecs/getBits.d.ts +1 -0
- package/esm/cramFile/codecs/getBits.js +4 -0
- package/esm/cramFile/codecs/getBits.js.map +1 -1
- package/esm/cramFile/record.d.ts +39 -9
- package/esm/cramFile/record.js +23 -19
- package/esm/cramFile/record.js.map +1 -1
- package/esm/cramFile/slice/decodeRecord.d.ts +4 -3
- package/esm/cramFile/slice/decodeRecord.js +62 -77
- package/esm/cramFile/slice/decodeRecord.js.map +1 -1
- package/esm/cramFile/slice/index.js +17 -27
- package/esm/cramFile/slice/index.js.map +1 -1
- package/esm/cramFile/util.d.ts +2 -0
- package/esm/cramFile/util.js +11 -0
- package/esm/cramFile/util.js.map +1 -1
- package/esm/indexedCramFile.js +0 -3
- package/esm/indexedCramFile.js.map +1 -1
- package/package.json +1 -1
- package/src/cramFile/codecs/_base.ts +8 -0
- package/src/cramFile/codecs/byteArrayLength.ts +21 -8
- package/src/cramFile/codecs/external.ts +41 -9
- package/src/cramFile/codecs/getBits.ts +3 -1
- package/src/cramFile/record.ts +64 -36
- package/src/cramFile/slice/decodeRecord.ts +77 -96
- package/src/cramFile/slice/index.ts +31 -47
- package/src/cramFile/util.ts +14 -0
- package/src/indexedCramFile.ts +0 -4
|
@@ -42,4 +42,12 @@ export default abstract class CramCodec<
|
|
|
42
42
|
blocksByContentId: Record<number, CramFileBlock>,
|
|
43
43
|
cursors: Cursors,
|
|
44
44
|
): DataTypeMapping[TResult] | undefined
|
|
45
|
+
|
|
46
|
+
getBytesSubarray(
|
|
47
|
+
_blocksByContentId: Record<number, CramFileBlock>,
|
|
48
|
+
_cursors: Cursors,
|
|
49
|
+
_length: number,
|
|
50
|
+
): Uint8Array | undefined {
|
|
51
|
+
return undefined
|
|
52
|
+
}
|
|
45
53
|
}
|
|
@@ -35,18 +35,31 @@ export default class ByteArrayStopCodec extends CramCodec<
|
|
|
35
35
|
const arrayLength =
|
|
36
36
|
lengthCodec.decode(slice, coreDataBlock, blocksByContentId, cursors) || 0
|
|
37
37
|
|
|
38
|
-
const data = new Uint8Array(arrayLength)
|
|
39
38
|
if (arrayLength > 0) {
|
|
40
39
|
const dataCodec = this._getDataCodec()
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
40
|
+
const subarray = dataCodec.getBytesSubarray(
|
|
41
|
+
blocksByContentId,
|
|
42
|
+
cursors,
|
|
43
|
+
arrayLength,
|
|
44
|
+
)
|
|
45
|
+
if (subarray) {
|
|
46
|
+
return subarray
|
|
47
|
+
} else {
|
|
48
|
+
const data = new Uint8Array(arrayLength)
|
|
49
|
+
for (let i = 0; i < arrayLength; i += 1) {
|
|
50
|
+
data[i] =
|
|
51
|
+
dataCodec.decode(
|
|
52
|
+
slice,
|
|
53
|
+
coreDataBlock,
|
|
54
|
+
blocksByContentId,
|
|
55
|
+
cursors,
|
|
56
|
+
) || 0
|
|
57
|
+
}
|
|
58
|
+
return data
|
|
46
59
|
}
|
|
60
|
+
} else {
|
|
61
|
+
return new Uint8Array(0)
|
|
47
62
|
}
|
|
48
|
-
|
|
49
|
-
return data
|
|
50
63
|
}
|
|
51
64
|
|
|
52
65
|
// memoize
|
|
@@ -1,10 +1,47 @@
|
|
|
1
1
|
import CramCodec, { Cursors } from './_base.ts'
|
|
2
2
|
import { CramUnimplementedError } from '../../errors.ts'
|
|
3
|
+
import { ExternalCramEncoding } from '../encoding.ts'
|
|
3
4
|
import { CramFileBlock } from '../file.ts'
|
|
4
|
-
import CramSlice from '../slice/index.ts'
|
|
5
|
-
import { parseItf8 } from '../util.ts'
|
|
6
5
|
import { CramBufferOverrunError } from './getBits.ts'
|
|
7
|
-
import
|
|
6
|
+
import CramSlice from '../slice/index.ts'
|
|
7
|
+
|
|
8
|
+
function parseItf8Inline(buffer: Uint8Array, cursor: { bytePosition: number }) {
|
|
9
|
+
const offset = cursor.bytePosition
|
|
10
|
+
const countFlags = buffer[offset]!
|
|
11
|
+
if (countFlags < 0x80) {
|
|
12
|
+
cursor.bytePosition = offset + 1
|
|
13
|
+
return countFlags
|
|
14
|
+
}
|
|
15
|
+
if (countFlags < 0xc0) {
|
|
16
|
+
cursor.bytePosition = offset + 2
|
|
17
|
+
return ((countFlags & 0x3f) << 8) | buffer[offset + 1]!
|
|
18
|
+
}
|
|
19
|
+
if (countFlags < 0xe0) {
|
|
20
|
+
cursor.bytePosition = offset + 3
|
|
21
|
+
return (
|
|
22
|
+
((countFlags & 0x1f) << 16) |
|
|
23
|
+
(buffer[offset + 1]! << 8) |
|
|
24
|
+
buffer[offset + 2]!
|
|
25
|
+
)
|
|
26
|
+
}
|
|
27
|
+
if (countFlags < 0xf0) {
|
|
28
|
+
cursor.bytePosition = offset + 4
|
|
29
|
+
return (
|
|
30
|
+
((countFlags & 0x0f) << 24) |
|
|
31
|
+
(buffer[offset + 1]! << 16) |
|
|
32
|
+
(buffer[offset + 2]! << 8) |
|
|
33
|
+
buffer[offset + 3]!
|
|
34
|
+
)
|
|
35
|
+
}
|
|
36
|
+
cursor.bytePosition = offset + 5
|
|
37
|
+
return (
|
|
38
|
+
((countFlags & 0x0f) << 28) |
|
|
39
|
+
(buffer[offset + 1]! << 20) |
|
|
40
|
+
(buffer[offset + 2]! << 12) |
|
|
41
|
+
(buffer[offset + 3]! << 4) |
|
|
42
|
+
(buffer[offset + 4]! & 0x0f)
|
|
43
|
+
)
|
|
44
|
+
}
|
|
8
45
|
|
|
9
46
|
export default class ExternalCodec extends CramCodec<
|
|
10
47
|
'int' | 'byte',
|
|
@@ -37,12 +74,7 @@ export default class ExternalCodec extends CramCodec<
|
|
|
37
74
|
const cursor = cursors.externalBlocks.getCursor(blockContentId)
|
|
38
75
|
|
|
39
76
|
if (this.dataType === 'int') {
|
|
40
|
-
|
|
41
|
-
contentBlock.content,
|
|
42
|
-
cursor.bytePosition,
|
|
43
|
-
)
|
|
44
|
-
cursor.bytePosition += bytesRead
|
|
45
|
-
return result
|
|
77
|
+
return parseItf8Inline(contentBlock.content, cursor)
|
|
46
78
|
} else {
|
|
47
79
|
if (cursor.bytePosition >= contentBlock.content.length) {
|
|
48
80
|
throw new CramBufferOverrunError(
|
package/src/cramFile/record.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import Constants from './constants.ts'
|
|
2
2
|
import CramContainerCompressionScheme from './container/compressionScheme.ts'
|
|
3
|
+
import { readNullTerminatedStringFromBuffer } from './util.ts'
|
|
3
4
|
|
|
4
5
|
import type decodeRecord from './slice/decodeRecord.ts'
|
|
5
6
|
|
|
@@ -20,15 +21,33 @@ export interface RefRegion {
|
|
|
20
21
|
seq: string
|
|
21
22
|
}
|
|
22
23
|
|
|
23
|
-
|
|
24
|
-
code: string
|
|
24
|
+
interface ReadFeatureBase {
|
|
25
25
|
pos: number
|
|
26
26
|
refPos: number
|
|
27
|
-
data: any
|
|
28
|
-
ref?: string
|
|
29
|
-
sub?: string
|
|
30
27
|
}
|
|
31
28
|
|
|
29
|
+
/**
|
|
30
|
+
* Read features describe differences between a read and the reference sequence.
|
|
31
|
+
* Each feature has a code indicating the type of difference, a position in the
|
|
32
|
+
* read (pos), and a position on the reference (refPos).
|
|
33
|
+
*/
|
|
34
|
+
export type ReadFeature =
|
|
35
|
+
/** I=insertion, S=soft clip, b=bases, i=single-base insertion — all carry a sequence string */
|
|
36
|
+
| (ReadFeatureBase & { code: 'I' | 'S' | 'b' | 'i'; data: string })
|
|
37
|
+
/** B=base and quality pair — [substituted base, quality score] */
|
|
38
|
+
| (ReadFeatureBase & { code: 'B'; data: [string, number] })
|
|
39
|
+
/** X=base substitution — data is the substitution matrix index, ref/sub filled in by addReferenceSequence */
|
|
40
|
+
| (ReadFeatureBase & {
|
|
41
|
+
code: 'X'
|
|
42
|
+
data: number
|
|
43
|
+
ref?: string
|
|
44
|
+
sub?: string
|
|
45
|
+
})
|
|
46
|
+
/** D=deletion, N=reference skip, H=hard clip, P=padding, Q=single quality score */
|
|
47
|
+
| (ReadFeatureBase & { code: 'D' | 'N' | 'H' | 'P' | 'Q'; data: number })
|
|
48
|
+
/** q=quality scores for a stretch of bases */
|
|
49
|
+
| (ReadFeatureBase & { code: 'q'; data: number[] })
|
|
50
|
+
|
|
32
51
|
export interface DecodeOptions {
|
|
33
52
|
/** Whether to parse tags. If false, raw tag data is stored for lazy parsing. Default true. */
|
|
34
53
|
decodeTags?: boolean
|
|
@@ -70,40 +89,30 @@ function decodeReadSequence(cramRecord: CramRecord, refRegion: RefRegion) {
|
|
|
70
89
|
currentReadFeature += 1
|
|
71
90
|
|
|
72
91
|
if (feature.code === 'b') {
|
|
73
|
-
// specify a base pair for some reason
|
|
74
92
|
const added = feature.data
|
|
75
93
|
bases += added
|
|
76
94
|
regionPos += added.length
|
|
77
95
|
} else if (feature.code === 'B') {
|
|
78
|
-
// base pair and associated quality
|
|
79
|
-
// TODO: do we need to set the quality in the qual scores?
|
|
80
96
|
bases += feature.data[0]
|
|
81
97
|
regionPos += 1
|
|
82
98
|
} else if (feature.code === 'X') {
|
|
83
|
-
// base substitution
|
|
84
99
|
bases += feature.sub
|
|
85
100
|
regionPos += 1
|
|
86
101
|
} else if (feature.code === 'I') {
|
|
87
|
-
// insertion
|
|
88
102
|
bases += feature.data
|
|
89
103
|
} else if (feature.code === 'D') {
|
|
90
|
-
// deletion
|
|
91
104
|
regionPos += feature.data
|
|
92
105
|
} else if (feature.code === 'i') {
|
|
93
|
-
// insert single base
|
|
94
106
|
bases += feature.data
|
|
95
107
|
} else if (feature.code === 'N') {
|
|
96
|
-
// reference skip. delete some bases
|
|
97
|
-
// do nothing
|
|
98
|
-
// seqBases.splice(feature.pos - 1, feature.data)
|
|
99
108
|
regionPos += feature.data
|
|
100
109
|
} else if (feature.code === 'S') {
|
|
101
|
-
// soft clipped bases that should be present in the read seq
|
|
102
|
-
// seqBases.splice(feature.pos - 1, 0, ...feature.data.split(''))
|
|
103
110
|
bases += feature.data
|
|
104
111
|
} else if (feature.code === 'P') {
|
|
105
112
|
// padding, do nothing
|
|
106
|
-
}
|
|
113
|
+
}
|
|
114
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
115
|
+
else if (feature.code === 'H') {
|
|
107
116
|
// hard clip, do nothing
|
|
108
117
|
}
|
|
109
118
|
} else if (currentReadFeature < cramRecord.readFeatures.length) {
|
|
@@ -129,7 +138,7 @@ function decodeReadSequence(cramRecord: CramRecord, refRegion: RefRegion) {
|
|
|
129
138
|
return bases.toUpperCase()
|
|
130
139
|
}
|
|
131
140
|
|
|
132
|
-
const baseNumbers = {
|
|
141
|
+
const baseNumbers: Record<string, number | undefined> = {
|
|
133
142
|
a: 0,
|
|
134
143
|
A: 0,
|
|
135
144
|
c: 1,
|
|
@@ -146,7 +155,12 @@ function decodeBaseSubstitution(
|
|
|
146
155
|
cramRecord: CramRecord,
|
|
147
156
|
refRegion: RefRegion,
|
|
148
157
|
compressionScheme: CramContainerCompressionScheme,
|
|
149
|
-
readFeature:
|
|
158
|
+
readFeature: ReadFeatureBase & {
|
|
159
|
+
code: 'X'
|
|
160
|
+
data: number
|
|
161
|
+
ref?: string
|
|
162
|
+
sub?: string
|
|
163
|
+
},
|
|
150
164
|
) {
|
|
151
165
|
// decode base substitution code using the substitution matrix
|
|
152
166
|
const refCoord = readFeature.refPos - refRegion.start
|
|
@@ -154,7 +168,7 @@ function decodeBaseSubstitution(
|
|
|
154
168
|
if (refBase) {
|
|
155
169
|
readFeature.ref = refBase
|
|
156
170
|
}
|
|
157
|
-
let baseNumber =
|
|
171
|
+
let baseNumber = baseNumbers[refBase]
|
|
158
172
|
if (baseNumber === undefined) {
|
|
159
173
|
baseNumber = 4
|
|
160
174
|
}
|
|
@@ -240,7 +254,7 @@ export const MateFlagsDecoder = makeFlagsHelper(MateFlags)
|
|
|
240
254
|
* Class of each CRAM record returned by this API.
|
|
241
255
|
*/
|
|
242
256
|
export default class CramRecord {
|
|
243
|
-
public tags: Record<string, string>
|
|
257
|
+
public tags: Record<string, string | number | number[] | undefined>
|
|
244
258
|
public flags: number
|
|
245
259
|
public cramFlags: number
|
|
246
260
|
public readBases?: string | null
|
|
@@ -249,9 +263,13 @@ export default class CramRecord {
|
|
|
249
263
|
public alignmentStart: number
|
|
250
264
|
public lengthOnRef: number | undefined
|
|
251
265
|
public readLength: number
|
|
266
|
+
// templateLength is computed post-hoc for intra-slice mate pairs,
|
|
267
|
+
// templateSize is the raw CRAM-encoded TS data series value
|
|
252
268
|
public templateLength?: number
|
|
253
269
|
public templateSize?: number
|
|
254
|
-
|
|
270
|
+
private _readName?: string
|
|
271
|
+
private _readNameRaw?: Uint8Array
|
|
272
|
+
public _syntheticReadName?: string
|
|
255
273
|
public mateRecordNumber?: number
|
|
256
274
|
public mate?: MateRecord
|
|
257
275
|
public uniqueId: number
|
|
@@ -260,6 +278,18 @@ export default class CramRecord {
|
|
|
260
278
|
public mappingQuality: number | undefined
|
|
261
279
|
public qualityScores: Uint8Array | null | undefined
|
|
262
280
|
|
|
281
|
+
get readName() {
|
|
282
|
+
if (this._readName === undefined) {
|
|
283
|
+
if (this._readNameRaw) {
|
|
284
|
+
this._readName = readNullTerminatedStringFromBuffer(this._readNameRaw)
|
|
285
|
+
this._readNameRaw = undefined
|
|
286
|
+
} else {
|
|
287
|
+
return this._syntheticReadName
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
return this._readName
|
|
291
|
+
}
|
|
292
|
+
|
|
263
293
|
constructor({
|
|
264
294
|
flags,
|
|
265
295
|
cramFlags,
|
|
@@ -272,32 +302,31 @@ export default class CramRecord {
|
|
|
272
302
|
readFeatures,
|
|
273
303
|
mateToUse,
|
|
274
304
|
readGroupId,
|
|
275
|
-
|
|
305
|
+
readNameRaw,
|
|
276
306
|
sequenceId,
|
|
277
307
|
uniqueId,
|
|
278
308
|
templateSize,
|
|
279
309
|
alignmentStart,
|
|
280
310
|
tags,
|
|
281
|
-
}: ReturnType<typeof decodeRecord>
|
|
311
|
+
}: ReturnType<typeof decodeRecord>) {
|
|
282
312
|
this.flags = flags
|
|
283
313
|
this.cramFlags = cramFlags
|
|
284
314
|
this.readLength = readLength
|
|
285
315
|
this.mappingQuality = mappingQuality
|
|
286
316
|
this.lengthOnRef = lengthOnRef
|
|
287
317
|
this.qualityScores = qualityScores
|
|
288
|
-
if (readBases) {
|
|
289
|
-
this.readBases = readBases
|
|
290
|
-
}
|
|
291
|
-
|
|
292
318
|
this.readGroupId = readGroupId
|
|
293
|
-
this.readName = readName
|
|
294
319
|
this.sequenceId = sequenceId!
|
|
295
320
|
this.uniqueId = uniqueId
|
|
296
|
-
this.templateSize = templateSize
|
|
297
321
|
this.alignmentStart = alignmentStart
|
|
298
322
|
this.tags = tags
|
|
299
|
-
|
|
300
|
-
|
|
323
|
+
if (readNameRaw) {
|
|
324
|
+
this._readNameRaw = readNameRaw
|
|
325
|
+
}
|
|
326
|
+
if (readBases) {
|
|
327
|
+
this.readBases = readBases
|
|
328
|
+
}
|
|
329
|
+
this.templateSize = templateSize
|
|
301
330
|
if (readFeatures) {
|
|
302
331
|
this.readFeatures = readFeatures
|
|
303
332
|
}
|
|
@@ -430,9 +459,7 @@ export default class CramRecord {
|
|
|
430
459
|
return undefined
|
|
431
460
|
}
|
|
432
461
|
const isize = this.templateLength || this.templateSize || 0
|
|
433
|
-
return PAIR_ORIENTATION_TABLE[
|
|
434
|
-
((f >> 4) & 0xf) | (isize > 0 ? 16 : 0)
|
|
435
|
-
]
|
|
462
|
+
return PAIR_ORIENTATION_TABLE[((f >> 4) & 0xf) | (isize > 0 ? 16 : 0)]
|
|
436
463
|
}
|
|
437
464
|
|
|
438
465
|
/**
|
|
@@ -488,6 +515,7 @@ export default class CramRecord {
|
|
|
488
515
|
data[k] = (this as any)[k]
|
|
489
516
|
})
|
|
490
517
|
|
|
518
|
+
data.readName = this.readName
|
|
491
519
|
data.readBases = this.getReadBases()
|
|
492
520
|
data.qualityScores = this.qualityScores
|
|
493
521
|
? Array.from(this.qualityScores)
|