@gmod/cram 8.0.2 → 8.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cram-bundle.js +1 -1
- package/dist/cramFile/codecs/_base.d.ts +1 -0
- package/dist/cramFile/codecs/_base.js +3 -0
- package/dist/cramFile/codecs/_base.js.map +1 -1
- package/dist/cramFile/codecs/byteArrayLength.d.ts +1 -1
- package/dist/cramFile/codecs/byteArrayLength.js +14 -7
- package/dist/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/dist/cramFile/codecs/external.d.ts +1 -1
- package/dist/cramFile/codecs/external.js +32 -4
- package/dist/cramFile/codecs/external.js.map +1 -1
- package/dist/cramFile/codecs/getBits.d.ts +1 -0
- package/dist/cramFile/codecs/getBits.js +4 -0
- package/dist/cramFile/codecs/getBits.js.map +1 -1
- package/dist/cramFile/record.d.ts +39 -9
- package/dist/cramFile/record.js +35 -35
- package/dist/cramFile/record.js.map +1 -1
- package/dist/cramFile/slice/decodeRecord.d.ts +4 -3
- package/dist/cramFile/slice/decodeRecord.js +62 -77
- package/dist/cramFile/slice/decodeRecord.js.map +1 -1
- package/dist/cramFile/slice/index.js +17 -27
- package/dist/cramFile/slice/index.js.map +1 -1
- package/dist/cramFile/util.d.ts +2 -0
- package/dist/cramFile/util.js +13 -0
- package/dist/cramFile/util.js.map +1 -1
- package/dist/indexedCramFile.js +0 -3
- package/dist/indexedCramFile.js.map +1 -1
- package/esm/cramFile/codecs/_base.d.ts +1 -0
- package/esm/cramFile/codecs/_base.js +3 -0
- package/esm/cramFile/codecs/_base.js.map +1 -1
- package/esm/cramFile/codecs/byteArrayLength.d.ts +1 -1
- package/esm/cramFile/codecs/byteArrayLength.js +14 -7
- package/esm/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/esm/cramFile/codecs/external.d.ts +1 -1
- package/esm/cramFile/codecs/external.js +32 -4
- package/esm/cramFile/codecs/external.js.map +1 -1
- package/esm/cramFile/codecs/getBits.d.ts +1 -0
- package/esm/cramFile/codecs/getBits.js +4 -0
- package/esm/cramFile/codecs/getBits.js.map +1 -1
- package/esm/cramFile/record.d.ts +39 -9
- package/esm/cramFile/record.js +35 -35
- package/esm/cramFile/record.js.map +1 -1
- package/esm/cramFile/slice/decodeRecord.d.ts +4 -3
- package/esm/cramFile/slice/decodeRecord.js +62 -77
- package/esm/cramFile/slice/decodeRecord.js.map +1 -1
- package/esm/cramFile/slice/index.js +17 -27
- package/esm/cramFile/slice/index.js.map +1 -1
- package/esm/cramFile/util.d.ts +2 -0
- package/esm/cramFile/util.js +11 -0
- package/esm/cramFile/util.js.map +1 -1
- package/esm/indexedCramFile.js +0 -3
- package/esm/indexedCramFile.js.map +1 -1
- package/package.json +1 -1
- package/src/cramFile/codecs/_base.ts +8 -0
- package/src/cramFile/codecs/byteArrayLength.ts +21 -8
- package/src/cramFile/codecs/external.ts +41 -9
- package/src/cramFile/codecs/getBits.ts +3 -1
- package/src/cramFile/record.ts +76 -49
- package/src/cramFile/slice/decodeRecord.ts +77 -96
- package/src/cramFile/slice/index.ts +31 -47
- package/src/cramFile/util.ts +14 -0
- package/src/indexedCramFile.ts +0 -4
|
@@ -42,4 +42,12 @@ export default abstract class CramCodec<
|
|
|
42
42
|
blocksByContentId: Record<number, CramFileBlock>,
|
|
43
43
|
cursors: Cursors,
|
|
44
44
|
): DataTypeMapping[TResult] | undefined
|
|
45
|
+
|
|
46
|
+
getBytesSubarray(
|
|
47
|
+
_blocksByContentId: Record<number, CramFileBlock>,
|
|
48
|
+
_cursors: Cursors,
|
|
49
|
+
_length: number,
|
|
50
|
+
): Uint8Array | undefined {
|
|
51
|
+
return undefined
|
|
52
|
+
}
|
|
45
53
|
}
|
|
@@ -35,18 +35,31 @@ export default class ByteArrayStopCodec extends CramCodec<
|
|
|
35
35
|
const arrayLength =
|
|
36
36
|
lengthCodec.decode(slice, coreDataBlock, blocksByContentId, cursors) || 0
|
|
37
37
|
|
|
38
|
-
const data = new Uint8Array(arrayLength)
|
|
39
38
|
if (arrayLength > 0) {
|
|
40
39
|
const dataCodec = this._getDataCodec()
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
40
|
+
const subarray = dataCodec.getBytesSubarray(
|
|
41
|
+
blocksByContentId,
|
|
42
|
+
cursors,
|
|
43
|
+
arrayLength,
|
|
44
|
+
)
|
|
45
|
+
if (subarray) {
|
|
46
|
+
return subarray
|
|
47
|
+
} else {
|
|
48
|
+
const data = new Uint8Array(arrayLength)
|
|
49
|
+
for (let i = 0; i < arrayLength; i += 1) {
|
|
50
|
+
data[i] =
|
|
51
|
+
dataCodec.decode(
|
|
52
|
+
slice,
|
|
53
|
+
coreDataBlock,
|
|
54
|
+
blocksByContentId,
|
|
55
|
+
cursors,
|
|
56
|
+
) || 0
|
|
57
|
+
}
|
|
58
|
+
return data
|
|
46
59
|
}
|
|
60
|
+
} else {
|
|
61
|
+
return new Uint8Array(0)
|
|
47
62
|
}
|
|
48
|
-
|
|
49
|
-
return data
|
|
50
63
|
}
|
|
51
64
|
|
|
52
65
|
// memoize
|
|
@@ -1,10 +1,47 @@
|
|
|
1
1
|
import CramCodec, { Cursors } from './_base.ts'
|
|
2
2
|
import { CramUnimplementedError } from '../../errors.ts'
|
|
3
|
+
import { ExternalCramEncoding } from '../encoding.ts'
|
|
3
4
|
import { CramFileBlock } from '../file.ts'
|
|
4
|
-
import CramSlice from '../slice/index.ts'
|
|
5
|
-
import { parseItf8 } from '../util.ts'
|
|
6
5
|
import { CramBufferOverrunError } from './getBits.ts'
|
|
7
|
-
import
|
|
6
|
+
import CramSlice from '../slice/index.ts'
|
|
7
|
+
|
|
8
|
+
function parseItf8Inline(buffer: Uint8Array, cursor: { bytePosition: number }) {
|
|
9
|
+
const offset = cursor.bytePosition
|
|
10
|
+
const countFlags = buffer[offset]!
|
|
11
|
+
if (countFlags < 0x80) {
|
|
12
|
+
cursor.bytePosition = offset + 1
|
|
13
|
+
return countFlags
|
|
14
|
+
}
|
|
15
|
+
if (countFlags < 0xc0) {
|
|
16
|
+
cursor.bytePosition = offset + 2
|
|
17
|
+
return ((countFlags & 0x3f) << 8) | buffer[offset + 1]!
|
|
18
|
+
}
|
|
19
|
+
if (countFlags < 0xe0) {
|
|
20
|
+
cursor.bytePosition = offset + 3
|
|
21
|
+
return (
|
|
22
|
+
((countFlags & 0x1f) << 16) |
|
|
23
|
+
(buffer[offset + 1]! << 8) |
|
|
24
|
+
buffer[offset + 2]!
|
|
25
|
+
)
|
|
26
|
+
}
|
|
27
|
+
if (countFlags < 0xf0) {
|
|
28
|
+
cursor.bytePosition = offset + 4
|
|
29
|
+
return (
|
|
30
|
+
((countFlags & 0x0f) << 24) |
|
|
31
|
+
(buffer[offset + 1]! << 16) |
|
|
32
|
+
(buffer[offset + 2]! << 8) |
|
|
33
|
+
buffer[offset + 3]!
|
|
34
|
+
)
|
|
35
|
+
}
|
|
36
|
+
cursor.bytePosition = offset + 5
|
|
37
|
+
return (
|
|
38
|
+
((countFlags & 0x0f) << 28) |
|
|
39
|
+
(buffer[offset + 1]! << 20) |
|
|
40
|
+
(buffer[offset + 2]! << 12) |
|
|
41
|
+
(buffer[offset + 3]! << 4) |
|
|
42
|
+
(buffer[offset + 4]! & 0x0f)
|
|
43
|
+
)
|
|
44
|
+
}
|
|
8
45
|
|
|
9
46
|
export default class ExternalCodec extends CramCodec<
|
|
10
47
|
'int' | 'byte',
|
|
@@ -37,12 +74,7 @@ export default class ExternalCodec extends CramCodec<
|
|
|
37
74
|
const cursor = cursors.externalBlocks.getCursor(blockContentId)
|
|
38
75
|
|
|
39
76
|
if (this.dataType === 'int') {
|
|
40
|
-
|
|
41
|
-
contentBlock.content,
|
|
42
|
-
cursor.bytePosition,
|
|
43
|
-
)
|
|
44
|
-
cursor.bytePosition += bytesRead
|
|
45
|
-
return result
|
|
77
|
+
return parseItf8Inline(contentBlock.content, cursor)
|
|
46
78
|
} else {
|
|
47
79
|
if (cursor.bytePosition >= contentBlock.content.length) {
|
|
48
80
|
throw new CramBufferOverrunError(
|
package/src/cramFile/record.ts
CHANGED
|
@@ -1,23 +1,53 @@
|
|
|
1
1
|
import Constants from './constants.ts'
|
|
2
2
|
import CramContainerCompressionScheme from './container/compressionScheme.ts'
|
|
3
|
+
import { readNullTerminatedStringFromBuffer } from './util.ts'
|
|
3
4
|
|
|
4
5
|
import type decodeRecord from './slice/decodeRecord.ts'
|
|
5
6
|
|
|
7
|
+
// precomputed pair orientation strings indexed by ((flags >> 4) & 0xF) | (isize > 0 ? 16 : 0)
|
|
8
|
+
// bits 0-3 encode flag bits 0x10(reverse),0x20(mate reverse),0x40(read1),0x80(read2)
|
|
9
|
+
// bit 4 encodes whether isize > 0
|
|
10
|
+
// prettier-ignore
|
|
11
|
+
const PAIR_ORIENTATION_TABLE = [
|
|
12
|
+
'F F ','F R ','R F ','R R ','F2F1','F2R1','R2F1','R2R1',
|
|
13
|
+
'F1F2','F1R2','R1F2','R1R2','F2F1','F2R1','R2F1','R2R1',
|
|
14
|
+
'F F ','R F ','F R ','R R ','F1F2','R1F2','F1R2','R1R2',
|
|
15
|
+
'F2F1','R2F1','F2R1','R2R1','F1F2','R1F2','F1R2','R1R2',
|
|
16
|
+
]
|
|
17
|
+
|
|
6
18
|
export interface RefRegion {
|
|
7
19
|
start: number
|
|
8
20
|
end: number
|
|
9
21
|
seq: string
|
|
10
22
|
}
|
|
11
23
|
|
|
12
|
-
|
|
13
|
-
code: string
|
|
24
|
+
interface ReadFeatureBase {
|
|
14
25
|
pos: number
|
|
15
26
|
refPos: number
|
|
16
|
-
data: any
|
|
17
|
-
ref?: string
|
|
18
|
-
sub?: string
|
|
19
27
|
}
|
|
20
28
|
|
|
29
|
+
/**
|
|
30
|
+
* Read features describe differences between a read and the reference sequence.
|
|
31
|
+
* Each feature has a code indicating the type of difference, a position in the
|
|
32
|
+
* read (pos), and a position on the reference (refPos).
|
|
33
|
+
*/
|
|
34
|
+
export type ReadFeature =
|
|
35
|
+
/** I=insertion, S=soft clip, b=bases, i=single-base insertion — all carry a sequence string */
|
|
36
|
+
| (ReadFeatureBase & { code: 'I' | 'S' | 'b' | 'i'; data: string })
|
|
37
|
+
/** B=base and quality pair — [substituted base, quality score] */
|
|
38
|
+
| (ReadFeatureBase & { code: 'B'; data: [string, number] })
|
|
39
|
+
/** X=base substitution — data is the substitution matrix index, ref/sub filled in by addReferenceSequence */
|
|
40
|
+
| (ReadFeatureBase & {
|
|
41
|
+
code: 'X'
|
|
42
|
+
data: number
|
|
43
|
+
ref?: string
|
|
44
|
+
sub?: string
|
|
45
|
+
})
|
|
46
|
+
/** D=deletion, N=reference skip, H=hard clip, P=padding, Q=single quality score */
|
|
47
|
+
| (ReadFeatureBase & { code: 'D' | 'N' | 'H' | 'P' | 'Q'; data: number })
|
|
48
|
+
/** q=quality scores for a stretch of bases */
|
|
49
|
+
| (ReadFeatureBase & { code: 'q'; data: number[] })
|
|
50
|
+
|
|
21
51
|
export interface DecodeOptions {
|
|
22
52
|
/** Whether to parse tags. If false, raw tag data is stored for lazy parsing. Default true. */
|
|
23
53
|
decodeTags?: boolean
|
|
@@ -59,40 +89,30 @@ function decodeReadSequence(cramRecord: CramRecord, refRegion: RefRegion) {
|
|
|
59
89
|
currentReadFeature += 1
|
|
60
90
|
|
|
61
91
|
if (feature.code === 'b') {
|
|
62
|
-
// specify a base pair for some reason
|
|
63
92
|
const added = feature.data
|
|
64
93
|
bases += added
|
|
65
94
|
regionPos += added.length
|
|
66
95
|
} else if (feature.code === 'B') {
|
|
67
|
-
// base pair and associated quality
|
|
68
|
-
// TODO: do we need to set the quality in the qual scores?
|
|
69
96
|
bases += feature.data[0]
|
|
70
97
|
regionPos += 1
|
|
71
98
|
} else if (feature.code === 'X') {
|
|
72
|
-
// base substitution
|
|
73
99
|
bases += feature.sub
|
|
74
100
|
regionPos += 1
|
|
75
101
|
} else if (feature.code === 'I') {
|
|
76
|
-
// insertion
|
|
77
102
|
bases += feature.data
|
|
78
103
|
} else if (feature.code === 'D') {
|
|
79
|
-
// deletion
|
|
80
104
|
regionPos += feature.data
|
|
81
105
|
} else if (feature.code === 'i') {
|
|
82
|
-
// insert single base
|
|
83
106
|
bases += feature.data
|
|
84
107
|
} else if (feature.code === 'N') {
|
|
85
|
-
// reference skip. delete some bases
|
|
86
|
-
// do nothing
|
|
87
|
-
// seqBases.splice(feature.pos - 1, feature.data)
|
|
88
108
|
regionPos += feature.data
|
|
89
109
|
} else if (feature.code === 'S') {
|
|
90
|
-
// soft clipped bases that should be present in the read seq
|
|
91
|
-
// seqBases.splice(feature.pos - 1, 0, ...feature.data.split(''))
|
|
92
110
|
bases += feature.data
|
|
93
111
|
} else if (feature.code === 'P') {
|
|
94
112
|
// padding, do nothing
|
|
95
|
-
}
|
|
113
|
+
}
|
|
114
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
115
|
+
else if (feature.code === 'H') {
|
|
96
116
|
// hard clip, do nothing
|
|
97
117
|
}
|
|
98
118
|
} else if (currentReadFeature < cramRecord.readFeatures.length) {
|
|
@@ -118,7 +138,7 @@ function decodeReadSequence(cramRecord: CramRecord, refRegion: RefRegion) {
|
|
|
118
138
|
return bases.toUpperCase()
|
|
119
139
|
}
|
|
120
140
|
|
|
121
|
-
const baseNumbers = {
|
|
141
|
+
const baseNumbers: Record<string, number | undefined> = {
|
|
122
142
|
a: 0,
|
|
123
143
|
A: 0,
|
|
124
144
|
c: 1,
|
|
@@ -135,7 +155,12 @@ function decodeBaseSubstitution(
|
|
|
135
155
|
cramRecord: CramRecord,
|
|
136
156
|
refRegion: RefRegion,
|
|
137
157
|
compressionScheme: CramContainerCompressionScheme,
|
|
138
|
-
readFeature:
|
|
158
|
+
readFeature: ReadFeatureBase & {
|
|
159
|
+
code: 'X'
|
|
160
|
+
data: number
|
|
161
|
+
ref?: string
|
|
162
|
+
sub?: string
|
|
163
|
+
},
|
|
139
164
|
) {
|
|
140
165
|
// decode base substitution code using the substitution matrix
|
|
141
166
|
const refCoord = readFeature.refPos - refRegion.start
|
|
@@ -143,7 +168,7 @@ function decodeBaseSubstitution(
|
|
|
143
168
|
if (refBase) {
|
|
144
169
|
readFeature.ref = refBase
|
|
145
170
|
}
|
|
146
|
-
let baseNumber =
|
|
171
|
+
let baseNumber = baseNumbers[refBase]
|
|
147
172
|
if (baseNumber === undefined) {
|
|
148
173
|
baseNumber = 4
|
|
149
174
|
}
|
|
@@ -229,7 +254,7 @@ export const MateFlagsDecoder = makeFlagsHelper(MateFlags)
|
|
|
229
254
|
* Class of each CRAM record returned by this API.
|
|
230
255
|
*/
|
|
231
256
|
export default class CramRecord {
|
|
232
|
-
public tags: Record<string, string>
|
|
257
|
+
public tags: Record<string, string | number | number[] | undefined>
|
|
233
258
|
public flags: number
|
|
234
259
|
public cramFlags: number
|
|
235
260
|
public readBases?: string | null
|
|
@@ -238,9 +263,13 @@ export default class CramRecord {
|
|
|
238
263
|
public alignmentStart: number
|
|
239
264
|
public lengthOnRef: number | undefined
|
|
240
265
|
public readLength: number
|
|
266
|
+
// templateLength is computed post-hoc for intra-slice mate pairs,
|
|
267
|
+
// templateSize is the raw CRAM-encoded TS data series value
|
|
241
268
|
public templateLength?: number
|
|
242
269
|
public templateSize?: number
|
|
243
|
-
|
|
270
|
+
private _readName?: string
|
|
271
|
+
private _readNameRaw?: Uint8Array
|
|
272
|
+
public _syntheticReadName?: string
|
|
244
273
|
public mateRecordNumber?: number
|
|
245
274
|
public mate?: MateRecord
|
|
246
275
|
public uniqueId: number
|
|
@@ -249,6 +278,18 @@ export default class CramRecord {
|
|
|
249
278
|
public mappingQuality: number | undefined
|
|
250
279
|
public qualityScores: Uint8Array | null | undefined
|
|
251
280
|
|
|
281
|
+
get readName() {
|
|
282
|
+
if (this._readName === undefined) {
|
|
283
|
+
if (this._readNameRaw) {
|
|
284
|
+
this._readName = readNullTerminatedStringFromBuffer(this._readNameRaw)
|
|
285
|
+
this._readNameRaw = undefined
|
|
286
|
+
} else {
|
|
287
|
+
return this._syntheticReadName
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
return this._readName
|
|
291
|
+
}
|
|
292
|
+
|
|
252
293
|
constructor({
|
|
253
294
|
flags,
|
|
254
295
|
cramFlags,
|
|
@@ -261,32 +302,31 @@ export default class CramRecord {
|
|
|
261
302
|
readFeatures,
|
|
262
303
|
mateToUse,
|
|
263
304
|
readGroupId,
|
|
264
|
-
|
|
305
|
+
readNameRaw,
|
|
265
306
|
sequenceId,
|
|
266
307
|
uniqueId,
|
|
267
308
|
templateSize,
|
|
268
309
|
alignmentStart,
|
|
269
310
|
tags,
|
|
270
|
-
}: ReturnType<typeof decodeRecord>
|
|
311
|
+
}: ReturnType<typeof decodeRecord>) {
|
|
271
312
|
this.flags = flags
|
|
272
313
|
this.cramFlags = cramFlags
|
|
273
314
|
this.readLength = readLength
|
|
274
315
|
this.mappingQuality = mappingQuality
|
|
275
316
|
this.lengthOnRef = lengthOnRef
|
|
276
317
|
this.qualityScores = qualityScores
|
|
277
|
-
if (readBases) {
|
|
278
|
-
this.readBases = readBases
|
|
279
|
-
}
|
|
280
|
-
|
|
281
318
|
this.readGroupId = readGroupId
|
|
282
|
-
this.readName = readName
|
|
283
319
|
this.sequenceId = sequenceId!
|
|
284
320
|
this.uniqueId = uniqueId
|
|
285
|
-
this.templateSize = templateSize
|
|
286
321
|
this.alignmentStart = alignmentStart
|
|
287
322
|
this.tags = tags
|
|
288
|
-
|
|
289
|
-
|
|
323
|
+
if (readNameRaw) {
|
|
324
|
+
this._readNameRaw = readNameRaw
|
|
325
|
+
}
|
|
326
|
+
if (readBases) {
|
|
327
|
+
this.readBases = readBases
|
|
328
|
+
}
|
|
329
|
+
this.templateSize = templateSize
|
|
290
330
|
if (readFeatures) {
|
|
291
331
|
this.readFeatures = readFeatures
|
|
292
332
|
}
|
|
@@ -411,29 +451,15 @@ export default class CramRecord {
|
|
|
411
451
|
}
|
|
412
452
|
|
|
413
453
|
// adapted from igv.js
|
|
414
|
-
//
|
|
454
|
+
// uses precomputed lookup table indexed by flag bits + isize sign
|
|
415
455
|
getPairOrientation() {
|
|
416
456
|
const f = this.flags
|
|
417
457
|
// combined check: paired (0x1) set, unmapped (0x4) clear, mate unmapped (0x8) clear
|
|
418
458
|
if ((f & 0xd) !== 0x1 || this.sequenceId !== this.mate?.sequenceId) {
|
|
419
459
|
return undefined
|
|
420
460
|
}
|
|
421
|
-
const s1 = f & 0x10 ? 'R' : 'F'
|
|
422
|
-
const s2 = f & 0x20 ? 'R' : 'F'
|
|
423
|
-
let o1 = ' '
|
|
424
|
-
let o2 = ' '
|
|
425
|
-
if (f & 0x40) {
|
|
426
|
-
o1 = '1'
|
|
427
|
-
o2 = '2'
|
|
428
|
-
} else if (f & 0x80) {
|
|
429
|
-
o1 = '2'
|
|
430
|
-
o2 = '1'
|
|
431
|
-
}
|
|
432
|
-
|
|
433
461
|
const isize = this.templateLength || this.templateSize || 0
|
|
434
|
-
return isize > 0
|
|
435
|
-
? `${s1}${o1}${s2}${o2}`
|
|
436
|
-
: `${s2}${o2}${s1}${o1}`
|
|
462
|
+
return PAIR_ORIENTATION_TABLE[((f >> 4) & 0xf) | (isize > 0 ? 16 : 0)]
|
|
437
463
|
}
|
|
438
464
|
|
|
439
465
|
/**
|
|
@@ -489,6 +515,7 @@ export default class CramRecord {
|
|
|
489
515
|
data[k] = (this as any)[k]
|
|
490
516
|
})
|
|
491
517
|
|
|
518
|
+
data.readName = this.readName
|
|
492
519
|
data.readBases = this.getReadBases()
|
|
493
520
|
data.qualityScores = this.qualityScores
|
|
494
521
|
? Array.from(this.qualityScores)
|