@gmod/cram 1.5.9 → 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +90 -0
- package/README.md +182 -172
- package/dist/craiIndex.d.ts +37 -0
- package/dist/craiIndex.js +196 -301
- package/dist/craiIndex.js.map +1 -0
- package/dist/cram-bundle.js +6 -15
- package/dist/cramFile/codecs/_base.d.ts +6 -0
- package/dist/cramFile/codecs/_base.js +44 -53
- package/dist/cramFile/codecs/_base.js.map +1 -0
- package/dist/cramFile/codecs/beta.d.ts +4 -0
- package/dist/cramFile/codecs/beta.js +38 -48
- package/dist/cramFile/codecs/beta.js.map +1 -0
- package/dist/cramFile/codecs/byteArrayLength.d.ts +8 -0
- package/dist/cramFile/codecs/byteArrayLength.js +58 -78
- package/dist/cramFile/codecs/byteArrayLength.js.map +1 -0
- package/dist/cramFile/codecs/byteArrayStop.d.ts +6 -0
- package/dist/cramFile/codecs/byteArrayStop.js +62 -76
- package/dist/cramFile/codecs/byteArrayStop.js.map +1 -0
- package/dist/cramFile/codecs/external.d.ts +7 -0
- package/dist/cramFile/codecs/external.js +63 -81
- package/dist/cramFile/codecs/external.js.map +1 -0
- package/dist/cramFile/codecs/gamma.d.ts +4 -0
- package/dist/cramFile/codecs/gamma.js +43 -56
- package/dist/cramFile/codecs/gamma.js.map +1 -0
- package/dist/cramFile/codecs/huffman.d.ts +17 -0
- package/dist/cramFile/codecs/huffman.js +126 -199
- package/dist/cramFile/codecs/huffman.js.map +1 -0
- package/dist/cramFile/codecs/index.d.ts +2 -0
- package/dist/cramFile/codecs/index.js +31 -38
- package/dist/cramFile/codecs/index.js.map +1 -0
- package/dist/cramFile/codecs/subexp.d.ts +4 -0
- package/dist/cramFile/codecs/subexp.js +51 -64
- package/dist/cramFile/codecs/subexp.js.map +1 -0
- package/dist/cramFile/constants.d.ts +36 -0
- package/dist/cramFile/constants.js +52 -50
- package/dist/cramFile/constants.js.map +1 -0
- package/dist/cramFile/container/compressionScheme.d.ts +23 -0
- package/dist/cramFile/container/compressionScheme.js +115 -153
- package/dist/cramFile/container/compressionScheme.js.map +1 -0
- package/dist/cramFile/container/index.d.ts +13 -0
- package/dist/cramFile/container/index.js +169 -283
- package/dist/cramFile/container/index.js.map +1 -0
- package/dist/cramFile/file.d.ts +63 -0
- package/dist/cramFile/file.js +440 -766
- package/dist/cramFile/file.js.map +1 -0
- package/dist/cramFile/index.d.ts +2 -0
- package/dist/cramFile/index.js +7 -4
- package/dist/cramFile/index.js.map +1 -0
- package/dist/cramFile/record.d.ts +79 -0
- package/dist/cramFile/record.js +253 -308
- package/dist/cramFile/record.js.map +1 -0
- package/dist/cramFile/sectionParsers.d.ts +18 -0
- package/dist/cramFile/sectionParsers.js +324 -362
- package/dist/cramFile/sectionParsers.js.map +1 -0
- package/dist/cramFile/slice/decodeRecord.d.ts +2 -0
- package/dist/cramFile/slice/decodeRecord.js +278 -298
- package/dist/cramFile/slice/decodeRecord.js.map +1 -0
- package/dist/cramFile/slice/index.d.ts +20 -0
- package/dist/cramFile/slice/index.js +488 -789
- package/dist/cramFile/slice/index.js.map +1 -0
- package/dist/cramFile/util.d.ts +5 -0
- package/dist/cramFile/util.js +158 -144
- package/dist/cramFile/util.js.map +1 -0
- package/dist/errors.d.ts +23 -0
- package/dist/errors.js +66 -103
- package/dist/errors.js.map +1 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +12 -12
- package/dist/index.js.map +1 -0
- package/dist/indexedCramFile.d.ts +39 -0
- package/dist/indexedCramFile.js +213 -315
- package/dist/indexedCramFile.js.map +1 -0
- package/dist/io/bufferCache.d.ts +12 -0
- package/dist/io/bufferCache.js +108 -128
- package/dist/io/bufferCache.js.map +1 -0
- package/dist/io/index.d.ts +5 -0
- package/dist/io/index.js +29 -27
- package/dist/io/index.js.map +1 -0
- package/dist/io/localFile.d.ts +10 -0
- package/dist/io/localFile.js +105 -162
- package/dist/io/localFile.js.map +1 -0
- package/dist/io/remoteFile.d.ts +16 -0
- package/dist/io/remoteFile.js +137 -206
- package/dist/io/remoteFile.js.map +1 -0
- package/dist/rans/constants.d.ts +3 -0
- package/dist/rans/constants.js +6 -6
- package/dist/rans/constants.js.map +1 -0
- package/dist/rans/d04.d.ts +1 -0
- package/dist/rans/d04.js +70 -99
- package/dist/rans/d04.js.map +1 -0
- package/dist/rans/d14.d.ts +1 -0
- package/dist/rans/d14.js +55 -93
- package/dist/rans/d14.js.map +1 -0
- package/dist/rans/decoding.d.ts +30 -0
- package/dist/rans/decoding.js +112 -159
- package/dist/rans/decoding.js.map +1 -0
- package/dist/rans/frequencies.d.ts +2 -0
- package/dist/rans/frequencies.js +110 -119
- package/dist/rans/frequencies.js.map +1 -0
- package/dist/rans/index.d.ts +1 -0
- package/dist/rans/index.js +111 -174
- package/dist/rans/index.js.map +1 -0
- package/dist/sam.d.ts +1 -0
- package/dist/sam.js +16 -41
- package/dist/sam.js.map +1 -0
- package/dist/unzip-pako.d.ts +2 -0
- package/dist/unzip-pako.js +9 -0
- package/dist/unzip-pako.js.map +1 -0
- package/dist/unzip.d.ts +2 -0
- package/dist/unzip.js +6 -0
- package/dist/unzip.js.map +1 -0
- package/errors.js +66 -103
- package/esm/craiIndex.d.ts +37 -0
- package/esm/craiIndex.js +158 -0
- package/esm/craiIndex.js.map +1 -0
- package/esm/cramFile/codecs/_base.d.ts +6 -0
- package/esm/cramFile/codecs/_base.js +42 -0
- package/esm/cramFile/codecs/_base.js.map +1 -0
- package/esm/cramFile/codecs/beta.d.ts +4 -0
- package/esm/cramFile/codecs/beta.js +15 -0
- package/esm/cramFile/codecs/beta.js.map +1 -0
- package/esm/cramFile/codecs/byteArrayLength.d.ts +8 -0
- package/esm/cramFile/codecs/byteArrayLength.js +35 -0
- package/esm/cramFile/codecs/byteArrayLength.js.map +1 -0
- package/esm/cramFile/codecs/byteArrayStop.d.ts +6 -0
- package/esm/cramFile/codecs/byteArrayStop.js +40 -0
- package/esm/cramFile/codecs/byteArrayStop.js.map +1 -0
- package/esm/cramFile/codecs/external.d.ts +7 -0
- package/esm/cramFile/codecs/external.js +40 -0
- package/esm/cramFile/codecs/external.js.map +1 -0
- package/esm/cramFile/codecs/gamma.d.ts +4 -0
- package/esm/cramFile/codecs/gamma.js +20 -0
- package/esm/cramFile/codecs/gamma.js.map +1 -0
- package/esm/cramFile/codecs/huffman.d.ts +17 -0
- package/esm/cramFile/codecs/huffman.js +107 -0
- package/esm/cramFile/codecs/huffman.js.map +1 -0
- package/esm/cramFile/codecs/index.d.ts +2 -0
- package/esm/cramFile/codecs/index.js +30 -0
- package/esm/cramFile/codecs/index.js.map +1 -0
- package/esm/cramFile/codecs/subexp.d.ts +4 -0
- package/esm/cramFile/codecs/subexp.js +28 -0
- package/esm/cramFile/codecs/subexp.js.map +1 -0
- package/esm/cramFile/constants.d.ts +36 -0
- package/esm/cramFile/constants.js +51 -0
- package/esm/cramFile/constants.js.map +1 -0
- package/esm/cramFile/container/compressionScheme.d.ts +23 -0
- package/esm/cramFile/container/compressionScheme.js +123 -0
- package/esm/cramFile/container/compressionScheme.js.map +1 -0
- package/esm/cramFile/container/index.d.ts +13 -0
- package/esm/cramFile/container/index.js +84 -0
- package/esm/cramFile/container/index.js.map +1 -0
- package/esm/cramFile/file.d.ts +63 -0
- package/esm/cramFile/file.js +281 -0
- package/esm/cramFile/file.js.map +1 -0
- package/esm/cramFile/index.d.ts +2 -0
- package/esm/cramFile/index.js +3 -0
- package/esm/cramFile/index.js.map +1 -0
- package/esm/cramFile/record.d.ts +79 -0
- package/esm/cramFile/record.js +297 -0
- package/esm/cramFile/record.js.map +1 -0
- package/esm/cramFile/sectionParsers.d.ts +18 -0
- package/esm/cramFile/sectionParsers.js +347 -0
- package/esm/cramFile/sectionParsers.js.map +1 -0
- package/esm/cramFile/slice/decodeRecord.d.ts +2 -0
- package/esm/cramFile/slice/decodeRecord.js +299 -0
- package/esm/cramFile/slice/decodeRecord.js.map +1 -0
- package/esm/cramFile/slice/index.d.ts +20 -0
- package/esm/cramFile/slice/index.js +364 -0
- package/esm/cramFile/slice/index.js.map +1 -0
- package/esm/cramFile/util.d.ts +5 -0
- package/esm/cramFile/util.js +161 -0
- package/esm/cramFile/util.js.map +1 -0
- package/esm/errors.d.ts +23 -0
- package/esm/errors.js +24 -0
- package/esm/errors.js.map +1 -0
- package/esm/index.d.ts +4 -0
- package/esm/index.js +5 -0
- package/esm/index.js.map +1 -0
- package/esm/indexedCramFile.d.ts +39 -0
- package/esm/indexedCramFile.js +155 -0
- package/esm/indexedCramFile.js.map +1 -0
- package/esm/io/bufferCache.d.ts +12 -0
- package/esm/io/bufferCache.js +54 -0
- package/esm/io/bufferCache.js.map +1 -0
- package/esm/io/index.d.ts +5 -0
- package/esm/io/index.js +24 -0
- package/esm/io/index.js.map +1 -0
- package/esm/io/localFile.d.ts +10 -0
- package/esm/io/localFile.js +31 -0
- package/esm/io/localFile.js.map +1 -0
- package/esm/io/remoteFile.d.ts +16 -0
- package/esm/io/remoteFile.js +64 -0
- package/esm/io/remoteFile.js.map +1 -0
- package/esm/rans/constants.d.ts +3 -0
- package/esm/rans/constants.js +5 -0
- package/esm/rans/constants.js.map +1 -0
- package/esm/rans/d04.d.ts +1 -0
- package/esm/rans/d04.js +67 -0
- package/esm/rans/d04.js.map +1 -0
- package/esm/rans/d14.d.ts +1 -0
- package/esm/rans/d14.js +52 -0
- package/esm/rans/d14.js.map +1 -0
- package/esm/rans/decoding.d.ts +30 -0
- package/esm/rans/decoding.js +118 -0
- package/esm/rans/decoding.js.map +1 -0
- package/esm/rans/frequencies.d.ts +2 -0
- package/esm/rans/frequencies.js +110 -0
- package/esm/rans/frequencies.js.map +1 -0
- package/esm/rans/index.d.ts +1 -0
- package/esm/rans/index.js +195 -0
- package/esm/rans/index.js.map +1 -0
- package/esm/sam.d.ts +1 -0
- package/esm/sam.js +16 -0
- package/esm/sam.js.map +1 -0
- package/esm/unzip-pako.d.ts +2 -0
- package/esm/unzip-pako.js +5 -0
- package/esm/unzip-pako.js.map +1 -0
- package/esm/unzip.d.ts +2 -0
- package/esm/unzip.js +3 -0
- package/esm/unzip.js.map +1 -0
- package/package.json +38 -35
- package/src/craiIndex.js +180 -0
- package/src/cramFile/codecs/_base.js +49 -0
- package/src/cramFile/codecs/beta.js +23 -0
- package/src/cramFile/codecs/byteArrayLength.js +55 -0
- package/src/cramFile/codecs/byteArrayStop.js +50 -0
- package/src/cramFile/codecs/external.js +54 -0
- package/src/cramFile/codecs/gamma.js +30 -0
- package/src/cramFile/codecs/huffman.js +137 -0
- package/src/cramFile/codecs/index.js +38 -0
- package/src/cramFile/codecs/subexp.js +32 -0
- package/src/cramFile/constants.js +55 -0
- package/src/cramFile/container/compressionScheme.js +144 -0
- package/src/cramFile/container/index.js +119 -0
- package/src/cramFile/file.js +347 -0
- package/src/cramFile/index.js +3 -0
- package/src/cramFile/record.js +337 -0
- package/src/cramFile/sectionParsers.js +379 -0
- package/src/cramFile/slice/decodeRecord.js +362 -0
- package/src/cramFile/slice/index.js +497 -0
- package/src/cramFile/util.js +169 -0
- package/src/errors.js +22 -0
- package/src/index.js +5 -0
- package/src/indexedCramFile.js +191 -0
- package/src/io/bufferCache.js +66 -0
- package/src/io/index.js +26 -0
- package/src/io/localFile.js +35 -0
- package/src/io/remoteFile.js +71 -0
- package/src/rans/README.md +1 -0
- package/src/rans/constants.js +5 -0
- package/src/rans/d04.js +83 -0
- package/src/rans/d14.js +59 -0
- package/src/rans/decoding.js +141 -0
- package/src/rans/frequencies.js +121 -0
- package/src/rans/index.js +249 -0
- package/src/sam.js +15 -0
- package/src/unzip-pako.ts +5 -0
- package/src/unzip.ts +2 -0
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
import Constants from './constants';
|
|
2
|
+
function decodeReadSequence(cramRecord, refRegion) {
|
|
3
|
+
// if it has no length, it has no sequence
|
|
4
|
+
if (!cramRecord.lengthOnRef && !cramRecord.readLength) {
|
|
5
|
+
return undefined;
|
|
6
|
+
}
|
|
7
|
+
if (cramRecord.isUnknownBases()) {
|
|
8
|
+
return undefined;
|
|
9
|
+
}
|
|
10
|
+
// remember: all coordinates are 1-based closed
|
|
11
|
+
const regionSeqOffset = cramRecord.alignmentStart - refRegion.start;
|
|
12
|
+
if (!cramRecord.readFeatures) {
|
|
13
|
+
return refRegion.seq
|
|
14
|
+
.substr(regionSeqOffset, cramRecord.lengthOnRef)
|
|
15
|
+
.toUpperCase();
|
|
16
|
+
}
|
|
17
|
+
let bases = '';
|
|
18
|
+
let regionPos = regionSeqOffset;
|
|
19
|
+
let currentReadFeature = 0;
|
|
20
|
+
while (bases.length < cramRecord.readLength) {
|
|
21
|
+
if (currentReadFeature < cramRecord.readFeatures.length) {
|
|
22
|
+
const feature = cramRecord.readFeatures[currentReadFeature];
|
|
23
|
+
if (feature.code === 'Q' || feature.code === 'q') {
|
|
24
|
+
currentReadFeature += 1;
|
|
25
|
+
}
|
|
26
|
+
else if (feature.pos === bases.length + 1) {
|
|
27
|
+
// process the read feature
|
|
28
|
+
currentReadFeature += 1;
|
|
29
|
+
if (feature.code === 'b') {
|
|
30
|
+
// specify a base pair for some reason
|
|
31
|
+
const ret = feature.data.split(',');
|
|
32
|
+
const added = String.fromCharCode(...ret);
|
|
33
|
+
bases += added;
|
|
34
|
+
regionPos += added.length;
|
|
35
|
+
}
|
|
36
|
+
else if (feature.code === 'B') {
|
|
37
|
+
// base pair and associated quality
|
|
38
|
+
// TODO: do we need to set the quality in the qual scores?
|
|
39
|
+
bases += feature.data[0];
|
|
40
|
+
regionPos += 1;
|
|
41
|
+
}
|
|
42
|
+
else if (feature.code === 'X') {
|
|
43
|
+
// base substitution
|
|
44
|
+
bases += feature.sub;
|
|
45
|
+
regionPos += 1;
|
|
46
|
+
}
|
|
47
|
+
else if (feature.code === 'I') {
|
|
48
|
+
// insertion
|
|
49
|
+
bases += feature.data;
|
|
50
|
+
}
|
|
51
|
+
else if (feature.code === 'D') {
|
|
52
|
+
// deletion
|
|
53
|
+
regionPos += feature.data;
|
|
54
|
+
}
|
|
55
|
+
else if (feature.code === 'i') {
|
|
56
|
+
// insert single base
|
|
57
|
+
bases += feature.data;
|
|
58
|
+
}
|
|
59
|
+
else if (feature.code === 'N') {
|
|
60
|
+
// reference skip. delete some bases
|
|
61
|
+
// do nothing
|
|
62
|
+
// seqBases.splice(feature.pos - 1, feature.data)
|
|
63
|
+
regionPos += feature.data;
|
|
64
|
+
}
|
|
65
|
+
else if (feature.code === 'S') {
|
|
66
|
+
// soft clipped bases that should be present in the read seq
|
|
67
|
+
// seqBases.splice(feature.pos - 1, 0, ...feature.data.split(''))
|
|
68
|
+
bases += feature.data;
|
|
69
|
+
}
|
|
70
|
+
else if (feature.code === 'P') {
|
|
71
|
+
// padding, do nothing
|
|
72
|
+
}
|
|
73
|
+
else if (feature.code === 'H') {
|
|
74
|
+
// hard clip, do nothing
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
else if (currentReadFeature < cramRecord.readFeatures.length) {
|
|
78
|
+
// put down a chunk of sequence up to the next read feature
|
|
79
|
+
const chunk = refRegion.seq.substr(regionPos, cramRecord.readFeatures[currentReadFeature].pos - bases.length - 1);
|
|
80
|
+
bases += chunk;
|
|
81
|
+
regionPos += chunk.length;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
else {
|
|
85
|
+
// put down a chunk of reference up to the full read length
|
|
86
|
+
const chunk = refRegion.seq.substr(regionPos, cramRecord.readLength - bases.length);
|
|
87
|
+
bases += chunk;
|
|
88
|
+
regionPos += chunk.length;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return bases.toUpperCase();
|
|
92
|
+
}
|
|
93
|
+
const baseNumbers = {
|
|
94
|
+
a: 0,
|
|
95
|
+
A: 0,
|
|
96
|
+
c: 1,
|
|
97
|
+
C: 1,
|
|
98
|
+
g: 2,
|
|
99
|
+
G: 2,
|
|
100
|
+
t: 3,
|
|
101
|
+
T: 3,
|
|
102
|
+
n: 4,
|
|
103
|
+
N: 4,
|
|
104
|
+
};
|
|
105
|
+
function decodeBaseSubstitution(cramRecord, refRegion, compressionScheme, readFeature) {
|
|
106
|
+
if (!refRegion) {
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
// decode base substitution code using the substitution matrix
|
|
110
|
+
const refCoord = readFeature.refPos - refRegion.start;
|
|
111
|
+
const refBase = refRegion.seq.charAt(refCoord);
|
|
112
|
+
if (refBase) {
|
|
113
|
+
readFeature.ref = refBase;
|
|
114
|
+
}
|
|
115
|
+
let baseNumber = baseNumbers[refBase];
|
|
116
|
+
if (baseNumber === undefined) {
|
|
117
|
+
baseNumber = 4;
|
|
118
|
+
}
|
|
119
|
+
const substitutionScheme = compressionScheme.substitutionMatrix[baseNumber];
|
|
120
|
+
const base = substitutionScheme[readFeature.data];
|
|
121
|
+
if (base) {
|
|
122
|
+
readFeature.sub = base;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Class of each CRAM record returned by this API.
|
|
127
|
+
*/
|
|
128
|
+
export default class CramRecord {
|
|
129
|
+
constructor() {
|
|
130
|
+
this.tags = {};
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* @returns {boolean} true if the read is paired, regardless of whether both segments are mapped
|
|
134
|
+
*/
|
|
135
|
+
isPaired() {
|
|
136
|
+
return !!(this.flags & Constants.BAM_FPAIRED);
|
|
137
|
+
}
|
|
138
|
+
/** @returns {boolean} true if the read is paired, and both segments are mapped */
|
|
139
|
+
isProperlyPaired() {
|
|
140
|
+
return !!(this.flags & Constants.BAM_FPROPER_PAIR);
|
|
141
|
+
}
|
|
142
|
+
/** @returns {boolean} true if the read itself is unmapped; conflictive with isProperlyPaired */
|
|
143
|
+
isSegmentUnmapped() {
|
|
144
|
+
return !!(this.flags & Constants.BAM_FUNMAP);
|
|
145
|
+
}
|
|
146
|
+
/** @returns {boolean} true if the read itself is unmapped; conflictive with isProperlyPaired */
|
|
147
|
+
isMateUnmapped() {
|
|
148
|
+
return !!(this.flags & Constants.BAM_FMUNMAP);
|
|
149
|
+
}
|
|
150
|
+
/** @returns {boolean} true if the read is mapped to the reverse strand */
|
|
151
|
+
isReverseComplemented() {
|
|
152
|
+
return !!(this.flags & Constants.BAM_FREVERSE);
|
|
153
|
+
}
|
|
154
|
+
/** @returns {boolean} true if the mate is mapped to the reverse strand */
|
|
155
|
+
isMateReverseComplemented() {
|
|
156
|
+
return !!(this.flags & Constants.BAM_FMREVERSE);
|
|
157
|
+
}
|
|
158
|
+
/** @returns {boolean} true if this is read number 1 in a pair */
|
|
159
|
+
isRead1() {
|
|
160
|
+
return !!(this.flags & Constants.BAM_FREAD1);
|
|
161
|
+
}
|
|
162
|
+
/** @returns {boolean} true if this is read number 2 in a pair */
|
|
163
|
+
isRead2() {
|
|
164
|
+
return !!(this.flags & Constants.BAM_FREAD2);
|
|
165
|
+
}
|
|
166
|
+
/** @returns {boolean} true if this is a secondary alignment */
|
|
167
|
+
isSecondary() {
|
|
168
|
+
return !!(this.flags & Constants.BAM_FSECONDARY);
|
|
169
|
+
}
|
|
170
|
+
/** @returns {boolean} true if this read has failed QC checks */
|
|
171
|
+
isFailedQc() {
|
|
172
|
+
return !!(this.flags & Constants.BAM_FQCFAIL);
|
|
173
|
+
}
|
|
174
|
+
/** @returns {boolean} true if the read is an optical or PCR duplicate */
|
|
175
|
+
isDuplicate() {
|
|
176
|
+
return !!(this.flags & Constants.BAM_FDUP);
|
|
177
|
+
}
|
|
178
|
+
/** @returns {boolean} true if this is a supplementary alignment */
|
|
179
|
+
isSupplementary() {
|
|
180
|
+
return !!(this.flags & Constants.BAM_FSUPPLEMENTARY);
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* @returns {boolean} true if the read is detached
|
|
184
|
+
*/
|
|
185
|
+
isDetached() {
|
|
186
|
+
return !!(this.cramFlags & Constants.CRAM_FLAG_DETACHED);
|
|
187
|
+
}
|
|
188
|
+
/** @returns {boolean} true if the read has a mate in this same CRAM segment */
|
|
189
|
+
hasMateDownStream() {
|
|
190
|
+
return !!(this.cramFlags & Constants.CRAM_FLAG_MATE_DOWNSTREAM);
|
|
191
|
+
}
|
|
192
|
+
/** @returns {boolean} true if the read contains qual scores */
|
|
193
|
+
isPreservingQualityScores() {
|
|
194
|
+
return !!(this.cramFlags & Constants.CRAM_FLAG_PRESERVE_QUAL_SCORES);
|
|
195
|
+
}
|
|
196
|
+
/** @returns {boolean} true if the read has no sequence bases */
|
|
197
|
+
isUnknownBases() {
|
|
198
|
+
return !!(this.cramFlags & Constants.CRAM_FLAG_NO_SEQ);
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Get the original sequence of this read.
|
|
202
|
+
* @returns {String} sequence basepairs
|
|
203
|
+
*/
|
|
204
|
+
getReadBases() {
|
|
205
|
+
if (!this.readBases && this._refRegion) {
|
|
206
|
+
this.readBases = decodeReadSequence(this, this._refRegion);
|
|
207
|
+
}
|
|
208
|
+
return this.readBases;
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Get the pair orientation of a paired read. Adapted from igv.js
|
|
212
|
+
* @returns {String} of paired orientatin
|
|
213
|
+
*/
|
|
214
|
+
getPairOrientation() {
|
|
215
|
+
if (!this.isSegmentUnmapped() &&
|
|
216
|
+
this.isPaired() &&
|
|
217
|
+
!this.isMateUnmapped() &&
|
|
218
|
+
this.mate &&
|
|
219
|
+
this.sequenceId === this.mate.sequenceId) {
|
|
220
|
+
const s1 = this.isReverseComplemented() ? 'R' : 'F';
|
|
221
|
+
const s2 = this.isMateReverseComplemented() ? 'R' : 'F';
|
|
222
|
+
let o1 = ' ';
|
|
223
|
+
let o2 = ' ';
|
|
224
|
+
if (this.isRead1()) {
|
|
225
|
+
o1 = '1';
|
|
226
|
+
o2 = '2';
|
|
227
|
+
}
|
|
228
|
+
else if (this.isRead2()) {
|
|
229
|
+
o1 = '2';
|
|
230
|
+
o2 = '1';
|
|
231
|
+
}
|
|
232
|
+
const tmp = [];
|
|
233
|
+
let isize = this.templateLength || this.templateSize;
|
|
234
|
+
if (this.alignmentStart > this.mate.alignmentStart && isize > 0) {
|
|
235
|
+
isize = -isize;
|
|
236
|
+
}
|
|
237
|
+
if (isize > 0) {
|
|
238
|
+
tmp[0] = s1;
|
|
239
|
+
tmp[1] = o1;
|
|
240
|
+
tmp[2] = s2;
|
|
241
|
+
tmp[3] = o2;
|
|
242
|
+
}
|
|
243
|
+
else {
|
|
244
|
+
tmp[2] = s1;
|
|
245
|
+
tmp[3] = o1;
|
|
246
|
+
tmp[0] = s2;
|
|
247
|
+
tmp[1] = o2;
|
|
248
|
+
}
|
|
249
|
+
return tmp.join('');
|
|
250
|
+
}
|
|
251
|
+
return null;
|
|
252
|
+
}
|
|
253
|
+
/**
|
|
254
|
+
* Annotates this feature with the given reference sequence basepair
|
|
255
|
+
* information. This will add a `sub` and a `ref` item to base
|
|
256
|
+
* subsitution read features given the actual substituted and reference
|
|
257
|
+
* base pairs, and will make the `getReadSequence()` method work.
|
|
258
|
+
*
|
|
259
|
+
* @param {object} refRegion
|
|
260
|
+
* @param {number} refRegion.start
|
|
261
|
+
* @param {number} refRegion.end
|
|
262
|
+
* @param {string} refRegion.seq
|
|
263
|
+
* @param {CramContainerCompressionScheme} compressionScheme
|
|
264
|
+
* @returns {undefined} nothing
|
|
265
|
+
*/
|
|
266
|
+
addReferenceSequence(refRegion, compressionScheme) {
|
|
267
|
+
if (this.readFeatures) {
|
|
268
|
+
// use the reference bases to decode the bases
|
|
269
|
+
// substituted in each base substitution
|
|
270
|
+
this.readFeatures.forEach(readFeature => {
|
|
271
|
+
if (readFeature.code === 'X') {
|
|
272
|
+
decodeBaseSubstitution(this, refRegion, compressionScheme, readFeature);
|
|
273
|
+
}
|
|
274
|
+
});
|
|
275
|
+
}
|
|
276
|
+
// if this region completely covers this read,
|
|
277
|
+
// keep a reference to it
|
|
278
|
+
if (!this.readBases &&
|
|
279
|
+
refRegion.start <= this.alignmentStart &&
|
|
280
|
+
refRegion.end >=
|
|
281
|
+
this.alignmentStart + (this.lengthOnRef || this.readLength) - 1) {
|
|
282
|
+
this._refRegion = refRegion;
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
toJSON() {
|
|
286
|
+
const data = {};
|
|
287
|
+
Object.keys(this).forEach(k => {
|
|
288
|
+
if (k.charAt(0) === '_') {
|
|
289
|
+
return;
|
|
290
|
+
}
|
|
291
|
+
data[k] = this[k];
|
|
292
|
+
});
|
|
293
|
+
data.readBases = this.getReadBases();
|
|
294
|
+
return data;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
//# sourceMappingURL=record.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"record.js","sourceRoot":"","sources":["../../src/cramFile/record.js"],"names":[],"mappings":"AAAA,OAAO,SAAS,MAAM,aAAa,CAAA;AAEnC,SAAS,kBAAkB,CAAC,UAAU,EAAE,SAAS;IAC/C,0CAA0C;IAC1C,IAAI,CAAC,UAAU,CAAC,WAAW,IAAI,CAAC,UAAU,CAAC,UAAU,EAAE;QACrD,OAAO,SAAS,CAAA;KACjB;IAED,IAAI,UAAU,CAAC,cAAc,EAAE,EAAE;QAC/B,OAAO,SAAS,CAAA;KACjB;IAED,+CAA+C;IAC/C,MAAM,eAAe,GAAG,UAAU,CAAC,cAAc,GAAG,SAAS,CAAC,KAAK,CAAA;IAEnE,IAAI,CAAC,UAAU,CAAC,YAAY,EAAE;QAC5B,OAAO,SAAS,CAAC,GAAG;aACjB,MAAM,CAAC,eAAe,EAAE,UAAU,CAAC,WAAW,CAAC;aAC/C,WAAW,EAAE,CAAA;KACjB;IAED,IAAI,KAAK,GAAG,EAAE,CAAA;IACd,IAAI,SAAS,GAAG,eAAe,CAAA;IAC/B,IAAI,kBAAkB,GAAG,CAAC,CAAA;IAC1B,OAAO,KAAK,CAAC,MAAM,GAAG,UAAU,CAAC,UAAU,EAAE;QAC3C,IAAI,kBAAkB,GAAG,UAAU,CAAC,YAAY,CAAC,MAAM,EAAE;YACvD,MAAM,OAAO,GAAG,UAAU,CAAC,YAAY,CAAC,kBAAkB,CAAC,CAAA;YAC3D,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;gBAChD,kBAAkB,IAAI,CAAC,CAAA;aACxB;iBAAM,IAAI,OAAO,CAAC,GAAG,KAAK,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE;gBAC3C,2BAA2B;gBAC3B,kBAAkB,IAAI,CAAC,CAAA;gBAEvB,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBACxB,sCAAsC;oBACtC,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;oBACnC,MAAM,KAAK,GAAG,MAAM,CAAC,YAAY,CAAC,GAAG,GAAG,CAAC,CAAA;oBACzC,KAAK,IAAI,KAAK,CAAA;oBACd,SAAS,IAAI,KAAK,CAAC,MAAM,CAAA;iBAC1B;qBAAM,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC/B,mCAAmC;oBACnC,0DAA0D;oBAC1D,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;oBACxB,SAAS,IAAI,CAAC,CAAA;iBACf;qBAAM,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC/B,oBAAoB;oBACpB,KAAK,IAAI,OAAO,CAAC,GAAG,CAAA;oBACpB,SAAS,IAAI,CAAC,CAAA;iBACf;qBAAM,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC/B,YAAY;oBACZ,KAAK,IAAI,OAAO,CAAC,IAAI,CAAA;iBACtB;qBAAM,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC/B,WAAW;oBACX,SAAS,IAAI,OAAO,CAAC,IAAI,CAAA;iBAC1B;qBAAM,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC/B,qBAAqB;oBACrB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAA;iBACtB;qBAAM,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC/B,oCAAoC;oBACpC,aAAa;oBACb,iDAAiD;oBACjD,SAAS,IAAI,OAAO,CAAC,IAAI,CAAA;iBAC1B;qBAAM,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC/B,4DAA4D;oBAC5D,iEAAiE;oBACjE,KAAK,IAAI,OAAO,CAAC,IAAI,CAAA;iBACtB;qBAAM,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC/B,sBAAsB;iBACvB;qBAAM,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC/B,wBAAwB;iBACzB;aACF;iBAAM,IAAI,kBAAkB,GAAG,UAAU,CAAC,YAAY,CAAC,MAAM,EAAE;gBAC9D,2DAA2D;gBAC3D,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CAAC,MAAM,CAChC,SAAS,EACT,UAAU,CAAC,YAAY,CAAC,kBAAkB,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CACnE,CAAA;gBACD,KAAK,IAAI,KAAK,CAAA;gBACd,SAAS,IAAI,KAAK,CAAC,MAAM,CAAA;aAC1B;SACF;aAAM;YACL,2DAA2D;YAC3D,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CAAC,MAAM,CAChC,SAAS,EACT,UAAU,CAAC,UAAU,GAAG,KAAK,CAAC,MAAM,CACrC,CAAA;YACD,KAAK,IAAI,KAAK,CAAA;YACd,SAAS,IAAI,KAAK,CAAC,MAAM,CAAA;SAC1B;KACF;IAED,OAAO,KAAK,CAAC,WAAW,EAAE,CAAA;AAC5B,CAAC;AAED,MAAM,WAAW,GAAG;IAClB,CAAC,EAAE,CAAC;IACJ,CAAC,EAAE,CAAC;IACJ,CAAC,EAAE,CAAC;IACJ,CAAC,EAAE,CAAC;IACJ,CAAC,EAAE,CAAC;IACJ,CAAC,EAAE,CAAC;IACJ,CAAC,EAAE,CAAC;IACJ,CAAC,EAAE,CAAC;IACJ,CAAC,EAAE,CAAC;IACJ,CAAC,EAAE,CAAC;CACL,CAAA;AAED,SAAS,sBAAsB,CAC7B,UAAU,EACV,SAAS,EACT,iBAAiB,EACjB,WAAW;IAEX,IAAI,CAAC,SAAS,EAAE;QACd,OAAM;KACP;IAED,8DAA8D;IAC9D,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,GAAG,SAAS,CAAC,KAAK,CAAA;IACrD,MAAM,OAAO,GAAG,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAA;IAC9C,IAAI,OAAO,EAAE;QACX,WAAW,CAAC,GAAG,GAAG,OAAO,CAAA;KAC1B;IACD,IAAI,UAAU,GAAG,WAAW,CAAC,OAAO,CAAC,CAAA;IACrC,IAAI,UAAU,KAAK,SAAS,EAAE;QAC5B,UAAU,GAAG,CAAC,CAAA;KACf;IACD,MAAM,kBAAkB,GAAG,iBAAiB,CAAC,kBAAkB,CAAC,UAAU,CAAC,CAAA;IAC3E,MAAM,IAAI,GAAG,kBAAkB,CAAC,WAAW,CAAC,IAAI,CAAC,CAAA;IACjD,IAAI,IAAI,EAAE;QACR,WAAW,CAAC,GAAG,GAAG,IAAI,CAAA;KACvB;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,OAAO,OAAO,UAAU;IAC7B;QACE,IAAI,CAAC,IAAI,GAAG,EAAE,CAAA;IAChB,CAAC;IAED;;OAEG;IACH,QAAQ;QACN,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,CAAA;IAC/C,CAAC;IAED,kFAAkF;IAClF,gBAAgB;QACd,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,gBAAgB,CAAC,CAAA;IACpD,CAAC;IAED,gGAAgG;IAChG,iBAAiB;QACf,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,UAAU,CAAC,CAAA;IAC9C,CAAC;IAED,gGAAgG;IAChG,cAAc;QACZ,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,CAAA;IAC/C,CAAC;IAED,0EAA0E;IAC1E,qBAAqB;QACnB,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,YAAY,CAAC,CAAA;IAChD,CAAC;IAED,0EAA0E;IAC1E,yBAAyB;QACvB,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,aAAa,CAAC,CAAA;IACjD,CAAC;IAED,iEAAiE;IACjE,OAAO;QACL,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,UAAU,CAAC,CAAA;IAC9C,CAAC;IAED,iEAAiE;IACjE,OAAO;QACL,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,UAAU,CAAC,CAAA;IAC9C,CAAC;IAED,+DAA+D;IAC/D,WAAW;QACT,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,cAAc,CAAC,CAAA;IAClD,CAAC;IAED,gEAAgE;IAChE,UAAU;QACR,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,CAAA;IAC/C,CAAC;IAED,yEAAyE;IACzE,WAAW;QACT,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAA;IAC5C,CAAC;IAED,mEAAmE;IACnE,eAAe;QACb,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,kBAAkB,CAAC,CAAA;IACtD,CAAC;IAED;;OAEG;IACH,UAAU;QACR,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC,kBAAkB,CAAC,CAAA;IAC1D,CAAC;IAED,+EAA+E;IAC/E,iBAAiB;QACf,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC,yBAAyB,CAAC,CAAA;IACjE,CAAC;IAED,+DAA+D;IAC/D,yBAAyB;QACvB,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC,8BAA8B,CAAC,CAAA;IACtE,CAAC;IAED,gEAAgE;IAChE,cAAc;QACZ,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC,gBAAgB,CAAC,CAAA;IACxD,CAAC;IAED;;;OAGG;IACH,YAAY;QACV,IAAI,CAAC,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,UAAU,EAAE;YACtC,IAAI,CAAC,SAAS,GAAG,kBAAkB,CAAC,IAAI,EAAE,IAAI,CAAC,UAAU,CAAC,CAAA;SAC3D;QACD,OAAO,IAAI,CAAC,SAAS,CAAA;IACvB,CAAC;IAED;;;OAGG;IACH,kBAAkB;QAChB,IACE,CAAC,IAAI,CAAC,iBAAiB,EAAE;YACzB,IAAI,CAAC,QAAQ,EAAE;YACf,CAAC,IAAI,CAAC,cAAc,EAAE;YACtB,IAAI,CAAC,IAAI;YACT,IAAI,CAAC,UAAU,KAAK,IAAI,CAAC,IAAI,CAAC,UAAU,EACxC;YACA,MAAM,EAAE,GAAG,IAAI,CAAC,qBAAqB,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAA;YACnD,MAAM,EAAE,GAAG,IAAI,CAAC,yBAAyB,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAA;YACvD,IAAI,EAAE,GAAG,GAAG,CAAA;YACZ,IAAI,EAAE,GAAG,GAAG,CAAA;YACZ,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE;gBAClB,EAAE,GAAG,GAAG,CAAA;gBACR,EAAE,GAAG,GAAG,CAAA;aACT;iBAAM,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE;gBACzB,EAAE,GAAG,GAAG,CAAA;gBACR,EAAE,GAAG,GAAG,CAAA;aACT;YAED,MAAM,GAAG,GAAG,EAAE,CAAA;YACd,IAAI,KAAK,GAAG,IAAI,CAAC,cAAc,IAAI,IAAI,CAAC,YAAY,CAAA;YACpD,IAAI,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,IAAI,KAAK,GAAG,CAAC,EAAE;gBAC/D,KAAK,GAAG,CAAC,KAAK,CAAA;aACf;YACD,IAAI,KAAK,GAAG,CAAC,EAAE;gBACb,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;gBACX,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;gBACX,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;gBACX,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;aACZ;iBAAM;gBACL,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;gBACX,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;gBACX,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;gBACX,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;aACZ;YACD,OAAO,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;SACpB;QACD,OAAO,IAAI,CAAA;IACb,CAAC;IAED;;;;;;;;;;;;OAYG;IACH,oBAAoB,CAAC,SAAS,EAAE,iBAAiB;QAC/C,IAAI,IAAI,CAAC,YAAY,EAAE;YACrB,8CAA8C;YAC9C,wCAAwC;YACxC,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE;gBACtC,IAAI,WAAW,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC5B,sBAAsB,CACpB,IAAI,EACJ,SAAS,EACT,iBAAiB,EACjB,WAAW,CACZ,CAAA;iBACF;YACH,CAAC,CAAC,CAAA;SACH;QAED,8CAA8C;QAC9C,yBAAyB;QACzB,IACE,CAAC,IAAI,CAAC,SAAS;YACf,SAAS,CAAC,KAAK,IAAI,IAAI,CAAC,cAAc;YACtC,SAAS,CAAC,GAAG;gBACX,IAAI,CAAC,cAAc,GAAG,CAAC,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EACjE;YACA,IAAI,CAAC,UAAU,GAAG,SAAS,CAAA;SAC5B;IACH,CAAC;IAED,MAAM;QACJ,MAAM,IAAI,GAAG,EAAE,CAAA;QACf,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;YAC5B,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE;gBACvB,OAAM;aACP;YACD,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;QACnB,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,CAAA;QAEpC,OAAO,IAAI,CAAA;IACb,CAAC;CACF"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
export namespace cramFileDefinition {
|
|
2
|
+
const parser: any;
|
|
3
|
+
const maxLength: number;
|
|
4
|
+
}
|
|
5
|
+
export function getSectionParsers(majorVersion: any): {
|
|
6
|
+
cramFileDefinition: {
|
|
7
|
+
parser: any;
|
|
8
|
+
maxLength: number;
|
|
9
|
+
};
|
|
10
|
+
cramBlockHeader: {
|
|
11
|
+
parser: any;
|
|
12
|
+
maxLength: number;
|
|
13
|
+
};
|
|
14
|
+
cramBlockCrc32: {
|
|
15
|
+
parser: any;
|
|
16
|
+
maxLength: number;
|
|
17
|
+
};
|
|
18
|
+
};
|
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
import { Parser } from '@gmod/binary-parser';
|
|
2
|
+
const singleItf8 = new Parser().itf8();
|
|
3
|
+
const cramFileDefinition = {
|
|
4
|
+
parser: new Parser()
|
|
5
|
+
.string('magic', { length: 4 })
|
|
6
|
+
.uint8('majorVersion')
|
|
7
|
+
.uint8('minorVersion')
|
|
8
|
+
.string('fileId', { length: 20, stripNull: true }),
|
|
9
|
+
maxLength: 26,
|
|
10
|
+
};
|
|
11
|
+
const cramBlockHeader = {
|
|
12
|
+
parser: new Parser()
|
|
13
|
+
.uint8('compressionMethod', {
|
|
14
|
+
formatter: /* istanbul ignore next */ /* istanbul ignore next */ b => {
|
|
15
|
+
const method = [
|
|
16
|
+
'raw',
|
|
17
|
+
'gzip',
|
|
18
|
+
'bzip2',
|
|
19
|
+
'lzma',
|
|
20
|
+
'rans',
|
|
21
|
+
'rans4x16',
|
|
22
|
+
'arith',
|
|
23
|
+
'fqzcomp',
|
|
24
|
+
'tok3',
|
|
25
|
+
][b];
|
|
26
|
+
if (!method) {
|
|
27
|
+
throw new Error(`compression method number ${b} not implemented`);
|
|
28
|
+
}
|
|
29
|
+
return method;
|
|
30
|
+
},
|
|
31
|
+
})
|
|
32
|
+
.uint8('contentType', {
|
|
33
|
+
formatter: /* istanbul ignore next */ /* istanbul ignore next */ b => {
|
|
34
|
+
const type = [
|
|
35
|
+
'FILE_HEADER',
|
|
36
|
+
'COMPRESSION_HEADER',
|
|
37
|
+
'MAPPED_SLICE_HEADER',
|
|
38
|
+
'UNMAPPED_SLICE_HEADER',
|
|
39
|
+
'EXTERNAL_DATA',
|
|
40
|
+
'CORE_DATA',
|
|
41
|
+
][b];
|
|
42
|
+
if (!type) {
|
|
43
|
+
throw new Error(`invalid block content type id ${b}`);
|
|
44
|
+
}
|
|
45
|
+
return type;
|
|
46
|
+
},
|
|
47
|
+
})
|
|
48
|
+
.itf8('contentId')
|
|
49
|
+
.itf8('compressedSize')
|
|
50
|
+
.itf8('uncompressedSize'),
|
|
51
|
+
maxLength: 17,
|
|
52
|
+
};
|
|
53
|
+
const cramBlockCrc32 = {
|
|
54
|
+
parser: new Parser().uint32('crc32'),
|
|
55
|
+
maxLength: 4,
|
|
56
|
+
};
|
|
57
|
+
// const ENCODING_NAMES = [
|
|
58
|
+
// 'NULL', // 0
|
|
59
|
+
// 'EXTERNAL', // 1
|
|
60
|
+
// 'GOLOMB', // 2
|
|
61
|
+
// 'HUFFMAN_INT', // 3
|
|
62
|
+
// 'BYTE_ARRAY_LEN', // 4
|
|
63
|
+
// 'BYTE_ARRAY_STOP', // 5
|
|
64
|
+
// 'BETA', // 6
|
|
65
|
+
// 'SUBEXP', // 7
|
|
66
|
+
// 'GOLOMB_RICE', // 8
|
|
67
|
+
// 'GAMMA', // 9
|
|
68
|
+
// ]
|
|
69
|
+
const cramTagDictionary = new Parser().itf8('size').buffer('ents', {
|
|
70
|
+
length: 'size',
|
|
71
|
+
formatter: /* istanbul ignore next */ /* istanbul ignore next */ buffer => {
|
|
72
|
+
function makeTagSet(stringStart, stringEnd) {
|
|
73
|
+
const str = buffer.toString('utf8', stringStart, stringEnd);
|
|
74
|
+
const tags = [];
|
|
75
|
+
for (let i = 0; i < str.length; i += 3) {
|
|
76
|
+
tags.push(str.substr(i, 3));
|
|
77
|
+
}
|
|
78
|
+
return tags;
|
|
79
|
+
}
|
|
80
|
+
/* eslint-disable */
|
|
81
|
+
var tagSets = [];
|
|
82
|
+
var stringStart = 0;
|
|
83
|
+
var i;
|
|
84
|
+
/* eslint-enable */
|
|
85
|
+
for (i = 0; i < buffer.length; i += 1) {
|
|
86
|
+
if (!buffer[i]) {
|
|
87
|
+
tagSets.push(makeTagSet(stringStart, i));
|
|
88
|
+
stringStart = i + 1;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
if (i > stringStart) {
|
|
92
|
+
tagSets.push(makeTagSet(stringStart, i));
|
|
93
|
+
}
|
|
94
|
+
return tagSets;
|
|
95
|
+
},
|
|
96
|
+
});
|
|
97
|
+
// const cramPreservationMapKeys = 'XX RN AP RR SM TD'.split(' ')
|
|
98
|
+
const parseByteAsBool = new Parser().uint8(null, {
|
|
99
|
+
formatter: /* istanbul ignore next */ /* istanbul ignore next */ val => !!val,
|
|
100
|
+
});
|
|
101
|
+
const cramPreservationMap = new Parser()
|
|
102
|
+
.itf8('mapSize')
|
|
103
|
+
.itf8('mapCount')
|
|
104
|
+
.array('ents', {
|
|
105
|
+
length: 'mapCount',
|
|
106
|
+
type: new Parser()
|
|
107
|
+
.string('key', {
|
|
108
|
+
length: 2,
|
|
109
|
+
stripNull: false,
|
|
110
|
+
// formatter: val => cramPreservationMapKeys[val] || 0,
|
|
111
|
+
})
|
|
112
|
+
.choice('value', {
|
|
113
|
+
tag: 'key',
|
|
114
|
+
choices: {
|
|
115
|
+
MI: parseByteAsBool,
|
|
116
|
+
UI: parseByteAsBool,
|
|
117
|
+
PI: parseByteAsBool,
|
|
118
|
+
RN: parseByteAsBool,
|
|
119
|
+
AP: parseByteAsBool,
|
|
120
|
+
RR: parseByteAsBool,
|
|
121
|
+
SM: new Parser().array(null, { type: 'uint8', length: 5 }),
|
|
122
|
+
TD: new Parser().nest(null, {
|
|
123
|
+
type: cramTagDictionary,
|
|
124
|
+
formatter: /* istanbul ignore next */ /* istanbul ignore next */ data => data.ents,
|
|
125
|
+
}),
|
|
126
|
+
},
|
|
127
|
+
}),
|
|
128
|
+
});
|
|
129
|
+
/* istanbul ignore next */
|
|
130
|
+
function formatMap(data) {
|
|
131
|
+
const map = {};
|
|
132
|
+
for (let i = 0; i < data.ents.length; i += 1) {
|
|
133
|
+
const { key, value } = data.ents[i];
|
|
134
|
+
if (map[key]) {
|
|
135
|
+
console.warn(`duplicate key ${key} in map`);
|
|
136
|
+
}
|
|
137
|
+
map[key] = value;
|
|
138
|
+
}
|
|
139
|
+
return map;
|
|
140
|
+
}
|
|
141
|
+
const unversionedParsers = {
|
|
142
|
+
cramFileDefinition,
|
|
143
|
+
cramBlockHeader,
|
|
144
|
+
cramBlockCrc32,
|
|
145
|
+
};
|
|
146
|
+
// each of these is a function of the major and minor version
|
|
147
|
+
const versionedParsers = {
|
|
148
|
+
// assemble a section parser for the unmapped slice header, with slight
|
|
149
|
+
// variations depending on the major version of the cram file
|
|
150
|
+
cramUnmappedSliceHeader(majorVersion) {
|
|
151
|
+
let maxLength = 0;
|
|
152
|
+
let parser = new Parser().itf8('numRecords');
|
|
153
|
+
maxLength += 5;
|
|
154
|
+
// recordCounter is itf8 in a CRAM v2 file, absent in CRAM v1
|
|
155
|
+
if (majorVersion >= 3) {
|
|
156
|
+
parser = parser.ltf8('recordCounter');
|
|
157
|
+
maxLength += 9;
|
|
158
|
+
}
|
|
159
|
+
else if (majorVersion === 2) {
|
|
160
|
+
parser = parser.itf8('recordCounter');
|
|
161
|
+
maxLength += 5;
|
|
162
|
+
}
|
|
163
|
+
parser = parser
|
|
164
|
+
.itf8('numBlocks')
|
|
165
|
+
.itf8('numContentIds')
|
|
166
|
+
.array('contentIds', {
|
|
167
|
+
type: singleItf8,
|
|
168
|
+
length: 'numContentIds',
|
|
169
|
+
});
|
|
170
|
+
maxLength += 5 * 2; // + numContentIds*5
|
|
171
|
+
// the md5 sum is missing in cram v1
|
|
172
|
+
if (majorVersion >= 2) {
|
|
173
|
+
parser = parser.array('md5', { type: 'uint8', length: 16 });
|
|
174
|
+
maxLength += 16;
|
|
175
|
+
}
|
|
176
|
+
const maxLengthFunc = numContentIds => maxLength + numContentIds * 5;
|
|
177
|
+
return { parser, maxLength: maxLengthFunc }; // : p, maxLength: numContentIds => 5 + 9 + 5 * 2 + 5 * numContentIds + 16 }
|
|
178
|
+
},
|
|
179
|
+
// assembles a section parser for the unmapped slice header, with slight
|
|
180
|
+
// variations depending on the major version of the cram file
|
|
181
|
+
cramMappedSliceHeader(majorVersion) {
|
|
182
|
+
let parser = new Parser()
|
|
183
|
+
.itf8('refSeqId')
|
|
184
|
+
.itf8('refSeqStart')
|
|
185
|
+
.itf8('refSeqSpan')
|
|
186
|
+
.itf8('numRecords');
|
|
187
|
+
let maxLength = 5 * 4;
|
|
188
|
+
if (majorVersion >= 3) {
|
|
189
|
+
parser = parser.ltf8('recordCounter');
|
|
190
|
+
maxLength += 9;
|
|
191
|
+
}
|
|
192
|
+
else if (majorVersion === 2) {
|
|
193
|
+
parser = parser.itf8('recordCounter');
|
|
194
|
+
maxLength += 5;
|
|
195
|
+
}
|
|
196
|
+
parser = parser
|
|
197
|
+
.itf8('numBlocks')
|
|
198
|
+
.itf8('numContentIds')
|
|
199
|
+
.array('contentIds', {
|
|
200
|
+
type: singleItf8,
|
|
201
|
+
length: 'numContentIds',
|
|
202
|
+
})
|
|
203
|
+
.itf8('refBaseBlockId');
|
|
204
|
+
maxLength += 5 * 3;
|
|
205
|
+
// the md5 sum is missing in cram v1
|
|
206
|
+
if (majorVersion >= 2) {
|
|
207
|
+
parser = parser.array('md5', { type: 'uint8', length: 16 });
|
|
208
|
+
maxLength += 16;
|
|
209
|
+
}
|
|
210
|
+
const maxLengthFunc = numContentIds => maxLength + numContentIds * 5;
|
|
211
|
+
return { parser, maxLength: maxLengthFunc };
|
|
212
|
+
},
|
|
213
|
+
cramEncoding(majorVersion) {
|
|
214
|
+
const parser = new Parser()
|
|
215
|
+
.namely('cramEncoding')
|
|
216
|
+
.itf8('codecId')
|
|
217
|
+
.itf8('parametersBytes')
|
|
218
|
+
.choice('parameters', {
|
|
219
|
+
tag: 'codecId',
|
|
220
|
+
choices: {
|
|
221
|
+
0: new Parser(),
|
|
222
|
+
1: new Parser().itf8('blockContentId'),
|
|
223
|
+
2: new Parser().itf8('offset').itf8('M'),
|
|
224
|
+
// HUFFMAN_INT
|
|
225
|
+
3: Parser.start()
|
|
226
|
+
.itf8('numCodes')
|
|
227
|
+
.array('symbols', { length: 'numCodes', type: singleItf8 })
|
|
228
|
+
.itf8('numLengths')
|
|
229
|
+
.array('bitLengths', { length: 'numLengths', type: singleItf8 }),
|
|
230
|
+
4: Parser.start() // BYTE_ARRAY_LEN
|
|
231
|
+
.nest('lengthsEncoding', { type: 'cramEncoding' })
|
|
232
|
+
.nest('valuesEncoding', { type: 'cramEncoding' }),
|
|
233
|
+
// BYTE_ARRAY_STOP is a little different for CRAM v1
|
|
234
|
+
5: new Parser()
|
|
235
|
+
.uint8('stopByte')[majorVersion > 1 ? 'itf8' : 'int']('blockContentId'),
|
|
236
|
+
6: new Parser().itf8('offset').itf8('length'),
|
|
237
|
+
7: new Parser().itf8('offset').itf8('K'),
|
|
238
|
+
8: new Parser().itf8('offset').itf8('log2m'),
|
|
239
|
+
9: new Parser().itf8('offset'), // GAMMA
|
|
240
|
+
},
|
|
241
|
+
});
|
|
242
|
+
return { parser };
|
|
243
|
+
},
|
|
244
|
+
cramDataSeriesEncodingMap(majorVersion) {
|
|
245
|
+
return new Parser()
|
|
246
|
+
.itf8('mapSize')
|
|
247
|
+
.itf8('mapCount')
|
|
248
|
+
.array('ents', {
|
|
249
|
+
length: 'mapCount',
|
|
250
|
+
type: new Parser()
|
|
251
|
+
.string('key', { length: 2, stripNull: false })
|
|
252
|
+
.nest('value', { type: this.cramEncoding(majorVersion).parser }),
|
|
253
|
+
});
|
|
254
|
+
},
|
|
255
|
+
cramTagEncodingMap(majorVersion) {
|
|
256
|
+
return new Parser()
|
|
257
|
+
.itf8('mapSize')
|
|
258
|
+
.itf8('mapCount')
|
|
259
|
+
.array('ents', {
|
|
260
|
+
length: 'mapCount',
|
|
261
|
+
type: new Parser()
|
|
262
|
+
.itf8('key', {
|
|
263
|
+
formatter: /* istanbul ignore next */ /* istanbul ignore next */ integerRepresentation =>
|
|
264
|
+
/* istanbul ignore next */
|
|
265
|
+
String.fromCharCode((integerRepresentation >> 16) & 0xff) +
|
|
266
|
+
String.fromCharCode((integerRepresentation >> 8) & 0xff) +
|
|
267
|
+
String.fromCharCode(integerRepresentation & 0xff),
|
|
268
|
+
})
|
|
269
|
+
.nest('value', { type: this.cramEncoding(majorVersion).parser }),
|
|
270
|
+
});
|
|
271
|
+
},
|
|
272
|
+
cramCompressionHeader(majorVersion) {
|
|
273
|
+
let parser = new Parser();
|
|
274
|
+
// TODO: if we want to support CRAM v1, we will need to refactor
|
|
275
|
+
// compression header into 2 parts to parse the landmarks,
|
|
276
|
+
// like the container header
|
|
277
|
+
parser = parser
|
|
278
|
+
.nest('preservation', {
|
|
279
|
+
type: cramPreservationMap,
|
|
280
|
+
formatter: formatMap,
|
|
281
|
+
})
|
|
282
|
+
.nest('dataSeriesEncoding', {
|
|
283
|
+
type: this.cramDataSeriesEncodingMap(majorVersion),
|
|
284
|
+
formatter: formatMap,
|
|
285
|
+
})
|
|
286
|
+
.nest('tagEncoding', {
|
|
287
|
+
type: this.cramTagEncodingMap(majorVersion),
|
|
288
|
+
formatter: formatMap,
|
|
289
|
+
});
|
|
290
|
+
return { parser };
|
|
291
|
+
},
|
|
292
|
+
cramContainerHeader1(majorVersion) {
|
|
293
|
+
let parser = new Parser()
|
|
294
|
+
.int32('length') // byte size of the container data (blocks)
|
|
295
|
+
.itf8('refSeqId') // reference sequence identifier, -1 for unmapped reads, -2 for multiple reference sequences
|
|
296
|
+
.itf8('refSeqStart') // the alignment start position or 0 for unmapped reads
|
|
297
|
+
.itf8('alignmentSpan') // the length of the alignment or 0 for unmapped reads
|
|
298
|
+
.itf8('numRecords'); // number of records in the container
|
|
299
|
+
let maxLength = 4 + 5 * 4;
|
|
300
|
+
if (majorVersion >= 3) {
|
|
301
|
+
parser = parser.ltf8('recordCounter'); // 1-based sequential index of records in the file/stream.
|
|
302
|
+
maxLength += 9;
|
|
303
|
+
}
|
|
304
|
+
else if (majorVersion === 2) {
|
|
305
|
+
parser = parser.itf8('recordCounter');
|
|
306
|
+
maxLength += 5;
|
|
307
|
+
}
|
|
308
|
+
if (majorVersion > 1) {
|
|
309
|
+
parser = parser.ltf8('numBases'); // number of read bases
|
|
310
|
+
maxLength += 9;
|
|
311
|
+
}
|
|
312
|
+
parser = parser
|
|
313
|
+
.itf8('numBlocks') // the number of blocks
|
|
314
|
+
.itf8('numLandmarks'); // the number of landmarks
|
|
315
|
+
maxLength += 5 + 5;
|
|
316
|
+
return { parser, maxLength };
|
|
317
|
+
},
|
|
318
|
+
cramContainerHeader2(majorVersion) {
|
|
319
|
+
let parser = new Parser()
|
|
320
|
+
.itf8('numLandmarks') // the number of blocks
|
|
321
|
+
// Each integer value of this array is a byte offset
|
|
322
|
+
// into the blocks byte array. Landmarks are used for
|
|
323
|
+
// random access indexing.
|
|
324
|
+
.array('landmarks', {
|
|
325
|
+
type: new Parser().itf8(),
|
|
326
|
+
length: 'numLandmarks',
|
|
327
|
+
});
|
|
328
|
+
let crcLength = 0;
|
|
329
|
+
if (majorVersion >= 3) {
|
|
330
|
+
parser = parser.uint32('crc32');
|
|
331
|
+
crcLength = 4;
|
|
332
|
+
}
|
|
333
|
+
return {
|
|
334
|
+
parser,
|
|
335
|
+
maxLength: numLandmarks => 5 + numLandmarks * 5 + crcLength,
|
|
336
|
+
};
|
|
337
|
+
},
|
|
338
|
+
};
|
|
339
|
+
function getSectionParsers(majorVersion) {
|
|
340
|
+
const parsers = Object.assign({}, unversionedParsers);
|
|
341
|
+
Object.keys(versionedParsers).forEach(parserName => {
|
|
342
|
+
parsers[parserName] = versionedParsers[parserName](majorVersion);
|
|
343
|
+
});
|
|
344
|
+
return parsers;
|
|
345
|
+
}
|
|
346
|
+
export { cramFileDefinition, getSectionParsers };
|
|
347
|
+
//# sourceMappingURL=sectionParsers.js.map
|