@gmod/bam 1.1.8 → 1.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/README.md +2 -3
- package/dist/bai.d.ts +2 -0
- package/dist/bai.js +300 -548
- package/dist/bai.js.map +1 -0
- package/dist/bamFile.d.ts +4 -6
- package/dist/bamFile.js +675 -1203
- package/dist/bamFile.js.map +1 -0
- package/dist/chunk.js +34 -69
- package/dist/chunk.js.map +1 -0
- package/dist/constants.js +27 -35
- package/dist/constants.js.map +1 -0
- package/dist/csi.js +317 -515
- package/dist/csi.js.map +1 -0
- package/dist/errors.js +64 -120
- package/dist/errors.js.map +1 -0
- package/dist/htsget.js +275 -396
- package/dist/htsget.js.map +1 -0
- package/dist/index.js +16 -54
- package/dist/index.js.map +1 -0
- package/dist/indexFile.d.ts +1 -2
- package/dist/indexFile.js +77 -163
- package/dist/indexFile.js.map +1 -0
- package/dist/record.js +496 -707
- package/dist/record.js.map +1 -0
- package/dist/sam.js +16 -49
- package/dist/sam.js.map +1 -0
- package/dist/util.d.ts +0 -1
- package/dist/util.js +115 -126
- package/dist/util.js.map +1 -0
- package/dist/virtualOffset.js +44 -77
- package/dist/virtualOffset.js.map +1 -0
- package/esm/bai.d.ts +26 -0
- package/esm/bai.js +191 -0
- package/esm/bai.js.map +1 -0
- package/esm/bamFile.d.ts +77 -0
- package/esm/bamFile.js +388 -0
- package/esm/bamFile.js.map +1 -0
- package/esm/chunk.d.ts +18 -0
- package/esm/chunk.js +33 -0
- package/esm/chunk.js.map +1 -0
- package/esm/constants.d.ts +15 -0
- package/esm/constants.js +27 -0
- package/esm/constants.js.map +1 -0
- package/esm/csi.d.ts +35 -0
- package/esm/csi.js +209 -0
- package/esm/csi.js.map +1 -0
- package/esm/errors.d.ts +23 -0
- package/esm/errors.js +24 -0
- package/esm/errors.js.map +1 -0
- package/esm/htsget.d.ts +33 -0
- package/esm/htsget.js +100 -0
- package/esm/htsget.js.map +1 -0
- package/esm/index.d.ts +6 -0
- package/esm/index.js +7 -0
- package/esm/index.js.map +1 -0
- package/esm/indexFile.d.ts +27 -0
- package/esm/indexFile.js +27 -0
- package/esm/indexFile.js.map +1 -0
- package/esm/record.d.ts +88 -0
- package/esm/record.js +534 -0
- package/esm/record.js.map +1 -0
- package/esm/sam.d.ts +7 -0
- package/esm/sam.js +16 -0
- package/esm/sam.js.map +1 -0
- package/esm/util.d.ts +35 -0
- package/esm/util.js +92 -0
- package/esm/util.js.map +1 -0
- package/esm/virtualOffset.d.ts +10 -0
- package/esm/virtualOffset.js +37 -0
- package/esm/virtualOffset.js.map +1 -0
- package/package.json +21 -29
- package/dist/declare.d.js +0 -2
package/esm/bai.js
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import Long from 'long';
|
|
2
|
+
import { fromBytes } from './virtualOffset';
|
|
3
|
+
import Chunk from './chunk';
|
|
4
|
+
import IndexFile from './indexFile';
|
|
5
|
+
import { longToNumber, abortBreakPoint, optimizeChunks } from './util';
|
|
6
|
+
const BAI_MAGIC = 21578050; // BAI\1
|
|
7
|
+
function roundDown(n, multiple) {
|
|
8
|
+
return n - (n % multiple);
|
|
9
|
+
}
|
|
10
|
+
function roundUp(n, multiple) {
|
|
11
|
+
return n - (n % multiple) + multiple;
|
|
12
|
+
}
|
|
13
|
+
export default class BAI extends IndexFile {
|
|
14
|
+
parsePseudoBin(bytes, offset) {
|
|
15
|
+
const lineCount = longToNumber(Long.fromBytesLE(Array.prototype.slice.call(bytes, offset + 16, offset + 24), true));
|
|
16
|
+
return { lineCount };
|
|
17
|
+
}
|
|
18
|
+
async lineCount(refId, opts = {}) {
|
|
19
|
+
const prom = await this.parse(opts);
|
|
20
|
+
const index = prom.indices[refId];
|
|
21
|
+
if (!index) {
|
|
22
|
+
return -1;
|
|
23
|
+
}
|
|
24
|
+
const ret = index.stats || {};
|
|
25
|
+
return ret.lineCount === undefined ? -1 : ret.lineCount;
|
|
26
|
+
}
|
|
27
|
+
fetchBai(opts = {}) {
|
|
28
|
+
if (!this.baiP) {
|
|
29
|
+
this.baiP = this.filehandle.readFile(opts).catch(e => {
|
|
30
|
+
this.baiP = undefined;
|
|
31
|
+
throw e;
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
return this.baiP;
|
|
35
|
+
}
|
|
36
|
+
// fetch and parse the index
|
|
37
|
+
async _parse(opts = {}) {
|
|
38
|
+
const data = { bai: true, maxBlockSize: 1 << 16 };
|
|
39
|
+
const bytes = await this.fetchBai();
|
|
40
|
+
// check BAI magic numbers
|
|
41
|
+
if (bytes.readUInt32LE(0) !== BAI_MAGIC) {
|
|
42
|
+
throw new Error('Not a BAI file');
|
|
43
|
+
}
|
|
44
|
+
data.refCount = bytes.readInt32LE(4);
|
|
45
|
+
const depth = 5;
|
|
46
|
+
const binLimit = ((1 << ((depth + 1) * 3)) - 1) / 7;
|
|
47
|
+
// read the indexes for each reference sequence
|
|
48
|
+
data.indices = new Array(data.refCount);
|
|
49
|
+
let currOffset = 8;
|
|
50
|
+
for (let i = 0; i < data.refCount; i += 1) {
|
|
51
|
+
await abortBreakPoint(opts.signal);
|
|
52
|
+
// the binning index
|
|
53
|
+
const binCount = bytes.readInt32LE(currOffset);
|
|
54
|
+
let stats;
|
|
55
|
+
currOffset += 4;
|
|
56
|
+
const binIndex = {};
|
|
57
|
+
for (let j = 0; j < binCount; j += 1) {
|
|
58
|
+
const bin = bytes.readUInt32LE(currOffset);
|
|
59
|
+
currOffset += 4;
|
|
60
|
+
if (bin === binLimit + 1) {
|
|
61
|
+
currOffset += 4;
|
|
62
|
+
stats = this.parsePseudoBin(bytes, currOffset);
|
|
63
|
+
currOffset += 32;
|
|
64
|
+
}
|
|
65
|
+
else if (bin > binLimit + 1) {
|
|
66
|
+
throw new Error('bai index contains too many bins, please use CSI');
|
|
67
|
+
}
|
|
68
|
+
else {
|
|
69
|
+
const chunkCount = bytes.readInt32LE(currOffset);
|
|
70
|
+
currOffset += 4;
|
|
71
|
+
const chunks = new Array(chunkCount);
|
|
72
|
+
for (let k = 0; k < chunkCount; k += 1) {
|
|
73
|
+
const u = fromBytes(bytes, currOffset);
|
|
74
|
+
const v = fromBytes(bytes, currOffset + 8);
|
|
75
|
+
currOffset += 16;
|
|
76
|
+
this._findFirstData(data, u);
|
|
77
|
+
chunks[k] = new Chunk(u, v, bin);
|
|
78
|
+
}
|
|
79
|
+
binIndex[bin] = chunks;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
const linearCount = bytes.readInt32LE(currOffset);
|
|
83
|
+
currOffset += 4;
|
|
84
|
+
// as we're going through the linear index, figure out
|
|
85
|
+
// the smallest virtual offset in the indexes, which
|
|
86
|
+
// tells us where the BAM header ends
|
|
87
|
+
const linearIndex = new Array(linearCount);
|
|
88
|
+
for (let k = 0; k < linearCount; k += 1) {
|
|
89
|
+
linearIndex[k] = fromBytes(bytes, currOffset);
|
|
90
|
+
currOffset += 8;
|
|
91
|
+
this._findFirstData(data, linearIndex[k]);
|
|
92
|
+
}
|
|
93
|
+
data.indices[i] = { binIndex, linearIndex, stats };
|
|
94
|
+
}
|
|
95
|
+
return data;
|
|
96
|
+
}
|
|
97
|
+
async indexCov(seqId, start, end, opts = {}) {
|
|
98
|
+
const v = 16384;
|
|
99
|
+
const range = start !== undefined;
|
|
100
|
+
const indexData = await this.parse(opts);
|
|
101
|
+
const seqIdx = indexData.indices[seqId];
|
|
102
|
+
if (!seqIdx) {
|
|
103
|
+
return [];
|
|
104
|
+
}
|
|
105
|
+
const { linearIndex = [], stats } = seqIdx;
|
|
106
|
+
if (!linearIndex.length) {
|
|
107
|
+
return [];
|
|
108
|
+
}
|
|
109
|
+
const e = end !== undefined ? roundUp(end, v) : (linearIndex.length - 1) * v;
|
|
110
|
+
const s = start !== undefined ? roundDown(start, v) : 0;
|
|
111
|
+
let depths;
|
|
112
|
+
if (range) {
|
|
113
|
+
depths = new Array((e - s) / v);
|
|
114
|
+
}
|
|
115
|
+
else {
|
|
116
|
+
depths = new Array(linearIndex.length - 1);
|
|
117
|
+
}
|
|
118
|
+
const totalSize = linearIndex[linearIndex.length - 1].blockPosition;
|
|
119
|
+
if (e > (linearIndex.length - 1) * v) {
|
|
120
|
+
throw new Error('query outside of range of linear index');
|
|
121
|
+
}
|
|
122
|
+
let currentPos = linearIndex[s / v].blockPosition;
|
|
123
|
+
for (let i = s / v, j = 0; i < e / v; i++, j++) {
|
|
124
|
+
depths[j] = {
|
|
125
|
+
score: linearIndex[i + 1].blockPosition - currentPos,
|
|
126
|
+
start: i * v,
|
|
127
|
+
end: i * v + v,
|
|
128
|
+
};
|
|
129
|
+
currentPos = linearIndex[i + 1].blockPosition;
|
|
130
|
+
}
|
|
131
|
+
return depths.map(d => {
|
|
132
|
+
return { ...d, score: (d.score * stats.lineCount) / totalSize };
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* calculate the list of bins that may overlap with region [beg,end) (zero-based half-open)
|
|
137
|
+
* @returns {Array[number]}
|
|
138
|
+
*/
|
|
139
|
+
reg2bins(beg, end) {
|
|
140
|
+
end -= 1;
|
|
141
|
+
return [
|
|
142
|
+
[0, 0],
|
|
143
|
+
[1 + (beg >> 26), 1 + (end >> 26)],
|
|
144
|
+
[9 + (beg >> 23), 9 + (end >> 23)],
|
|
145
|
+
[73 + (beg >> 20), 73 + (end >> 20)],
|
|
146
|
+
[585 + (beg >> 17), 585 + (end >> 17)],
|
|
147
|
+
[4681 + (beg >> 14), 4681 + (end >> 14)],
|
|
148
|
+
];
|
|
149
|
+
}
|
|
150
|
+
async blocksForRange(refId, min, max, opts = {}) {
|
|
151
|
+
if (min < 0) {
|
|
152
|
+
min = 0;
|
|
153
|
+
}
|
|
154
|
+
const indexData = await this.parse(opts);
|
|
155
|
+
if (!indexData) {
|
|
156
|
+
return [];
|
|
157
|
+
}
|
|
158
|
+
const ba = indexData.indices[refId];
|
|
159
|
+
if (!ba) {
|
|
160
|
+
return [];
|
|
161
|
+
}
|
|
162
|
+
const overlappingBins = this.reg2bins(min, max); // List of bin #s that overlap min, max
|
|
163
|
+
const chunks = [];
|
|
164
|
+
// Find chunks in overlapping bins. Leaf bins (< 4681) are not pruned
|
|
165
|
+
for (const [start, end] of overlappingBins) {
|
|
166
|
+
for (let bin = start; bin <= end; bin++) {
|
|
167
|
+
if (ba.binIndex[bin]) {
|
|
168
|
+
const binChunks = ba.binIndex[bin];
|
|
169
|
+
for (let c = 0; c < binChunks.length; ++c) {
|
|
170
|
+
chunks.push(new Chunk(binChunks[c].minv, binChunks[c].maxv, bin));
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
// Use the linear index to find minimum file position of chunks that could contain alignments in the region
|
|
176
|
+
const nintv = ba.linearIndex.length;
|
|
177
|
+
let lowest = null;
|
|
178
|
+
const minLin = Math.min(min >> 14, nintv - 1);
|
|
179
|
+
const maxLin = Math.min(max >> 14, nintv - 1);
|
|
180
|
+
for (let i = minLin; i <= maxLin; ++i) {
|
|
181
|
+
const vp = ba.linearIndex[i];
|
|
182
|
+
if (vp) {
|
|
183
|
+
if (!lowest || vp.compareTo(lowest) < 0) {
|
|
184
|
+
lowest = vp;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return optimizeChunks(chunks, lowest);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
//# sourceMappingURL=bai.js.map
|
package/esm/bai.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bai.js","sourceRoot":"","sources":["../src/bai.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAA;AACvB,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAA;AAC3C,OAAO,KAAK,MAAM,SAAS,CAAA;AAE3B,OAAO,SAAS,MAAM,aAAa,CAAA;AACnC,OAAO,EAAE,YAAY,EAAE,eAAe,EAAE,cAAc,EAAY,MAAM,QAAQ,CAAA;AAEhF,MAAM,SAAS,GAAG,QAAQ,CAAA,CAAC,QAAQ;AAEnC,SAAS,SAAS,CAAC,CAAS,EAAE,QAAgB;IAC5C,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAA;AAC3B,CAAC;AACD,SAAS,OAAO,CAAC,CAAS,EAAE,QAAgB;IAC1C,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,QAAQ,CAAC,GAAG,QAAQ,CAAA;AACtC,CAAC;AAED,MAAM,CAAC,OAAO,OAAO,GAAI,SAAQ,SAAS;IAGxC,cAAc,CAAC,KAAa,EAAE,MAAc;QAC1C,MAAM,SAAS,GAAG,YAAY,CAC5B,IAAI,CAAC,WAAW,CACd,KAAK,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,EAAE,MAAM,GAAG,EAAE,EAAE,MAAM,GAAG,EAAE,CAAC,EAC3D,IAAI,CACL,CACF,CAAA;QACD,OAAO,EAAE,SAAS,EAAE,CAAA;IACtB,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,KAAa,EAAE,OAAiB,EAAE;QAChD,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QACnC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAA;QACjC,IAAI,CAAC,KAAK,EAAE;YACV,OAAO,CAAC,CAAC,CAAA;SACV;QACD,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,IAAI,EAAE,CAAA;QAC7B,OAAO,GAAG,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAA;IACzD,CAAC;IAED,QAAQ,CAAC,OAAiB,EAAE;QAC1B,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;YACd,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;gBACnD,IAAI,CAAC,IAAI,GAAG,SAAS,CAAA;gBACrB,MAAM,CAAC,CAAA;YACT,CAAC,CAAoB,CAAA;SACtB;QACD,OAAO,IAAI,CAAC,IAAI,CAAA;IAClB,CAAC;IAED,4BAA4B;IAC5B,KAAK,CAAC,MAAM,CAAC,OAAiB,EAAE;QAC9B,MAAM,IAAI,GAA2B,EAAE,GAAG,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,IAAI,EAAE,EAAE,CAAA;QACzE,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAA;QAEnC,0BAA0B;QAC1B,IAAI,KAAK,CAAC,YAAY,CAAC,CAAC,CAAC,KAAK,SAAS,EAAE;YACvC,MAAM,IAAI,KAAK,CAAC,gBAAgB,CAAC,CAAA;SAClC;QAED,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,CAAA;QACpC,MAAM,KAAK,GAAG,CAAC,CAAA;QACf,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAA;QAEnD,+CAA+C;QAC/C,IAAI,CAAC,OAAO,GAAG,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;QACvC,IAAI,UAAU,GAAG,CAAC,CAAA;QAClB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC,IAAI,CAAC,EAAE;YACzC,MAAM,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YAElC,oBAAoB;YACpB,MAAM,QAAQ,GAAG,KAAK,CAAC,WAAW,CAAC,UAAU,CAAC,CAAA;YAC9C,IAAI,KAAK,CAAA;YAET,UAAU,IAAI,CAAC,CAAA;YACf,MAAM,QAAQ,GAA+B,EAAE,CAAA;YAC/C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,EAAE,CAAC,IAAI,CAAC,EAAE;gBACpC,MAAM,GAAG,GAAG,KAAK,CAAC,YAAY,CAAC,UAAU,CAAC,CAAA;gBAC1C,UAAU,IAAI,CAAC,CAAA;gBACf,IAAI,GAAG,KAAK,QAAQ,GAAG,CAAC,EAAE;oBACxB,UAAU,IAAI,CAAC,CAAA;oBACf,KAAK,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,UAAU,CAAC,CAAA;oBAC9C,UAAU,IAAI,EAAE,CAAA;iBACjB;qBAAM,IAAI,GAAG,GAAG,QAAQ,GAAG,CAAC,EAAE;oBAC7B,MAAM,IAAI,KAAK,CAAC,kDAAkD,CAAC,CAAA;iBACpE;qBAAM;oBACL,MAAM,UAAU,GAAG,KAAK,CAAC,WAAW,CAAC,UAAU,CAAC,CAAA;oBAChD,UAAU,IAAI,CAAC,CAAA;oBACf,MAAM,MAAM,GAAG,IAAI,KAAK,CAAC,UAAU,CAAC,CAAA;oBACpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,IAAI,CAAC,EAAE;wBACtC,MAAM,CAAC,GAAG,SAAS,CAAC,KAAK,EAAE,UAAU,CAAC,CAAA;wBACtC,MAAM,CAAC,GAAG,SAAS,CAAC,KAAK,EAAE,UAAU,GAAG,CAAC,CAAC,CAAA;wBAC1C,UAAU,IAAI,EAAE,CAAA;wBAChB,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC,CAAC,CAAA;wBAC5B,MAAM,CAAC,CAAC,CAAC,GAAG,IAAI,KAAK,CAAC,CAAC,EAAE,CAAC,EAAE,GAAG,CAAC,CAAA;qBACjC;oBACD,QAAQ,CAAC,GAAG,CAAC,GAAG,MAAM,CAAA;iBACvB;aACF;YAED,MAAM,WAAW,GAAG,KAAK,CAAC,WAAW,CAAC,UAAU,CAAC,CAAA;YACjD,UAAU,IAAI,CAAC,CAAA;YACf,sDAAsD;YACtD,oDAAoD;YACpD,qCAAqC;YACrC,MAAM,WAAW,GAAG,IAAI,KAAK,CAAC,WAAW,CAAC,CAAA;YAC1C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,IAAI,CAAC,EAAE;gBACvC,WAAW,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,KAAK,EAAE,UAAU,CAAC,CAAA;gBAC7C,UAAU,IAAI,CAAC,CAAA;gBACf,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,CAAA;aAC1C;YAED,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,WAAW,EAAE,KAAK,EAAE,CAAA;SACnD;QAED,OAAO,IAAI,CAAA;IACb,CAAC;IAED,KAAK,CAAC,QAAQ,CACZ,KAAa,EACb,KAAc,EACd,GAAY,EACZ,OAAiB,EAAE;QAEnB,MAAM,CAAC,GAAG,KAAK,CAAA;QACf,MAAM,KAAK,GAAG,KAAK,KAAK,SAAS,CAAA;QACjC,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QACxC,MAAM,MAAM,GAAG,SAAS,CAAC,OAAO,CAAC,KAAK,CAAC,CAAA;QACvC,IAAI,CAAC,MAAM,EAAE;YACX,OAAO,EAAE,CAAA;SACV;QACD,MAAM,EAAE,WAAW,GAAG,EAAE,EAAE,KAAK,EAAE,GAAG,MAAM,CAAA;QAC1C,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE;YACvB,OAAO,EAAE,CAAA;SACV;QACD,MAAM,CAAC,GAAG,GAAG,KAAK,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,CAAA;QAC5E,MAAM,CAAC,GAAG,KAAK,KAAK,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;QACvD,IAAI,MAAM,CAAA;QACV,IAAI,KAAK,EAAE;YACT,MAAM,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;SAChC;aAAM;YACL,MAAM,GAAG,IAAI,KAAK,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;SAC3C;QACD,MAAM,SAAS,GAAG,WAAW,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,aAAa,CAAA;QACnE,IAAI,CAAC,GAAG,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,EAAE;YACpC,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAA;SAC1D;QACD,IAAI,UAAU,GAAG,WAAW,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,aAAa,CAAA;QACjD,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE;YAC9C,MAAM,CAAC,CAAC,CAAC,GAAG;gBACV,KAAK,EAAE,WAAW,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,aAAa,GAAG,UAAU;gBACpD,KAAK,EAAE,CAAC,GAAG,CAAC;gBACZ,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC;aACf,CAAA;YACD,UAAU,GAAG,WAAW,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,aAAa,CAAA;SAC9C;QACD,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;YACpB,OAAO,EAAE,GAAG,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,KAAK,CAAC,SAAS,CAAC,GAAG,SAAS,EAAE,CAAA;QACjE,CAAC,CAAC,CAAA;IACJ,CAAC;IAED;;;OAGG;IACH,QAAQ,CAAC,GAAW,EAAE,GAAW;QAC/B,GAAG,IAAI,CAAC,CAAA;QACR,OAAO;YACL,CAAC,CAAC,EAAE,CAAC,CAAC;YACN,CAAC,CAAC,GAAG,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC;YAClC,CAAC,CAAC,GAAG,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC;YAClC,CAAC,EAAE,GAAG,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,EAAE,GAAG,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC;YACpC,CAAC,GAAG,GAAG,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,GAAG,GAAG,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC;YACtC,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,IAAI,GAAG,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC;SACzC,CAAA;IACH,CAAC;IAED,KAAK,CAAC,cAAc,CAClB,KAAa,EACb,GAAW,EACX,GAAW,EACX,OAAiB,EAAE;QAEnB,IAAI,GAAG,GAAG,CAAC,EAAE;YACX,GAAG,GAAG,CAAC,CAAA;SACR;QAED,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QACxC,IAAI,CAAC,SAAS,EAAE;YACd,OAAO,EAAE,CAAA;SACV;QACD,MAAM,EAAE,GAAG,SAAS,CAAC,OAAO,CAAC,KAAK,CAAC,CAAA;QACnC,IAAI,CAAC,EAAE,EAAE;YACP,OAAO,EAAE,CAAA;SACV;QAED,MAAM,eAAe,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE,GAAG,CAAC,CAAA,CAAC,uCAAuC;QACvF,MAAM,MAAM,GAAY,EAAE,CAAA;QAE1B,sEAAsE;QACtE,KAAK,MAAM,CAAC,KAAK,EAAE,GAAG,CAAC,IAAI,eAAe,EAAE;YAC1C,KAAK,IAAI,GAAG,GAAG,KAAK,EAAE,GAAG,IAAI,GAAG,EAAE,GAAG,EAAE,EAAE;gBACvC,IAAI,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE;oBACpB,MAAM,SAAS,GAAG,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAA;oBAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE;wBACzC,MAAM,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,CAAA;qBAClE;iBACF;aACF;SACF;QAED,2GAA2G;QAC3G,MAAM,KAAK,GAAG,EAAE,CAAC,WAAW,CAAC,MAAM,CAAA;QACnC,IAAI,MAAM,GAAG,IAAI,CAAA;QACjB,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,EAAE,EAAE,KAAK,GAAG,CAAC,CAAC,CAAA;QAC7C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,EAAE,EAAE,KAAK,GAAG,CAAC,CAAC,CAAA;QAC7C,KAAK,IAAI,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,MAAM,EAAE,EAAE,CAAC,EAAE;YACrC,MAAM,EAAE,GAAG,EAAE,CAAC,WAAW,CAAC,CAAC,CAAC,CAAA;YAC5B,IAAI,EAAE,EAAE;gBACN,IAAI,CAAC,MAAM,IAAI,EAAE,CAAC,SAAS,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;oBACvC,MAAM,GAAG,EAAE,CAAA;iBACZ;aACF;SACF;QAED,OAAO,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IACvC,CAAC;CACF"}
|
package/esm/bamFile.d.ts
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/// <reference types="node" />
|
|
2
|
+
import Chunk from './chunk';
|
|
3
|
+
import { GenericFilehandle } from 'generic-filehandle';
|
|
4
|
+
import BAMFeature from './record';
|
|
5
|
+
import { BamOpts, BaseOpts } from './util';
|
|
6
|
+
export declare const BAM_MAGIC = 21840194;
|
|
7
|
+
export default class BamFile {
|
|
8
|
+
private renameRefSeq;
|
|
9
|
+
private bam;
|
|
10
|
+
private index;
|
|
11
|
+
private chunkSizeLimit;
|
|
12
|
+
private fetchSizeLimit;
|
|
13
|
+
private header;
|
|
14
|
+
protected chrToIndex: any;
|
|
15
|
+
protected indexToChr: any;
|
|
16
|
+
private yieldThreadTime;
|
|
17
|
+
/**
|
|
18
|
+
* @param {object} args
|
|
19
|
+
* @param {string} [args.bamPath]
|
|
20
|
+
* @param {FileHandle} [args.bamFilehandle]
|
|
21
|
+
* @param {string} [args.baiPath]
|
|
22
|
+
* @param {FileHandle} [args.baiFilehandle]
|
|
23
|
+
*/
|
|
24
|
+
constructor({ bamFilehandle, bamPath, bamUrl, baiPath, baiFilehandle, baiUrl, csiPath, csiFilehandle, csiUrl, fetchSizeLimit, chunkSizeLimit, yieldThreadTime, renameRefSeqs, }: {
|
|
25
|
+
bamFilehandle?: GenericFilehandle;
|
|
26
|
+
bamPath?: string;
|
|
27
|
+
bamUrl?: string;
|
|
28
|
+
baiPath?: string;
|
|
29
|
+
baiFilehandle?: GenericFilehandle;
|
|
30
|
+
baiUrl?: string;
|
|
31
|
+
csiPath?: string;
|
|
32
|
+
csiFilehandle?: GenericFilehandle;
|
|
33
|
+
csiUrl?: string;
|
|
34
|
+
fetchSizeLimit?: number;
|
|
35
|
+
chunkSizeLimit?: number;
|
|
36
|
+
renameRefSeqs?: (a: string) => string;
|
|
37
|
+
yieldThreadTime?: number;
|
|
38
|
+
});
|
|
39
|
+
getHeader(origOpts?: AbortSignal | BaseOpts): Promise<{
|
|
40
|
+
tag: string;
|
|
41
|
+
data: {
|
|
42
|
+
tag: string;
|
|
43
|
+
value: string;
|
|
44
|
+
}[];
|
|
45
|
+
}[]>;
|
|
46
|
+
getHeaderText(opts?: BaseOpts): Promise<any>;
|
|
47
|
+
_readRefSeqs(start: number, refSeqBytes: number, opts?: BaseOpts): Promise<{
|
|
48
|
+
chrToIndex: {
|
|
49
|
+
[key: string]: number;
|
|
50
|
+
};
|
|
51
|
+
indexToChr: {
|
|
52
|
+
refName: string;
|
|
53
|
+
length: number;
|
|
54
|
+
}[];
|
|
55
|
+
}>;
|
|
56
|
+
getRecordsForRange(chr: string, min: number, max: number, opts?: BamOpts): Promise<BAMFeature[]>;
|
|
57
|
+
streamRecordsForRange(chr: string, min: number, max: number, opts?: BamOpts): AsyncGenerator<BAMFeature[], void, unknown>;
|
|
58
|
+
_fetchChunkFeatures(chunks: Chunk[], chrId: number, min: number, max: number, opts: BamOpts): AsyncGenerator<BAMFeature[], void, unknown>;
|
|
59
|
+
fetchPairs(chrId: number, featPromises: Promise<BAMFeature[]>[], opts: BamOpts): Promise<BAMFeature[]>;
|
|
60
|
+
_readChunk({ chunk, opts }: {
|
|
61
|
+
chunk: Chunk;
|
|
62
|
+
opts: BaseOpts;
|
|
63
|
+
}): Promise<{
|
|
64
|
+
data: any;
|
|
65
|
+
cpositions: any;
|
|
66
|
+
dpositions: any;
|
|
67
|
+
chunk: Chunk;
|
|
68
|
+
}>;
|
|
69
|
+
readBamFeatures(ba: Buffer, cpositions: number[], dpositions: number[], chunk: Chunk): Promise<BAMFeature[]>;
|
|
70
|
+
hasRefSeq(seqName: string): Promise<boolean>;
|
|
71
|
+
lineCount(seqName: string): Promise<number>;
|
|
72
|
+
indexCov(seqName: string, start?: number, end?: number): Promise<{
|
|
73
|
+
start: number;
|
|
74
|
+
end: number;
|
|
75
|
+
score: number;
|
|
76
|
+
}[]>;
|
|
77
|
+
}
|
package/esm/bamFile.js
ADDED
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
import BAI from './bai';
|
|
2
|
+
import CSI from './csi';
|
|
3
|
+
import crc32 from 'buffer-crc32';
|
|
4
|
+
import { unzip, unzipChunkSlice } from '@gmod/bgzf-filehandle';
|
|
5
|
+
import entries from 'object.entries-ponyfill';
|
|
6
|
+
import { LocalFile, RemoteFile } from 'generic-filehandle';
|
|
7
|
+
import BAMFeature from './record';
|
|
8
|
+
import { parseHeaderText } from './sam';
|
|
9
|
+
import { abortBreakPoint, checkAbortSignal, timeout, makeOpts, } from './util';
|
|
10
|
+
export const BAM_MAGIC = 21840194;
|
|
11
|
+
const blockLen = 1 << 16;
|
|
12
|
+
export default class BamFile {
|
|
13
|
+
/**
|
|
14
|
+
* @param {object} args
|
|
15
|
+
* @param {string} [args.bamPath]
|
|
16
|
+
* @param {FileHandle} [args.bamFilehandle]
|
|
17
|
+
* @param {string} [args.baiPath]
|
|
18
|
+
* @param {FileHandle} [args.baiFilehandle]
|
|
19
|
+
*/
|
|
20
|
+
constructor({ bamFilehandle, bamPath, bamUrl, baiPath, baiFilehandle, baiUrl, csiPath, csiFilehandle, csiUrl, fetchSizeLimit, chunkSizeLimit, yieldThreadTime = 100, renameRefSeqs = n => n, }) {
|
|
21
|
+
this.renameRefSeq = renameRefSeqs;
|
|
22
|
+
if (bamFilehandle) {
|
|
23
|
+
this.bam = bamFilehandle;
|
|
24
|
+
}
|
|
25
|
+
else if (bamPath) {
|
|
26
|
+
this.bam = new LocalFile(bamPath);
|
|
27
|
+
}
|
|
28
|
+
else if (bamUrl) {
|
|
29
|
+
this.bam = new RemoteFile(bamUrl);
|
|
30
|
+
}
|
|
31
|
+
else {
|
|
32
|
+
throw new Error('unable to initialize bam');
|
|
33
|
+
}
|
|
34
|
+
if (csiFilehandle) {
|
|
35
|
+
this.index = new CSI({ filehandle: csiFilehandle });
|
|
36
|
+
}
|
|
37
|
+
else if (csiPath) {
|
|
38
|
+
this.index = new CSI({ filehandle: new LocalFile(csiPath) });
|
|
39
|
+
}
|
|
40
|
+
else if (csiUrl) {
|
|
41
|
+
this.index = new CSI({ filehandle: new RemoteFile(csiUrl) });
|
|
42
|
+
}
|
|
43
|
+
else if (baiFilehandle) {
|
|
44
|
+
this.index = new BAI({ filehandle: baiFilehandle });
|
|
45
|
+
}
|
|
46
|
+
else if (baiPath) {
|
|
47
|
+
this.index = new BAI({ filehandle: new LocalFile(baiPath) });
|
|
48
|
+
}
|
|
49
|
+
else if (baiUrl) {
|
|
50
|
+
this.index = new BAI({ filehandle: new RemoteFile(baiUrl) });
|
|
51
|
+
}
|
|
52
|
+
else if (bamPath) {
|
|
53
|
+
this.index = new BAI({ filehandle: new LocalFile(`${bamPath}.bai`) });
|
|
54
|
+
}
|
|
55
|
+
else if (bamUrl) {
|
|
56
|
+
this.index = new BAI({ filehandle: new RemoteFile(`${bamUrl}.bai`) });
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
throw new Error('unable to infer index format');
|
|
60
|
+
}
|
|
61
|
+
this.fetchSizeLimit = fetchSizeLimit || 500000000; // 500MB
|
|
62
|
+
this.chunkSizeLimit = chunkSizeLimit || 300000000; // 300MB
|
|
63
|
+
this.yieldThreadTime = yieldThreadTime;
|
|
64
|
+
}
|
|
65
|
+
async getHeader(origOpts = {}) {
|
|
66
|
+
const opts = makeOpts(origOpts);
|
|
67
|
+
const indexData = await this.index.parse(opts);
|
|
68
|
+
const ret = indexData.firstDataLine
|
|
69
|
+
? indexData.firstDataLine.blockPosition + 65535
|
|
70
|
+
: undefined;
|
|
71
|
+
let buffer;
|
|
72
|
+
if (ret) {
|
|
73
|
+
const res = await this.bam.read(Buffer.alloc(ret + blockLen), 0, ret + blockLen, 0, opts);
|
|
74
|
+
const { bytesRead } = res;
|
|
75
|
+
({ buffer } = res);
|
|
76
|
+
if (!bytesRead) {
|
|
77
|
+
throw new Error('Error reading header');
|
|
78
|
+
}
|
|
79
|
+
if (bytesRead < ret) {
|
|
80
|
+
buffer = buffer.slice(0, bytesRead);
|
|
81
|
+
}
|
|
82
|
+
else {
|
|
83
|
+
buffer = buffer.slice(0, ret);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
buffer = (await this.bam.readFile(opts));
|
|
88
|
+
}
|
|
89
|
+
const uncba = await unzip(buffer);
|
|
90
|
+
if (uncba.readInt32LE(0) !== BAM_MAGIC) {
|
|
91
|
+
throw new Error('Not a BAM file');
|
|
92
|
+
}
|
|
93
|
+
const headLen = uncba.readInt32LE(4);
|
|
94
|
+
this.header = uncba.toString('utf8', 8, 8 + headLen);
|
|
95
|
+
const { chrToIndex, indexToChr } = await this._readRefSeqs(headLen + 8, 65535, opts);
|
|
96
|
+
this.chrToIndex = chrToIndex;
|
|
97
|
+
this.indexToChr = indexToChr;
|
|
98
|
+
return parseHeaderText(this.header);
|
|
99
|
+
}
|
|
100
|
+
async getHeaderText(opts = {}) {
|
|
101
|
+
await this.getHeader(opts);
|
|
102
|
+
return this.header;
|
|
103
|
+
}
|
|
104
|
+
// the full length of the refseq block is not given in advance so this grabs a chunk and
|
|
105
|
+
// doubles it if all refseqs haven't been processed
|
|
106
|
+
async _readRefSeqs(start, refSeqBytes, opts = {}) {
|
|
107
|
+
if (start > refSeqBytes) {
|
|
108
|
+
return this._readRefSeqs(start, refSeqBytes * 2, opts);
|
|
109
|
+
}
|
|
110
|
+
const res = await this.bam.read(Buffer.alloc(refSeqBytes + blockLen), 0, refSeqBytes, 0, opts);
|
|
111
|
+
const { bytesRead } = res;
|
|
112
|
+
let { buffer } = res;
|
|
113
|
+
if (!bytesRead) {
|
|
114
|
+
throw new Error('Error reading refseqs from header');
|
|
115
|
+
}
|
|
116
|
+
if (bytesRead < refSeqBytes) {
|
|
117
|
+
buffer = buffer.slice(0, bytesRead);
|
|
118
|
+
}
|
|
119
|
+
else {
|
|
120
|
+
buffer = buffer.slice(0, refSeqBytes);
|
|
121
|
+
}
|
|
122
|
+
const uncba = await unzip(buffer);
|
|
123
|
+
const nRef = uncba.readInt32LE(start);
|
|
124
|
+
let p = start + 4;
|
|
125
|
+
const chrToIndex = {};
|
|
126
|
+
const indexToChr = [];
|
|
127
|
+
for (let i = 0; i < nRef; i += 1) {
|
|
128
|
+
await abortBreakPoint(opts.signal);
|
|
129
|
+
const lName = uncba.readInt32LE(p);
|
|
130
|
+
const refName = this.renameRefSeq(uncba.toString('utf8', p + 4, p + 4 + lName - 1));
|
|
131
|
+
const lRef = uncba.readInt32LE(p + lName + 4);
|
|
132
|
+
chrToIndex[refName] = i;
|
|
133
|
+
indexToChr.push({ refName, length: lRef });
|
|
134
|
+
p = p + 8 + lName;
|
|
135
|
+
if (p > uncba.length) {
|
|
136
|
+
console.warn(`BAM header is very big. Re-fetching ${refSeqBytes} bytes.`);
|
|
137
|
+
return this._readRefSeqs(start, refSeqBytes * 2, opts);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
return { chrToIndex, indexToChr };
|
|
141
|
+
}
|
|
142
|
+
async getRecordsForRange(chr, min, max, opts = {
|
|
143
|
+
viewAsPairs: false,
|
|
144
|
+
pairAcrossChr: false,
|
|
145
|
+
maxInsertSize: 200000,
|
|
146
|
+
}) {
|
|
147
|
+
let records = [];
|
|
148
|
+
for await (const chunk of this.streamRecordsForRange(chr, min, max, opts)) {
|
|
149
|
+
records = records.concat(chunk);
|
|
150
|
+
}
|
|
151
|
+
return records;
|
|
152
|
+
}
|
|
153
|
+
async *streamRecordsForRange(chr, min, max, opts = {
|
|
154
|
+
viewAsPairs: false,
|
|
155
|
+
pairAcrossChr: false,
|
|
156
|
+
maxInsertSize: 200000,
|
|
157
|
+
}) {
|
|
158
|
+
// todo regularize refseq names
|
|
159
|
+
opts.viewAsPairs = opts.viewAsPairs || false;
|
|
160
|
+
opts.pairAcrossChr = opts.pairAcrossChr || false;
|
|
161
|
+
opts.maxInsertSize =
|
|
162
|
+
opts.maxInsertSize !== undefined ? opts.maxInsertSize : 200000;
|
|
163
|
+
const chrId = this.chrToIndex && this.chrToIndex[chr];
|
|
164
|
+
let chunks;
|
|
165
|
+
if (!(chrId >= 0)) {
|
|
166
|
+
chunks = [];
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
chunks = await this.index.blocksForRange(chrId, min - 1, max, opts);
|
|
170
|
+
if (!chunks) {
|
|
171
|
+
throw new Error('Error in index fetch');
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
for (let i = 0; i < chunks.length; i += 1) {
|
|
175
|
+
await abortBreakPoint(opts.signal);
|
|
176
|
+
const size = chunks[i].fetchedSize();
|
|
177
|
+
if (size > this.chunkSizeLimit) {
|
|
178
|
+
throw new Error(`Too many BAM features. BAM chunk size ${size} bytes exceeds chunkSizeLimit of ${this.chunkSizeLimit}`);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
const totalSize = chunks
|
|
182
|
+
.map((s) => s.fetchedSize())
|
|
183
|
+
.reduce((a, b) => a + b, 0);
|
|
184
|
+
if (totalSize > this.fetchSizeLimit) {
|
|
185
|
+
throw new Error(`data size of ${totalSize.toLocaleString()} bytes exceeded fetch size limit of ${this.fetchSizeLimit.toLocaleString()} bytes`);
|
|
186
|
+
}
|
|
187
|
+
yield* this._fetchChunkFeatures(chunks, chrId, min, max, opts);
|
|
188
|
+
}
|
|
189
|
+
async *_fetchChunkFeatures(chunks, chrId, min, max, opts) {
|
|
190
|
+
const featPromises = [];
|
|
191
|
+
let done = false;
|
|
192
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
193
|
+
const { data, cpositions, dpositions, chunk } = await this._readChunk({
|
|
194
|
+
chunk: chunks[i],
|
|
195
|
+
opts,
|
|
196
|
+
});
|
|
197
|
+
const promise = this.readBamFeatures(data, cpositions, dpositions, chunk).then(records => {
|
|
198
|
+
const recs = [];
|
|
199
|
+
for (let i = 0; i < records.length; i += 1) {
|
|
200
|
+
const feature = records[i];
|
|
201
|
+
if (feature.seq_id() === chrId) {
|
|
202
|
+
if (feature.get('start') >= max) {
|
|
203
|
+
// past end of range, can stop iterating
|
|
204
|
+
done = true;
|
|
205
|
+
break;
|
|
206
|
+
}
|
|
207
|
+
else if (feature.get('end') >= min) {
|
|
208
|
+
// must be in range
|
|
209
|
+
recs.push(feature);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
return recs;
|
|
214
|
+
});
|
|
215
|
+
featPromises.push(promise);
|
|
216
|
+
await promise;
|
|
217
|
+
if (done) {
|
|
218
|
+
break;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
checkAbortSignal(opts.signal);
|
|
222
|
+
for (let i = 0; i < featPromises.length; i++) {
|
|
223
|
+
yield featPromises[i];
|
|
224
|
+
}
|
|
225
|
+
checkAbortSignal(opts.signal);
|
|
226
|
+
if (opts.viewAsPairs) {
|
|
227
|
+
yield this.fetchPairs(chrId, featPromises, opts);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
async fetchPairs(chrId, featPromises, opts) {
|
|
231
|
+
const unmatedPairs = {};
|
|
232
|
+
const readIds = {};
|
|
233
|
+
await Promise.all(featPromises.map(async (f) => {
|
|
234
|
+
const ret = await f;
|
|
235
|
+
const readNames = {};
|
|
236
|
+
for (let i = 0; i < ret.length; i++) {
|
|
237
|
+
const name = ret[i].name();
|
|
238
|
+
const id = ret[i].id();
|
|
239
|
+
if (!readNames[name]) {
|
|
240
|
+
readNames[name] = 0;
|
|
241
|
+
}
|
|
242
|
+
readNames[name]++;
|
|
243
|
+
readIds[id] = 1;
|
|
244
|
+
}
|
|
245
|
+
entries(readNames).forEach(([k, v]) => {
|
|
246
|
+
if (v === 1) {
|
|
247
|
+
unmatedPairs[k] = true;
|
|
248
|
+
}
|
|
249
|
+
});
|
|
250
|
+
}));
|
|
251
|
+
const matePromises = [];
|
|
252
|
+
await Promise.all(featPromises.map(async (f) => {
|
|
253
|
+
const ret = await f;
|
|
254
|
+
for (let i = 0; i < ret.length; i++) {
|
|
255
|
+
const name = ret[i].name();
|
|
256
|
+
if (unmatedPairs[name] &&
|
|
257
|
+
(opts.pairAcrossChr ||
|
|
258
|
+
(ret[i]._next_refid() === chrId &&
|
|
259
|
+
Math.abs(ret[i].get('start') - ret[i]._next_pos()) <
|
|
260
|
+
(opts.maxInsertSize || 200000)))) {
|
|
261
|
+
matePromises.push(this.index.blocksForRange(ret[i]._next_refid(), ret[i]._next_pos(), ret[i]._next_pos() + 1, opts));
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}));
|
|
265
|
+
const mateBlocks = await Promise.all(matePromises);
|
|
266
|
+
let mateChunks = [];
|
|
267
|
+
for (let i = 0; i < mateBlocks.length; i++) {
|
|
268
|
+
mateChunks = mateChunks.concat(mateBlocks[i]);
|
|
269
|
+
}
|
|
270
|
+
// filter out duplicate chunks (the blocks are lists of chunks, blocks are concatenated, then filter dup chunks)
|
|
271
|
+
mateChunks = mateChunks
|
|
272
|
+
.sort()
|
|
273
|
+
.filter((item, pos, ary) => !pos || item.toString() !== ary[pos - 1].toString());
|
|
274
|
+
const mateTotalSize = mateChunks
|
|
275
|
+
.map(s => s.fetchedSize())
|
|
276
|
+
.reduce((a, b) => a + b, 0);
|
|
277
|
+
if (mateTotalSize > this.fetchSizeLimit) {
|
|
278
|
+
throw new Error(`data size of ${mateTotalSize.toLocaleString()} bytes exceeded fetch size limit of ${this.fetchSizeLimit.toLocaleString()} bytes`);
|
|
279
|
+
}
|
|
280
|
+
const mateFeatPromises = mateChunks.map(async (c) => {
|
|
281
|
+
const { data, cpositions, dpositions, chunk } = await this._readChunk({
|
|
282
|
+
chunk: c,
|
|
283
|
+
opts,
|
|
284
|
+
});
|
|
285
|
+
const feats = await this.readBamFeatures(data, cpositions, dpositions, chunk);
|
|
286
|
+
const mateRecs = [];
|
|
287
|
+
for (let i = 0; i < feats.length; i += 1) {
|
|
288
|
+
const feature = feats[i];
|
|
289
|
+
if (unmatedPairs[feature.get('name')] && !readIds[feature.id()]) {
|
|
290
|
+
mateRecs.push(feature);
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
return mateRecs;
|
|
294
|
+
});
|
|
295
|
+
const newMateFeats = await Promise.all(mateFeatPromises);
|
|
296
|
+
let featuresRet = [];
|
|
297
|
+
if (newMateFeats.length) {
|
|
298
|
+
const newMates = newMateFeats.reduce((result, current) => result.concat(current));
|
|
299
|
+
featuresRet = featuresRet.concat(newMates);
|
|
300
|
+
}
|
|
301
|
+
return featuresRet;
|
|
302
|
+
}
|
|
303
|
+
async _readChunk({ chunk, opts }) {
|
|
304
|
+
const { signal } = opts;
|
|
305
|
+
const bufsize = chunk.fetchedSize();
|
|
306
|
+
const res = await this.bam.read(Buffer.alloc(bufsize), 0, bufsize, chunk.minv.blockPosition, opts);
|
|
307
|
+
const { bytesRead } = res;
|
|
308
|
+
let { buffer } = res;
|
|
309
|
+
checkAbortSignal(signal);
|
|
310
|
+
if (bytesRead < bufsize) {
|
|
311
|
+
buffer = buffer.slice(0, bytesRead);
|
|
312
|
+
}
|
|
313
|
+
else {
|
|
314
|
+
buffer = buffer.slice(0, bufsize);
|
|
315
|
+
}
|
|
316
|
+
const { buffer: data, cpositions, dpositions, } = await unzipChunkSlice(buffer, chunk);
|
|
317
|
+
checkAbortSignal(signal);
|
|
318
|
+
return { data, cpositions, dpositions, chunk };
|
|
319
|
+
}
|
|
320
|
+
async readBamFeatures(ba, cpositions, dpositions, chunk) {
|
|
321
|
+
let blockStart = 0;
|
|
322
|
+
const sink = [];
|
|
323
|
+
let pos = 0;
|
|
324
|
+
let last = +Date.now();
|
|
325
|
+
while (blockStart + 4 < ba.length) {
|
|
326
|
+
const blockSize = ba.readInt32LE(blockStart);
|
|
327
|
+
const blockEnd = blockStart + 4 + blockSize - 1;
|
|
328
|
+
// increment position to the current decompressed status
|
|
329
|
+
if (dpositions) {
|
|
330
|
+
while (blockStart + chunk.minv.dataPosition >= dpositions[pos++]) { }
|
|
331
|
+
pos--;
|
|
332
|
+
}
|
|
333
|
+
// only try to read the feature if we have all the bytes for it
|
|
334
|
+
if (blockEnd < ba.length) {
|
|
335
|
+
const feature = new BAMFeature({
|
|
336
|
+
bytes: {
|
|
337
|
+
byteArray: ba,
|
|
338
|
+
start: blockStart,
|
|
339
|
+
end: blockEnd,
|
|
340
|
+
},
|
|
341
|
+
// the below results in an automatically calculated file-offset based ID
|
|
342
|
+
// if the info for that is available, otherwise crc32 of the features
|
|
343
|
+
//
|
|
344
|
+
// cpositions[pos] refers to actual file offset of a bgzip block boundaries
|
|
345
|
+
//
|
|
346
|
+
// we multiply by (1 <<8) in order to make sure each block has a "unique"
|
|
347
|
+
// address space so that data in that block could never overlap
|
|
348
|
+
//
|
|
349
|
+
// then the blockStart-dpositions is an uncompressed file offset from
|
|
350
|
+
// that bgzip block boundary, and since the cpositions are multiplied by
|
|
351
|
+
// (1 << 8) these uncompressed offsets get a unique space
|
|
352
|
+
//
|
|
353
|
+
// this has an extra chunk.minv.dataPosition added on because it blockStart
|
|
354
|
+
// starts at 0 instead of chunk.minv.dataPosition
|
|
355
|
+
//
|
|
356
|
+
// the +1 is just to avoid any possible uniqueId 0 but this does not realistically happen
|
|
357
|
+
fileOffset: cpositions
|
|
358
|
+
? cpositions[pos] * (1 << 8) +
|
|
359
|
+
(blockStart - dpositions[pos]) +
|
|
360
|
+
chunk.minv.dataPosition +
|
|
361
|
+
1
|
|
362
|
+
: crc32.signed(ba.slice(blockStart, blockEnd)),
|
|
363
|
+
});
|
|
364
|
+
sink.push(feature);
|
|
365
|
+
if (this.yieldThreadTime && +Date.now() - last > this.yieldThreadTime) {
|
|
366
|
+
await timeout(1);
|
|
367
|
+
last = +Date.now();
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
blockStart = blockEnd + 1;
|
|
371
|
+
}
|
|
372
|
+
return sink;
|
|
373
|
+
}
|
|
374
|
+
async hasRefSeq(seqName) {
|
|
375
|
+
const refId = this.chrToIndex && this.chrToIndex[seqName];
|
|
376
|
+
return this.index.hasRefSeq(refId);
|
|
377
|
+
}
|
|
378
|
+
async lineCount(seqName) {
|
|
379
|
+
const refId = this.chrToIndex && this.chrToIndex[seqName];
|
|
380
|
+
return this.index.lineCount(refId);
|
|
381
|
+
}
|
|
382
|
+
async indexCov(seqName, start, end) {
|
|
383
|
+
await this.index.parse();
|
|
384
|
+
const seqId = this.chrToIndex && this.chrToIndex[seqName];
|
|
385
|
+
return this.index.indexCov(seqId, start, end);
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
//# sourceMappingURL=bamFile.js.map
|