@gmod/bam 4.0.1 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +3 -7
- package/README.md +7 -11
- package/dist/bai.d.ts +1 -1
- package/dist/bai.js +151 -169
- package/dist/bai.js.map +1 -1
- package/dist/bamFile.d.ts +4 -5
- package/dist/bamFile.js +271 -350
- package/dist/bamFile.js.map +1 -1
- package/dist/chunk.d.ts +1 -1
- package/dist/chunk.js +5 -0
- package/dist/chunk.js.map +1 -1
- package/dist/csi.d.ts +1 -1
- package/dist/csi.js +140 -145
- package/dist/csi.js.map +1 -1
- package/dist/htsget.d.ts +1 -2
- package/dist/htsget.js +131 -161
- package/dist/htsget.js.map +1 -1
- package/dist/indexFile.d.ts +1 -1
- package/dist/indexFile.js +2 -0
- package/dist/indexFile.js.map +1 -1
- package/dist/nullIndex.js +2 -13
- package/dist/nullIndex.js.map +1 -1
- package/dist/record.d.ts +4 -4
- package/dist/record.js +43 -36
- package/dist/record.js.map +1 -1
- package/dist/util.d.ts +4 -2
- package/dist/util.js +25 -15
- package/dist/util.js.map +1 -1
- package/dist/virtualOffset.d.ts +1 -1
- package/dist/virtualOffset.js +2 -0
- package/dist/virtualOffset.js.map +1 -1
- package/esm/bai.d.ts +1 -1
- package/esm/bai.js +13 -13
- package/esm/bai.js.map +1 -1
- package/esm/bamFile.d.ts +4 -5
- package/esm/bamFile.js +49 -50
- package/esm/bamFile.js.map +1 -1
- package/esm/chunk.d.ts +1 -1
- package/esm/chunk.js +5 -0
- package/esm/chunk.js.map +1 -1
- package/esm/csi.d.ts +1 -1
- package/esm/csi.js +26 -28
- package/esm/csi.js.map +1 -1
- package/esm/htsget.d.ts +1 -2
- package/esm/htsget.js +21 -11
- package/esm/htsget.js.map +1 -1
- package/esm/indexFile.d.ts +1 -1
- package/esm/indexFile.js +2 -0
- package/esm/indexFile.js.map +1 -1
- package/esm/record.d.ts +4 -4
- package/esm/record.js +43 -36
- package/esm/record.js.map +1 -1
- package/esm/util.d.ts +4 -2
- package/esm/util.js +20 -1
- package/esm/util.js.map +1 -1
- package/esm/virtualOffset.d.ts +1 -1
- package/esm/virtualOffset.js +2 -0
- package/esm/virtualOffset.js.map +1 -1
- package/package.json +6 -6
- package/src/bai.ts +11 -8
- package/src/bamFile.ts +22 -41
- package/src/chunk.ts +1 -1
- package/src/csi.ts +22 -19
- package/src/htsget.ts +18 -9
- package/src/indexFile.ts +1 -1
- package/src/record.ts +43 -42
- package/src/util.ts +23 -3
- package/src/virtualOffset.ts +1 -1
package/dist/bamFile.js
CHANGED
|
@@ -1,47 +1,12 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
|
-
var __asyncValues = (this && this.__asyncValues) || function (o) {
|
|
12
|
-
if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
|
|
13
|
-
var m = o[Symbol.asyncIterator], i;
|
|
14
|
-
return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
|
|
15
|
-
function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
|
|
16
|
-
function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
|
|
17
|
-
};
|
|
18
|
-
var __await = (this && this.__await) || function (v) { return this instanceof __await ? (this.v = v, this) : new __await(v); }
|
|
19
|
-
var __asyncDelegator = (this && this.__asyncDelegator) || function (o) {
|
|
20
|
-
var i, p;
|
|
21
|
-
return i = {}, verb("next"), verb("throw", function (e) { throw e; }), verb("return"), i[Symbol.iterator] = function () { return this; }, i;
|
|
22
|
-
function verb(n, f) { i[n] = o[n] ? function (v) { return (p = !p) ? { value: __await(o[n](v)), done: false } : f ? f(v) : v; } : f; }
|
|
23
|
-
};
|
|
24
|
-
var __asyncGenerator = (this && this.__asyncGenerator) || function (thisArg, _arguments, generator) {
|
|
25
|
-
if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
|
|
26
|
-
var g = generator.apply(thisArg, _arguments || []), i, q = [];
|
|
27
|
-
return i = Object.create((typeof AsyncIterator === "function" ? AsyncIterator : Object).prototype), verb("next"), verb("throw"), verb("return", awaitReturn), i[Symbol.asyncIterator] = function () { return this; }, i;
|
|
28
|
-
function awaitReturn(f) { return function (v) { return Promise.resolve(v).then(f, reject); }; }
|
|
29
|
-
function verb(n, f) { if (g[n]) { i[n] = function (v) { return new Promise(function (a, b) { q.push([n, v, a, b]) > 1 || resume(n, v); }); }; if (f) i[n] = f(i[n]); } }
|
|
30
|
-
function resume(n, v) { try { step(g[n](v)); } catch (e) { settle(q[0][3], e); } }
|
|
31
|
-
function step(r) { r.value instanceof __await ? Promise.resolve(r.value.v).then(fulfill, reject) : settle(q[0][2], r); }
|
|
32
|
-
function fulfill(value) { resume("next", value); }
|
|
33
|
-
function reject(value) { resume("throw", value); }
|
|
34
|
-
function settle(f, v) { if (f(v), q.shift(), q.length) resume(q[0][0], q[0][1]); }
|
|
35
|
-
};
|
|
36
2
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
37
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
38
4
|
};
|
|
39
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
40
6
|
exports.BAM_MAGIC = void 0;
|
|
41
|
-
const buffer_1 = require("buffer");
|
|
42
7
|
const crc32_1 = __importDefault(require("crc/crc32"));
|
|
43
8
|
const bgzf_filehandle_1 = require("@gmod/bgzf-filehandle");
|
|
44
|
-
const
|
|
9
|
+
const generic_filehandle2_1 = require("generic-filehandle2");
|
|
45
10
|
const abortable_promise_cache_1 = __importDefault(require("@gmod/abortable-promise-cache"));
|
|
46
11
|
const quick_lru_1 = __importDefault(require("quick-lru"));
|
|
47
12
|
// locals
|
|
@@ -52,28 +17,12 @@ const sam_1 = require("./sam");
|
|
|
52
17
|
const util_1 = require("./util");
|
|
53
18
|
exports.BAM_MAGIC = 21840194;
|
|
54
19
|
const blockLen = 1 << 16;
|
|
55
|
-
function gen2array(gen) {
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
for (_a = true, gen_1 = __asyncValues(gen); gen_1_1 = yield gen_1.next(), _b = gen_1_1.done, !_b; _a = true) {
|
|
62
|
-
_d = gen_1_1.value;
|
|
63
|
-
_a = false;
|
|
64
|
-
const x = _d;
|
|
65
|
-
out = out.concat(x);
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
69
|
-
finally {
|
|
70
|
-
try {
|
|
71
|
-
if (!_a && !_b && (_c = gen_1.return)) yield _c.call(gen_1);
|
|
72
|
-
}
|
|
73
|
-
finally { if (e_1) throw e_1.error; }
|
|
74
|
-
}
|
|
75
|
-
return out;
|
|
76
|
-
});
|
|
20
|
+
async function gen2array(gen) {
|
|
21
|
+
let out = [];
|
|
22
|
+
for await (const x of gen) {
|
|
23
|
+
out = out.concat(x);
|
|
24
|
+
}
|
|
25
|
+
return out;
|
|
77
26
|
}
|
|
78
27
|
class NullFilehandle {
|
|
79
28
|
read() {
|
|
@@ -90,30 +39,38 @@ class NullFilehandle {
|
|
|
90
39
|
}
|
|
91
40
|
}
|
|
92
41
|
class BamFile {
|
|
42
|
+
renameRefSeq;
|
|
43
|
+
bam;
|
|
44
|
+
header;
|
|
45
|
+
chrToIndex;
|
|
46
|
+
indexToChr;
|
|
47
|
+
yieldThreadTime;
|
|
48
|
+
index;
|
|
49
|
+
htsget = false;
|
|
50
|
+
headerP;
|
|
51
|
+
featureCache = new abortable_promise_cache_1.default({
|
|
52
|
+
cache: new quick_lru_1.default({
|
|
53
|
+
maxSize: 50,
|
|
54
|
+
}),
|
|
55
|
+
fill: async (args, signal) => {
|
|
56
|
+
const { chunk, opts } = args;
|
|
57
|
+
const { data, cpositions, dpositions } = await this._readChunk({
|
|
58
|
+
chunk,
|
|
59
|
+
opts: { ...opts, signal },
|
|
60
|
+
});
|
|
61
|
+
return this.readBamFeatures(data, cpositions, dpositions, chunk);
|
|
62
|
+
},
|
|
63
|
+
});
|
|
93
64
|
constructor({ bamFilehandle, bamPath, bamUrl, baiPath, baiFilehandle, baiUrl, csiPath, csiFilehandle, csiUrl, htsget, yieldThreadTime = 100, renameRefSeqs = n => n, }) {
|
|
94
|
-
this.htsget = false;
|
|
95
|
-
this.featureCache = new abortable_promise_cache_1.default({
|
|
96
|
-
cache: new quick_lru_1.default({
|
|
97
|
-
maxSize: 50,
|
|
98
|
-
}),
|
|
99
|
-
fill: (args, signal) => __awaiter(this, void 0, void 0, function* () {
|
|
100
|
-
const { chunk, opts } = args;
|
|
101
|
-
const { data, cpositions, dpositions } = yield this._readChunk({
|
|
102
|
-
chunk,
|
|
103
|
-
opts: Object.assign(Object.assign({}, opts), { signal }),
|
|
104
|
-
});
|
|
105
|
-
return this.readBamFeatures(data, cpositions, dpositions, chunk);
|
|
106
|
-
}),
|
|
107
|
-
});
|
|
108
65
|
this.renameRefSeq = renameRefSeqs;
|
|
109
66
|
if (bamFilehandle) {
|
|
110
67
|
this.bam = bamFilehandle;
|
|
111
68
|
}
|
|
112
69
|
else if (bamPath) {
|
|
113
|
-
this.bam = new
|
|
70
|
+
this.bam = new generic_filehandle2_1.LocalFile(bamPath);
|
|
114
71
|
}
|
|
115
72
|
else if (bamUrl) {
|
|
116
|
-
this.bam = new
|
|
73
|
+
this.bam = new generic_filehandle2_1.RemoteFile(bamUrl);
|
|
117
74
|
}
|
|
118
75
|
else if (htsget) {
|
|
119
76
|
this.htsget = true;
|
|
@@ -126,25 +83,25 @@ class BamFile {
|
|
|
126
83
|
this.index = new csi_1.default({ filehandle: csiFilehandle });
|
|
127
84
|
}
|
|
128
85
|
else if (csiPath) {
|
|
129
|
-
this.index = new csi_1.default({ filehandle: new
|
|
86
|
+
this.index = new csi_1.default({ filehandle: new generic_filehandle2_1.LocalFile(csiPath) });
|
|
130
87
|
}
|
|
131
88
|
else if (csiUrl) {
|
|
132
|
-
this.index = new csi_1.default({ filehandle: new
|
|
89
|
+
this.index = new csi_1.default({ filehandle: new generic_filehandle2_1.RemoteFile(csiUrl) });
|
|
133
90
|
}
|
|
134
91
|
else if (baiFilehandle) {
|
|
135
92
|
this.index = new bai_1.default({ filehandle: baiFilehandle });
|
|
136
93
|
}
|
|
137
94
|
else if (baiPath) {
|
|
138
|
-
this.index = new bai_1.default({ filehandle: new
|
|
95
|
+
this.index = new bai_1.default({ filehandle: new generic_filehandle2_1.LocalFile(baiPath) });
|
|
139
96
|
}
|
|
140
97
|
else if (baiUrl) {
|
|
141
|
-
this.index = new bai_1.default({ filehandle: new
|
|
98
|
+
this.index = new bai_1.default({ filehandle: new generic_filehandle2_1.RemoteFile(baiUrl) });
|
|
142
99
|
}
|
|
143
100
|
else if (bamPath) {
|
|
144
|
-
this.index = new bai_1.default({ filehandle: new
|
|
101
|
+
this.index = new bai_1.default({ filehandle: new generic_filehandle2_1.LocalFile(`${bamPath}.bai`) });
|
|
145
102
|
}
|
|
146
103
|
else if (bamUrl) {
|
|
147
|
-
this.index = new bai_1.default({ filehandle: new
|
|
104
|
+
this.index = new bai_1.default({ filehandle: new generic_filehandle2_1.RemoteFile(`${bamUrl}.bai`) });
|
|
148
105
|
}
|
|
149
106
|
else if (htsget) {
|
|
150
107
|
this.htsget = true;
|
|
@@ -154,39 +111,35 @@ class BamFile {
|
|
|
154
111
|
}
|
|
155
112
|
this.yieldThreadTime = yieldThreadTime;
|
|
156
113
|
}
|
|
157
|
-
getHeaderPre(origOpts) {
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
this.chrToIndex = chrToIndex;
|
|
187
|
-
this.indexToChr = indexToChr;
|
|
188
|
-
return (0, sam_1.parseHeaderText)(this.header);
|
|
189
|
-
});
|
|
114
|
+
async getHeaderPre(origOpts) {
|
|
115
|
+
const opts = (0, util_1.makeOpts)(origOpts);
|
|
116
|
+
if (!this.index) {
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
const indexData = await this.index.parse(opts);
|
|
120
|
+
const ret = indexData.firstDataLine
|
|
121
|
+
? indexData.firstDataLine.blockPosition + 65535
|
|
122
|
+
: undefined;
|
|
123
|
+
let buffer;
|
|
124
|
+
if (ret) {
|
|
125
|
+
const s = ret + blockLen;
|
|
126
|
+
buffer = await this.bam.read(s, 0);
|
|
127
|
+
}
|
|
128
|
+
else {
|
|
129
|
+
buffer = await this.bam.readFile(opts);
|
|
130
|
+
}
|
|
131
|
+
const uncba = await (0, bgzf_filehandle_1.unzip)(buffer);
|
|
132
|
+
const dataView = new DataView(uncba.buffer);
|
|
133
|
+
if (dataView.getInt32(0, true) !== exports.BAM_MAGIC) {
|
|
134
|
+
throw new Error('Not a BAM file');
|
|
135
|
+
}
|
|
136
|
+
const headLen = dataView.getInt32(4, true);
|
|
137
|
+
const decoder = new TextDecoder('utf8');
|
|
138
|
+
this.header = decoder.decode(uncba.subarray(8, 8 + headLen));
|
|
139
|
+
const { chrToIndex, indexToChr } = await this._readRefSeqs(headLen + 8, 65535, opts);
|
|
140
|
+
this.chrToIndex = chrToIndex;
|
|
141
|
+
this.indexToChr = indexToChr;
|
|
142
|
+
return (0, sam_1.parseHeaderText)(this.header);
|
|
190
143
|
}
|
|
191
144
|
getHeader(opts) {
|
|
192
145
|
if (!this.headerP) {
|
|
@@ -197,271 +150,239 @@ class BamFile {
|
|
|
197
150
|
}
|
|
198
151
|
return this.headerP;
|
|
199
152
|
}
|
|
200
|
-
getHeaderText() {
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
return this.header;
|
|
204
|
-
});
|
|
153
|
+
async getHeaderText(opts = {}) {
|
|
154
|
+
await this.getHeader(opts);
|
|
155
|
+
return this.header;
|
|
205
156
|
}
|
|
206
157
|
// the full length of the refseq block is not given in advance so this grabs
|
|
207
158
|
// a chunk and doubles it if all refseqs haven't been processed
|
|
208
|
-
_readRefSeqs(start, refSeqBytes, opts) {
|
|
209
|
-
|
|
210
|
-
|
|
159
|
+
async _readRefSeqs(start, refSeqBytes, opts) {
|
|
160
|
+
if (start > refSeqBytes) {
|
|
161
|
+
return this._readRefSeqs(start, refSeqBytes * 2, opts);
|
|
162
|
+
}
|
|
163
|
+
// const size = refSeqBytes + blockLen <-- use this?
|
|
164
|
+
const buffer = await this.bam.read(refSeqBytes, 0, opts);
|
|
165
|
+
const uncba = await (0, bgzf_filehandle_1.unzip)(buffer);
|
|
166
|
+
const dataView = new DataView(uncba.buffer);
|
|
167
|
+
const nRef = dataView.getInt32(start, true);
|
|
168
|
+
let p = start + 4;
|
|
169
|
+
const chrToIndex = {};
|
|
170
|
+
const indexToChr = [];
|
|
171
|
+
const decoder = new TextDecoder('utf8');
|
|
172
|
+
for (let i = 0; i < nRef; i += 1) {
|
|
173
|
+
const lName = dataView.getInt32(p, true);
|
|
174
|
+
const refName = this.renameRefSeq(decoder.decode(uncba.subarray(p + 4, p + 4 + lName - 1)));
|
|
175
|
+
const lRef = dataView.getInt32(p + lName + 4, true);
|
|
176
|
+
chrToIndex[refName] = i;
|
|
177
|
+
indexToChr.push({ refName, length: lRef });
|
|
178
|
+
p = p + 8 + lName;
|
|
179
|
+
if (p > uncba.length) {
|
|
180
|
+
console.warn(`BAM header is very big. Re-fetching ${refSeqBytes} bytes.`);
|
|
211
181
|
return this._readRefSeqs(start, refSeqBytes * 2, opts);
|
|
212
182
|
}
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
if (!bytesRead) {
|
|
216
|
-
throw new Error('Error reading refseqs from header');
|
|
217
|
-
}
|
|
218
|
-
const uncba = yield (0, bgzf_filehandle_1.unzip)(buffer.subarray(0, Math.min(bytesRead, refSeqBytes)));
|
|
219
|
-
const nRef = uncba.readInt32LE(start);
|
|
220
|
-
let p = start + 4;
|
|
221
|
-
const chrToIndex = {};
|
|
222
|
-
const indexToChr = [];
|
|
223
|
-
for (let i = 0; i < nRef; i += 1) {
|
|
224
|
-
const lName = uncba.readInt32LE(p);
|
|
225
|
-
const refName = this.renameRefSeq(uncba.toString('utf8', p + 4, p + 4 + lName - 1));
|
|
226
|
-
const lRef = uncba.readInt32LE(p + lName + 4);
|
|
227
|
-
chrToIndex[refName] = i;
|
|
228
|
-
indexToChr.push({ refName, length: lRef });
|
|
229
|
-
p = p + 8 + lName;
|
|
230
|
-
if (p > uncba.length) {
|
|
231
|
-
console.warn(`BAM header is very big. Re-fetching ${refSeqBytes} bytes.`);
|
|
232
|
-
return this._readRefSeqs(start, refSeqBytes * 2, opts);
|
|
233
|
-
}
|
|
234
|
-
}
|
|
235
|
-
return { chrToIndex, indexToChr };
|
|
236
|
-
});
|
|
183
|
+
}
|
|
184
|
+
return { chrToIndex, indexToChr };
|
|
237
185
|
}
|
|
238
|
-
getRecordsForRange(chr, min, max, opts) {
|
|
239
|
-
return
|
|
240
|
-
return gen2array(this.streamRecordsForRange(chr, min, max, opts));
|
|
241
|
-
});
|
|
186
|
+
async getRecordsForRange(chr, min, max, opts) {
|
|
187
|
+
return gen2array(this.streamRecordsForRange(chr, min, max, opts));
|
|
242
188
|
}
|
|
243
|
-
streamRecordsForRange(chr, min, max, opts) {
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
yield __await(yield* __asyncDelegator(__asyncValues(this._fetchChunkFeatures(chunks, chrId, min, max, opts))));
|
|
254
|
-
}
|
|
255
|
-
});
|
|
189
|
+
async *streamRecordsForRange(chr, min, max, opts) {
|
|
190
|
+
await this.getHeader(opts);
|
|
191
|
+
const chrId = this.chrToIndex?.[chr];
|
|
192
|
+
if (chrId === undefined || !this.index) {
|
|
193
|
+
yield [];
|
|
194
|
+
}
|
|
195
|
+
else {
|
|
196
|
+
const chunks = await this.index.blocksForRange(chrId, min - 1, max, opts);
|
|
197
|
+
yield* this._fetchChunkFeatures(chunks, chrId, min, max, opts);
|
|
198
|
+
}
|
|
256
199
|
}
|
|
257
|
-
_fetchChunkFeatures(
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
if (feature.
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
recs.push(feature);
|
|
275
|
-
}
|
|
200
|
+
async *_fetchChunkFeatures(chunks, chrId, min, max, opts = {}) {
|
|
201
|
+
const { viewAsPairs } = opts;
|
|
202
|
+
const feats = [];
|
|
203
|
+
let done = false;
|
|
204
|
+
for (const chunk of chunks) {
|
|
205
|
+
const records = await this.featureCache.get(chunk.toString(), { chunk, opts }, opts.signal);
|
|
206
|
+
const recs = [];
|
|
207
|
+
for (const feature of records) {
|
|
208
|
+
if (feature.ref_id === chrId) {
|
|
209
|
+
if (feature.start >= max) {
|
|
210
|
+
// past end of range, can stop iterating
|
|
211
|
+
done = true;
|
|
212
|
+
break;
|
|
213
|
+
}
|
|
214
|
+
else if (feature.end >= min) {
|
|
215
|
+
// must be in range
|
|
216
|
+
recs.push(feature);
|
|
276
217
|
}
|
|
277
|
-
}
|
|
278
|
-
feats.push(recs);
|
|
279
|
-
yield yield __await(recs);
|
|
280
|
-
if (done) {
|
|
281
|
-
break;
|
|
282
218
|
}
|
|
283
219
|
}
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
220
|
+
feats.push(recs);
|
|
221
|
+
yield recs;
|
|
222
|
+
if (done) {
|
|
223
|
+
break;
|
|
287
224
|
}
|
|
288
|
-
}
|
|
225
|
+
}
|
|
226
|
+
(0, util_1.checkAbortSignal)(opts.signal);
|
|
227
|
+
if (viewAsPairs) {
|
|
228
|
+
yield this.fetchPairs(chrId, feats, opts);
|
|
229
|
+
}
|
|
289
230
|
}
|
|
290
|
-
fetchPairs(chrId, feats, opts) {
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
readNames[name] = 0;
|
|
302
|
-
}
|
|
303
|
-
readNames[name]++;
|
|
304
|
-
readIds[id] = 1;
|
|
231
|
+
async fetchPairs(chrId, feats, opts) {
|
|
232
|
+
const { pairAcrossChr, maxInsertSize = 200000 } = opts;
|
|
233
|
+
const unmatedPairs = {};
|
|
234
|
+
const readIds = {};
|
|
235
|
+
feats.map(ret => {
|
|
236
|
+
const readNames = {};
|
|
237
|
+
for (const element of ret) {
|
|
238
|
+
const name = element.name;
|
|
239
|
+
const id = element.id;
|
|
240
|
+
if (!readNames[name]) {
|
|
241
|
+
readNames[name] = 0;
|
|
305
242
|
}
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
243
|
+
readNames[name]++;
|
|
244
|
+
readIds[id] = 1;
|
|
245
|
+
}
|
|
246
|
+
for (const [k, v] of Object.entries(readNames)) {
|
|
247
|
+
if (v === 1) {
|
|
248
|
+
unmatedPairs[k] = true;
|
|
310
249
|
}
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
250
|
+
}
|
|
251
|
+
});
|
|
252
|
+
const matePromises = [];
|
|
253
|
+
feats.map(ret => {
|
|
254
|
+
for (const f of ret) {
|
|
255
|
+
const name = f.name;
|
|
256
|
+
const start = f.start;
|
|
257
|
+
const pnext = f.next_pos;
|
|
258
|
+
const rnext = f.next_refid;
|
|
259
|
+
if (this.index &&
|
|
260
|
+
unmatedPairs[name] &&
|
|
261
|
+
(pairAcrossChr ||
|
|
262
|
+
(rnext === chrId && Math.abs(start - pnext) < maxInsertSize))) {
|
|
263
|
+
matePromises.push(this.index.blocksForRange(rnext, pnext, pnext + 1, opts));
|
|
325
264
|
}
|
|
265
|
+
}
|
|
266
|
+
});
|
|
267
|
+
// filter out duplicate chunks (the blocks are lists of chunks, blocks are
|
|
268
|
+
// concatenated, then filter dup chunks)
|
|
269
|
+
const map = new Map();
|
|
270
|
+
const res = await Promise.all(matePromises);
|
|
271
|
+
for (const m of res.flat()) {
|
|
272
|
+
if (!map.has(m.toString())) {
|
|
273
|
+
map.set(m.toString(), m);
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
const mateFeatPromises = await Promise.all([...map.values()].map(async (c) => {
|
|
277
|
+
const { data, cpositions, dpositions, chunk } = await this._readChunk({
|
|
278
|
+
chunk: c,
|
|
279
|
+
opts,
|
|
326
280
|
});
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
for (const m of res.flat()) {
|
|
332
|
-
if (!map.has(m.toString())) {
|
|
333
|
-
map.set(m.toString(), m);
|
|
281
|
+
const mateRecs = [];
|
|
282
|
+
for (const feature of await this.readBamFeatures(data, cpositions, dpositions, chunk)) {
|
|
283
|
+
if (unmatedPairs[feature.name] && !readIds[feature.id]) {
|
|
284
|
+
mateRecs.push(feature);
|
|
334
285
|
}
|
|
335
286
|
}
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
opts,
|
|
340
|
-
});
|
|
341
|
-
const mateRecs = [];
|
|
342
|
-
for (const feature of yield this.readBamFeatures(data, cpositions, dpositions, chunk)) {
|
|
343
|
-
if (unmatedPairs[feature.name] && !readIds[feature.id]) {
|
|
344
|
-
mateRecs.push(feature);
|
|
345
|
-
}
|
|
346
|
-
}
|
|
347
|
-
return mateRecs;
|
|
348
|
-
})));
|
|
349
|
-
return mateFeatPromises.flat();
|
|
350
|
-
});
|
|
287
|
+
return mateRecs;
|
|
288
|
+
}));
|
|
289
|
+
return mateFeatPromises.flat();
|
|
351
290
|
}
|
|
352
|
-
_readRegion(
|
|
353
|
-
return
|
|
354
|
-
const { bytesRead, buffer } = yield this.bam.read(buffer_1.Buffer.alloc(size), 0, size, position, opts);
|
|
355
|
-
return buffer.subarray(0, Math.min(bytesRead, size));
|
|
356
|
-
});
|
|
291
|
+
async _readRegion(position, size, opts = {}) {
|
|
292
|
+
return this.bam.read(size, position, opts);
|
|
357
293
|
}
|
|
358
|
-
_readChunk(
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
return { data, cpositions, dpositions, chunk };
|
|
363
|
-
});
|
|
294
|
+
async _readChunk({ chunk, opts }) {
|
|
295
|
+
const buffer = await this._readRegion(chunk.minv.blockPosition, chunk.fetchedSize(), opts);
|
|
296
|
+
const { buffer: data, cpositions, dpositions, } = await (0, bgzf_filehandle_1.unzipChunkSlice)(buffer, chunk);
|
|
297
|
+
return { data, cpositions, dpositions, chunk };
|
|
364
298
|
}
|
|
365
|
-
readBamFeatures(ba, cpositions, dpositions, chunk) {
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
}
|
|
299
|
+
async readBamFeatures(ba, cpositions, dpositions, chunk) {
|
|
300
|
+
let blockStart = 0;
|
|
301
|
+
const sink = [];
|
|
302
|
+
let pos = 0;
|
|
303
|
+
let last = +Date.now();
|
|
304
|
+
const dataView = new DataView(ba.buffer);
|
|
305
|
+
while (blockStart + 4 < ba.length) {
|
|
306
|
+
const blockSize = dataView.getInt32(blockStart, true);
|
|
307
|
+
const blockEnd = blockStart + 4 + blockSize - 1;
|
|
308
|
+
// increment position to the current decompressed status
|
|
309
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
310
|
+
if (dpositions) {
|
|
311
|
+
while (blockStart + chunk.minv.dataPosition >= dpositions[pos++]) { }
|
|
312
|
+
pos--;
|
|
313
|
+
}
|
|
314
|
+
// only try to read the feature if we have all the bytes for it
|
|
315
|
+
if (blockEnd < ba.length) {
|
|
316
|
+
const feature = new record_1.default({
|
|
317
|
+
bytes: {
|
|
318
|
+
byteArray: ba,
|
|
319
|
+
start: blockStart,
|
|
320
|
+
end: blockEnd,
|
|
321
|
+
},
|
|
322
|
+
// the below results in an automatically calculated file-offset based
|
|
323
|
+
// ID if the info for that is available, otherwise crc32 of the
|
|
324
|
+
// features
|
|
325
|
+
//
|
|
326
|
+
// cpositions[pos] refers to actual file offset of a bgzip block
|
|
327
|
+
// boundaries
|
|
328
|
+
//
|
|
329
|
+
// we multiply by (1 <<8) in order to make sure each block has a
|
|
330
|
+
// "unique" address space so that data in that block could never
|
|
331
|
+
// overlap
|
|
332
|
+
//
|
|
333
|
+
// then the blockStart-dpositions is an uncompressed file offset from
|
|
334
|
+
// that bgzip block boundary, and since the cpositions are multiplied
|
|
335
|
+
// by (1 << 8) these uncompressed offsets get a unique space
|
|
336
|
+
//
|
|
337
|
+
// this has an extra chunk.minv.dataPosition added on because it
|
|
338
|
+
// blockStart starts at 0 instead of chunk.minv.dataPosition
|
|
339
|
+
//
|
|
340
|
+
// the +1 is just to avoid any possible uniqueId 0 but this does not
|
|
341
|
+
// realistically happen
|
|
342
|
+
fileOffset: cpositions.length > 0
|
|
343
|
+
? cpositions[pos] * (1 << 8) +
|
|
344
|
+
(blockStart - dpositions[pos]) +
|
|
345
|
+
chunk.minv.dataPosition +
|
|
346
|
+
1
|
|
347
|
+
: // must be slice, not subarray for buffer polyfill on web
|
|
348
|
+
// @ts-expect-error
|
|
349
|
+
crc32_1.default.signed(ba.subarray(blockStart, blockEnd)),
|
|
350
|
+
});
|
|
351
|
+
sink.push(feature);
|
|
352
|
+
if (this.yieldThreadTime && +Date.now() - last > this.yieldThreadTime) {
|
|
353
|
+
await (0, util_1.timeout)(1);
|
|
354
|
+
last = +Date.now();
|
|
422
355
|
}
|
|
423
|
-
blockStart = blockEnd + 1;
|
|
424
356
|
}
|
|
425
|
-
|
|
426
|
-
}
|
|
357
|
+
blockStart = blockEnd + 1;
|
|
358
|
+
}
|
|
359
|
+
return sink;
|
|
427
360
|
}
|
|
428
|
-
hasRefSeq(seqName) {
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
const seqId = (_a = this.chrToIndex) === null || _a === void 0 ? void 0 : _a[seqName];
|
|
432
|
-
return seqId === undefined ? false : (_b = this.index) === null || _b === void 0 ? void 0 : _b.hasRefSeq(seqId);
|
|
433
|
-
});
|
|
361
|
+
async hasRefSeq(seqName) {
|
|
362
|
+
const seqId = this.chrToIndex?.[seqName];
|
|
363
|
+
return seqId === undefined ? false : this.index?.hasRefSeq(seqId);
|
|
434
364
|
}
|
|
435
|
-
lineCount(seqName) {
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
const seqId = (_a = this.chrToIndex) === null || _a === void 0 ? void 0 : _a[seqName];
|
|
439
|
-
return seqId === undefined || !this.index ? 0 : this.index.lineCount(seqId);
|
|
440
|
-
});
|
|
365
|
+
async lineCount(seqName) {
|
|
366
|
+
const seqId = this.chrToIndex?.[seqName];
|
|
367
|
+
return seqId === undefined || !this.index ? 0 : this.index.lineCount(seqId);
|
|
441
368
|
}
|
|
442
|
-
indexCov(seqName, start, end) {
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
const seqId = (_a = this.chrToIndex) === null || _a === void 0 ? void 0 : _a[seqName];
|
|
450
|
-
return seqId === undefined ? [] : this.index.indexCov(seqId, start, end);
|
|
451
|
-
});
|
|
369
|
+
async indexCov(seqName, start, end) {
|
|
370
|
+
if (!this.index) {
|
|
371
|
+
return [];
|
|
372
|
+
}
|
|
373
|
+
await this.index.parse();
|
|
374
|
+
const seqId = this.chrToIndex?.[seqName];
|
|
375
|
+
return seqId === undefined ? [] : this.index.indexCov(seqId, start, end);
|
|
452
376
|
}
|
|
453
|
-
blocksForRange(seqName, start, end, opts) {
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
? []
|
|
463
|
-
: this.index.blocksForRange(seqId, start, end, opts);
|
|
464
|
-
});
|
|
377
|
+
async blocksForRange(seqName, start, end, opts) {
|
|
378
|
+
if (!this.index) {
|
|
379
|
+
return [];
|
|
380
|
+
}
|
|
381
|
+
await this.index.parse();
|
|
382
|
+
const seqId = this.chrToIndex?.[seqName];
|
|
383
|
+
return seqId === undefined
|
|
384
|
+
? []
|
|
385
|
+
: this.index.blocksForRange(seqId, start, end, opts);
|
|
465
386
|
}
|
|
466
387
|
}
|
|
467
388
|
exports.default = BamFile;
|