@gmod/bam 1.1.16 → 1.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bamFile.js CHANGED
@@ -1,15 +1,4 @@
1
1
  "use strict";
2
- var __assign = (this && this.__assign) || function () {
3
- __assign = Object.assign || function(t) {
4
- for (var s, i = 1, n = arguments.length; i < n; i++) {
5
- s = arguments[i];
6
- for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
7
- t[p] = s[p];
8
- }
9
- return t;
10
- };
11
- return __assign.apply(this, arguments);
12
- };
13
2
  var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
14
3
  function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
15
4
  return new (P || (P = Promise))(function (resolve, reject) {
@@ -19,33 +8,6 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
19
8
  step((generator = generator.apply(thisArg, _arguments || [])).next());
20
9
  });
21
10
  };
22
- var __generator = (this && this.__generator) || function (thisArg, body) {
23
- var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
24
- return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
25
- function verb(n) { return function (v) { return step([n, v]); }; }
26
- function step(op) {
27
- if (f) throw new TypeError("Generator is already executing.");
28
- while (_) try {
29
- if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
30
- if (y = 0, t) op = [op[0] & 2, t.value];
31
- switch (op[0]) {
32
- case 0: case 1: t = op; break;
33
- case 4: _.label++; return { value: op[1], done: false };
34
- case 5: _.label++; y = op[1]; op = [0]; continue;
35
- case 7: op = _.ops.pop(); _.trys.pop(); continue;
36
- default:
37
- if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
38
- if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
39
- if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
40
- if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
41
- if (t[2]) _.ops.pop();
42
- _.trys.pop(); continue;
43
- }
44
- op = body.call(thisArg, _);
45
- } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
46
- if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
47
- }
48
- };
49
11
  var __asyncValues = (this && this.__asyncValues) || function (o) {
50
12
  if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
51
13
  var m = o[Symbol.asyncIterator], i;
@@ -70,84 +32,57 @@ var __asyncGenerator = (this && this.__asyncGenerator) || function (thisArg, _ar
70
32
  function reject(value) { resume("throw", value); }
71
33
  function settle(f, v) { if (f(v), q.shift(), q.length) resume(q[0][0], q[0][1]); }
72
34
  };
73
- var __values = (this && this.__values) || function(o) {
74
- var s = typeof Symbol === "function" && Symbol.iterator, m = s && o[s], i = 0;
75
- if (m) return m.call(o);
76
- if (o && typeof o.length === "number") return {
77
- next: function () {
78
- if (o && i >= o.length) o = void 0;
79
- return { value: o && o[i++], done: !o };
80
- }
81
- };
82
- throw new TypeError(s ? "Object is not iterable." : "Symbol.iterator is not defined.");
83
- };
84
35
  var __importDefault = (this && this.__importDefault) || function (mod) {
85
36
  return (mod && mod.__esModule) ? mod : { "default": mod };
86
37
  };
87
38
  Object.defineProperty(exports, "__esModule", { value: true });
88
39
  exports.BAM_MAGIC = void 0;
89
- var buffer_crc32_1 = __importDefault(require("buffer-crc32"));
90
- var bgzf_filehandle_1 = require("@gmod/bgzf-filehandle");
91
- var object_entries_ponyfill_1 = __importDefault(require("object.entries-ponyfill"));
92
- var generic_filehandle_1 = require("generic-filehandle");
93
- var abortable_promise_cache_1 = __importDefault(require("abortable-promise-cache"));
94
- var quick_lru_1 = __importDefault(require("quick-lru"));
40
+ const buffer_crc32_1 = __importDefault(require("buffer-crc32"));
41
+ const bgzf_filehandle_1 = require("@gmod/bgzf-filehandle");
42
+ const object_entries_ponyfill_1 = __importDefault(require("object.entries-ponyfill"));
43
+ const generic_filehandle_1 = require("generic-filehandle");
44
+ const abortable_promise_cache_1 = __importDefault(require("abortable-promise-cache"));
45
+ const quick_lru_1 = __importDefault(require("quick-lru"));
95
46
  //locals
96
- var bai_1 = __importDefault(require("./bai"));
97
- var csi_1 = __importDefault(require("./csi"));
98
- var record_1 = __importDefault(require("./record"));
99
- var sam_1 = require("./sam");
100
- var util_1 = require("./util");
47
+ const bai_1 = __importDefault(require("./bai"));
48
+ const csi_1 = __importDefault(require("./csi"));
49
+ const record_1 = __importDefault(require("./record"));
50
+ const sam_1 = require("./sam");
51
+ const util_1 = require("./util");
101
52
  exports.BAM_MAGIC = 21840194;
102
- var blockLen = 1 << 16;
53
+ const blockLen = 1 << 16;
103
54
  function flat(arr) {
104
- var _a;
105
- return (_a = []).concat.apply(_a, arr);
55
+ return [].concat(...arr);
106
56
  }
107
57
  function gen2array(gen) {
108
- var gen_1, gen_1_1;
109
- var e_1, _a;
110
- return __awaiter(this, void 0, void 0, function () {
111
- var out, x, e_1_1;
112
- return __generator(this, function (_b) {
113
- switch (_b.label) {
114
- case 0:
115
- out = [];
116
- _b.label = 1;
117
- case 1:
118
- _b.trys.push([1, 6, 7, 12]);
119
- gen_1 = __asyncValues(gen);
120
- _b.label = 2;
121
- case 2: return [4 /*yield*/, gen_1.next()];
122
- case 3:
123
- if (!(gen_1_1 = _b.sent(), !gen_1_1.done)) return [3 /*break*/, 5];
124
- x = gen_1_1.value;
58
+ var _a, gen_1, gen_1_1;
59
+ var _b, e_1, _c, _d;
60
+ return __awaiter(this, void 0, void 0, function* () {
61
+ const out = [];
62
+ try {
63
+ for (_a = true, gen_1 = __asyncValues(gen); gen_1_1 = yield gen_1.next(), _b = gen_1_1.done, !_b;) {
64
+ _d = gen_1_1.value;
65
+ _a = false;
66
+ try {
67
+ const x = _d;
125
68
  out.push(x);
126
- _b.label = 4;
127
- case 4: return [3 /*break*/, 2];
128
- case 5: return [3 /*break*/, 12];
129
- case 6:
130
- e_1_1 = _b.sent();
131
- e_1 = { error: e_1_1 };
132
- return [3 /*break*/, 12];
133
- case 7:
134
- _b.trys.push([7, , 10, 11]);
135
- if (!(gen_1_1 && !gen_1_1.done && (_a = gen_1.return))) return [3 /*break*/, 9];
136
- return [4 /*yield*/, _a.call(gen_1)];
137
- case 8:
138
- _b.sent();
139
- _b.label = 9;
140
- case 9: return [3 /*break*/, 11];
141
- case 10:
142
- if (e_1) throw e_1.error;
143
- return [7 /*endfinally*/];
144
- case 11: return [7 /*endfinally*/];
145
- case 12: return [2 /*return*/, out];
69
+ }
70
+ finally {
71
+ _a = true;
72
+ }
146
73
  }
147
- });
74
+ }
75
+ catch (e_1_1) { e_1 = { error: e_1_1 }; }
76
+ finally {
77
+ try {
78
+ if (!_a && !_b && (_c = gen_1.return)) yield _c.call(gen_1);
79
+ }
80
+ finally { if (e_1) throw e_1.error; }
81
+ }
82
+ return out;
148
83
  });
149
84
  }
150
- var BamFile = /** @class */ (function () {
85
+ class BamFile {
151
86
  /**
152
87
  * @param {object} args
153
88
  * @param {string} [args.bamPath]
@@ -155,35 +90,21 @@ var BamFile = /** @class */ (function () {
155
90
  * @param {string} [args.baiPath]
156
91
  * @param {FileHandle} [args.baiFilehandle]
157
92
  */
158
- function BamFile(_a) {
159
- var bamFilehandle = _a.bamFilehandle, bamPath = _a.bamPath, bamUrl = _a.bamUrl, baiPath = _a.baiPath, baiFilehandle = _a.baiFilehandle, baiUrl = _a.baiUrl, csiPath = _a.csiPath, csiFilehandle = _a.csiFilehandle, csiUrl = _a.csiUrl, fetchSizeLimit = _a.fetchSizeLimit, chunkSizeLimit = _a.chunkSizeLimit, _b = _a.yieldThreadTime, yieldThreadTime = _b === void 0 ? 100 : _b, _c = _a.renameRefSeqs, renameRefSeqs = _c === void 0 ? function (n) { return n; } : _c;
160
- var _this = this;
93
+ constructor({ bamFilehandle, bamPath, bamUrl, baiPath, baiFilehandle, baiUrl, csiPath, csiFilehandle, csiUrl, fetchSizeLimit, chunkSizeLimit, yieldThreadTime = 100, renameRefSeqs = n => n, }) {
161
94
  this.featureCache = new abortable_promise_cache_1.default({
162
95
  //@ts-ignore
163
96
  cache: new quick_lru_1.default({
164
97
  maxSize: 50,
165
98
  }),
166
99
  //@ts-ignore
167
- fill: function (_a, signal) {
168
- var chunk = _a.chunk, opts = _a.opts;
169
- return __awaiter(_this, void 0, void 0, function () {
170
- var _b, data, cpositions, dpositions, feats;
171
- return __generator(this, function (_c) {
172
- switch (_c.label) {
173
- case 0: return [4 /*yield*/, this._readChunk({
174
- chunk: chunk,
175
- opts: __assign(__assign({}, opts), { signal: signal }),
176
- })];
177
- case 1:
178
- _b = _c.sent(), data = _b.data, cpositions = _b.cpositions, dpositions = _b.dpositions;
179
- return [4 /*yield*/, this.readBamFeatures(data, cpositions, dpositions, chunk)];
180
- case 2:
181
- feats = _c.sent();
182
- return [2 /*return*/, feats];
183
- }
184
- });
100
+ fill: ({ chunk, opts }, signal) => __awaiter(this, void 0, void 0, function* () {
101
+ const { data, cpositions, dpositions } = yield this._readChunk({
102
+ chunk,
103
+ opts: Object.assign(Object.assign({}, opts), { signal }),
185
104
  });
186
- },
105
+ const feats = yield this.readBamFeatures(data, cpositions, dpositions, chunk);
106
+ return feats;
107
+ }),
187
108
  });
188
109
  this.renameRefSeq = renameRefSeqs;
189
110
  if (bamFilehandle) {
@@ -217,10 +138,10 @@ var BamFile = /** @class */ (function () {
217
138
  this.index = new bai_1.default({ filehandle: new generic_filehandle_1.RemoteFile(baiUrl) });
218
139
  }
219
140
  else if (bamPath) {
220
- this.index = new bai_1.default({ filehandle: new generic_filehandle_1.LocalFile("".concat(bamPath, ".bai")) });
141
+ this.index = new bai_1.default({ filehandle: new generic_filehandle_1.LocalFile(`${bamPath}.bai`) });
221
142
  }
222
143
  else if (bamUrl) {
223
- this.index = new bai_1.default({ filehandle: new generic_filehandle_1.RemoteFile("".concat(bamUrl, ".bai")) });
144
+ this.index = new bai_1.default({ filehandle: new generic_filehandle_1.RemoteFile(`${bamUrl}.bai`) });
224
145
  }
225
146
  else {
226
147
  throw new Error('unable to infer index format');
@@ -229,461 +150,315 @@ var BamFile = /** @class */ (function () {
229
150
  this.chunkSizeLimit = chunkSizeLimit || 300000000; // 300MB
230
151
  this.yieldThreadTime = yieldThreadTime;
231
152
  }
232
- BamFile.prototype.getHeader = function (origOpts) {
233
- if (origOpts === void 0) { origOpts = {}; }
234
- return __awaiter(this, void 0, void 0, function () {
235
- var opts, indexData, ret, buffer, res, bytesRead, uncba, headLen, _a, chrToIndex, indexToChr;
236
- return __generator(this, function (_b) {
237
- switch (_b.label) {
238
- case 0:
239
- opts = (0, util_1.makeOpts)(origOpts);
240
- return [4 /*yield*/, this.index.parse(opts)];
241
- case 1:
242
- indexData = _b.sent();
243
- ret = indexData.firstDataLine
244
- ? indexData.firstDataLine.blockPosition + 65535
245
- : undefined;
246
- if (!ret) return [3 /*break*/, 3];
247
- return [4 /*yield*/, this.bam.read(Buffer.alloc(ret + blockLen), 0, ret + blockLen, 0, opts)];
248
- case 2:
249
- res = _b.sent();
250
- bytesRead = res.bytesRead;
251
- (buffer = res.buffer);
252
- if (!bytesRead) {
253
- throw new Error('Error reading header');
254
- }
255
- if (bytesRead < ret) {
256
- buffer = buffer.subarray(0, bytesRead);
257
- }
258
- else {
259
- buffer = buffer.subarray(0, ret);
260
- }
261
- return [3 /*break*/, 5];
262
- case 3: return [4 /*yield*/, this.bam.readFile(opts)];
263
- case 4:
264
- buffer = (_b.sent());
265
- _b.label = 5;
266
- case 5: return [4 /*yield*/, (0, bgzf_filehandle_1.unzip)(buffer)];
267
- case 6:
268
- uncba = _b.sent();
269
- if (uncba.readInt32LE(0) !== exports.BAM_MAGIC) {
270
- throw new Error('Not a BAM file');
271
- }
272
- headLen = uncba.readInt32LE(4);
273
- this.header = uncba.toString('utf8', 8, 8 + headLen);
274
- return [4 /*yield*/, this._readRefSeqs(headLen + 8, 65535, opts)];
275
- case 7:
276
- _a = _b.sent(), chrToIndex = _a.chrToIndex, indexToChr = _a.indexToChr;
277
- this.chrToIndex = chrToIndex;
278
- this.indexToChr = indexToChr;
279
- return [2 /*return*/, (0, sam_1.parseHeaderText)(this.header)];
153
+ getHeader(origOpts = {}) {
154
+ return __awaiter(this, void 0, void 0, function* () {
155
+ const opts = (0, util_1.makeOpts)(origOpts);
156
+ const indexData = yield this.index.parse(opts);
157
+ const ret = indexData.firstDataLine
158
+ ? indexData.firstDataLine.blockPosition + 65535
159
+ : undefined;
160
+ let buffer;
161
+ if (ret) {
162
+ const res = yield this.bam.read(Buffer.alloc(ret + blockLen), 0, ret + blockLen, 0, opts);
163
+ const { bytesRead } = res;
164
+ ({ buffer } = res);
165
+ if (!bytesRead) {
166
+ throw new Error('Error reading header');
280
167
  }
281
- });
282
- });
283
- };
284
- BamFile.prototype.getHeaderText = function (opts) {
285
- if (opts === void 0) { opts = {}; }
286
- return __awaiter(this, void 0, void 0, function () {
287
- return __generator(this, function (_a) {
288
- switch (_a.label) {
289
- case 0: return [4 /*yield*/, this.getHeader(opts)];
290
- case 1:
291
- _a.sent();
292
- return [2 /*return*/, this.header];
168
+ if (bytesRead < ret) {
169
+ buffer = buffer.subarray(0, bytesRead);
293
170
  }
294
- });
171
+ else {
172
+ buffer = buffer.subarray(0, ret);
173
+ }
174
+ }
175
+ else {
176
+ buffer = (yield this.bam.readFile(opts));
177
+ }
178
+ const uncba = yield (0, bgzf_filehandle_1.unzip)(buffer);
179
+ if (uncba.readInt32LE(0) !== exports.BAM_MAGIC) {
180
+ throw new Error('Not a BAM file');
181
+ }
182
+ const headLen = uncba.readInt32LE(4);
183
+ this.header = uncba.toString('utf8', 8, 8 + headLen);
184
+ const { chrToIndex, indexToChr } = yield this._readRefSeqs(headLen + 8, 65535, opts);
185
+ this.chrToIndex = chrToIndex;
186
+ this.indexToChr = indexToChr;
187
+ return (0, sam_1.parseHeaderText)(this.header);
295
188
  });
296
- };
189
+ }
190
+ getHeaderText(opts = {}) {
191
+ return __awaiter(this, void 0, void 0, function* () {
192
+ yield this.getHeader(opts);
193
+ return this.header;
194
+ });
195
+ }
297
196
  // the full length of the refseq block is not given in advance so this grabs
298
197
  // a chunk and doubles it if all refseqs haven't been processed
299
- BamFile.prototype._readRefSeqs = function (start, refSeqBytes, opts) {
300
- if (opts === void 0) { opts = {}; }
301
- return __awaiter(this, void 0, void 0, function () {
302
- var size, _a, bytesRead, buffer, uncba, nRef, p, chrToIndex, indexToChr, i, lName, refName, lRef;
303
- return __generator(this, function (_b) {
304
- switch (_b.label) {
305
- case 0:
306
- if (start > refSeqBytes) {
307
- return [2 /*return*/, this._readRefSeqs(start, refSeqBytes * 2, opts)];
308
- }
309
- size = refSeqBytes + blockLen;
310
- return [4 /*yield*/, this.bam.read(Buffer.alloc(size), 0, refSeqBytes, 0, opts)];
311
- case 1:
312
- _a = _b.sent(), bytesRead = _a.bytesRead, buffer = _a.buffer;
313
- if (!bytesRead) {
314
- throw new Error('Error reading refseqs from header');
315
- }
316
- return [4 /*yield*/, (0, bgzf_filehandle_1.unzip)(buffer.subarray(0, Math.min(bytesRead, refSeqBytes)))];
317
- case 2:
318
- uncba = _b.sent();
319
- nRef = uncba.readInt32LE(start);
320
- p = start + 4;
321
- chrToIndex = {};
322
- indexToChr = [];
323
- for (i = 0; i < nRef; i += 1) {
324
- lName = uncba.readInt32LE(p);
325
- refName = this.renameRefSeq(uncba.toString('utf8', p + 4, p + 4 + lName - 1));
326
- lRef = uncba.readInt32LE(p + lName + 4);
327
- chrToIndex[refName] = i;
328
- indexToChr.push({ refName: refName, length: lRef });
329
- p = p + 8 + lName;
330
- if (p > uncba.length) {
331
- console.warn("BAM header is very big. Re-fetching ".concat(refSeqBytes, " bytes."));
332
- return [2 /*return*/, this._readRefSeqs(start, refSeqBytes * 2, opts)];
333
- }
334
- }
335
- return [2 /*return*/, { chrToIndex: chrToIndex, indexToChr: indexToChr }];
198
+ _readRefSeqs(start, refSeqBytes, opts = {}) {
199
+ return __awaiter(this, void 0, void 0, function* () {
200
+ if (start > refSeqBytes) {
201
+ return this._readRefSeqs(start, refSeqBytes * 2, opts);
202
+ }
203
+ const size = refSeqBytes + blockLen;
204
+ const { bytesRead, buffer } = yield this.bam.read(Buffer.alloc(size), 0, refSeqBytes, 0, opts);
205
+ if (!bytesRead) {
206
+ throw new Error('Error reading refseqs from header');
207
+ }
208
+ const uncba = yield (0, bgzf_filehandle_1.unzip)(buffer.subarray(0, Math.min(bytesRead, refSeqBytes)));
209
+ const nRef = uncba.readInt32LE(start);
210
+ let p = start + 4;
211
+ const chrToIndex = {};
212
+ const indexToChr = [];
213
+ for (let i = 0; i < nRef; i += 1) {
214
+ const lName = uncba.readInt32LE(p);
215
+ const refName = this.renameRefSeq(uncba.toString('utf8', p + 4, p + 4 + lName - 1));
216
+ const lRef = uncba.readInt32LE(p + lName + 4);
217
+ chrToIndex[refName] = i;
218
+ indexToChr.push({ refName, length: lRef });
219
+ p = p + 8 + lName;
220
+ if (p > uncba.length) {
221
+ console.warn(`BAM header is very big. Re-fetching ${refSeqBytes} bytes.`);
222
+ return this._readRefSeqs(start, refSeqBytes * 2, opts);
336
223
  }
337
- });
224
+ }
225
+ return { chrToIndex, indexToChr };
338
226
  });
339
- };
340
- BamFile.prototype.getRecordsForRange = function (chr, min, max, opts) {
341
- if (opts === void 0) { opts = {
342
- viewAsPairs: false,
343
- pairAcrossChr: false,
344
- maxInsertSize: 200000,
345
- }; }
346
- return __awaiter(this, void 0, void 0, function () {
347
- var _a;
348
- return __generator(this, function (_b) {
349
- switch (_b.label) {
350
- case 0:
351
- _a = flat;
352
- return [4 /*yield*/, gen2array(this.streamRecordsForRange(chr, min, max, opts))];
353
- case 1: return [2 /*return*/, _a.apply(void 0, [_b.sent()])];
354
- }
355
- });
227
+ }
228
+ getRecordsForRange(chr, min, max, opts = {
229
+ viewAsPairs: false,
230
+ pairAcrossChr: false,
231
+ maxInsertSize: 200000,
232
+ }) {
233
+ return __awaiter(this, void 0, void 0, function* () {
234
+ return flat(yield gen2array(this.streamRecordsForRange(chr, min, max, opts)));
356
235
  });
357
- };
358
- BamFile.prototype.streamRecordsForRange = function (chr, min, max, opts) {
359
- if (opts === void 0) { opts = {}; }
360
- return __asyncGenerator(this, arguments, function streamRecordsForRange_1() {
361
- var signal, chrId, chunks, i, size, totalSize;
362
- return __generator(this, function (_a) {
363
- switch (_a.label) {
364
- case 0:
365
- signal = opts.signal;
366
- chrId = this.chrToIndex && this.chrToIndex[chr];
367
- if (!!(chrId >= 0)) return [3 /*break*/, 1];
368
- chunks = [];
369
- return [3 /*break*/, 3];
370
- case 1: return [4 /*yield*/, __await(this.index.blocksForRange(chrId, min - 1, max, opts))];
371
- case 2:
372
- chunks = _a.sent();
373
- if (!chunks) {
374
- throw new Error('Error in index fetch');
375
- }
376
- _a.label = 3;
377
- case 3:
378
- i = 0;
379
- _a.label = 4;
380
- case 4:
381
- if (!(i < chunks.length)) return [3 /*break*/, 7];
382
- return [4 /*yield*/, __await((0, util_1.abortBreakPoint)(signal))];
383
- case 5:
384
- _a.sent();
385
- size = chunks[i].fetchedSize();
386
- if (size > this.chunkSizeLimit) {
387
- throw new Error("Too many BAM features. BAM chunk size ".concat(size, " bytes exceeds chunkSizeLimit of ").concat(this.chunkSizeLimit));
388
- }
389
- _a.label = 6;
390
- case 6:
391
- i += 1;
392
- return [3 /*break*/, 4];
393
- case 7:
394
- totalSize = chunks
395
- .map(function (s) { return s.fetchedSize(); })
396
- .reduce(function (a, b) { return a + b; }, 0);
397
- if (totalSize > this.fetchSizeLimit) {
398
- throw new Error("data size of ".concat(totalSize.toLocaleString(), " bytes exceeded fetch size limit of ").concat(this.fetchSizeLimit.toLocaleString(), " bytes"));
399
- }
400
- return [5 /*yield**/, __values(__asyncDelegator(__asyncValues(this._fetchChunkFeatures(chunks, chrId, min, max, opts))))];
401
- case 8: return [4 /*yield*/, __await.apply(void 0, [_a.sent()])];
402
- case 9:
403
- _a.sent();
404
- return [2 /*return*/];
236
+ }
237
+ streamRecordsForRange(chr, min, max, opts = {}) {
238
+ return __asyncGenerator(this, arguments, function* streamRecordsForRange_1() {
239
+ const { signal } = opts;
240
+ const chrId = this.chrToIndex && this.chrToIndex[chr];
241
+ let chunks;
242
+ if (!(chrId >= 0)) {
243
+ chunks = [];
244
+ }
245
+ else {
246
+ chunks = yield __await(this.index.blocksForRange(chrId, min - 1, max, opts));
247
+ if (!chunks) {
248
+ throw new Error('Error in index fetch');
405
249
  }
406
- });
250
+ }
251
+ for (let i = 0; i < chunks.length; i += 1) {
252
+ yield __await((0, util_1.abortBreakPoint)(signal));
253
+ const size = chunks[i].fetchedSize();
254
+ if (size > this.chunkSizeLimit) {
255
+ throw new Error(`Too many BAM features. BAM chunk size ${size} bytes exceeds chunkSizeLimit of ${this.chunkSizeLimit}`);
256
+ }
257
+ }
258
+ const totalSize = chunks
259
+ .map(s => s.fetchedSize())
260
+ .reduce((a, b) => a + b, 0);
261
+ if (totalSize > this.fetchSizeLimit) {
262
+ throw new Error(`data size of ${totalSize.toLocaleString()} bytes exceeded fetch size limit of ${this.fetchSizeLimit.toLocaleString()} bytes`);
263
+ }
264
+ yield __await(yield* __asyncDelegator(__asyncValues(this._fetchChunkFeatures(chunks, chrId, min, max, opts))));
407
265
  });
408
- };
409
- BamFile.prototype._fetchChunkFeatures = function (chunks, chrId, min, max, opts) {
410
- return __asyncGenerator(this, arguments, function _fetchChunkFeatures_1() {
411
- var _a, viewAsPairs, feats, done, i, c, records, recs, i_1, feature;
412
- return __generator(this, function (_b) {
413
- switch (_b.label) {
414
- case 0:
415
- _a = opts.viewAsPairs, viewAsPairs = _a === void 0 ? false : _a;
416
- feats = [];
417
- done = false;
418
- i = 0;
419
- _b.label = 1;
420
- case 1:
421
- if (!(i < chunks.length)) return [3 /*break*/, 6];
422
- c = chunks[i];
423
- return [4 /*yield*/, __await(this.featureCache.get(c.toString(), {
424
- chunk: c,
425
- opts: opts,
426
- }, opts.signal))];
427
- case 2:
428
- records = (_b.sent());
429
- recs = [];
430
- for (i_1 = 0; i_1 < records.length; i_1 += 1) {
431
- feature = records[i_1];
432
- if (feature.seq_id() === chrId) {
433
- if (feature.get('start') >= max) {
434
- // past end of range, can stop iterating
435
- done = true;
436
- break;
437
- }
438
- else if (feature.get('end') >= min) {
439
- // must be in range
440
- recs.push(feature);
441
- }
442
- }
266
+ }
267
+ _fetchChunkFeatures(chunks, chrId, min, max, opts) {
268
+ return __asyncGenerator(this, arguments, function* _fetchChunkFeatures_1() {
269
+ const { viewAsPairs = false } = opts;
270
+ const feats = [];
271
+ let done = false;
272
+ for (let i = 0; i < chunks.length; i++) {
273
+ const c = chunks[i];
274
+ const records = (yield __await(this.featureCache.get(c.toString(), {
275
+ chunk: c,
276
+ opts,
277
+ }, opts.signal)));
278
+ const recs = [];
279
+ for (let i = 0; i < records.length; i += 1) {
280
+ const feature = records[i];
281
+ if (feature.seq_id() === chrId) {
282
+ if (feature.get('start') >= max) {
283
+ // past end of range, can stop iterating
284
+ done = true;
285
+ break;
443
286
  }
444
- feats.push(recs);
445
- return [4 /*yield*/, __await(recs)];
446
- case 3: return [4 /*yield*/, _b.sent()];
447
- case 4:
448
- _b.sent();
449
- if (done) {
450
- return [3 /*break*/, 6];
287
+ else if (feature.get('end') >= min) {
288
+ // must be in range
289
+ recs.push(feature);
451
290
  }
452
- _b.label = 5;
453
- case 5:
454
- i++;
455
- return [3 /*break*/, 1];
456
- case 6:
457
- (0, util_1.checkAbortSignal)(opts.signal);
458
- if (!viewAsPairs) return [3 /*break*/, 9];
459
- return [4 /*yield*/, __await(this.fetchPairs(chrId, feats, opts))];
460
- case 7: return [4 /*yield*/, _b.sent()];
461
- case 8:
462
- _b.sent();
463
- _b.label = 9;
464
- case 9: return [2 /*return*/];
291
+ }
465
292
  }
466
- });
293
+ feats.push(recs);
294
+ yield yield __await(recs);
295
+ if (done) {
296
+ break;
297
+ }
298
+ }
299
+ (0, util_1.checkAbortSignal)(opts.signal);
300
+ if (viewAsPairs) {
301
+ yield yield __await(this.fetchPairs(chrId, feats, opts));
302
+ }
467
303
  });
468
- };
469
- BamFile.prototype.fetchPairs = function (chrId, feats, opts) {
470
- return __awaiter(this, void 0, void 0, function () {
471
- var _a, pairAcrossChr, _b, maxInsertSize, unmatedPairs, readIds, matePromises, mateChunks, _c, mateTotalSize, mateFeatPromises, _d;
472
- var _this = this;
473
- return __generator(this, function (_e) {
474
- switch (_e.label) {
475
- case 0:
476
- _a = opts.pairAcrossChr, pairAcrossChr = _a === void 0 ? false : _a, _b = opts.maxInsertSize, maxInsertSize = _b === void 0 ? 200000 : _b;
477
- unmatedPairs = {};
478
- readIds = {};
479
- feats.map(function (ret) {
480
- var readNames = {};
481
- for (var i = 0; i < ret.length; i++) {
482
- var name_1 = ret[i].name();
483
- var id = ret[i].id();
484
- if (!readNames[name_1]) {
485
- readNames[name_1] = 0;
486
- }
487
- readNames[name_1]++;
488
- readIds[id] = 1;
489
- }
490
- (0, object_entries_ponyfill_1.default)(readNames).forEach(function (_a) {
491
- var k = _a[0], v = _a[1];
492
- if (v === 1) {
493
- unmatedPairs[k] = true;
494
- }
495
- });
496
- });
497
- matePromises = [];
498
- feats.map(function (ret) {
499
- for (var i = 0; i < ret.length; i++) {
500
- var f = ret[i];
501
- var name_2 = f.name();
502
- var start = f.get('start');
503
- var pnext = f._next_pos();
504
- var rnext = f._next_refid();
505
- if (unmatedPairs[name_2] &&
506
- (pairAcrossChr ||
507
- (rnext === chrId && Math.abs(start - pnext) < maxInsertSize))) {
508
- matePromises.push(_this.index.blocksForRange(rnext, pnext, pnext + 1, opts));
509
- }
510
- }
511
- });
512
- _c = flat;
513
- return [4 /*yield*/, Promise.all(matePromises)];
514
- case 1:
515
- mateChunks = _c.apply(void 0, [_e.sent()])
516
- .sort()
517
- .filter(function (item, pos, ary) { return !pos || item.toString() !== ary[pos - 1].toString(); });
518
- mateTotalSize = mateChunks
519
- .map(function (s) { return s.fetchedSize(); })
520
- .reduce(function (a, b) { return a + b; }, 0);
521
- if (mateTotalSize > this.fetchSizeLimit) {
522
- throw new Error("data size of ".concat(mateTotalSize.toLocaleString(), " bytes exceeded fetch size limit of ").concat(this.fetchSizeLimit.toLocaleString(), " bytes"));
523
- }
524
- mateFeatPromises = mateChunks.map(function (c) { return __awaiter(_this, void 0, void 0, function () {
525
- var _a, data, cpositions, dpositions, chunk, feats, mateRecs, i, feature;
526
- return __generator(this, function (_b) {
527
- switch (_b.label) {
528
- case 0: return [4 /*yield*/, this._readChunk({
529
- chunk: c,
530
- opts: opts,
531
- })];
532
- case 1:
533
- _a = _b.sent(), data = _a.data, cpositions = _a.cpositions, dpositions = _a.dpositions, chunk = _a.chunk;
534
- return [4 /*yield*/, this.readBamFeatures(data, cpositions, dpositions, chunk)];
535
- case 2:
536
- feats = _b.sent();
537
- mateRecs = [];
538
- for (i = 0; i < feats.length; i += 1) {
539
- feature = feats[i];
540
- if (unmatedPairs[feature.get('name')] && !readIds[feature.id()]) {
541
- mateRecs.push(feature);
542
- }
543
- }
544
- return [2 /*return*/, mateRecs];
545
- }
546
- });
547
- }); });
548
- _d = flat;
549
- return [4 /*yield*/, Promise.all(mateFeatPromises)];
550
- case 2: return [2 /*return*/, _d.apply(void 0, [_e.sent()])];
304
+ }
305
+ fetchPairs(chrId, feats, opts) {
306
+ return __awaiter(this, void 0, void 0, function* () {
307
+ const { pairAcrossChr = false, maxInsertSize = 200000 } = opts;
308
+ const unmatedPairs = {};
309
+ const readIds = {};
310
+ feats.map(ret => {
311
+ const readNames = {};
312
+ for (let i = 0; i < ret.length; i++) {
313
+ const name = ret[i].name();
314
+ const id = ret[i].id();
315
+ if (!readNames[name]) {
316
+ readNames[name] = 0;
317
+ }
318
+ readNames[name]++;
319
+ readIds[id] = 1;
551
320
  }
321
+ (0, object_entries_ponyfill_1.default)(readNames).forEach(([k, v]) => {
322
+ if (v === 1) {
323
+ unmatedPairs[k] = true;
324
+ }
325
+ });
552
326
  });
553
- });
554
- };
555
- BamFile.prototype._readChunk = function (_a) {
556
- var chunk = _a.chunk, opts = _a.opts;
557
- return __awaiter(this, void 0, void 0, function () {
558
- var size, _b, buffer, bytesRead, _c, data, cpositions, dpositions;
559
- return __generator(this, function (_d) {
560
- switch (_d.label) {
561
- case 0:
562
- size = chunk.fetchedSize();
563
- return [4 /*yield*/, this.bam.read(Buffer.alloc(size), 0, size, chunk.minv.blockPosition, opts)];
564
- case 1:
565
- _b = _d.sent(), buffer = _b.buffer, bytesRead = _b.bytesRead;
566
- return [4 /*yield*/, (0, bgzf_filehandle_1.unzipChunkSlice)(buffer.subarray(0, Math.min(bytesRead, size)), chunk)];
567
- case 2:
568
- _c = _d.sent(), data = _c.buffer, cpositions = _c.cpositions, dpositions = _c.dpositions;
569
- return [2 /*return*/, { data: data, cpositions: cpositions, dpositions: dpositions, chunk: chunk }];
327
+ const matePromises = [];
328
+ feats.map(ret => {
329
+ for (let i = 0; i < ret.length; i++) {
330
+ const f = ret[i];
331
+ const name = f.name();
332
+ const start = f.get('start');
333
+ const pnext = f._next_pos();
334
+ const rnext = f._next_refid();
335
+ if (unmatedPairs[name] &&
336
+ (pairAcrossChr ||
337
+ (rnext === chrId && Math.abs(start - pnext) < maxInsertSize))) {
338
+ matePromises.push(this.index.blocksForRange(rnext, pnext, pnext + 1, opts));
339
+ }
570
340
  }
571
341
  });
342
+ // filter out duplicate chunks (the blocks are lists of chunks, blocks are
343
+ // concatenated, then filter dup chunks)
344
+ const mateChunks = flat(yield Promise.all(matePromises))
345
+ .sort()
346
+ .filter((item, pos, ary) => !pos || item.toString() !== ary[pos - 1].toString());
347
+ const mateTotalSize = mateChunks
348
+ .map(s => s.fetchedSize())
349
+ .reduce((a, b) => a + b, 0);
350
+ if (mateTotalSize > this.fetchSizeLimit) {
351
+ throw new Error(`data size of ${mateTotalSize.toLocaleString()} bytes exceeded fetch size limit of ${this.fetchSizeLimit.toLocaleString()} bytes`);
352
+ }
353
+ const mateFeatPromises = mateChunks.map((c) => __awaiter(this, void 0, void 0, function* () {
354
+ const { data, cpositions, dpositions, chunk } = yield this._readChunk({
355
+ chunk: c,
356
+ opts,
357
+ });
358
+ const feats = yield this.readBamFeatures(data, cpositions, dpositions, chunk);
359
+ const mateRecs = [];
360
+ for (let i = 0; i < feats.length; i += 1) {
361
+ const feature = feats[i];
362
+ if (unmatedPairs[feature.get('name')] && !readIds[feature.id()]) {
363
+ mateRecs.push(feature);
364
+ }
365
+ }
366
+ return mateRecs;
367
+ }));
368
+ return flat(yield Promise.all(mateFeatPromises));
572
369
  });
573
- };
574
- BamFile.prototype.readBamFeatures = function (ba, cpositions, dpositions, chunk) {
575
- return __awaiter(this, void 0, void 0, function () {
576
- var blockStart, sink, pos, last, blockSize, blockEnd, feature;
577
- return __generator(this, function (_a) {
578
- switch (_a.label) {
579
- case 0:
580
- blockStart = 0;
581
- sink = [];
582
- pos = 0;
583
- last = +Date.now();
584
- _a.label = 1;
585
- case 1:
586
- if (!(blockStart + 4 < ba.length)) return [3 /*break*/, 4];
587
- blockSize = ba.readInt32LE(blockStart);
588
- blockEnd = blockStart + 4 + blockSize - 1;
589
- // increment position to the current decompressed status
590
- if (dpositions) {
591
- while (blockStart + chunk.minv.dataPosition >= dpositions[pos++]) { }
592
- pos--;
593
- }
594
- if (!(blockEnd < ba.length)) return [3 /*break*/, 3];
595
- feature = new record_1.default({
596
- bytes: {
597
- byteArray: ba,
598
- start: blockStart,
599
- end: blockEnd,
600
- },
601
- // the below results in an automatically calculated file-offset based ID
602
- // if the info for that is available, otherwise crc32 of the features
603
- //
604
- // cpositions[pos] refers to actual file offset of a bgzip block boundaries
605
- //
606
- // we multiply by (1 <<8) in order to make sure each block has a "unique"
607
- // address space so that data in that block could never overlap
608
- //
609
- // then the blockStart-dpositions is an uncompressed file offset from
610
- // that bgzip block boundary, and since the cpositions are multiplied by
611
- // (1 << 8) these uncompressed offsets get a unique space
612
- //
613
- // this has an extra chunk.minv.dataPosition added on because it blockStart
614
- // starts at 0 instead of chunk.minv.dataPosition
615
- //
616
- // the +1 is just to avoid any possible uniqueId 0 but this does not realistically happen
617
- fileOffset: cpositions
618
- ? cpositions[pos] * (1 << 8) +
619
- (blockStart - dpositions[pos]) +
620
- chunk.minv.dataPosition +
621
- 1
622
- : // must be slice, not subarray for buffer polyfill on web
623
- buffer_crc32_1.default.signed(ba.slice(blockStart, blockEnd)),
624
- });
625
- sink.push(feature);
626
- if (!(this.yieldThreadTime && +Date.now() - last > this.yieldThreadTime)) return [3 /*break*/, 3];
627
- return [4 /*yield*/, (0, util_1.timeout)(1)];
628
- case 2:
629
- _a.sent();
370
+ }
371
+ _readChunk({ chunk, opts }) {
372
+ return __awaiter(this, void 0, void 0, function* () {
373
+ const size = chunk.fetchedSize();
374
+ const { buffer, bytesRead } = yield this.bam.read(Buffer.alloc(size), 0, size, chunk.minv.blockPosition, opts);
375
+ const { buffer: data, cpositions, dpositions, } = yield (0, bgzf_filehandle_1.unzipChunkSlice)(buffer.subarray(0, Math.min(bytesRead, size)), chunk);
376
+ return { data, cpositions, dpositions, chunk };
377
+ });
378
+ }
379
+ readBamFeatures(ba, cpositions, dpositions, chunk) {
380
+ return __awaiter(this, void 0, void 0, function* () {
381
+ let blockStart = 0;
382
+ const sink = [];
383
+ let pos = 0;
384
+ let last = +Date.now();
385
+ while (blockStart + 4 < ba.length) {
386
+ const blockSize = ba.readInt32LE(blockStart);
387
+ const blockEnd = blockStart + 4 + blockSize - 1;
388
+ // increment position to the current decompressed status
389
+ if (dpositions) {
390
+ while (blockStart + chunk.minv.dataPosition >= dpositions[pos++]) { }
391
+ pos--;
392
+ }
393
+ // only try to read the feature if we have all the bytes for it
394
+ if (blockEnd < ba.length) {
395
+ const feature = new record_1.default({
396
+ bytes: {
397
+ byteArray: ba,
398
+ start: blockStart,
399
+ end: blockEnd,
400
+ },
401
+ // the below results in an automatically calculated file-offset based ID
402
+ // if the info for that is available, otherwise crc32 of the features
403
+ //
404
+ // cpositions[pos] refers to actual file offset of a bgzip block boundaries
405
+ //
406
+ // we multiply by (1 <<8) in order to make sure each block has a "unique"
407
+ // address space so that data in that block could never overlap
408
+ //
409
+ // then the blockStart-dpositions is an uncompressed file offset from
410
+ // that bgzip block boundary, and since the cpositions are multiplied by
411
+ // (1 << 8) these uncompressed offsets get a unique space
412
+ //
413
+ // this has an extra chunk.minv.dataPosition added on because it blockStart
414
+ // starts at 0 instead of chunk.minv.dataPosition
415
+ //
416
+ // the +1 is just to avoid any possible uniqueId 0 but this does not realistically happen
417
+ fileOffset: cpositions
418
+ ? cpositions[pos] * (1 << 8) +
419
+ (blockStart - dpositions[pos]) +
420
+ chunk.minv.dataPosition +
421
+ 1
422
+ : // must be slice, not subarray for buffer polyfill on web
423
+ buffer_crc32_1.default.signed(ba.slice(blockStart, blockEnd)),
424
+ });
425
+ sink.push(feature);
426
+ if (this.yieldThreadTime && +Date.now() - last > this.yieldThreadTime) {
427
+ yield (0, util_1.timeout)(1);
630
428
  last = +Date.now();
631
- _a.label = 3;
632
- case 3:
633
- blockStart = blockEnd + 1;
634
- return [3 /*break*/, 1];
635
- case 4: return [2 /*return*/, sink];
429
+ }
636
430
  }
637
- });
431
+ blockStart = blockEnd + 1;
432
+ }
433
+ return sink;
638
434
  });
639
- };
640
- BamFile.prototype.hasRefSeq = function (seqName) {
641
- return __awaiter(this, void 0, void 0, function () {
642
- var refId;
643
- return __generator(this, function (_a) {
644
- refId = this.chrToIndex && this.chrToIndex[seqName];
645
- return [2 /*return*/, this.index.hasRefSeq(refId)];
646
- });
435
+ }
436
+ hasRefSeq(seqName) {
437
+ return __awaiter(this, void 0, void 0, function* () {
438
+ const refId = this.chrToIndex && this.chrToIndex[seqName];
439
+ return this.index.hasRefSeq(refId);
647
440
  });
648
- };
649
- BamFile.prototype.lineCount = function (seqName) {
650
- return __awaiter(this, void 0, void 0, function () {
651
- var refId;
652
- return __generator(this, function (_a) {
653
- refId = this.chrToIndex && this.chrToIndex[seqName];
654
- return [2 /*return*/, this.index.lineCount(refId)];
655
- });
441
+ }
442
+ lineCount(seqName) {
443
+ return __awaiter(this, void 0, void 0, function* () {
444
+ const refId = this.chrToIndex && this.chrToIndex[seqName];
445
+ return this.index.lineCount(refId);
656
446
  });
657
- };
658
- BamFile.prototype.indexCov = function (seqName, start, end) {
659
- return __awaiter(this, void 0, void 0, function () {
660
- var seqId;
661
- return __generator(this, function (_a) {
662
- switch (_a.label) {
663
- case 0: return [4 /*yield*/, this.index.parse()];
664
- case 1:
665
- _a.sent();
666
- seqId = this.chrToIndex && this.chrToIndex[seqName];
667
- return [2 /*return*/, this.index.indexCov(seqId, start, end)];
668
- }
669
- });
447
+ }
448
+ indexCov(seqName, start, end) {
449
+ return __awaiter(this, void 0, void 0, function* () {
450
+ yield this.index.parse();
451
+ const seqId = this.chrToIndex && this.chrToIndex[seqName];
452
+ return this.index.indexCov(seqId, start, end);
670
453
  });
671
- };
672
- BamFile.prototype.blocksForRange = function (seqName, start, end, opts) {
673
- return __awaiter(this, void 0, void 0, function () {
674
- var seqId;
675
- return __generator(this, function (_a) {
676
- switch (_a.label) {
677
- case 0: return [4 /*yield*/, this.index.parse()];
678
- case 1:
679
- _a.sent();
680
- seqId = this.chrToIndex && this.chrToIndex[seqName];
681
- return [2 /*return*/, this.index.blocksForRange(seqId, start, end, opts)];
682
- }
683
- });
454
+ }
455
+ blocksForRange(seqName, start, end, opts) {
456
+ return __awaiter(this, void 0, void 0, function* () {
457
+ yield this.index.parse();
458
+ const seqId = this.chrToIndex && this.chrToIndex[seqName];
459
+ return this.index.blocksForRange(seqId, start, end, opts);
684
460
  });
685
- };
686
- return BamFile;
687
- }());
461
+ }
462
+ }
688
463
  exports.default = BamFile;
689
464
  //# sourceMappingURL=bamFile.js.map