@gmod/bam 1.1.17 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +65 -25
- package/README.md +108 -57
- package/dist/bai.d.ts +34 -15
- package/dist/bai.js +180 -273
- package/dist/bai.js.map +1 -1
- package/dist/bamFile.d.ts +33 -27
- package/dist/bamFile.js +353 -572
- package/dist/bamFile.js.map +1 -1
- package/dist/chunk.d.ts +4 -8
- package/dist/chunk.js +13 -21
- package/dist/chunk.js.map +1 -1
- package/dist/csi.d.ts +74 -10
- package/dist/csi.js +157 -256
- package/dist/csi.js.map +1 -1
- package/dist/errors.js +12 -57
- package/dist/errors.js.map +1 -1
- package/dist/htsget.d.ts +5 -8
- package/dist/htsget.js +120 -209
- package/dist/htsget.js.map +1 -1
- package/dist/index.d.ts +5 -6
- package/dist/index.js +11 -11
- package/dist/index.js.map +1 -1
- package/dist/indexFile.d.ts +0 -6
- package/dist/indexFile.js +3 -77
- package/dist/indexFile.js.map +1 -1
- package/dist/nullIndex.d.ts +7 -0
- package/dist/nullIndex.js +33 -0
- package/dist/nullIndex.js.map +1 -0
- package/dist/record.d.ts +2 -2
- package/dist/record.js +200 -193
- package/dist/record.js.map +1 -1
- package/dist/sam.js +12 -10
- package/dist/sam.js.map +1 -1
- package/dist/util.d.ts +13 -1
- package/dist/util.js +55 -58
- package/dist/util.js.map +1 -1
- package/dist/virtualOffset.js +13 -20
- package/dist/virtualOffset.js.map +1 -1
- package/esm/bai.d.ts +34 -15
- package/esm/bai.js +86 -91
- package/esm/bai.js.map +1 -1
- package/esm/bamFile.d.ts +33 -27
- package/esm/bamFile.js +124 -120
- package/esm/bamFile.js.map +1 -1
- package/esm/chunk.d.ts +4 -8
- package/esm/chunk.js +2 -8
- package/esm/chunk.js.map +1 -1
- package/esm/csi.d.ts +74 -10
- package/esm/csi.js +85 -93
- package/esm/csi.js.map +1 -1
- package/esm/htsget.d.ts +5 -8
- package/esm/htsget.js +68 -43
- package/esm/htsget.js.map +1 -1
- package/esm/index.d.ts +5 -6
- package/esm/index.js +5 -6
- package/esm/index.js.map +1 -1
- package/esm/indexFile.d.ts +0 -6
- package/esm/indexFile.js +0 -22
- package/esm/indexFile.js.map +1 -1
- package/esm/nullIndex.d.ts +7 -0
- package/esm/nullIndex.js +16 -0
- package/esm/nullIndex.js.map +1 -0
- package/esm/record.d.ts +2 -2
- package/esm/record.js +34 -24
- package/esm/record.js.map +1 -1
- package/esm/sam.js +9 -7
- package/esm/sam.js.map +1 -1
- package/esm/util.d.ts +13 -1
- package/esm/util.js +40 -14
- package/esm/util.js.map +1 -1
- package/package.json +19 -20
- package/src/bai.ts +99 -102
- package/src/bamFile.ts +174 -198
- package/src/chunk.ts +6 -20
- package/src/csi.ts +102 -111
- package/src/htsget.ts +81 -61
- package/src/index.ts +5 -7
- package/src/indexFile.ts +0 -27
- package/src/nullIndex.ts +18 -0
- package/src/record.ts +34 -24
- package/src/sam.ts +9 -7
- package/src/util.ts +54 -13
- package/src/declare.d.ts +0 -2
package/src/bamFile.ts
CHANGED
|
@@ -1,80 +1,76 @@
|
|
|
1
|
+
import { Buffer } from 'buffer'
|
|
1
2
|
import crc32 from 'buffer-crc32'
|
|
2
3
|
import { unzip, unzipChunkSlice } from '@gmod/bgzf-filehandle'
|
|
3
|
-
import entries from 'object.entries-ponyfill'
|
|
4
4
|
import { LocalFile, RemoteFile, GenericFilehandle } from 'generic-filehandle'
|
|
5
5
|
import AbortablePromiseCache from 'abortable-promise-cache'
|
|
6
6
|
import QuickLRU from 'quick-lru'
|
|
7
|
-
|
|
7
|
+
|
|
8
|
+
// locals
|
|
8
9
|
import BAI from './bai'
|
|
9
10
|
import CSI from './csi'
|
|
10
11
|
import Chunk from './chunk'
|
|
11
12
|
import BAMFeature from './record'
|
|
12
|
-
import IndexFile from './indexFile'
|
|
13
13
|
import { parseHeaderText } from './sam'
|
|
14
|
-
import {
|
|
15
|
-
abortBreakPoint,
|
|
16
|
-
checkAbortSignal,
|
|
17
|
-
timeout,
|
|
18
|
-
makeOpts,
|
|
19
|
-
BamOpts,
|
|
20
|
-
BaseOpts,
|
|
21
|
-
} from './util'
|
|
14
|
+
import { checkAbortSignal, timeout, makeOpts, BamOpts, BaseOpts } from './util'
|
|
22
15
|
|
|
23
16
|
export const BAM_MAGIC = 21840194
|
|
24
17
|
|
|
25
18
|
const blockLen = 1 << 16
|
|
26
19
|
|
|
27
|
-
function
|
|
28
|
-
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
async function gen2array<T>(gen: AsyncIterable<T>): Promise<T[]> {
|
|
32
|
-
const out: T[] = []
|
|
20
|
+
async function gen2array<T>(gen: AsyncIterable<T[]>): Promise<T[]> {
|
|
21
|
+
let out: T[] = []
|
|
33
22
|
for await (const x of gen) {
|
|
34
|
-
out.
|
|
23
|
+
out = out.concat(x)
|
|
35
24
|
}
|
|
36
25
|
return out
|
|
37
26
|
}
|
|
38
27
|
|
|
28
|
+
interface Args {
|
|
29
|
+
chunk: Chunk
|
|
30
|
+
opts: BaseOpts
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
class NullFilehandle {
|
|
34
|
+
public read(): Promise<any> {
|
|
35
|
+
throw new Error('never called')
|
|
36
|
+
}
|
|
37
|
+
public stat(): Promise<any> {
|
|
38
|
+
throw new Error('never called')
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
public readFile(): Promise<any> {
|
|
42
|
+
throw new Error('never called')
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
public close(): Promise<any> {
|
|
46
|
+
throw new Error('never called')
|
|
47
|
+
}
|
|
48
|
+
}
|
|
39
49
|
export default class BamFile {
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
private featureCache = new AbortablePromiseCache({
|
|
51
|
-
//@ts-ignore
|
|
50
|
+
public renameRefSeq: (a: string) => string
|
|
51
|
+
public bam: GenericFilehandle
|
|
52
|
+
public header?: string
|
|
53
|
+
public chrToIndex?: Record<string, number>
|
|
54
|
+
public indexToChr?: { refName: string; length: number }[]
|
|
55
|
+
public yieldThreadTime: number
|
|
56
|
+
public index?: BAI | CSI
|
|
57
|
+
public htsget = false
|
|
58
|
+
public headerP?: ReturnType<BamFile['getHeaderPre']>
|
|
59
|
+
|
|
60
|
+
private featureCache = new AbortablePromiseCache<Args, BAMFeature[]>({
|
|
52
61
|
cache: new QuickLRU({
|
|
53
62
|
maxSize: 50,
|
|
54
63
|
}),
|
|
55
|
-
|
|
56
|
-
|
|
64
|
+
fill: async (args: Args, signal) => {
|
|
65
|
+
const { chunk, opts } = args
|
|
57
66
|
const { data, cpositions, dpositions } = await this._readChunk({
|
|
58
67
|
chunk,
|
|
59
68
|
opts: { ...opts, signal },
|
|
60
69
|
})
|
|
61
|
-
|
|
62
|
-
data,
|
|
63
|
-
cpositions,
|
|
64
|
-
dpositions,
|
|
65
|
-
chunk,
|
|
66
|
-
)
|
|
67
|
-
return feats
|
|
70
|
+
return this.readBamFeatures(data, cpositions, dpositions, chunk)
|
|
68
71
|
},
|
|
69
72
|
})
|
|
70
73
|
|
|
71
|
-
/**
|
|
72
|
-
* @param {object} args
|
|
73
|
-
* @param {string} [args.bamPath]
|
|
74
|
-
* @param {FileHandle} [args.bamFilehandle]
|
|
75
|
-
* @param {string} [args.baiPath]
|
|
76
|
-
* @param {FileHandle} [args.baiFilehandle]
|
|
77
|
-
*/
|
|
78
74
|
constructor({
|
|
79
75
|
bamFilehandle,
|
|
80
76
|
bamPath,
|
|
@@ -85,8 +81,7 @@ export default class BamFile {
|
|
|
85
81
|
csiPath,
|
|
86
82
|
csiFilehandle,
|
|
87
83
|
csiUrl,
|
|
88
|
-
|
|
89
|
-
chunkSizeLimit,
|
|
84
|
+
htsget,
|
|
90
85
|
yieldThreadTime = 100,
|
|
91
86
|
renameRefSeqs = n => n,
|
|
92
87
|
}: {
|
|
@@ -99,10 +94,9 @@ export default class BamFile {
|
|
|
99
94
|
csiPath?: string
|
|
100
95
|
csiFilehandle?: GenericFilehandle
|
|
101
96
|
csiUrl?: string
|
|
102
|
-
fetchSizeLimit?: number
|
|
103
|
-
chunkSizeLimit?: number
|
|
104
97
|
renameRefSeqs?: (a: string) => string
|
|
105
98
|
yieldThreadTime?: number
|
|
99
|
+
htsget?: boolean
|
|
106
100
|
}) {
|
|
107
101
|
this.renameRefSeq = renameRefSeqs
|
|
108
102
|
|
|
@@ -112,6 +106,9 @@ export default class BamFile {
|
|
|
112
106
|
this.bam = new LocalFile(bamPath)
|
|
113
107
|
} else if (bamUrl) {
|
|
114
108
|
this.bam = new RemoteFile(bamUrl)
|
|
109
|
+
} else if (htsget) {
|
|
110
|
+
this.htsget = true
|
|
111
|
+
this.bam = new NullFilehandle()
|
|
115
112
|
} else {
|
|
116
113
|
throw new Error('unable to initialize bam')
|
|
117
114
|
}
|
|
@@ -131,40 +128,31 @@ export default class BamFile {
|
|
|
131
128
|
this.index = new BAI({ filehandle: new LocalFile(`${bamPath}.bai`) })
|
|
132
129
|
} else if (bamUrl) {
|
|
133
130
|
this.index = new BAI({ filehandle: new RemoteFile(`${bamUrl}.bai`) })
|
|
131
|
+
} else if (htsget) {
|
|
132
|
+
this.htsget = true
|
|
134
133
|
} else {
|
|
135
134
|
throw new Error('unable to infer index format')
|
|
136
135
|
}
|
|
137
|
-
this.fetchSizeLimit = fetchSizeLimit || 500000000 // 500MB
|
|
138
|
-
this.chunkSizeLimit = chunkSizeLimit || 300000000 // 300MB
|
|
139
136
|
this.yieldThreadTime = yieldThreadTime
|
|
140
137
|
}
|
|
141
138
|
|
|
142
|
-
async
|
|
139
|
+
async getHeaderPre(origOpts?: BaseOpts) {
|
|
143
140
|
const opts = makeOpts(origOpts)
|
|
141
|
+
if (!this.index) {
|
|
142
|
+
return
|
|
143
|
+
}
|
|
144
144
|
const indexData = await this.index.parse(opts)
|
|
145
145
|
const ret = indexData.firstDataLine
|
|
146
146
|
? indexData.firstDataLine.blockPosition + 65535
|
|
147
147
|
: undefined
|
|
148
148
|
let buffer
|
|
149
149
|
if (ret) {
|
|
150
|
-
const
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
ret + blockLen,
|
|
154
|
-
0,
|
|
155
|
-
opts,
|
|
156
|
-
)
|
|
157
|
-
|
|
158
|
-
const { bytesRead } = res
|
|
159
|
-
;({ buffer } = res)
|
|
160
|
-
if (!bytesRead) {
|
|
150
|
+
const s = ret + blockLen
|
|
151
|
+
const res = await this.bam.read(Buffer.alloc(s), 0, s, 0, opts)
|
|
152
|
+
if (!res.bytesRead) {
|
|
161
153
|
throw new Error('Error reading header')
|
|
162
154
|
}
|
|
163
|
-
|
|
164
|
-
buffer = buffer.subarray(0, bytesRead)
|
|
165
|
-
} else {
|
|
166
|
-
buffer = buffer.subarray(0, ret)
|
|
167
|
-
}
|
|
155
|
+
buffer = res.buffer.subarray(0, Math.min(res.bytesRead, ret))
|
|
168
156
|
} else {
|
|
169
157
|
buffer = (await this.bam.readFile(opts)) as Buffer
|
|
170
158
|
}
|
|
@@ -188,6 +176,16 @@ export default class BamFile {
|
|
|
188
176
|
return parseHeaderText(this.header)
|
|
189
177
|
}
|
|
190
178
|
|
|
179
|
+
getHeader(opts?: BaseOpts) {
|
|
180
|
+
if (!this.headerP) {
|
|
181
|
+
this.headerP = this.getHeaderPre(opts).catch(e => {
|
|
182
|
+
this.headerP = undefined
|
|
183
|
+
throw e
|
|
184
|
+
})
|
|
185
|
+
}
|
|
186
|
+
return this.headerP
|
|
187
|
+
}
|
|
188
|
+
|
|
191
189
|
async getHeaderText(opts: BaseOpts = {}) {
|
|
192
190
|
await this.getHeader(opts)
|
|
193
191
|
return this.header
|
|
@@ -198,7 +196,7 @@ export default class BamFile {
|
|
|
198
196
|
async _readRefSeqs(
|
|
199
197
|
start: number,
|
|
200
198
|
refSeqBytes: number,
|
|
201
|
-
opts
|
|
199
|
+
opts?: BaseOpts,
|
|
202
200
|
): Promise<{
|
|
203
201
|
chrToIndex: { [key: string]: number }
|
|
204
202
|
indexToChr: { refName: string; length: number }[]
|
|
@@ -249,55 +247,25 @@ export default class BamFile {
|
|
|
249
247
|
chr: string,
|
|
250
248
|
min: number,
|
|
251
249
|
max: number,
|
|
252
|
-
opts
|
|
253
|
-
viewAsPairs: false,
|
|
254
|
-
pairAcrossChr: false,
|
|
255
|
-
maxInsertSize: 200000,
|
|
256
|
-
},
|
|
250
|
+
opts?: BamOpts,
|
|
257
251
|
) {
|
|
258
|
-
return
|
|
259
|
-
await gen2array(this.streamRecordsForRange(chr, min, max, opts)),
|
|
260
|
-
)
|
|
252
|
+
return gen2array(this.streamRecordsForRange(chr, min, max, opts))
|
|
261
253
|
}
|
|
262
254
|
|
|
263
255
|
async *streamRecordsForRange(
|
|
264
256
|
chr: string,
|
|
265
257
|
min: number,
|
|
266
258
|
max: number,
|
|
267
|
-
opts
|
|
259
|
+
opts?: BamOpts,
|
|
268
260
|
) {
|
|
269
|
-
|
|
270
|
-
const chrId = this.chrToIndex
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
chunks = []
|
|
261
|
+
await this.getHeader(opts)
|
|
262
|
+
const chrId = this.chrToIndex?.[chr]
|
|
263
|
+
if (chrId === undefined || !this.index) {
|
|
264
|
+
yield []
|
|
274
265
|
} else {
|
|
275
|
-
chunks = await this.index.blocksForRange(chrId, min - 1, max, opts)
|
|
276
|
-
|
|
277
|
-
if (!chunks) {
|
|
278
|
-
throw new Error('Error in index fetch')
|
|
279
|
-
}
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
for (let i = 0; i < chunks.length; i += 1) {
|
|
283
|
-
await abortBreakPoint(signal)
|
|
284
|
-
const size = chunks[i].fetchedSize()
|
|
285
|
-
if (size > this.chunkSizeLimit) {
|
|
286
|
-
throw new Error(
|
|
287
|
-
`Too many BAM features. BAM chunk size ${size} bytes exceeds chunkSizeLimit of ${this.chunkSizeLimit}`,
|
|
288
|
-
)
|
|
289
|
-
}
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
const totalSize = chunks
|
|
293
|
-
.map(s => s.fetchedSize())
|
|
294
|
-
.reduce((a, b) => a + b, 0)
|
|
295
|
-
if (totalSize > this.fetchSizeLimit) {
|
|
296
|
-
throw new Error(
|
|
297
|
-
`data size of ${totalSize.toLocaleString()} bytes exceeded fetch size limit of ${this.fetchSizeLimit.toLocaleString()} bytes`,
|
|
298
|
-
)
|
|
266
|
+
const chunks = await this.index.blocksForRange(chrId, min - 1, max, opts)
|
|
267
|
+
yield* this._fetchChunkFeatures(chunks, chrId, min, max, opts)
|
|
299
268
|
}
|
|
300
|
-
yield* this._fetchChunkFeatures(chunks, chrId, min, max, opts)
|
|
301
269
|
}
|
|
302
270
|
|
|
303
271
|
async *_fetchChunkFeatures(
|
|
@@ -305,26 +273,21 @@ export default class BamFile {
|
|
|
305
273
|
chrId: number,
|
|
306
274
|
min: number,
|
|
307
275
|
max: number,
|
|
308
|
-
opts: BamOpts,
|
|
276
|
+
opts: BamOpts = {},
|
|
309
277
|
) {
|
|
310
|
-
const { viewAsPairs
|
|
311
|
-
const feats = []
|
|
278
|
+
const { viewAsPairs } = opts
|
|
279
|
+
const feats = [] as BAMFeature[][]
|
|
312
280
|
let done = false
|
|
313
281
|
|
|
314
|
-
for (
|
|
315
|
-
const
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
{
|
|
319
|
-
chunk: c,
|
|
320
|
-
opts,
|
|
321
|
-
},
|
|
282
|
+
for (const chunk of chunks) {
|
|
283
|
+
const records = await this.featureCache.get(
|
|
284
|
+
chunk.toString(),
|
|
285
|
+
{ chunk, opts },
|
|
322
286
|
opts.signal,
|
|
323
|
-
)
|
|
287
|
+
)
|
|
324
288
|
|
|
325
|
-
const recs = []
|
|
326
|
-
for (
|
|
327
|
-
const feature = records[i]
|
|
289
|
+
const recs = [] as BAMFeature[]
|
|
290
|
+
for (const feature of records) {
|
|
328
291
|
if (feature.seq_id() === chrId) {
|
|
329
292
|
if (feature.get('start') >= max) {
|
|
330
293
|
// past end of range, can stop iterating
|
|
@@ -350,36 +313,36 @@ export default class BamFile {
|
|
|
350
313
|
}
|
|
351
314
|
|
|
352
315
|
async fetchPairs(chrId: number, feats: BAMFeature[][], opts: BamOpts) {
|
|
353
|
-
const { pairAcrossChr
|
|
316
|
+
const { pairAcrossChr, maxInsertSize = 200000 } = opts
|
|
354
317
|
const unmatedPairs: { [key: string]: boolean } = {}
|
|
355
318
|
const readIds: { [key: string]: number } = {}
|
|
356
319
|
feats.map(ret => {
|
|
357
320
|
const readNames: { [key: string]: number } = {}
|
|
358
|
-
for (
|
|
359
|
-
const name =
|
|
360
|
-
const id =
|
|
321
|
+
for (const element of ret) {
|
|
322
|
+
const name = element.name()
|
|
323
|
+
const id = element.id()
|
|
361
324
|
if (!readNames[name]) {
|
|
362
325
|
readNames[name] = 0
|
|
363
326
|
}
|
|
364
327
|
readNames[name]++
|
|
365
328
|
readIds[id] = 1
|
|
366
329
|
}
|
|
367
|
-
|
|
330
|
+
for (const [k, v] of Object.entries(readNames)) {
|
|
368
331
|
if (v === 1) {
|
|
369
332
|
unmatedPairs[k] = true
|
|
370
333
|
}
|
|
371
|
-
}
|
|
334
|
+
}
|
|
372
335
|
})
|
|
373
336
|
|
|
374
337
|
const matePromises: Promise<Chunk[]>[] = []
|
|
375
338
|
feats.map(ret => {
|
|
376
|
-
for (
|
|
377
|
-
const f = ret[i]
|
|
339
|
+
for (const f of ret) {
|
|
378
340
|
const name = f.name()
|
|
379
341
|
const start = f.get('start')
|
|
380
342
|
const pnext = f._next_pos()
|
|
381
343
|
const rnext = f._next_refid()
|
|
382
344
|
if (
|
|
345
|
+
this.index &&
|
|
383
346
|
unmatedPairs[name] &&
|
|
384
347
|
(pairAcrossChr ||
|
|
385
348
|
(rnext === chrId && Math.abs(start - pnext) < maxInsertSize))
|
|
@@ -393,50 +356,53 @@ export default class BamFile {
|
|
|
393
356
|
|
|
394
357
|
// filter out duplicate chunks (the blocks are lists of chunks, blocks are
|
|
395
358
|
// concatenated, then filter dup chunks)
|
|
396
|
-
const
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
const mateTotalSize = mateChunks
|
|
403
|
-
.map(s => s.fetchedSize())
|
|
404
|
-
.reduce((a, b) => a + b, 0)
|
|
405
|
-
if (mateTotalSize > this.fetchSizeLimit) {
|
|
406
|
-
throw new Error(
|
|
407
|
-
`data size of ${mateTotalSize.toLocaleString()} bytes exceeded fetch size limit of ${this.fetchSizeLimit.toLocaleString()} bytes`,
|
|
408
|
-
)
|
|
359
|
+
const map = new Map<string, Chunk>()
|
|
360
|
+
const res = await Promise.all(matePromises)
|
|
361
|
+
for (const m of res.flat()) {
|
|
362
|
+
if (!map.has(m.toString())) {
|
|
363
|
+
map.set(m.toString(), m)
|
|
364
|
+
}
|
|
409
365
|
}
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
366
|
+
|
|
367
|
+
const mateFeatPromises = await Promise.all(
|
|
368
|
+
[...map.values()].map(async c => {
|
|
369
|
+
const { data, cpositions, dpositions, chunk } = await this._readChunk({
|
|
370
|
+
chunk: c,
|
|
371
|
+
opts,
|
|
372
|
+
})
|
|
373
|
+
const mateRecs = [] as BAMFeature[]
|
|
374
|
+
for (const feature of await this.readBamFeatures(
|
|
375
|
+
data,
|
|
376
|
+
cpositions,
|
|
377
|
+
dpositions,
|
|
378
|
+
chunk,
|
|
379
|
+
)) {
|
|
380
|
+
if (unmatedPairs[feature.get('name')] && !readIds[feature.id()]) {
|
|
381
|
+
mateRecs.push(feature)
|
|
382
|
+
}
|
|
426
383
|
}
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
return flat(
|
|
384
|
+
return mateRecs
|
|
385
|
+
}),
|
|
386
|
+
)
|
|
387
|
+
return mateFeatPromises.flat()
|
|
431
388
|
}
|
|
432
389
|
|
|
433
|
-
async
|
|
434
|
-
const
|
|
435
|
-
const { buffer, bytesRead } = await this.bam.read(
|
|
390
|
+
async _readRegion(position: number, size: number, opts: BaseOpts = {}) {
|
|
391
|
+
const { bytesRead, buffer } = await this.bam.read(
|
|
436
392
|
Buffer.alloc(size),
|
|
437
393
|
0,
|
|
438
394
|
size,
|
|
395
|
+
position,
|
|
396
|
+
opts,
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
return buffer.subarray(0, Math.min(bytesRead, size))
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
async _readChunk({ chunk, opts }: { chunk: Chunk; opts: BaseOpts }) {
|
|
403
|
+
const buffer = await this._readRegion(
|
|
439
404
|
chunk.minv.blockPosition,
|
|
405
|
+
chunk.fetchedSize(),
|
|
440
406
|
opts,
|
|
441
407
|
)
|
|
442
408
|
|
|
@@ -444,10 +410,7 @@ export default class BamFile {
|
|
|
444
410
|
buffer: data,
|
|
445
411
|
cpositions,
|
|
446
412
|
dpositions,
|
|
447
|
-
} = await unzipChunkSlice(
|
|
448
|
-
buffer.subarray(0, Math.min(bytesRead, size)),
|
|
449
|
-
chunk,
|
|
450
|
-
)
|
|
413
|
+
} = await unzipChunkSlice(buffer, chunk)
|
|
451
414
|
return { data, cpositions, dpositions, chunk }
|
|
452
415
|
}
|
|
453
416
|
|
|
@@ -458,7 +421,7 @@ export default class BamFile {
|
|
|
458
421
|
chunk: Chunk,
|
|
459
422
|
) {
|
|
460
423
|
let blockStart = 0
|
|
461
|
-
const sink = []
|
|
424
|
+
const sink = [] as BAMFeature[]
|
|
462
425
|
let pos = 0
|
|
463
426
|
let last = +Date.now()
|
|
464
427
|
|
|
@@ -480,29 +443,34 @@ export default class BamFile {
|
|
|
480
443
|
start: blockStart,
|
|
481
444
|
end: blockEnd,
|
|
482
445
|
},
|
|
483
|
-
// the below results in an automatically calculated file-offset based
|
|
484
|
-
// if the info for that is available, otherwise crc32 of the
|
|
446
|
+
// the below results in an automatically calculated file-offset based
|
|
447
|
+
// ID if the info for that is available, otherwise crc32 of the
|
|
448
|
+
// features
|
|
485
449
|
//
|
|
486
|
-
// cpositions[pos] refers to actual file offset of a bgzip block
|
|
450
|
+
// cpositions[pos] refers to actual file offset of a bgzip block
|
|
451
|
+
// boundaries
|
|
487
452
|
//
|
|
488
|
-
// we multiply by (1 <<8) in order to make sure each block has a
|
|
489
|
-
// address space so that data in that block could never
|
|
453
|
+
// we multiply by (1 <<8) in order to make sure each block has a
|
|
454
|
+
// "unique" address space so that data in that block could never
|
|
455
|
+
// overlap
|
|
490
456
|
//
|
|
491
457
|
// then the blockStart-dpositions is an uncompressed file offset from
|
|
492
|
-
// that bgzip block boundary, and since the cpositions are multiplied
|
|
493
|
-
// (1 << 8) these uncompressed offsets get a unique space
|
|
458
|
+
// that bgzip block boundary, and since the cpositions are multiplied
|
|
459
|
+
// by (1 << 8) these uncompressed offsets get a unique space
|
|
494
460
|
//
|
|
495
|
-
// this has an extra chunk.minv.dataPosition added on because it
|
|
496
|
-
// starts at 0 instead of chunk.minv.dataPosition
|
|
461
|
+
// this has an extra chunk.minv.dataPosition added on because it
|
|
462
|
+
// blockStart starts at 0 instead of chunk.minv.dataPosition
|
|
497
463
|
//
|
|
498
|
-
// the +1 is just to avoid any possible uniqueId 0 but this does not
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
464
|
+
// the +1 is just to avoid any possible uniqueId 0 but this does not
|
|
465
|
+
// realistically happen
|
|
466
|
+
fileOffset:
|
|
467
|
+
cpositions.length > 0
|
|
468
|
+
? cpositions[pos] * (1 << 8) +
|
|
469
|
+
(blockStart - dpositions[pos]) +
|
|
470
|
+
chunk.minv.dataPosition +
|
|
471
|
+
1
|
|
472
|
+
: // must be slice, not subarray for buffer polyfill on web
|
|
473
|
+
crc32.signed(ba.slice(blockStart, blockEnd)),
|
|
506
474
|
})
|
|
507
475
|
|
|
508
476
|
sink.push(feature)
|
|
@@ -518,19 +486,22 @@ export default class BamFile {
|
|
|
518
486
|
}
|
|
519
487
|
|
|
520
488
|
async hasRefSeq(seqName: string) {
|
|
521
|
-
const
|
|
522
|
-
return this.index
|
|
489
|
+
const seqId = this.chrToIndex?.[seqName]
|
|
490
|
+
return seqId === undefined ? false : this.index?.hasRefSeq(seqId)
|
|
523
491
|
}
|
|
524
492
|
|
|
525
493
|
async lineCount(seqName: string) {
|
|
526
|
-
const
|
|
527
|
-
return this.index.lineCount(
|
|
494
|
+
const seqId = this.chrToIndex?.[seqName]
|
|
495
|
+
return seqId === undefined || !this.index ? 0 : this.index.lineCount(seqId)
|
|
528
496
|
}
|
|
529
497
|
|
|
530
498
|
async indexCov(seqName: string, start?: number, end?: number) {
|
|
499
|
+
if (!this.index) {
|
|
500
|
+
return []
|
|
501
|
+
}
|
|
531
502
|
await this.index.parse()
|
|
532
|
-
const seqId = this.chrToIndex
|
|
533
|
-
return this.index.indexCov(seqId, start, end)
|
|
503
|
+
const seqId = this.chrToIndex?.[seqName]
|
|
504
|
+
return seqId === undefined ? [] : this.index.indexCov(seqId, start, end)
|
|
534
505
|
}
|
|
535
506
|
|
|
536
507
|
async blocksForRange(
|
|
@@ -539,8 +510,13 @@ export default class BamFile {
|
|
|
539
510
|
end: number,
|
|
540
511
|
opts?: BaseOpts,
|
|
541
512
|
) {
|
|
513
|
+
if (!this.index) {
|
|
514
|
+
return []
|
|
515
|
+
}
|
|
542
516
|
await this.index.parse()
|
|
543
|
-
const seqId = this.chrToIndex
|
|
544
|
-
return
|
|
517
|
+
const seqId = this.chrToIndex?.[seqName]
|
|
518
|
+
return seqId === undefined
|
|
519
|
+
? []
|
|
520
|
+
: this.index.blocksForRange(seqId, start, end, opts)
|
|
545
521
|
}
|
|
546
522
|
}
|
package/src/chunk.ts
CHANGED
|
@@ -2,28 +2,14 @@ import VirtualOffset from './virtualOffset'
|
|
|
2
2
|
|
|
3
3
|
// little class representing a chunk in the index
|
|
4
4
|
export default class Chunk {
|
|
5
|
-
public
|
|
6
|
-
public maxv: VirtualOffset
|
|
7
|
-
public bin: number
|
|
8
|
-
public _fetchedSize?: number
|
|
5
|
+
public buffer?: Buffer
|
|
9
6
|
|
|
10
|
-
/**
|
|
11
|
-
* @param {VirtualOffset} minv
|
|
12
|
-
* @param {VirtualOffset} maxv
|
|
13
|
-
* @param {number} bin
|
|
14
|
-
* @param {number} [fetchedSize]
|
|
15
|
-
*/
|
|
16
7
|
constructor(
|
|
17
|
-
minv: VirtualOffset,
|
|
18
|
-
maxv: VirtualOffset,
|
|
19
|
-
bin: number,
|
|
20
|
-
|
|
21
|
-
) {
|
|
22
|
-
this.minv = minv
|
|
23
|
-
this.maxv = maxv
|
|
24
|
-
this.bin = bin
|
|
25
|
-
this._fetchedSize = fetchedSize
|
|
26
|
-
}
|
|
8
|
+
public minv: VirtualOffset,
|
|
9
|
+
public maxv: VirtualOffset,
|
|
10
|
+
public bin: number,
|
|
11
|
+
public _fetchedSize?: number,
|
|
12
|
+
) {}
|
|
27
13
|
|
|
28
14
|
toUniqueString() {
|
|
29
15
|
return `${this.minv}..${this.maxv} (bin ${
|