@gmod/bam 1.1.18 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +61 -25
- package/README.md +95 -57
- package/dist/bai.d.ts +34 -15
- package/dist/bai.js +87 -91
- package/dist/bai.js.map +1 -1
- package/dist/bamFile.d.ts +33 -27
- package/dist/bamFile.js +127 -121
- package/dist/bamFile.js.map +1 -1
- package/dist/chunk.d.ts +4 -8
- package/dist/chunk.js +2 -8
- package/dist/chunk.js.map +1 -1
- package/dist/csi.d.ts +74 -10
- package/dist/csi.js +78 -90
- package/dist/csi.js.map +1 -1
- package/dist/htsget.d.ts +5 -8
- package/dist/htsget.js +72 -47
- package/dist/htsget.js.map +1 -1
- package/dist/index.d.ts +5 -6
- package/dist/index.js +11 -11
- package/dist/index.js.map +1 -1
- package/dist/indexFile.d.ts +0 -6
- package/dist/indexFile.js +0 -35
- package/dist/indexFile.js.map +1 -1
- package/dist/nullIndex.d.ts +7 -0
- package/dist/nullIndex.js +33 -0
- package/dist/nullIndex.js.map +1 -0
- package/dist/record.d.ts +2 -2
- package/dist/record.js +34 -24
- package/dist/record.js.map +1 -1
- package/dist/sam.js +9 -7
- package/dist/sam.js.map +1 -1
- package/dist/util.d.ts +13 -1
- package/dist/util.js +47 -15
- package/dist/util.js.map +1 -1
- package/esm/bai.d.ts +34 -15
- package/esm/bai.js +86 -91
- package/esm/bai.js.map +1 -1
- package/esm/bamFile.d.ts +33 -27
- package/esm/bamFile.js +124 -120
- package/esm/bamFile.js.map +1 -1
- package/esm/chunk.d.ts +4 -8
- package/esm/chunk.js +2 -8
- package/esm/chunk.js.map +1 -1
- package/esm/csi.d.ts +74 -10
- package/esm/csi.js +85 -93
- package/esm/csi.js.map +1 -1
- package/esm/htsget.d.ts +5 -8
- package/esm/htsget.js +68 -43
- package/esm/htsget.js.map +1 -1
- package/esm/index.d.ts +5 -6
- package/esm/index.js +5 -6
- package/esm/index.js.map +1 -1
- package/esm/indexFile.d.ts +0 -6
- package/esm/indexFile.js +0 -22
- package/esm/indexFile.js.map +1 -1
- package/esm/nullIndex.d.ts +7 -0
- package/esm/nullIndex.js +16 -0
- package/esm/nullIndex.js.map +1 -0
- package/esm/record.d.ts +2 -2
- package/esm/record.js +34 -24
- package/esm/record.js.map +1 -1
- package/esm/sam.js +9 -7
- package/esm/sam.js.map +1 -1
- package/esm/util.d.ts +13 -1
- package/esm/util.js +40 -14
- package/esm/util.js.map +1 -1
- package/package.json +16 -17
- package/src/bai.ts +99 -102
- package/src/bamFile.ts +174 -198
- package/src/chunk.ts +6 -20
- package/src/csi.ts +102 -111
- package/src/htsget.ts +81 -61
- package/src/index.ts +5 -7
- package/src/indexFile.ts +0 -27
- package/src/nullIndex.ts +18 -0
- package/src/record.ts +34 -24
- package/src/sam.ts +9 -7
- package/src/util.ts +54 -13
- package/src/declare.d.ts +0 -2
package/src/bai.ts
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
import
|
|
2
|
-
import { fromBytes } from './virtualOffset'
|
|
1
|
+
import VirtualOffset, { fromBytes } from './virtualOffset'
|
|
3
2
|
import Chunk from './chunk'
|
|
4
3
|
|
|
4
|
+
import { optimizeChunks, parsePseudoBin, findFirstData, BaseOpts } from './util'
|
|
5
5
|
import IndexFile from './indexFile'
|
|
6
|
-
import { longToNumber, optimizeChunks, BaseOpts } from './util'
|
|
7
6
|
|
|
8
7
|
const BAI_MAGIC = 21578050 // BAI\1
|
|
9
8
|
|
|
@@ -14,103 +13,106 @@ function roundUp(n: number, multiple: number) {
|
|
|
14
13
|
return n - (n % multiple) + multiple
|
|
15
14
|
}
|
|
16
15
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
}
|
|
16
|
+
function reg2bins(beg: number, end: number) {
|
|
17
|
+
end -= 1
|
|
18
|
+
return [
|
|
19
|
+
[0, 0],
|
|
20
|
+
[1 + (beg >> 26), 1 + (end >> 26)],
|
|
21
|
+
[9 + (beg >> 23), 9 + (end >> 23)],
|
|
22
|
+
[73 + (beg >> 20), 73 + (end >> 20)],
|
|
23
|
+
[585 + (beg >> 17), 585 + (end >> 17)],
|
|
24
|
+
[4681 + (beg >> 14), 4681 + (end >> 14)],
|
|
25
|
+
]
|
|
26
|
+
}
|
|
29
27
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
const index = prom.indices[refId]
|
|
33
|
-
if (!index) {
|
|
34
|
-
return -1
|
|
35
|
-
}
|
|
36
|
-
const ret = index.stats || {}
|
|
37
|
-
return ret.lineCount === undefined ? -1 : ret.lineCount
|
|
38
|
-
}
|
|
28
|
+
export default class BAI extends IndexFile {
|
|
29
|
+
public setupP?: ReturnType<BAI['_parse']>
|
|
39
30
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
this.baiP = undefined
|
|
44
|
-
throw e
|
|
45
|
-
}) as Promise<Buffer>
|
|
46
|
-
}
|
|
47
|
-
return this.baiP
|
|
31
|
+
async lineCount(refId: number, opts?: BaseOpts) {
|
|
32
|
+
const indexData = await this.parse(opts)
|
|
33
|
+
return indexData.indices[refId]?.stats?.lineCount || 0
|
|
48
34
|
}
|
|
49
35
|
|
|
50
36
|
// fetch and parse the index
|
|
51
|
-
async _parse() {
|
|
52
|
-
const
|
|
53
|
-
const bytes = await this.fetchBai()
|
|
37
|
+
async _parse(opts?: BaseOpts) {
|
|
38
|
+
const bytes = (await this.filehandle.readFile(opts)) as Buffer
|
|
54
39
|
|
|
55
40
|
// check BAI magic numbers
|
|
56
41
|
if (bytes.readUInt32LE(0) !== BAI_MAGIC) {
|
|
57
42
|
throw new Error('Not a BAI file')
|
|
58
43
|
}
|
|
59
44
|
|
|
60
|
-
|
|
45
|
+
const refCount = bytes.readInt32LE(4)
|
|
61
46
|
const depth = 5
|
|
62
47
|
const binLimit = ((1 << ((depth + 1) * 3)) - 1) / 7
|
|
63
48
|
|
|
64
49
|
// read the indexes for each reference sequence
|
|
65
|
-
|
|
66
|
-
let
|
|
67
|
-
|
|
50
|
+
let curr = 8
|
|
51
|
+
let firstDataLine: VirtualOffset | undefined
|
|
52
|
+
|
|
53
|
+
type BinIndex = { [key: string]: Chunk[] }
|
|
54
|
+
type LinearIndex = VirtualOffset[]
|
|
55
|
+
const indices = new Array<{
|
|
56
|
+
binIndex: BinIndex
|
|
57
|
+
linearIndex: LinearIndex
|
|
58
|
+
stats?: { lineCount: number }
|
|
59
|
+
}>(refCount)
|
|
60
|
+
for (let i = 0; i < refCount; i++) {
|
|
68
61
|
// the binning index
|
|
69
|
-
const binCount = bytes.readInt32LE(
|
|
62
|
+
const binCount = bytes.readInt32LE(curr)
|
|
70
63
|
let stats
|
|
71
64
|
|
|
72
|
-
|
|
65
|
+
curr += 4
|
|
73
66
|
const binIndex: { [key: number]: Chunk[] } = {}
|
|
67
|
+
|
|
74
68
|
for (let j = 0; j < binCount; j += 1) {
|
|
75
|
-
const bin = bytes.readUInt32LE(
|
|
76
|
-
|
|
69
|
+
const bin = bytes.readUInt32LE(curr)
|
|
70
|
+
curr += 4
|
|
77
71
|
if (bin === binLimit + 1) {
|
|
78
|
-
|
|
79
|
-
stats =
|
|
80
|
-
|
|
72
|
+
curr += 4
|
|
73
|
+
stats = parsePseudoBin(bytes, curr + 16)
|
|
74
|
+
curr += 32
|
|
81
75
|
} else if (bin > binLimit + 1) {
|
|
82
76
|
throw new Error('bai index contains too many bins, please use CSI')
|
|
83
77
|
} else {
|
|
84
|
-
const chunkCount = bytes.readInt32LE(
|
|
85
|
-
|
|
86
|
-
const chunks = new Array(chunkCount)
|
|
87
|
-
for (let k = 0; k < chunkCount; k
|
|
88
|
-
const u = fromBytes(bytes,
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
78
|
+
const chunkCount = bytes.readInt32LE(curr)
|
|
79
|
+
curr += 4
|
|
80
|
+
const chunks = new Array<Chunk>(chunkCount)
|
|
81
|
+
for (let k = 0; k < chunkCount; k++) {
|
|
82
|
+
const u = fromBytes(bytes, curr)
|
|
83
|
+
curr += 8
|
|
84
|
+
const v = fromBytes(bytes, curr)
|
|
85
|
+
curr += 8
|
|
86
|
+
firstDataLine = findFirstData(firstDataLine, u)
|
|
92
87
|
chunks[k] = new Chunk(u, v, bin)
|
|
93
88
|
}
|
|
94
89
|
binIndex[bin] = chunks
|
|
95
90
|
}
|
|
96
91
|
}
|
|
97
92
|
|
|
98
|
-
const linearCount = bytes.readInt32LE(
|
|
99
|
-
|
|
100
|
-
// as we're going through the linear index, figure out
|
|
101
|
-
//
|
|
102
|
-
//
|
|
103
|
-
const linearIndex = new Array(linearCount)
|
|
104
|
-
for (let
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
93
|
+
const linearCount = bytes.readInt32LE(curr)
|
|
94
|
+
curr += 4
|
|
95
|
+
// as we're going through the linear index, figure out the smallest
|
|
96
|
+
// virtual offset in the indexes, which tells us where the BAM header
|
|
97
|
+
// ends
|
|
98
|
+
const linearIndex = new Array<VirtualOffset>(linearCount)
|
|
99
|
+
for (let j = 0; j < linearCount; j++) {
|
|
100
|
+
const offset = fromBytes(bytes, curr)
|
|
101
|
+
curr += 8
|
|
102
|
+
firstDataLine = findFirstData(firstDataLine, offset)
|
|
103
|
+
linearIndex[j] = offset
|
|
108
104
|
}
|
|
109
105
|
|
|
110
|
-
|
|
106
|
+
indices[i] = { binIndex, linearIndex, stats }
|
|
111
107
|
}
|
|
112
108
|
|
|
113
|
-
return
|
|
109
|
+
return {
|
|
110
|
+
bai: true,
|
|
111
|
+
firstDataLine,
|
|
112
|
+
maxBlockSize: 1 << 16,
|
|
113
|
+
indices,
|
|
114
|
+
refCount,
|
|
115
|
+
}
|
|
114
116
|
}
|
|
115
117
|
|
|
116
118
|
async indexCov(
|
|
@@ -127,17 +129,14 @@ export default class BAI extends IndexFile {
|
|
|
127
129
|
return []
|
|
128
130
|
}
|
|
129
131
|
const { linearIndex = [], stats } = seqIdx
|
|
130
|
-
if (
|
|
132
|
+
if (linearIndex.length === 0) {
|
|
131
133
|
return []
|
|
132
134
|
}
|
|
133
|
-
const e = end
|
|
134
|
-
const s = start
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
} else {
|
|
139
|
-
depths = new Array(linearIndex.length - 1)
|
|
140
|
-
}
|
|
135
|
+
const e = end === undefined ? (linearIndex.length - 1) * v : roundUp(end, v)
|
|
136
|
+
const s = start === undefined ? 0 : roundDown(start, v)
|
|
137
|
+
const depths = range
|
|
138
|
+
? new Array((e - s) / v)
|
|
139
|
+
: new Array(linearIndex.length - 1)
|
|
141
140
|
const totalSize = linearIndex[linearIndex.length - 1].blockPosition
|
|
142
141
|
if (e > (linearIndex.length - 1) * v) {
|
|
143
142
|
throw new Error('query outside of range of linear index')
|
|
@@ -151,25 +150,10 @@ export default class BAI extends IndexFile {
|
|
|
151
150
|
}
|
|
152
151
|
currentPos = linearIndex[i + 1].blockPosition
|
|
153
152
|
}
|
|
154
|
-
return depths.map(d => {
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
/**
|
|
160
|
-
* calculate the list of bins that may overlap with region [beg,end) (zero-based half-open)
|
|
161
|
-
* @returns {Array[number]}
|
|
162
|
-
*/
|
|
163
|
-
reg2bins(beg: number, end: number) {
|
|
164
|
-
end -= 1
|
|
165
|
-
return [
|
|
166
|
-
[0, 0],
|
|
167
|
-
[1 + (beg >> 26), 1 + (end >> 26)],
|
|
168
|
-
[9 + (beg >> 23), 9 + (end >> 23)],
|
|
169
|
-
[73 + (beg >> 20), 73 + (end >> 20)],
|
|
170
|
-
[585 + (beg >> 17), 585 + (end >> 17)],
|
|
171
|
-
[4681 + (beg >> 14), 4681 + (end >> 14)],
|
|
172
|
-
]
|
|
153
|
+
return depths.map(d => ({
|
|
154
|
+
...d,
|
|
155
|
+
score: (d.score * (stats?.lineCount || 0)) / totalSize,
|
|
156
|
+
}))
|
|
173
157
|
}
|
|
174
158
|
|
|
175
159
|
async blocksForRange(
|
|
@@ -192,7 +176,7 @@ export default class BAI extends IndexFile {
|
|
|
192
176
|
}
|
|
193
177
|
|
|
194
178
|
// List of bin #s that overlap min, max
|
|
195
|
-
const overlappingBins =
|
|
179
|
+
const overlappingBins = reg2bins(min, max)
|
|
196
180
|
const chunks: Chunk[] = []
|
|
197
181
|
|
|
198
182
|
// Find chunks in overlapping bins. Leaf bins (< 4681) are not pruned
|
|
@@ -200,8 +184,8 @@ export default class BAI extends IndexFile {
|
|
|
200
184
|
for (let bin = start; bin <= end; bin++) {
|
|
201
185
|
if (ba.binIndex[bin]) {
|
|
202
186
|
const binChunks = ba.binIndex[bin]
|
|
203
|
-
for (
|
|
204
|
-
chunks.push(
|
|
187
|
+
for (const binChunk of binChunks) {
|
|
188
|
+
chunks.push(binChunk)
|
|
205
189
|
}
|
|
206
190
|
}
|
|
207
191
|
}
|
|
@@ -210,18 +194,31 @@ export default class BAI extends IndexFile {
|
|
|
210
194
|
// Use the linear index to find minimum file position of chunks that could
|
|
211
195
|
// contain alignments in the region
|
|
212
196
|
const nintv = ba.linearIndex.length
|
|
213
|
-
let lowest
|
|
197
|
+
let lowest: VirtualOffset | undefined
|
|
214
198
|
const minLin = Math.min(min >> 14, nintv - 1)
|
|
215
199
|
const maxLin = Math.min(max >> 14, nintv - 1)
|
|
216
200
|
for (let i = minLin; i <= maxLin; ++i) {
|
|
217
201
|
const vp = ba.linearIndex[i]
|
|
218
|
-
if (vp) {
|
|
219
|
-
|
|
220
|
-
lowest = vp
|
|
221
|
-
}
|
|
202
|
+
if (vp && (!lowest || vp.compareTo(lowest) < 0)) {
|
|
203
|
+
lowest = vp
|
|
222
204
|
}
|
|
223
205
|
}
|
|
224
206
|
|
|
225
207
|
return optimizeChunks(chunks, lowest)
|
|
226
208
|
}
|
|
209
|
+
|
|
210
|
+
async parse(opts: BaseOpts = {}) {
|
|
211
|
+
if (!this.setupP) {
|
|
212
|
+
this.setupP = this._parse(opts).catch(e => {
|
|
213
|
+
this.setupP = undefined
|
|
214
|
+
throw e
|
|
215
|
+
})
|
|
216
|
+
}
|
|
217
|
+
return this.setupP
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
async hasRefSeq(seqId: number, opts: BaseOpts = {}) {
|
|
221
|
+
const header = await this.parse(opts)
|
|
222
|
+
return !!header.indices[seqId]?.binIndex
|
|
223
|
+
}
|
|
227
224
|
}
|