@gmod/bam 1.1.15 → 1.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/package.json +3 -2
- package/src/bai.ts +227 -0
- package/src/bamFile.ts +546 -0
- package/src/chunk.ts +52 -0
- package/src/constants.ts +26 -0
- package/src/csi.ts +246 -0
- package/src/declare.d.ts +2 -0
- package/src/errors.ts +22 -0
- package/src/htsget.ts +138 -0
- package/src/index.ts +7 -0
- package/src/indexFile.ts +63 -0
- package/src/record.ts +610 -0
- package/src/sam.ts +15 -0
- package/src/util.ts +116 -0
- package/src/virtualOffset.ts +47 -0
package/CHANGELOG.md
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gmod/bam",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.16",
|
|
4
4
|
"description": "Parser for BAM and BAM index (bai) files",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": "GMOD/bam-js",
|
|
@@ -16,7 +16,8 @@
|
|
|
16
16
|
},
|
|
17
17
|
"files": [
|
|
18
18
|
"dist",
|
|
19
|
-
"esm"
|
|
19
|
+
"esm",
|
|
20
|
+
"src"
|
|
20
21
|
],
|
|
21
22
|
"scripts": {
|
|
22
23
|
"test": "jest",
|
package/src/bai.ts
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
import Long from 'long'
|
|
2
|
+
import { fromBytes } from './virtualOffset'
|
|
3
|
+
import Chunk from './chunk'
|
|
4
|
+
|
|
5
|
+
import IndexFile from './indexFile'
|
|
6
|
+
import { longToNumber, optimizeChunks, BaseOpts } from './util'
|
|
7
|
+
|
|
8
|
+
const BAI_MAGIC = 21578050 // BAI\1
|
|
9
|
+
|
|
10
|
+
function roundDown(n: number, multiple: number) {
|
|
11
|
+
return n - (n % multiple)
|
|
12
|
+
}
|
|
13
|
+
function roundUp(n: number, multiple: number) {
|
|
14
|
+
return n - (n % multiple) + multiple
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export default class BAI extends IndexFile {
|
|
18
|
+
baiP?: Promise<Buffer>
|
|
19
|
+
|
|
20
|
+
parsePseudoBin(bytes: Buffer, offset: number) {
|
|
21
|
+
const lineCount = longToNumber(
|
|
22
|
+
Long.fromBytesLE(
|
|
23
|
+
Array.prototype.slice.call(bytes, offset + 16, offset + 24),
|
|
24
|
+
true,
|
|
25
|
+
),
|
|
26
|
+
)
|
|
27
|
+
return { lineCount }
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
async lineCount(refId: number, opts: BaseOpts = {}) {
|
|
31
|
+
const prom = await this.parse(opts)
|
|
32
|
+
const index = prom.indices[refId]
|
|
33
|
+
if (!index) {
|
|
34
|
+
return -1
|
|
35
|
+
}
|
|
36
|
+
const ret = index.stats || {}
|
|
37
|
+
return ret.lineCount === undefined ? -1 : ret.lineCount
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
fetchBai(opts: BaseOpts = {}) {
|
|
41
|
+
if (!this.baiP) {
|
|
42
|
+
this.baiP = this.filehandle.readFile(opts).catch(e => {
|
|
43
|
+
this.baiP = undefined
|
|
44
|
+
throw e
|
|
45
|
+
}) as Promise<Buffer>
|
|
46
|
+
}
|
|
47
|
+
return this.baiP
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// fetch and parse the index
|
|
51
|
+
async _parse() {
|
|
52
|
+
const data: { [key: string]: any } = { bai: true, maxBlockSize: 1 << 16 }
|
|
53
|
+
const bytes = await this.fetchBai()
|
|
54
|
+
|
|
55
|
+
// check BAI magic numbers
|
|
56
|
+
if (bytes.readUInt32LE(0) !== BAI_MAGIC) {
|
|
57
|
+
throw new Error('Not a BAI file')
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
data.refCount = bytes.readInt32LE(4)
|
|
61
|
+
const depth = 5
|
|
62
|
+
const binLimit = ((1 << ((depth + 1) * 3)) - 1) / 7
|
|
63
|
+
|
|
64
|
+
// read the indexes for each reference sequence
|
|
65
|
+
data.indices = new Array(data.refCount)
|
|
66
|
+
let currOffset = 8
|
|
67
|
+
for (let i = 0; i < data.refCount; i += 1) {
|
|
68
|
+
// the binning index
|
|
69
|
+
const binCount = bytes.readInt32LE(currOffset)
|
|
70
|
+
let stats
|
|
71
|
+
|
|
72
|
+
currOffset += 4
|
|
73
|
+
const binIndex: { [key: number]: Chunk[] } = {}
|
|
74
|
+
for (let j = 0; j < binCount; j += 1) {
|
|
75
|
+
const bin = bytes.readUInt32LE(currOffset)
|
|
76
|
+
currOffset += 4
|
|
77
|
+
if (bin === binLimit + 1) {
|
|
78
|
+
currOffset += 4
|
|
79
|
+
stats = this.parsePseudoBin(bytes, currOffset)
|
|
80
|
+
currOffset += 32
|
|
81
|
+
} else if (bin > binLimit + 1) {
|
|
82
|
+
throw new Error('bai index contains too many bins, please use CSI')
|
|
83
|
+
} else {
|
|
84
|
+
const chunkCount = bytes.readInt32LE(currOffset)
|
|
85
|
+
currOffset += 4
|
|
86
|
+
const chunks = new Array(chunkCount)
|
|
87
|
+
for (let k = 0; k < chunkCount; k += 1) {
|
|
88
|
+
const u = fromBytes(bytes, currOffset)
|
|
89
|
+
const v = fromBytes(bytes, currOffset + 8)
|
|
90
|
+
currOffset += 16
|
|
91
|
+
this._findFirstData(data, u)
|
|
92
|
+
chunks[k] = new Chunk(u, v, bin)
|
|
93
|
+
}
|
|
94
|
+
binIndex[bin] = chunks
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const linearCount = bytes.readInt32LE(currOffset)
|
|
99
|
+
currOffset += 4
|
|
100
|
+
// as we're going through the linear index, figure out
|
|
101
|
+
// the smallest virtual offset in the indexes, which
|
|
102
|
+
// tells us where the BAM header ends
|
|
103
|
+
const linearIndex = new Array(linearCount)
|
|
104
|
+
for (let k = 0; k < linearCount; k += 1) {
|
|
105
|
+
linearIndex[k] = fromBytes(bytes, currOffset)
|
|
106
|
+
currOffset += 8
|
|
107
|
+
this._findFirstData(data, linearIndex[k])
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
data.indices[i] = { binIndex, linearIndex, stats }
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
return data
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
async indexCov(
|
|
117
|
+
seqId: number,
|
|
118
|
+
start?: number,
|
|
119
|
+
end?: number,
|
|
120
|
+
opts: BaseOpts = {},
|
|
121
|
+
): Promise<{ start: number; end: number; score: number }[]> {
|
|
122
|
+
const v = 16384
|
|
123
|
+
const range = start !== undefined
|
|
124
|
+
const indexData = await this.parse(opts)
|
|
125
|
+
const seqIdx = indexData.indices[seqId]
|
|
126
|
+
if (!seqIdx) {
|
|
127
|
+
return []
|
|
128
|
+
}
|
|
129
|
+
const { linearIndex = [], stats } = seqIdx
|
|
130
|
+
if (!linearIndex.length) {
|
|
131
|
+
return []
|
|
132
|
+
}
|
|
133
|
+
const e = end !== undefined ? roundUp(end, v) : (linearIndex.length - 1) * v
|
|
134
|
+
const s = start !== undefined ? roundDown(start, v) : 0
|
|
135
|
+
let depths
|
|
136
|
+
if (range) {
|
|
137
|
+
depths = new Array((e - s) / v)
|
|
138
|
+
} else {
|
|
139
|
+
depths = new Array(linearIndex.length - 1)
|
|
140
|
+
}
|
|
141
|
+
const totalSize = linearIndex[linearIndex.length - 1].blockPosition
|
|
142
|
+
if (e > (linearIndex.length - 1) * v) {
|
|
143
|
+
throw new Error('query outside of range of linear index')
|
|
144
|
+
}
|
|
145
|
+
let currentPos = linearIndex[s / v].blockPosition
|
|
146
|
+
for (let i = s / v, j = 0; i < e / v; i++, j++) {
|
|
147
|
+
depths[j] = {
|
|
148
|
+
score: linearIndex[i + 1].blockPosition - currentPos,
|
|
149
|
+
start: i * v,
|
|
150
|
+
end: i * v + v,
|
|
151
|
+
}
|
|
152
|
+
currentPos = linearIndex[i + 1].blockPosition
|
|
153
|
+
}
|
|
154
|
+
return depths.map(d => {
|
|
155
|
+
return { ...d, score: (d.score * stats.lineCount) / totalSize }
|
|
156
|
+
})
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* calculate the list of bins that may overlap with region [beg,end) (zero-based half-open)
|
|
161
|
+
* @returns {Array[number]}
|
|
162
|
+
*/
|
|
163
|
+
reg2bins(beg: number, end: number) {
|
|
164
|
+
end -= 1
|
|
165
|
+
return [
|
|
166
|
+
[0, 0],
|
|
167
|
+
[1 + (beg >> 26), 1 + (end >> 26)],
|
|
168
|
+
[9 + (beg >> 23), 9 + (end >> 23)],
|
|
169
|
+
[73 + (beg >> 20), 73 + (end >> 20)],
|
|
170
|
+
[585 + (beg >> 17), 585 + (end >> 17)],
|
|
171
|
+
[4681 + (beg >> 14), 4681 + (end >> 14)],
|
|
172
|
+
]
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
async blocksForRange(
|
|
176
|
+
refId: number,
|
|
177
|
+
min: number,
|
|
178
|
+
max: number,
|
|
179
|
+
opts: BaseOpts = {},
|
|
180
|
+
) {
|
|
181
|
+
if (min < 0) {
|
|
182
|
+
min = 0
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const indexData = await this.parse(opts)
|
|
186
|
+
if (!indexData) {
|
|
187
|
+
return []
|
|
188
|
+
}
|
|
189
|
+
const ba = indexData.indices[refId]
|
|
190
|
+
if (!ba) {
|
|
191
|
+
return []
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// List of bin #s that overlap min, max
|
|
195
|
+
const overlappingBins = this.reg2bins(min, max)
|
|
196
|
+
const chunks: Chunk[] = []
|
|
197
|
+
|
|
198
|
+
// Find chunks in overlapping bins. Leaf bins (< 4681) are not pruned
|
|
199
|
+
for (const [start, end] of overlappingBins) {
|
|
200
|
+
for (let bin = start; bin <= end; bin++) {
|
|
201
|
+
if (ba.binIndex[bin]) {
|
|
202
|
+
const binChunks = ba.binIndex[bin]
|
|
203
|
+
for (let c = 0; c < binChunks.length; ++c) {
|
|
204
|
+
chunks.push(new Chunk(binChunks[c].minv, binChunks[c].maxv, bin))
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Use the linear index to find minimum file position of chunks that could
|
|
211
|
+
// contain alignments in the region
|
|
212
|
+
const nintv = ba.linearIndex.length
|
|
213
|
+
let lowest = null
|
|
214
|
+
const minLin = Math.min(min >> 14, nintv - 1)
|
|
215
|
+
const maxLin = Math.min(max >> 14, nintv - 1)
|
|
216
|
+
for (let i = minLin; i <= maxLin; ++i) {
|
|
217
|
+
const vp = ba.linearIndex[i]
|
|
218
|
+
if (vp) {
|
|
219
|
+
if (!lowest || vp.compareTo(lowest) < 0) {
|
|
220
|
+
lowest = vp
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
return optimizeChunks(chunks, lowest)
|
|
226
|
+
}
|
|
227
|
+
}
|