@gmod/bam 1.1.15 → 1.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## [1.1.16](https://github.com/GMOD/bam-js/compare/v1.1.15...v1.1.16) (2022-03-30)
2
+
3
+
4
+
5
+ - Add src directory for better source maps
6
+
1
7
  ## [1.1.15](https://github.com/GMOD/bam-js/compare/v1.1.14...v1.1.15) (2022-03-18)
2
8
 
3
9
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@gmod/bam",
3
- "version": "1.1.15",
3
+ "version": "1.1.16",
4
4
  "description": "Parser for BAM and BAM index (bai) files",
5
5
  "license": "MIT",
6
6
  "repository": "GMOD/bam-js",
@@ -16,7 +16,8 @@
16
16
  },
17
17
  "files": [
18
18
  "dist",
19
- "esm"
19
+ "esm",
20
+ "src"
20
21
  ],
21
22
  "scripts": {
22
23
  "test": "jest",
package/src/bai.ts ADDED
@@ -0,0 +1,227 @@
1
+ import Long from 'long'
2
+ import { fromBytes } from './virtualOffset'
3
+ import Chunk from './chunk'
4
+
5
+ import IndexFile from './indexFile'
6
+ import { longToNumber, optimizeChunks, BaseOpts } from './util'
7
+
8
+ const BAI_MAGIC = 21578050 // BAI\1
9
+
10
+ function roundDown(n: number, multiple: number) {
11
+ return n - (n % multiple)
12
+ }
13
+ function roundUp(n: number, multiple: number) {
14
+ return n - (n % multiple) + multiple
15
+ }
16
+
17
+ export default class BAI extends IndexFile {
18
+ baiP?: Promise<Buffer>
19
+
20
+ parsePseudoBin(bytes: Buffer, offset: number) {
21
+ const lineCount = longToNumber(
22
+ Long.fromBytesLE(
23
+ Array.prototype.slice.call(bytes, offset + 16, offset + 24),
24
+ true,
25
+ ),
26
+ )
27
+ return { lineCount }
28
+ }
29
+
30
+ async lineCount(refId: number, opts: BaseOpts = {}) {
31
+ const prom = await this.parse(opts)
32
+ const index = prom.indices[refId]
33
+ if (!index) {
34
+ return -1
35
+ }
36
+ const ret = index.stats || {}
37
+ return ret.lineCount === undefined ? -1 : ret.lineCount
38
+ }
39
+
40
+ fetchBai(opts: BaseOpts = {}) {
41
+ if (!this.baiP) {
42
+ this.baiP = this.filehandle.readFile(opts).catch(e => {
43
+ this.baiP = undefined
44
+ throw e
45
+ }) as Promise<Buffer>
46
+ }
47
+ return this.baiP
48
+ }
49
+
50
+ // fetch and parse the index
51
+ async _parse() {
52
+ const data: { [key: string]: any } = { bai: true, maxBlockSize: 1 << 16 }
53
+ const bytes = await this.fetchBai()
54
+
55
+ // check BAI magic numbers
56
+ if (bytes.readUInt32LE(0) !== BAI_MAGIC) {
57
+ throw new Error('Not a BAI file')
58
+ }
59
+
60
+ data.refCount = bytes.readInt32LE(4)
61
+ const depth = 5
62
+ const binLimit = ((1 << ((depth + 1) * 3)) - 1) / 7
63
+
64
+ // read the indexes for each reference sequence
65
+ data.indices = new Array(data.refCount)
66
+ let currOffset = 8
67
+ for (let i = 0; i < data.refCount; i += 1) {
68
+ // the binning index
69
+ const binCount = bytes.readInt32LE(currOffset)
70
+ let stats
71
+
72
+ currOffset += 4
73
+ const binIndex: { [key: number]: Chunk[] } = {}
74
+ for (let j = 0; j < binCount; j += 1) {
75
+ const bin = bytes.readUInt32LE(currOffset)
76
+ currOffset += 4
77
+ if (bin === binLimit + 1) {
78
+ currOffset += 4
79
+ stats = this.parsePseudoBin(bytes, currOffset)
80
+ currOffset += 32
81
+ } else if (bin > binLimit + 1) {
82
+ throw new Error('bai index contains too many bins, please use CSI')
83
+ } else {
84
+ const chunkCount = bytes.readInt32LE(currOffset)
85
+ currOffset += 4
86
+ const chunks = new Array(chunkCount)
87
+ for (let k = 0; k < chunkCount; k += 1) {
88
+ const u = fromBytes(bytes, currOffset)
89
+ const v = fromBytes(bytes, currOffset + 8)
90
+ currOffset += 16
91
+ this._findFirstData(data, u)
92
+ chunks[k] = new Chunk(u, v, bin)
93
+ }
94
+ binIndex[bin] = chunks
95
+ }
96
+ }
97
+
98
+ const linearCount = bytes.readInt32LE(currOffset)
99
+ currOffset += 4
100
+ // as we're going through the linear index, figure out
101
+ // the smallest virtual offset in the indexes, which
102
+ // tells us where the BAM header ends
103
+ const linearIndex = new Array(linearCount)
104
+ for (let k = 0; k < linearCount; k += 1) {
105
+ linearIndex[k] = fromBytes(bytes, currOffset)
106
+ currOffset += 8
107
+ this._findFirstData(data, linearIndex[k])
108
+ }
109
+
110
+ data.indices[i] = { binIndex, linearIndex, stats }
111
+ }
112
+
113
+ return data
114
+ }
115
+
116
+ async indexCov(
117
+ seqId: number,
118
+ start?: number,
119
+ end?: number,
120
+ opts: BaseOpts = {},
121
+ ): Promise<{ start: number; end: number; score: number }[]> {
122
+ const v = 16384
123
+ const range = start !== undefined
124
+ const indexData = await this.parse(opts)
125
+ const seqIdx = indexData.indices[seqId]
126
+ if (!seqIdx) {
127
+ return []
128
+ }
129
+ const { linearIndex = [], stats } = seqIdx
130
+ if (!linearIndex.length) {
131
+ return []
132
+ }
133
+ const e = end !== undefined ? roundUp(end, v) : (linearIndex.length - 1) * v
134
+ const s = start !== undefined ? roundDown(start, v) : 0
135
+ let depths
136
+ if (range) {
137
+ depths = new Array((e - s) / v)
138
+ } else {
139
+ depths = new Array(linearIndex.length - 1)
140
+ }
141
+ const totalSize = linearIndex[linearIndex.length - 1].blockPosition
142
+ if (e > (linearIndex.length - 1) * v) {
143
+ throw new Error('query outside of range of linear index')
144
+ }
145
+ let currentPos = linearIndex[s / v].blockPosition
146
+ for (let i = s / v, j = 0; i < e / v; i++, j++) {
147
+ depths[j] = {
148
+ score: linearIndex[i + 1].blockPosition - currentPos,
149
+ start: i * v,
150
+ end: i * v + v,
151
+ }
152
+ currentPos = linearIndex[i + 1].blockPosition
153
+ }
154
+ return depths.map(d => {
155
+ return { ...d, score: (d.score * stats.lineCount) / totalSize }
156
+ })
157
+ }
158
+
159
+ /**
160
+ * calculate the list of bins that may overlap with region [beg,end) (zero-based half-open)
161
+ * @returns {Array[number]}
162
+ */
163
+ reg2bins(beg: number, end: number) {
164
+ end -= 1
165
+ return [
166
+ [0, 0],
167
+ [1 + (beg >> 26), 1 + (end >> 26)],
168
+ [9 + (beg >> 23), 9 + (end >> 23)],
169
+ [73 + (beg >> 20), 73 + (end >> 20)],
170
+ [585 + (beg >> 17), 585 + (end >> 17)],
171
+ [4681 + (beg >> 14), 4681 + (end >> 14)],
172
+ ]
173
+ }
174
+
175
+ async blocksForRange(
176
+ refId: number,
177
+ min: number,
178
+ max: number,
179
+ opts: BaseOpts = {},
180
+ ) {
181
+ if (min < 0) {
182
+ min = 0
183
+ }
184
+
185
+ const indexData = await this.parse(opts)
186
+ if (!indexData) {
187
+ return []
188
+ }
189
+ const ba = indexData.indices[refId]
190
+ if (!ba) {
191
+ return []
192
+ }
193
+
194
+ // List of bin #s that overlap min, max
195
+ const overlappingBins = this.reg2bins(min, max)
196
+ const chunks: Chunk[] = []
197
+
198
+ // Find chunks in overlapping bins. Leaf bins (< 4681) are not pruned
199
+ for (const [start, end] of overlappingBins) {
200
+ for (let bin = start; bin <= end; bin++) {
201
+ if (ba.binIndex[bin]) {
202
+ const binChunks = ba.binIndex[bin]
203
+ for (let c = 0; c < binChunks.length; ++c) {
204
+ chunks.push(new Chunk(binChunks[c].minv, binChunks[c].maxv, bin))
205
+ }
206
+ }
207
+ }
208
+ }
209
+
210
+ // Use the linear index to find minimum file position of chunks that could
211
+ // contain alignments in the region
212
+ const nintv = ba.linearIndex.length
213
+ let lowest = null
214
+ const minLin = Math.min(min >> 14, nintv - 1)
215
+ const maxLin = Math.min(max >> 14, nintv - 1)
216
+ for (let i = minLin; i <= maxLin; ++i) {
217
+ const vp = ba.linearIndex[i]
218
+ if (vp) {
219
+ if (!lowest || vp.compareTo(lowest) < 0) {
220
+ lowest = vp
221
+ }
222
+ }
223
+ }
224
+
225
+ return optimizeChunks(chunks, lowest)
226
+ }
227
+ }