@gmod/bam 1.1.18 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/CHANGELOG.md +61 -25
  2. package/README.md +95 -57
  3. package/dist/bai.d.ts +34 -15
  4. package/dist/bai.js +87 -91
  5. package/dist/bai.js.map +1 -1
  6. package/dist/bamFile.d.ts +33 -27
  7. package/dist/bamFile.js +127 -121
  8. package/dist/bamFile.js.map +1 -1
  9. package/dist/chunk.d.ts +4 -8
  10. package/dist/chunk.js +2 -8
  11. package/dist/chunk.js.map +1 -1
  12. package/dist/csi.d.ts +74 -10
  13. package/dist/csi.js +78 -90
  14. package/dist/csi.js.map +1 -1
  15. package/dist/htsget.d.ts +5 -8
  16. package/dist/htsget.js +72 -47
  17. package/dist/htsget.js.map +1 -1
  18. package/dist/index.d.ts +5 -6
  19. package/dist/index.js +11 -11
  20. package/dist/index.js.map +1 -1
  21. package/dist/indexFile.d.ts +0 -6
  22. package/dist/indexFile.js +0 -35
  23. package/dist/indexFile.js.map +1 -1
  24. package/dist/nullIndex.d.ts +7 -0
  25. package/dist/nullIndex.js +33 -0
  26. package/dist/nullIndex.js.map +1 -0
  27. package/dist/record.d.ts +2 -2
  28. package/dist/record.js +34 -24
  29. package/dist/record.js.map +1 -1
  30. package/dist/sam.js +9 -7
  31. package/dist/sam.js.map +1 -1
  32. package/dist/util.d.ts +13 -1
  33. package/dist/util.js +47 -15
  34. package/dist/util.js.map +1 -1
  35. package/esm/bai.d.ts +34 -15
  36. package/esm/bai.js +86 -91
  37. package/esm/bai.js.map +1 -1
  38. package/esm/bamFile.d.ts +33 -27
  39. package/esm/bamFile.js +124 -120
  40. package/esm/bamFile.js.map +1 -1
  41. package/esm/chunk.d.ts +4 -8
  42. package/esm/chunk.js +2 -8
  43. package/esm/chunk.js.map +1 -1
  44. package/esm/csi.d.ts +74 -10
  45. package/esm/csi.js +85 -93
  46. package/esm/csi.js.map +1 -1
  47. package/esm/htsget.d.ts +5 -8
  48. package/esm/htsget.js +68 -43
  49. package/esm/htsget.js.map +1 -1
  50. package/esm/index.d.ts +5 -6
  51. package/esm/index.js +5 -6
  52. package/esm/index.js.map +1 -1
  53. package/esm/indexFile.d.ts +0 -6
  54. package/esm/indexFile.js +0 -22
  55. package/esm/indexFile.js.map +1 -1
  56. package/esm/nullIndex.d.ts +7 -0
  57. package/esm/nullIndex.js +16 -0
  58. package/esm/nullIndex.js.map +1 -0
  59. package/esm/record.d.ts +2 -2
  60. package/esm/record.js +34 -24
  61. package/esm/record.js.map +1 -1
  62. package/esm/sam.js +9 -7
  63. package/esm/sam.js.map +1 -1
  64. package/esm/util.d.ts +13 -1
  65. package/esm/util.js +40 -14
  66. package/esm/util.js.map +1 -1
  67. package/package.json +16 -17
  68. package/src/bai.ts +99 -102
  69. package/src/bamFile.ts +174 -198
  70. package/src/chunk.ts +6 -20
  71. package/src/csi.ts +102 -111
  72. package/src/htsget.ts +81 -61
  73. package/src/index.ts +5 -7
  74. package/src/indexFile.ts +0 -27
  75. package/src/nullIndex.ts +18 -0
  76. package/src/record.ts +34 -24
  77. package/src/sam.ts +9 -7
  78. package/src/util.ts +54 -13
  79. package/src/declare.d.ts +0 -2
package/src/bai.ts CHANGED
@@ -1,9 +1,8 @@
1
- import Long from 'long'
2
- import { fromBytes } from './virtualOffset'
1
+ import VirtualOffset, { fromBytes } from './virtualOffset'
3
2
  import Chunk from './chunk'
4
3
 
4
+ import { optimizeChunks, parsePseudoBin, findFirstData, BaseOpts } from './util'
5
5
  import IndexFile from './indexFile'
6
- import { longToNumber, optimizeChunks, BaseOpts } from './util'
7
6
 
8
7
  const BAI_MAGIC = 21578050 // BAI\1
9
8
 
@@ -14,103 +13,106 @@ function roundUp(n: number, multiple: number) {
14
13
  return n - (n % multiple) + multiple
15
14
  }
16
15
 
17
- export default class BAI extends IndexFile {
18
- baiP?: Promise<Buffer>
19
-
20
- parsePseudoBin(bytes: Buffer, offset: number) {
21
- const lineCount = longToNumber(
22
- Long.fromBytesLE(
23
- Array.prototype.slice.call(bytes, offset + 16, offset + 24),
24
- true,
25
- ),
26
- )
27
- return { lineCount }
28
- }
16
+ function reg2bins(beg: number, end: number) {
17
+ end -= 1
18
+ return [
19
+ [0, 0],
20
+ [1 + (beg >> 26), 1 + (end >> 26)],
21
+ [9 + (beg >> 23), 9 + (end >> 23)],
22
+ [73 + (beg >> 20), 73 + (end >> 20)],
23
+ [585 + (beg >> 17), 585 + (end >> 17)],
24
+ [4681 + (beg >> 14), 4681 + (end >> 14)],
25
+ ]
26
+ }
29
27
 
30
- async lineCount(refId: number, opts: BaseOpts = {}) {
31
- const prom = await this.parse(opts)
32
- const index = prom.indices[refId]
33
- if (!index) {
34
- return -1
35
- }
36
- const ret = index.stats || {}
37
- return ret.lineCount === undefined ? -1 : ret.lineCount
38
- }
28
+ export default class BAI extends IndexFile {
29
+ public setupP?: ReturnType<BAI['_parse']>
39
30
 
40
- fetchBai(opts: BaseOpts = {}) {
41
- if (!this.baiP) {
42
- this.baiP = this.filehandle.readFile(opts).catch(e => {
43
- this.baiP = undefined
44
- throw e
45
- }) as Promise<Buffer>
46
- }
47
- return this.baiP
31
+ async lineCount(refId: number, opts?: BaseOpts) {
32
+ const indexData = await this.parse(opts)
33
+ return indexData.indices[refId]?.stats?.lineCount || 0
48
34
  }
49
35
 
50
36
  // fetch and parse the index
51
- async _parse() {
52
- const data: { [key: string]: any } = { bai: true, maxBlockSize: 1 << 16 }
53
- const bytes = await this.fetchBai()
37
+ async _parse(opts?: BaseOpts) {
38
+ const bytes = (await this.filehandle.readFile(opts)) as Buffer
54
39
 
55
40
  // check BAI magic numbers
56
41
  if (bytes.readUInt32LE(0) !== BAI_MAGIC) {
57
42
  throw new Error('Not a BAI file')
58
43
  }
59
44
 
60
- data.refCount = bytes.readInt32LE(4)
45
+ const refCount = bytes.readInt32LE(4)
61
46
  const depth = 5
62
47
  const binLimit = ((1 << ((depth + 1) * 3)) - 1) / 7
63
48
 
64
49
  // read the indexes for each reference sequence
65
- data.indices = new Array(data.refCount)
66
- let currOffset = 8
67
- for (let i = 0; i < data.refCount; i += 1) {
50
+ let curr = 8
51
+ let firstDataLine: VirtualOffset | undefined
52
+
53
+ type BinIndex = { [key: string]: Chunk[] }
54
+ type LinearIndex = VirtualOffset[]
55
+ const indices = new Array<{
56
+ binIndex: BinIndex
57
+ linearIndex: LinearIndex
58
+ stats?: { lineCount: number }
59
+ }>(refCount)
60
+ for (let i = 0; i < refCount; i++) {
68
61
  // the binning index
69
- const binCount = bytes.readInt32LE(currOffset)
62
+ const binCount = bytes.readInt32LE(curr)
70
63
  let stats
71
64
 
72
- currOffset += 4
65
+ curr += 4
73
66
  const binIndex: { [key: number]: Chunk[] } = {}
67
+
74
68
  for (let j = 0; j < binCount; j += 1) {
75
- const bin = bytes.readUInt32LE(currOffset)
76
- currOffset += 4
69
+ const bin = bytes.readUInt32LE(curr)
70
+ curr += 4
77
71
  if (bin === binLimit + 1) {
78
- currOffset += 4
79
- stats = this.parsePseudoBin(bytes, currOffset)
80
- currOffset += 32
72
+ curr += 4
73
+ stats = parsePseudoBin(bytes, curr + 16)
74
+ curr += 32
81
75
  } else if (bin > binLimit + 1) {
82
76
  throw new Error('bai index contains too many bins, please use CSI')
83
77
  } else {
84
- const chunkCount = bytes.readInt32LE(currOffset)
85
- currOffset += 4
86
- const chunks = new Array(chunkCount)
87
- for (let k = 0; k < chunkCount; k += 1) {
88
- const u = fromBytes(bytes, currOffset)
89
- const v = fromBytes(bytes, currOffset + 8)
90
- currOffset += 16
91
- this._findFirstData(data, u)
78
+ const chunkCount = bytes.readInt32LE(curr)
79
+ curr += 4
80
+ const chunks = new Array<Chunk>(chunkCount)
81
+ for (let k = 0; k < chunkCount; k++) {
82
+ const u = fromBytes(bytes, curr)
83
+ curr += 8
84
+ const v = fromBytes(bytes, curr)
85
+ curr += 8
86
+ firstDataLine = findFirstData(firstDataLine, u)
92
87
  chunks[k] = new Chunk(u, v, bin)
93
88
  }
94
89
  binIndex[bin] = chunks
95
90
  }
96
91
  }
97
92
 
98
- const linearCount = bytes.readInt32LE(currOffset)
99
- currOffset += 4
100
- // as we're going through the linear index, figure out
101
- // the smallest virtual offset in the indexes, which
102
- // tells us where the BAM header ends
103
- const linearIndex = new Array(linearCount)
104
- for (let k = 0; k < linearCount; k += 1) {
105
- linearIndex[k] = fromBytes(bytes, currOffset)
106
- currOffset += 8
107
- this._findFirstData(data, linearIndex[k])
93
+ const linearCount = bytes.readInt32LE(curr)
94
+ curr += 4
95
+ // as we're going through the linear index, figure out the smallest
96
+ // virtual offset in the indexes, which tells us where the BAM header
97
+ // ends
98
+ const linearIndex = new Array<VirtualOffset>(linearCount)
99
+ for (let j = 0; j < linearCount; j++) {
100
+ const offset = fromBytes(bytes, curr)
101
+ curr += 8
102
+ firstDataLine = findFirstData(firstDataLine, offset)
103
+ linearIndex[j] = offset
108
104
  }
109
105
 
110
- data.indices[i] = { binIndex, linearIndex, stats }
106
+ indices[i] = { binIndex, linearIndex, stats }
111
107
  }
112
108
 
113
- return data
109
+ return {
110
+ bai: true,
111
+ firstDataLine,
112
+ maxBlockSize: 1 << 16,
113
+ indices,
114
+ refCount,
115
+ }
114
116
  }
115
117
 
116
118
  async indexCov(
@@ -127,17 +129,14 @@ export default class BAI extends IndexFile {
127
129
  return []
128
130
  }
129
131
  const { linearIndex = [], stats } = seqIdx
130
- if (!linearIndex.length) {
132
+ if (linearIndex.length === 0) {
131
133
  return []
132
134
  }
133
- const e = end !== undefined ? roundUp(end, v) : (linearIndex.length - 1) * v
134
- const s = start !== undefined ? roundDown(start, v) : 0
135
- let depths
136
- if (range) {
137
- depths = new Array((e - s) / v)
138
- } else {
139
- depths = new Array(linearIndex.length - 1)
140
- }
135
+ const e = end === undefined ? (linearIndex.length - 1) * v : roundUp(end, v)
136
+ const s = start === undefined ? 0 : roundDown(start, v)
137
+ const depths = range
138
+ ? new Array((e - s) / v)
139
+ : new Array(linearIndex.length - 1)
141
140
  const totalSize = linearIndex[linearIndex.length - 1].blockPosition
142
141
  if (e > (linearIndex.length - 1) * v) {
143
142
  throw new Error('query outside of range of linear index')
@@ -151,25 +150,10 @@ export default class BAI extends IndexFile {
151
150
  }
152
151
  currentPos = linearIndex[i + 1].blockPosition
153
152
  }
154
- return depths.map(d => {
155
- return { ...d, score: (d.score * stats.lineCount) / totalSize }
156
- })
157
- }
158
-
159
- /**
160
- * calculate the list of bins that may overlap with region [beg,end) (zero-based half-open)
161
- * @returns {Array[number]}
162
- */
163
- reg2bins(beg: number, end: number) {
164
- end -= 1
165
- return [
166
- [0, 0],
167
- [1 + (beg >> 26), 1 + (end >> 26)],
168
- [9 + (beg >> 23), 9 + (end >> 23)],
169
- [73 + (beg >> 20), 73 + (end >> 20)],
170
- [585 + (beg >> 17), 585 + (end >> 17)],
171
- [4681 + (beg >> 14), 4681 + (end >> 14)],
172
- ]
153
+ return depths.map(d => ({
154
+ ...d,
155
+ score: (d.score * (stats?.lineCount || 0)) / totalSize,
156
+ }))
173
157
  }
174
158
 
175
159
  async blocksForRange(
@@ -192,7 +176,7 @@ export default class BAI extends IndexFile {
192
176
  }
193
177
 
194
178
  // List of bin #s that overlap min, max
195
- const overlappingBins = this.reg2bins(min, max)
179
+ const overlappingBins = reg2bins(min, max)
196
180
  const chunks: Chunk[] = []
197
181
 
198
182
  // Find chunks in overlapping bins. Leaf bins (< 4681) are not pruned
@@ -200,8 +184,8 @@ export default class BAI extends IndexFile {
200
184
  for (let bin = start; bin <= end; bin++) {
201
185
  if (ba.binIndex[bin]) {
202
186
  const binChunks = ba.binIndex[bin]
203
- for (let c = 0; c < binChunks.length; ++c) {
204
- chunks.push(new Chunk(binChunks[c].minv, binChunks[c].maxv, bin))
187
+ for (const binChunk of binChunks) {
188
+ chunks.push(binChunk)
205
189
  }
206
190
  }
207
191
  }
@@ -210,18 +194,31 @@ export default class BAI extends IndexFile {
210
194
  // Use the linear index to find minimum file position of chunks that could
211
195
  // contain alignments in the region
212
196
  const nintv = ba.linearIndex.length
213
- let lowest = null
197
+ let lowest: VirtualOffset | undefined
214
198
  const minLin = Math.min(min >> 14, nintv - 1)
215
199
  const maxLin = Math.min(max >> 14, nintv - 1)
216
200
  for (let i = minLin; i <= maxLin; ++i) {
217
201
  const vp = ba.linearIndex[i]
218
- if (vp) {
219
- if (!lowest || vp.compareTo(lowest) < 0) {
220
- lowest = vp
221
- }
202
+ if (vp && (!lowest || vp.compareTo(lowest) < 0)) {
203
+ lowest = vp
222
204
  }
223
205
  }
224
206
 
225
207
  return optimizeChunks(chunks, lowest)
226
208
  }
209
+
210
+ async parse(opts: BaseOpts = {}) {
211
+ if (!this.setupP) {
212
+ this.setupP = this._parse(opts).catch(e => {
213
+ this.setupP = undefined
214
+ throw e
215
+ })
216
+ }
217
+ return this.setupP
218
+ }
219
+
220
+ async hasRefSeq(seqId: number, opts: BaseOpts = {}) {
221
+ const header = await this.parse(opts)
222
+ return !!header.indices[seqId]?.binIndex
223
+ }
227
224
  }