@gmod/bbi 1.0.35 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/blockView.ts CHANGED
@@ -1,12 +1,13 @@
1
- /* eslint no-bitwise: ["error", { "allow": ["|"] }] */
2
1
  import { Observer } from 'rxjs'
3
- import { Parser } from '@gmod/binary-parser'
2
+ import { Parser } from 'binary-parser'
4
3
  import AbortablePromiseCache from 'abortable-promise-cache'
5
4
  import { GenericFilehandle } from 'generic-filehandle'
6
- import { unzip } from './unzip'
7
5
  import QuickLRU from 'quick-lru'
8
- import { Feature } from './bbi'
6
+
7
+ // locals
9
8
  import Range from './range'
9
+ import { unzip } from './unzip'
10
+ import { Feature } from './bbi'
10
11
  import { groupBlocks, checkAbortSignal } from './util'
11
12
 
12
13
  interface CoordRequest {
@@ -30,16 +31,6 @@ interface ReadData {
30
31
  length: number
31
32
  }
32
33
 
33
- interface SummaryBlock {
34
- chromId: number
35
- start: number
36
- end: number
37
- validCnt: number
38
- minScore: number
39
- maxScore: number
40
- sumData: number
41
- sumSqData: number
42
- }
43
34
  interface Options {
44
35
  signal?: AbortSignal
45
36
  request?: CoordRequest
@@ -49,7 +40,11 @@ const BIG_WIG_TYPE_GRAPH = 1
49
40
  const BIG_WIG_TYPE_VSTEP = 2
50
41
  const BIG_WIG_TYPE_FSTEP = 3
51
42
 
52
- function getParsers(isBigEndian: boolean): any {
43
+ function coordFilter(s1: number, e1: number, s2: number, e2: number): boolean {
44
+ return s1 < e2 && e1 >= s2
45
+ }
46
+
47
+ function getParsers(isBigEndian: boolean) {
53
48
  const le = isBigEndian ? 'big' : 'little'
54
49
  const summaryParser = new Parser()
55
50
  .endianess(le)
@@ -57,10 +52,11 @@ function getParsers(isBigEndian: boolean): any {
57
52
  .uint32('start')
58
53
  .uint32('end')
59
54
  .uint32('validCnt')
60
- .float('minScore')
61
- .float('maxScore')
62
- .float('sumData')
63
- .float('sumSqData')
55
+ .floatle('minScore')
56
+ .floatle('maxScore')
57
+ .floatle('sumData')
58
+ .floatle('sumSqData')
59
+ .saveOffset('offset')
64
60
 
65
61
  const leafParser = new Parser()
66
62
  .endianess(le)
@@ -70,24 +66,28 @@ function getParsers(isBigEndian: boolean): any {
70
66
  .choice({
71
67
  tag: 'isLeaf',
72
68
  choices: {
73
- 1: new Parser().array('blocksToFetch', {
69
+ 1: new Parser().endianess(le).array('blocksToFetch', {
74
70
  length: 'cnt',
75
71
  type: new Parser()
72
+ .endianess(le)
76
73
  .uint32('startChrom')
77
74
  .uint32('startBase')
78
75
  .uint32('endChrom')
79
76
  .uint32('endBase')
80
77
  .uint64('blockOffset')
81
- .uint64('blockSize'),
78
+ .uint64('blockSize')
79
+ .saveOffset('offset'),
82
80
  }),
83
81
  0: new Parser().array('recurOffsets', {
84
82
  length: 'cnt',
85
83
  type: new Parser()
84
+ .endianess(le)
86
85
  .uint32('startChrom')
87
86
  .uint32('startBase')
88
87
  .uint32('endChrom')
89
88
  .uint32('endBase')
90
- .uint64('blockOffset'),
89
+ .uint64('blockOffset')
90
+ .saveOffset('offset'),
91
91
  }),
92
92
  },
93
93
  })
@@ -99,6 +99,7 @@ function getParsers(isBigEndian: boolean): any {
99
99
  .string('rest', {
100
100
  zeroTerminated: true,
101
101
  })
102
+ .saveOffset('offset')
102
103
 
103
104
  const bigWigParser = new Parser()
104
105
  .endianess(le)
@@ -115,15 +116,19 @@ function getParsers(isBigEndian: boolean): any {
115
116
  choices: {
116
117
  [BIG_WIG_TYPE_FSTEP]: new Parser().array('items', {
117
118
  length: 'itemCount',
118
- type: new Parser().float('score'),
119
+ type: new Parser().floatle('score'),
119
120
  }),
120
121
  [BIG_WIG_TYPE_VSTEP]: new Parser().array('items', {
121
122
  length: 'itemCount',
122
- type: new Parser().int32('start').float('score'),
123
+ type: new Parser().endianess(le).int32('start').floatle('score'),
123
124
  }),
124
125
  [BIG_WIG_TYPE_GRAPH]: new Parser().array('items', {
125
126
  length: 'itemCount',
126
- type: new Parser().int32('start').int32('end').float('score'),
127
+ type: new Parser()
128
+ .endianess(le)
129
+ .int32('start')
130
+ .int32('end')
131
+ .floatle('score'),
127
132
  }),
128
133
  },
129
134
  })
@@ -144,70 +149,40 @@ function getParsers(isBigEndian: boolean): any {
144
149
  */
145
150
 
146
151
  export class BlockView {
147
- private cirTreeOffset: number
148
-
149
- private cirTreeLength: number
150
-
151
- private bbi: GenericFilehandle
152
-
153
- private isCompressed: boolean
154
-
155
- private isBigEndian: boolean
156
-
157
- private refsByName: any
158
-
159
- private blockType: string
160
-
161
152
  private cirTreePromise?: Promise<{ bytesRead: number; buffer: Buffer }>
162
153
 
163
154
  private featureCache = new AbortablePromiseCache({
164
155
  cache: new QuickLRU({ maxSize: 1000 }),
165
156
 
166
157
  fill: async (requestData: ReadData, signal: AbortSignal) => {
167
- const { length, offset } = requestData
168
- const { buffer } = await this.bbi.read(
169
- Buffer.alloc(length),
170
- 0,
171
- length,
172
- offset,
173
- { signal },
174
- )
158
+ const len = Number(requestData.length)
159
+ const off = Number(requestData.offset)
160
+ const { buffer } = await this.bbi.read(Buffer.alloc(len), 0, len, off, {
161
+ signal,
162
+ })
175
163
  return buffer
176
164
  },
177
165
  })
178
166
 
179
- private leafParser: any
180
-
181
- private bigWigParser: any
167
+ private leafParser: ReturnType<typeof getParsers>['leafParser']
182
168
 
183
- private bigBedParser: any
184
-
185
- private summaryParser: any
169
+ private bigBedParser: ReturnType<typeof getParsers>['bigBedParser']
186
170
 
187
171
  public constructor(
188
- bbi: GenericFilehandle,
189
- refsByName: any,
190
- cirTreeOffset: number,
191
- cirTreeLength: number,
192
- isBigEndian: boolean,
193
- isCompressed: boolean,
194
- blockType: string,
172
+ private bbi: GenericFilehandle,
173
+ private refsByName: any,
174
+ private cirTreeOffset: number,
175
+ private isBigEndian: boolean,
176
+ private isCompressed: boolean,
177
+ private blockType: string,
195
178
  ) {
196
179
  if (!(cirTreeOffset >= 0)) {
197
180
  throw new Error('invalid cirTreeOffset!')
198
181
  }
199
- if (!(cirTreeLength > 0)) {
200
- throw new Error('invalid cirTreeLength!')
201
- }
202
182
 
203
- this.cirTreeOffset = cirTreeOffset
204
- this.cirTreeLength = cirTreeLength
205
- this.isCompressed = isCompressed
206
- this.refsByName = refsByName
207
- this.isBigEndian = isBigEndian
208
- this.bbi = bbi
209
- this.blockType = blockType
210
- Object.assign(this, getParsers(isBigEndian))
183
+ const parsers = getParsers(isBigEndian)
184
+ this.leafParser = parsers.leafParser
185
+ this.bigBedParser = parsers.bigBedParser
211
186
  }
212
187
 
213
188
  public async readWigData(
@@ -219,16 +194,14 @@ export class BlockView {
219
194
  ) {
220
195
  try {
221
196
  const { refsByName, bbi, cirTreeOffset, isBigEndian } = this
222
- const { signal } = opts
223
197
  const chrId = refsByName[chrName]
224
198
  if (chrId === undefined) {
225
199
  observer.complete()
226
200
  }
227
201
  const request = { chrId, start, end }
228
202
  if (!this.cirTreePromise) {
229
- this.cirTreePromise = bbi.read(Buffer.alloc(48), 0, 48, cirTreeOffset, {
230
- signal,
231
- })
203
+ const off = Number(cirTreeOffset)
204
+ this.cirTreePromise = bbi.read(Buffer.alloc(48), 0, 48, off, opts)
232
205
  }
233
206
  const { buffer } = await this.cirTreePromise
234
207
  const cirBlockSize = isBigEndian
@@ -243,21 +216,23 @@ export class BlockView {
243
216
  level: number,
244
217
  ) => {
245
218
  try {
246
- const data = cirBlockData.slice(offset)
219
+ const data = cirBlockData.subarray(offset)
247
220
 
248
- const p = this.leafParser.parse(data).result
221
+ const p = this.leafParser.parse(data)
249
222
  if (p.blocksToFetch) {
250
223
  blocksToFetch = blocksToFetch.concat(
251
- p.blocksToFetch.filter(filterFeats).map((l: any): any => ({
252
- offset: l.blockOffset,
253
- length: l.blockSize,
254
- })),
224
+ p.blocksToFetch
225
+ .filter(filterFeats)
226
+ .map((l: { blockOffset: bigint; blockSize: bigint }) => ({
227
+ offset: l.blockOffset,
228
+ length: l.blockSize,
229
+ })),
255
230
  )
256
231
  }
257
232
  if (p.recurOffsets) {
258
233
  const recurOffsets = p.recurOffsets
259
234
  .filter(filterFeats)
260
- .map((l: any): any => l.blockOffset)
235
+ .map((l: { blockOffset: bigint }) => Number(l.blockOffset))
261
236
  if (recurOffsets.length > 0) {
262
237
  cirFobRecur(recurOffsets, level + 1)
263
238
  }
@@ -275,14 +250,18 @@ export class BlockView {
275
250
  )
276
251
  }
277
252
 
278
- const cirFobStartFetch = async (off: any, fr: any, level: number) => {
253
+ const cirFobStartFetch = async (
254
+ off: number[],
255
+ fr: Range,
256
+ level: number,
257
+ ) => {
279
258
  try {
280
259
  const length = fr.max() - fr.min()
281
260
  const offset = fr.min()
282
- const resultBuffer = await this.featureCache.get(
261
+ const resultBuffer: Buffer = await this.featureCache.get(
283
262
  `${length}_${offset}`,
284
263
  { length, offset },
285
- signal,
264
+ opts.signal,
286
265
  )
287
266
  for (let i = 0; i < off.length; i += 1) {
288
267
  if (fr.contains(off[i])) {
@@ -297,11 +276,11 @@ export class BlockView {
297
276
  observer.error(e)
298
277
  }
299
278
  }
300
- const cirFobRecur = (offset: any, level: number) => {
279
+ const cirFobRecur = (offset: number[], level: number) => {
301
280
  try {
302
281
  outstanding += offset.length
303
282
 
304
- const maxCirBlockSpan = 4 + cirBlockSize * 32 // Upper bound on size, based on a completely full leaf node.
283
+ const maxCirBlockSpan = 4 + Number(cirBlockSize) * 32 // Upper bound on size, based on a completely full leaf node.
305
284
  let spans = new Range(offset[0], offset[0] + maxCirBlockSpan)
306
285
  for (let i = 1; i < offset.length; i += 1) {
307
286
  const blockSpan = new Range(offset[i], offset[i] + maxCirBlockSpan)
@@ -313,41 +292,64 @@ export class BlockView {
313
292
  }
314
293
  }
315
294
 
316
- return cirFobRecur([cirTreeOffset + 48], 1)
295
+ return cirFobRecur([Number(cirTreeOffset) + 48], 1)
317
296
  } catch (e) {
318
297
  observer.error(e)
319
298
  }
320
299
  }
321
300
 
322
301
  private parseSummaryBlock(
323
- data: Buffer,
302
+ buffer: Buffer,
324
303
  startOffset: number,
325
304
  request?: CoordRequest,
326
305
  ) {
327
- const features = [] as SummaryBlock[]
328
- let currOffset = startOffset
329
- while (currOffset < data.byteLength) {
330
- const res = this.summaryParser.parse(data.slice(currOffset))
331
- features.push(res.result)
332
- currOffset += res.offset
333
- }
334
- let items = features
335
- if (request) {
336
- items = items.filter(elt => elt.chromId === request.chrId)
337
- }
338
- const feats = items.map(
339
- (elt: SummaryBlock): Feature => ({
340
- start: elt.start,
341
- end: elt.end,
342
- maxScore: elt.maxScore,
343
- minScore: elt.minScore,
344
- score: elt.sumData / (elt.validCnt || 1),
345
- summary: true,
346
- }),
306
+ const features = [] as any[]
307
+ let offset = startOffset
308
+
309
+ const dataView = new DataView(
310
+ buffer.buffer,
311
+ buffer.byteOffset,
312
+ buffer.length,
347
313
  )
348
- return request
349
- ? feats.filter(f => BlockView.coordFilter(f, request))
350
- : feats
314
+ while (offset < buffer.byteLength) {
315
+ // this was extracted from looking at the runtime code generated by
316
+ // binary-parser
317
+ const chromId = dataView.getUint32(offset, true)
318
+ offset += 4
319
+ const start = dataView.getUint32(offset, true)
320
+ offset += 4
321
+ const end = dataView.getUint32(offset, true)
322
+ offset += 4
323
+ const validCnt = dataView.getUint32(offset, true)
324
+ offset += 4
325
+ const minScore = dataView.getFloat32(offset, true)
326
+ offset += 4
327
+ const maxScore = dataView.getFloat32(offset, true)
328
+ offset += 4
329
+ const sumData = dataView.getFloat32(offset, true)
330
+ offset += 4
331
+ // unused
332
+ // const sumSqData = dataView.getFloat32(offset, true)
333
+ offset += 4
334
+
335
+ if (
336
+ request
337
+ ? chromId === request.chrId &&
338
+ coordFilter(start, end, request.start, request.end)
339
+ : true
340
+ ) {
341
+ features.push({
342
+ start,
343
+ end,
344
+ maxScore,
345
+ minScore,
346
+ summary: true,
347
+ score: sumData / (validCnt || 1),
348
+ })
349
+ }
350
+ }
351
+
352
+ return features
351
353
  }
352
354
 
353
355
  private parseBigBedBlock(
@@ -359,56 +361,89 @@ export class BlockView {
359
361
  const items = [] as Feature[]
360
362
  let currOffset = startOffset
361
363
  while (currOffset < data.byteLength) {
362
- const res = this.bigBedParser.parse(data.slice(currOffset))
363
- res.result.uniqueId = `bb-${offset + currOffset}`
364
- items.push(res.result)
364
+ const res = this.bigBedParser.parse(data.subarray(currOffset))
365
+ items.push({ ...res, uniqueId: `bb-${offset + currOffset}` })
365
366
  currOffset += res.offset
366
367
  }
367
368
 
368
369
  return request
369
- ? items.filter((f: any) => BlockView.coordFilter(f, request))
370
+ ? items.filter((f: any) =>
371
+ coordFilter(f.start, f.end, request.start, request.end),
372
+ )
370
373
  : items
371
374
  }
372
375
 
373
376
  private parseBigWigBlock(
374
- bytes: Buffer,
377
+ buffer: Buffer,
375
378
  startOffset: number,
376
379
  request?: CoordRequest,
377
- ): Feature[] {
378
- const data = bytes.slice(startOffset)
379
- const results = this.bigWigParser.parse(data).result
380
- const { items, itemSpan, itemStep, blockStart, blockType } = results
381
- if (blockType === BIG_WIG_TYPE_FSTEP) {
382
- for (let i = 0; i < items.length; i++) {
383
- items[i].start = blockStart + i * itemStep
384
- items[i].end = blockStart + i * itemStep + itemSpan
385
- }
386
- } else if (blockType === BIG_WIG_TYPE_VSTEP) {
387
- for (let i = 0; i < items.length; i++) {
388
- items[i].end = items[i].start + itemSpan
389
- }
380
+ ) {
381
+ const b = buffer.subarray(startOffset)
382
+
383
+ const dataView = new DataView(b.buffer, b.byteOffset, b.length)
384
+ let offset = 0
385
+ offset += 4
386
+ const blockStart = dataView.getInt32(offset, true)
387
+ offset += 8
388
+ const itemStep = dataView.getUint32(offset, true)
389
+ offset += 4
390
+ const itemSpan = dataView.getUint32(offset, true)
391
+ offset += 4
392
+ const blockType = dataView.getUint8(offset)
393
+ offset += 2
394
+ const itemCount = dataView.getUint16(offset, true)
395
+ offset += 2
396
+ const items = new Array(itemCount)
397
+ switch (blockType) {
398
+ case 1:
399
+ for (let i = 0; i < itemCount; i++) {
400
+ const start = dataView.getInt32(offset, true)
401
+ offset += 4
402
+ const end = dataView.getInt32(offset, true)
403
+ offset += 4
404
+ const score = dataView.getFloat32(offset, true)
405
+ offset += 4
406
+ items[i] = { start, end, score }
407
+ }
408
+ break
409
+ case 2:
410
+ for (let i = 0; i < itemCount; i++) {
411
+ const start = dataView.getInt32(offset, true)
412
+ offset += 4
413
+ const score = dataView.getFloat32(offset, true)
414
+ offset += 4
415
+ items[i] = { score, start, end: start + itemSpan }
416
+ }
417
+ break
418
+ case 3:
419
+ for (let i = 0; i < itemCount; i++) {
420
+ const score = dataView.getFloat32(offset, true)
421
+ offset += 4
422
+ const start = blockStart + i * itemStep
423
+ items[i] = { score, start, end: start + itemSpan }
424
+ }
425
+ break
390
426
  }
427
+
391
428
  return request
392
- ? items.filter((f: any) => BlockView.coordFilter(f, request))
429
+ ? items.filter((f: any) =>
430
+ coordFilter(f.start, f.end, request.start, request.end),
431
+ )
393
432
  : items
394
433
  }
395
434
 
396
- private static coordFilter(f: Feature, range: CoordRequest): boolean {
397
- return f.start < range.end && f.end >= range.start
398
- }
399
-
400
435
  public async readFeatures(
401
436
  observer: Observer<Feature[]>,
402
- blocks: any,
437
+ blocks: { offset: bigint; length: bigint }[],
403
438
  opts: Options = {},
404
- ): Promise<void> {
439
+ ) {
405
440
  try {
406
441
  const { blockType, isCompressed } = this
407
442
  const { signal, request } = opts
408
443
  const blockGroupsToFetch = groupBlocks(blocks)
409
444
  checkAbortSignal(signal)
410
445
  await Promise.all(
411
- blockGroupsToFetch.map(async (blockGroup: any) => {
446
+ blockGroupsToFetch.map(async blockGroup => {
412
447
  checkAbortSignal(signal)
413
448
  const { length, offset } = blockGroup
414
449
  const data = await this.featureCache.get(
@@ -416,12 +451,12 @@ export class BlockView {
416
451
  blockGroup,
417
452
  signal,
418
453
  )
419
- blockGroup.blocks.forEach((block: any) => {
454
+ blockGroup.blocks.forEach(block => {
420
455
  checkAbortSignal(signal)
421
- let blockOffset = block.offset - blockGroup.offset
456
+ let blockOffset = Number(block.offset) - Number(blockGroup.offset)
422
457
  let resultData = data
423
458
  if (isCompressed) {
424
- resultData = unzip(data.slice(blockOffset))
459
+ resultData = unzip(data.subarray(blockOffset))
425
460
  blockOffset = 0
426
461
  }
427
462
  checkAbortSignal(signal)
@@ -442,8 +477,7 @@ export class BlockView {
442
477
  this.parseBigBedBlock(
443
478
  resultData,
444
479
  blockOffset,
445
- // eslint-disable-next-line no-bitwise
446
- block.offset * (1 << 8),
480
+ Number(block.offset) * (1 << 8),
447
481
  request,
448
482
  ),
449
483
  )
package/src/declare.d.ts CHANGED
@@ -1,4 +1,2 @@
1
- declare module 'es6-promisify'
2
1
  declare module 'abortable-promise-cache'
3
- declare module '@gmod/binary-parser'
4
2
  declare module '@gmod/bed'
package/src/unzip-pako.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { inflate } from 'pako'
1
+ import { inflateRaw } from 'pako'
2
2
 
3
3
  export function unzip(input: Buffer) {
4
- return Buffer.from(inflate(input))
4
+ return inflateRaw(input.subarray(2))
5
5
  }
package/src/util.ts CHANGED
@@ -9,15 +9,24 @@ export class AbortError extends Error {
9
9
  }
10
10
  // sort blocks by file offset and
11
11
  // group blocks that are within 2KB of eachother
12
- export function groupBlocks(blocks: any[]): any[] {
13
- blocks.sort((b0, b1) => (b0.offset | 0) - (b1.offset | 0))
12
+ export function groupBlocks(blocks: { offset: bigint; length: bigint }[]) {
13
+ blocks.sort((b0, b1) => Number(b0.offset) - Number(b1.offset))
14
14
 
15
15
  const blockGroups = []
16
16
  let lastBlock
17
17
  let lastBlockEnd
18
18
  for (let i = 0; i < blocks.length; i += 1) {
19
- if (lastBlock && blocks[i].offset - lastBlockEnd <= 2000) {
20
- lastBlock.length += blocks[i].length - lastBlockEnd + blocks[i].offset
19
+ if (
20
+ lastBlock &&
21
+ lastBlockEnd &&
22
+ Number(blocks[i].offset) - lastBlockEnd <= 2000
23
+ ) {
24
+ lastBlock.length = BigInt(
25
+ Number(lastBlock.length) +
26
+ Number(blocks[i].length) -
27
+ lastBlockEnd +
28
+ Number(blocks[i].offset),
29
+ )
21
30
  lastBlock.blocks.push(blocks[i])
22
31
  } else {
23
32
  blockGroups.push(
@@ -28,7 +37,7 @@ export function groupBlocks(blocks: any[]): any[] {
28
37
  }),
29
38
  )
30
39
  }
31
- lastBlockEnd = lastBlock.offset + lastBlock.length
40
+ lastBlockEnd = Number(lastBlock.offset) + Number(lastBlock.length)
32
41
  }
33
42
 
34
43
  return blockGroups