@gmod/bbi 1.0.32 → 1.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,462 @@
1
+ /* eslint no-bitwise: ["error", { "allow": ["|"] }] */
2
+ import { Observer } from 'rxjs'
3
+ import { Parser } from '@gmod/binary-parser'
4
+ import AbortablePromiseCache from 'abortable-promise-cache'
5
+ import { GenericFilehandle } from 'generic-filehandle'
6
+ import { unzip } from './unzip'
7
+ import QuickLRU from 'quick-lru'
8
+ import { Feature } from './bbi'
9
+ import Range from './range'
10
+ import { groupBlocks, checkAbortSignal } from './util'
11
+
12
+ interface CoordRequest {
13
+ chrId: number
14
+ start: number
15
+ end: number
16
+ }
17
+ interface DataBlock {
18
+ startChrom: number
19
+ endChrom: number
20
+ startBase: number
21
+ endBase: number
22
+ validCnt: number
23
+ minVal: number
24
+ maxVal: number
25
+ sumData: number
26
+ sumSqData: number
27
+ }
28
+ interface ReadData {
29
+ offset: number
30
+ length: number
31
+ }
32
+
33
+ interface SummaryBlock {
34
+ chromId: number
35
+ start: number
36
+ end: number
37
+ validCnt: number
38
+ minScore: number
39
+ maxScore: number
40
+ sumData: number
41
+ sumSqData: number
42
+ }
43
+ interface Options {
44
+ signal?: AbortSignal
45
+ request?: CoordRequest
46
+ }
47
+
48
+ const BIG_WIG_TYPE_GRAPH = 1
49
+ const BIG_WIG_TYPE_VSTEP = 2
50
+ const BIG_WIG_TYPE_FSTEP = 3
51
+
52
+ function getParsers(isBigEndian: boolean): any {
53
+ const le = isBigEndian ? 'big' : 'little'
54
+ const summaryParser = new Parser()
55
+ .endianess(le)
56
+ .uint32('chromId')
57
+ .uint32('start')
58
+ .uint32('end')
59
+ .uint32('validCnt')
60
+ .float('minScore')
61
+ .float('maxScore')
62
+ .float('sumData')
63
+ .float('sumSqData')
64
+
65
+ const leafParser = new Parser()
66
+ .endianess(le)
67
+ .uint8('isLeaf')
68
+ .skip(1)
69
+ .uint16('cnt')
70
+ .choice({
71
+ tag: 'isLeaf',
72
+ choices: {
73
+ 1: new Parser().array('blocksToFetch', {
74
+ length: 'cnt',
75
+ type: new Parser()
76
+ .uint32('startChrom')
77
+ .uint32('startBase')
78
+ .uint32('endChrom')
79
+ .uint32('endBase')
80
+ .uint64('blockOffset')
81
+ .uint64('blockSize'),
82
+ }),
83
+ 0: new Parser().array('recurOffsets', {
84
+ length: 'cnt',
85
+ type: new Parser()
86
+ .uint32('startChrom')
87
+ .uint32('startBase')
88
+ .uint32('endChrom')
89
+ .uint32('endBase')
90
+ .uint64('blockOffset'),
91
+ }),
92
+ },
93
+ })
94
+ const bigBedParser = new Parser()
95
+ .endianess(le)
96
+ .uint32('chromId')
97
+ .int32('start')
98
+ .int32('end')
99
+ .string('rest', {
100
+ zeroTerminated: true,
101
+ })
102
+
103
+ const bigWigParser = new Parser()
104
+ .endianess(le)
105
+ .skip(4)
106
+ .int32('blockStart')
107
+ .skip(4)
108
+ .uint32('itemStep')
109
+ .uint32('itemSpan')
110
+ .uint8('blockType')
111
+ .skip(1)
112
+ .uint16('itemCount')
113
+ .choice({
114
+ tag: 'blockType',
115
+ choices: {
116
+ [BIG_WIG_TYPE_FSTEP]: new Parser().array('items', {
117
+ length: 'itemCount',
118
+ type: new Parser().float('score'),
119
+ }),
120
+ [BIG_WIG_TYPE_VSTEP]: new Parser().array('items', {
121
+ length: 'itemCount',
122
+ type: new Parser().int32('start').float('score'),
123
+ }),
124
+ [BIG_WIG_TYPE_GRAPH]: new Parser().array('items', {
125
+ length: 'itemCount',
126
+ type: new Parser().int32('start').int32('end').float('score'),
127
+ }),
128
+ },
129
+ })
130
+ return {
131
+ bigWigParser,
132
+ bigBedParser,
133
+ summaryParser,
134
+ leafParser,
135
+ }
136
+ }
137
+
138
+ /**
139
+ * View into a subset of the data in a BigWig file.
140
+ *
141
+ * Adapted by Robert Buels and Colin Diesh from bigwig.js in the Dalliance Genome
142
+ * Explorer by Thomas Down.
143
+ * @constructs
144
+ */
145
+
146
+ export class BlockView {
147
+ private cirTreeOffset: number
148
+
149
+ private cirTreeLength: number
150
+
151
+ private bbi: GenericFilehandle
152
+
153
+ private isCompressed: boolean
154
+
155
+ private isBigEndian: boolean
156
+
157
+ private refsByName: any
158
+
159
+ private blockType: string
160
+
161
+ private cirTreePromise?: Promise<{ bytesRead: number; buffer: Buffer }>
162
+
163
+ private featureCache = new AbortablePromiseCache({
164
+ cache: new QuickLRU({ maxSize: 1000 }),
165
+
166
+ fill: async (requestData: ReadData, signal: AbortSignal) => {
167
+ const { length, offset } = requestData
168
+ const { buffer } = await this.bbi.read(
169
+ Buffer.alloc(length),
170
+ 0,
171
+ length,
172
+ offset,
173
+ { signal },
174
+ )
175
+ return buffer
176
+ },
177
+ })
178
+
179
+ private leafParser: any
180
+
181
+ private bigWigParser: any
182
+
183
+ private bigBedParser: any
184
+
185
+ private summaryParser: any
186
+
187
+ public constructor(
188
+ bbi: GenericFilehandle,
189
+ refsByName: any,
190
+ cirTreeOffset: number,
191
+ cirTreeLength: number,
192
+ isBigEndian: boolean,
193
+ isCompressed: boolean,
194
+ blockType: string,
195
+ ) {
196
+ if (!(cirTreeOffset >= 0)) {
197
+ throw new Error('invalid cirTreeOffset!')
198
+ }
199
+ if (!(cirTreeLength > 0)) {
200
+ throw new Error('invalid cirTreeLength!')
201
+ }
202
+
203
+ this.cirTreeOffset = cirTreeOffset
204
+ this.cirTreeLength = cirTreeLength
205
+ this.isCompressed = isCompressed
206
+ this.refsByName = refsByName
207
+ this.isBigEndian = isBigEndian
208
+ this.bbi = bbi
209
+ this.blockType = blockType
210
+ Object.assign(this, getParsers(isBigEndian))
211
+ }
212
+
213
+ public async readWigData(
214
+ chrName: string,
215
+ start: number,
216
+ end: number,
217
+ observer: Observer<Feature[]>,
218
+ opts: Options,
219
+ ) {
220
+ try {
221
+ const { refsByName, bbi, cirTreeOffset, isBigEndian } = this
222
+ const { signal } = opts
223
+ const chrId = refsByName[chrName]
224
+ if (chrId === undefined) {
225
+ observer.complete()
226
+ }
227
+ const request = { chrId, start, end }
228
+ if (!this.cirTreePromise) {
229
+ this.cirTreePromise = bbi.read(Buffer.alloc(48), 0, 48, cirTreeOffset, {
230
+ signal,
231
+ })
232
+ }
233
+ const { buffer } = await this.cirTreePromise
234
+ const cirBlockSize = isBigEndian
235
+ ? buffer.readUInt32BE(4)
236
+ : buffer.readUInt32LE(4)
237
+ let blocksToFetch: any[] = []
238
+ let outstanding = 0
239
+
240
+ const cirFobRecur2 = (
241
+ cirBlockData: Buffer,
242
+ offset: number,
243
+ level: number,
244
+ ) => {
245
+ try {
246
+ const data = cirBlockData.slice(offset)
247
+
248
+ const p = this.leafParser.parse(data).result
249
+ if (p.blocksToFetch) {
250
+ blocksToFetch = blocksToFetch.concat(
251
+ p.blocksToFetch.filter(filterFeats).map((l: any): any => ({
252
+ offset: l.blockOffset,
253
+ length: l.blockSize,
254
+ })),
255
+ )
256
+ }
257
+ if (p.recurOffsets) {
258
+ const recurOffsets = p.recurOffsets
259
+ .filter(filterFeats)
260
+ .map((l: any): any => l.blockOffset)
261
+ if (recurOffsets.length > 0) {
262
+ cirFobRecur(recurOffsets, level + 1)
263
+ }
264
+ }
265
+ } catch (e) {
266
+ observer.error(e)
267
+ }
268
+ }
269
+
270
+ const filterFeats = (b: DataBlock) => {
271
+ const { startChrom, startBase, endChrom, endBase } = b
272
+ return (
273
+ (startChrom < chrId || (startChrom === chrId && startBase <= end)) &&
274
+ (endChrom > chrId || (endChrom === chrId && endBase >= start))
275
+ )
276
+ }
277
+
278
+ const cirFobStartFetch = async (off: any, fr: any, level: number) => {
279
+ try {
280
+ const length = fr.max() - fr.min()
281
+ const offset = fr.min()
282
+ const resultBuffer = await this.featureCache.get(
283
+ `${length}_${offset}`,
284
+ { length, offset },
285
+ signal,
286
+ )
287
+ for (let i = 0; i < off.length; i += 1) {
288
+ if (fr.contains(off[i])) {
289
+ cirFobRecur2(resultBuffer, off[i] - offset, level)
290
+ outstanding -= 1
291
+ if (outstanding === 0) {
292
+ this.readFeatures(observer, blocksToFetch, { ...opts, request })
293
+ }
294
+ }
295
+ }
296
+ } catch (e) {
297
+ observer.error(e)
298
+ }
299
+ }
300
+ const cirFobRecur = (offset: any, level: number) => {
301
+ try {
302
+ outstanding += offset.length
303
+
304
+ const maxCirBlockSpan = 4 + cirBlockSize * 32 // Upper bound on size, based on a completely full leaf node.
305
+ let spans = new Range(offset[0], offset[0] + maxCirBlockSpan)
306
+ for (let i = 1; i < offset.length; i += 1) {
307
+ const blockSpan = new Range(offset[i], offset[i] + maxCirBlockSpan)
308
+ spans = spans.union(blockSpan)
309
+ }
310
+ spans.getRanges().map(fr => cirFobStartFetch(offset, fr, level))
311
+ } catch (e) {
312
+ observer.error(e)
313
+ }
314
+ }
315
+
316
+ return cirFobRecur([cirTreeOffset + 48], 1)
317
+ } catch (e) {
318
+ observer.error(e)
319
+ }
320
+ }
321
+
322
+ private parseSummaryBlock(
323
+ data: Buffer,
324
+ startOffset: number,
325
+ request?: CoordRequest,
326
+ ) {
327
+ const features = [] as SummaryBlock[]
328
+ let currOffset = startOffset
329
+ while (currOffset < data.byteLength) {
330
+ const res = this.summaryParser.parse(data.slice(currOffset))
331
+ features.push(res.result)
332
+ currOffset += res.offset
333
+ }
334
+ let items = features
335
+ if (request) {
336
+ items = items.filter(elt => elt.chromId === request.chrId)
337
+ }
338
+ const feats = items.map(
339
+ (elt: SummaryBlock): Feature => ({
340
+ start: elt.start,
341
+ end: elt.end,
342
+ maxScore: elt.maxScore,
343
+ minScore: elt.minScore,
344
+ score: elt.sumData / (elt.validCnt || 1),
345
+ summary: true,
346
+ }),
347
+ )
348
+ return request
349
+ ? feats.filter(f => BlockView.coordFilter(f, request))
350
+ : feats
351
+ }
352
+
353
+ private parseBigBedBlock(
354
+ data: Buffer,
355
+ startOffset: number,
356
+ offset: number,
357
+ request?: CoordRequest,
358
+ ) {
359
+ const items = [] as Feature[]
360
+ let currOffset = startOffset
361
+ while (currOffset < data.byteLength) {
362
+ const res = this.bigBedParser.parse(data.slice(currOffset))
363
+ res.result.uniqueId = `bb-${offset + currOffset}`
364
+ items.push(res.result)
365
+ currOffset += res.offset
366
+ }
367
+
368
+ return request
369
+ ? items.filter((f: any) => BlockView.coordFilter(f, request))
370
+ : items
371
+ }
372
+
373
+ private parseBigWigBlock(
374
+ bytes: Buffer,
375
+ startOffset: number,
376
+ request?: CoordRequest,
377
+ ): Feature[] {
378
+ const data = bytes.slice(startOffset)
379
+ const results = this.bigWigParser.parse(data).result
380
+ const { items, itemSpan, itemStep, blockStart, blockType } = results
381
+ if (blockType === BIG_WIG_TYPE_FSTEP) {
382
+ for (let i = 0; i < items.length; i++) {
383
+ items[i].start = blockStart + i * itemStep
384
+ items[i].end = blockStart + i * itemStep + itemSpan
385
+ }
386
+ } else if (blockType === BIG_WIG_TYPE_VSTEP) {
387
+ for (let i = 0; i < items.length; i++) {
388
+ items[i].end = items[i].start + itemSpan
389
+ }
390
+ }
391
+ return request
392
+ ? items.filter((f: any) => BlockView.coordFilter(f, request))
393
+ : items
394
+ }
395
+
396
+ private static coordFilter(f: Feature, range: CoordRequest): boolean {
397
+ return f.start < range.end && f.end >= range.start
398
+ }
399
+
400
+ public async readFeatures(
401
+ observer: Observer<Feature[]>,
402
+ blocks: any,
403
+ opts: Options = {},
404
+ ): Promise<void> {
405
+ try {
406
+ const { blockType, isCompressed } = this
407
+ const { signal, request } = opts
408
+ const blockGroupsToFetch = groupBlocks(blocks)
409
+ checkAbortSignal(signal)
410
+ await Promise.all(
411
+ blockGroupsToFetch.map(async (blockGroup: any) => {
412
+ checkAbortSignal(signal)
413
+ const { length, offset } = blockGroup
414
+ const data = await this.featureCache.get(
415
+ `${length}_${offset}`,
416
+ blockGroup,
417
+ signal,
418
+ )
419
+ blockGroup.blocks.forEach((block: any) => {
420
+ checkAbortSignal(signal)
421
+ let blockOffset = block.offset - blockGroup.offset
422
+ let resultData = data
423
+ if (isCompressed) {
424
+ resultData = unzip(data.slice(blockOffset))
425
+ blockOffset = 0
426
+ }
427
+ checkAbortSignal(signal)
428
+
429
+ switch (blockType) {
430
+ case 'summary':
431
+ observer.next(
432
+ this.parseSummaryBlock(resultData, blockOffset, request),
433
+ )
434
+ break
435
+ case 'bigwig':
436
+ observer.next(
437
+ this.parseBigWigBlock(resultData, blockOffset, request),
438
+ )
439
+ break
440
+ case 'bigbed':
441
+ observer.next(
442
+ this.parseBigBedBlock(
443
+ resultData,
444
+ blockOffset,
445
+ // eslint-disable-next-line no-bitwise
446
+ block.offset * (1 << 8),
447
+ request,
448
+ ),
449
+ )
450
+ break
451
+ default:
452
+ console.warn(`Don't know what to do with ${blockType}`)
453
+ }
454
+ })
455
+ }),
456
+ )
457
+ observer.complete()
458
+ } catch (e) {
459
+ observer.error(e)
460
+ }
461
+ }
462
+ }
@@ -0,0 +1,4 @@
1
+ declare module 'es6-promisify'
2
+ declare module 'abortable-promise-cache'
3
+ declare module '@gmod/binary-parser'
4
+ declare module '@gmod/bed'
package/src/index.ts ADDED
@@ -0,0 +1,3 @@
1
+ export { BigWig } from './bigwig'
2
+ export { BigBed } from './bigbed'
3
+ export { Feature, Header, RequestOptions } from './bbi'
package/src/range.ts ADDED
@@ -0,0 +1,142 @@
1
+ /* eslint prefer-rest-params:0, no-nested-ternary:0 */
2
+
3
+ /**
4
+ * Adapted from a combination of Range and _Compound in the
5
+ * Dalliance Genome Explorer, (c) Thomas Down 2006-2010.
6
+ */
7
+ export default class Range {
8
+ public ranges: any
9
+
10
+ public constructor(arg1: any, arg2?: any) {
11
+ this.ranges =
12
+ arguments.length === 2
13
+ ? [{ min: arg1, max: arg2 }]
14
+ : 0 in arg1
15
+ ? Object.assign({}, arg1)
16
+ : [arg1]
17
+ }
18
+
19
+ public min(): number {
20
+ return this.ranges[0].min
21
+ }
22
+
23
+ public max(): number {
24
+ return this.ranges[this.ranges.length - 1].max
25
+ }
26
+
27
+ public contains(pos: number): boolean {
28
+ for (let s = 0; s < this.ranges.length; s += 1) {
29
+ const r = this.ranges[s]
30
+ if (r.min <= pos && r.max >= pos) {
31
+ return true
32
+ }
33
+ }
34
+ return false
35
+ }
36
+
37
+ public isContiguous(): boolean {
38
+ return this.ranges.length > 1
39
+ }
40
+
41
+ public getRanges(): Range[] {
42
+ return this.ranges.map((r: Range) => new Range(r.min, r.max))
43
+ }
44
+
45
+ public toString(): string {
46
+ return this.ranges.map((r: Range) => `[${r.min}-${r.max}]`).join(',')
47
+ }
48
+
49
+ public union(s1: Range): Range {
50
+ const ranges = this.getRanges().concat(s1.getRanges()).sort(this.rangeOrder)
51
+ const oranges = []
52
+ let current = ranges[0]
53
+
54
+ for (let i = 1; i < ranges.length; i += 1) {
55
+ const nxt = ranges[i]
56
+ if (nxt.min() > current.max() + 1) {
57
+ oranges.push(current)
58
+ current = nxt
59
+ } else if (nxt.max() > current.max()) {
60
+ current = new Range(current.min(), nxt.max())
61
+ }
62
+ }
63
+ oranges.push(current)
64
+
65
+ if (oranges.length === 1) {
66
+ return oranges[0]
67
+ }
68
+ return new Range(oranges)
69
+ }
70
+
71
+ public intersection(arg: Range): Range {
72
+ // eslint-disable-next-line @typescript-eslint/no-this-alias
73
+ let s0 = this
74
+ let s1 = arg
75
+ const r0 = this.ranges()
76
+ const r1 = s1.ranges()
77
+ const l0 = r0.length
78
+
79
+ const l1 = r1.length
80
+ let i0 = 0
81
+
82
+ let i1 = 0
83
+ const or = []
84
+
85
+ while (i0 < l0 && i1 < l1) {
86
+ s0 = r0[i0]
87
+ s1 = r1[i1]
88
+ const lapMin = Math.max(s0.min(), s1.min())
89
+ const lapMax = Math.min(s0.max(), s1.max())
90
+ if (lapMax >= lapMin) {
91
+ or.push(new Range(lapMin, lapMax))
92
+ }
93
+ if (s0.max() > s1.max()) {
94
+ i1 += 1
95
+ } else {
96
+ i0 += 1
97
+ }
98
+ }
99
+
100
+ if (or.length === 0) {
101
+ throw new Error('found range of length 0')
102
+ }
103
+ if (or.length === 1) {
104
+ return or[0]
105
+ }
106
+ return new Range(or)
107
+ }
108
+
109
+ public coverage(): number {
110
+ let tot = 0
111
+ const rl = this.ranges()
112
+ for (let ri = 0; ri < rl.length; ri += 1) {
113
+ const r = rl[ri]
114
+ tot += r.max() - r.min() + 1
115
+ }
116
+ return tot
117
+ }
118
+
119
+ public rangeOrder(tmpa: Range, tmpb: Range): number {
120
+ let a = tmpa
121
+ let b = tmpb
122
+ if (arguments.length < 2) {
123
+ b = a
124
+ // eslint-disable-next-line @typescript-eslint/no-this-alias
125
+ a = this
126
+ }
127
+
128
+ if (a.min() < b.min()) {
129
+ return -1
130
+ }
131
+ if (a.min() > b.min()) {
132
+ return 1
133
+ }
134
+ if (a.max() < b.max()) {
135
+ return -1
136
+ }
137
+ if (b.max() > a.max()) {
138
+ return 1
139
+ }
140
+ return 0
141
+ }
142
+ }
@@ -0,0 +1,5 @@
1
+ import { inflate } from 'pako'
2
+
3
+ export function unzip(input: Buffer) {
4
+ return Buffer.from(inflate(input))
5
+ }
package/src/unzip.ts ADDED
@@ -0,0 +1,2 @@
1
+ import { inflateSync } from 'zlib'
2
+ export { inflateSync as unzip }
package/src/util.ts ADDED
@@ -0,0 +1,74 @@
1
+ /* eslint no-bitwise: ["error", { "allow": ["|"] }] */
2
+ export class AbortError extends Error {
3
+ public code: string
4
+
5
+ public constructor(message: string) {
6
+ super(message)
7
+ this.code = 'ERR_ABORTED'
8
+ }
9
+ }
10
+ // sort blocks by file offset and
11
+ // group blocks that are within 2KB of eachother
12
+ export function groupBlocks(blocks: any[]): any[] {
13
+ blocks.sort((b0, b1) => (b0.offset | 0) - (b1.offset | 0))
14
+
15
+ const blockGroups = []
16
+ let lastBlock
17
+ let lastBlockEnd
18
+ for (let i = 0; i < blocks.length; i += 1) {
19
+ if (lastBlock && blocks[i].offset - lastBlockEnd <= 2000) {
20
+ lastBlock.length += blocks[i].length - lastBlockEnd + blocks[i].offset
21
+ lastBlock.blocks.push(blocks[i])
22
+ } else {
23
+ blockGroups.push(
24
+ (lastBlock = {
25
+ blocks: [blocks[i]],
26
+ length: blocks[i].length,
27
+ offset: blocks[i].offset,
28
+ }),
29
+ )
30
+ }
31
+ lastBlockEnd = lastBlock.offset + lastBlock.length
32
+ }
33
+
34
+ return blockGroups
35
+ }
36
+
37
+ /**
38
+ * Properly check if the given AbortSignal is aborted.
39
+ * Per the standard, if the signal reads as aborted,
40
+ * this function throws either a DOMException AbortError, or a regular error
41
+ * with a `code` attribute set to `ERR_ABORTED`.
42
+ *
43
+ * For convenience, passing `undefined` is a no-op
44
+ *
45
+ * @param {AbortSignal} [signal] an AbortSignal, or anything with an `aborted` attribute
46
+ * @returns nothing
47
+ */
48
+ export function checkAbortSignal(signal?: AbortSignal): void {
49
+ if (!signal) {
50
+ return
51
+ }
52
+
53
+ if (signal.aborted) {
54
+ // console.log('bam aborted!')
55
+ if (typeof DOMException !== 'undefined') {
56
+ throw new DOMException('aborted', 'AbortError')
57
+ } else {
58
+ const e = new AbortError('aborted')
59
+ e.code = 'ERR_ABORTED'
60
+ throw e
61
+ }
62
+ }
63
+ }
64
+
65
+ /**
66
+ * Skips to the next tick, then runs `checkAbortSignal`.
67
+ * Await this to inside an otherwise synchronous loop to
68
+ * provide a place to break when an abort signal is received.
69
+ * @param {AbortSignal} signal
70
+ */
71
+ export async function abortBreakPoint(signal?: AbortSignal): Promise<void> {
72
+ await Promise.resolve()
73
+ checkAbortSignal(signal)
74
+ }