@gmod/bbi 1.0.32 → 1.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/bbi.ts ADDED
@@ -0,0 +1,380 @@
1
+ import { Parser } from '@gmod/binary-parser'
2
+ import { LocalFile, RemoteFile, GenericFilehandle } from 'generic-filehandle'
3
+ import { Observable, Observer } from 'rxjs'
4
+ import { reduce } from 'rxjs/operators'
5
+ import AbortablePromiseCache from 'abortable-promise-cache'
6
+ import QuickLRU from 'quick-lru'
7
+ import { BlockView } from './blockView'
8
+
9
+ const BIG_WIG_MAGIC = -2003829722
10
+ const BIG_BED_MAGIC = -2021002517
11
+
12
+ export interface Feature {
13
+ start: number
14
+ end: number
15
+ score: number
16
+ rest?: string // for bigbed line
17
+ minScore?: number // for summary line
18
+ maxScore?: number // for summary line
19
+ summary?: boolean // is summary line
20
+ uniqueId?: string // for bigbed contains uniqueId calculated from file offset
21
+ field?: number // used in bigbed searching
22
+ }
23
+ interface Statistics {
24
+ scoreSum: number
25
+ basesCovered: number
26
+ scoreSumSquares: number
27
+ }
28
+
29
+ interface RefInfo {
30
+ name: string
31
+ id: number
32
+ length: number
33
+ }
34
+ export interface Header {
35
+ autoSql: string
36
+ totalSummary: Statistics
37
+ zoomLevels: any
38
+ unzoomedIndexOffset: number
39
+ unzoomedDataOffset: number
40
+ definedFieldCount: number
41
+ uncompressBufSize: number
42
+ chromTreeOffset: number
43
+ fileSize: number
44
+ extHeaderOffset: number
45
+ isBigEndian: boolean
46
+ fileType: string
47
+ refsByName: { [key: string]: number }
48
+ refsByNumber: { [key: number]: RefInfo }
49
+ }
50
+
51
+ /* get the compiled parsers for different sections of the bigwig file
52
+ *
53
+ * @param isBE - is big endian, typically false
54
+ * @return an object with compiled parsers
55
+ */
56
+ function getParsers(isBE: boolean): any {
57
+ const le = isBE ? 'big' : 'little'
58
+ const headerParser = new Parser()
59
+ .endianess(le)
60
+ .int32('magic')
61
+ .uint16('version')
62
+ .uint16('numZoomLevels')
63
+ .uint64('chromTreeOffset')
64
+ .uint64('unzoomedDataOffset')
65
+ .uint64('unzoomedIndexOffset')
66
+ .uint16('fieldCount')
67
+ .uint16('definedFieldCount')
68
+ .uint64('asOffset') // autoSql offset, used in bigbed
69
+ .uint64('totalSummaryOffset')
70
+ .uint32('uncompressBufSize')
71
+ .uint64('extHeaderOffset') // name index offset, used in bigbed
72
+ .array('zoomLevels', {
73
+ length: 'numZoomLevels',
74
+ type: new Parser()
75
+ .uint32('reductionLevel')
76
+ .uint32('reserved')
77
+ .uint64('dataOffset')
78
+ .uint64('indexOffset'),
79
+ })
80
+
81
+ const totalSummaryParser = new Parser()
82
+ .endianess(le)
83
+ .uint64('basesCovered')
84
+ .double('scoreMin')
85
+ .double('scoreMax')
86
+ .double('scoreSum')
87
+ .double('scoreSumSquares')
88
+
89
+ const chromTreeParser = new Parser()
90
+ .endianess(le)
91
+ .uint32('magic')
92
+ .uint32('blockSize')
93
+ .uint32('keySize')
94
+ .uint32('valSize')
95
+ .uint64('itemCount')
96
+
97
+ const isLeafNode = new Parser()
98
+ .endianess(le)
99
+ .uint8('isLeafNode')
100
+ .skip(1)
101
+ .uint16('cnt')
102
+
103
+ return {
104
+ chromTreeParser,
105
+ totalSummaryParser,
106
+ headerParser,
107
+ isLeafNode,
108
+ }
109
+ }
110
+
111
+ export interface RequestOptions {
112
+ signal?: AbortSignal
113
+ headers?: Record<string, string>
114
+ [key: string]: unknown
115
+ }
116
+
117
+ export abstract class BBI {
118
+ protected bbi: GenericFilehandle
119
+
120
+ protected headerCache = new AbortablePromiseCache({
121
+ cache: new QuickLRU({ maxSize: 1 }),
122
+ fill: async (params: any, signal?: AbortSignal) => {
123
+ return this._getHeader({ ...params, signal })
124
+ },
125
+ })
126
+
127
+ protected renameRefSeqs: (a: string) => string
128
+
129
+ /* fetch and parse header information from a bigwig or bigbed file
130
+ * @param abortSignal - abort the operation, can be null
131
+ * @return a Header object
132
+ */
133
+ public getHeader(opts: RequestOptions | AbortSignal = {}) {
134
+ const options = 'aborted' in opts ? { signal: opts } : opts
135
+ return this.headerCache.get(
136
+ JSON.stringify(options),
137
+ options,
138
+ options.signal,
139
+ )
140
+ }
141
+
142
+ /*
143
+ * @param filehandle - a filehandle from generic-filehandle or implementing something similar to the node10 fs.promises API
144
+ * @param path - a Local file path as a string
145
+ * @param url - a URL string
146
+ * @param renameRefSeqs - an optional method to rename the internal reference sequences using a mapping function
147
+ */
148
+ public constructor(
149
+ options: {
150
+ filehandle?: GenericFilehandle
151
+ path?: string
152
+ url?: string
153
+ renameRefSeqs?: (a: string) => string
154
+ } = {},
155
+ ) {
156
+ const { filehandle, renameRefSeqs, path, url } = options
157
+ this.renameRefSeqs = renameRefSeqs || ((s: string): string => s)
158
+ if (filehandle) {
159
+ this.bbi = filehandle
160
+ } else if (url) {
161
+ this.bbi = new RemoteFile(url)
162
+ } else if (path) {
163
+ this.bbi = new LocalFile(path)
164
+ } else {
165
+ throw new Error('no file given')
166
+ }
167
+ }
168
+
169
+ private async _getHeader(opts: RequestOptions) {
170
+ const header = await this._getMainHeader(opts)
171
+ const chroms = await this._readChromTree(header, opts)
172
+ return { ...header, ...chroms }
173
+ }
174
+
175
+ private async _getMainHeader(
176
+ opts: RequestOptions,
177
+ requestSize = 2000,
178
+ ): Promise<Header> {
179
+ const { buffer } = await this.bbi.read(
180
+ Buffer.alloc(requestSize),
181
+ 0,
182
+ requestSize,
183
+ 0,
184
+ opts,
185
+ )
186
+ const isBigEndian = this._isBigEndian(buffer)
187
+ const ret = getParsers(isBigEndian)
188
+ const header = ret.headerParser.parse(buffer).result
189
+ header.fileType = header.magic === BIG_BED_MAGIC ? 'bigbed' : 'bigwig'
190
+ if (
191
+ header.asOffset > requestSize ||
192
+ header.totalSummaryOffset > requestSize
193
+ ) {
194
+ return this._getMainHeader(opts, requestSize * 2)
195
+ }
196
+ if (header.asOffset) {
197
+ header.autoSql = buffer
198
+ .slice(header.asOffset, buffer.indexOf(0, header.asOffset))
199
+ .toString('utf8')
200
+ }
201
+ if (header.totalSummaryOffset > requestSize) {
202
+ return this._getMainHeader(opts, requestSize * 2)
203
+ }
204
+ if (header.totalSummaryOffset) {
205
+ const tail = buffer.slice(header.totalSummaryOffset)
206
+ header.totalSummary = ret.totalSummaryParser.parse(tail).result
207
+ }
208
+ return { ...header, isBigEndian }
209
+ }
210
+
211
+ private _isBigEndian(buffer: Buffer): boolean {
212
+ let ret = buffer.readInt32LE(0)
213
+ if (ret === BIG_WIG_MAGIC || ret === BIG_BED_MAGIC) {
214
+ return false
215
+ }
216
+ ret = buffer.readInt32BE(0)
217
+ if (ret === BIG_WIG_MAGIC || ret === BIG_BED_MAGIC) {
218
+ return true
219
+ }
220
+ throw new Error('not a BigWig/BigBed file')
221
+ }
222
+
223
+ // todo: add progress if long running
224
+ private async _readChromTree(header: Header, opts: { signal?: AbortSignal }) {
225
+ const isBE = header.isBigEndian
226
+ const le = isBE ? 'big' : 'little'
227
+ const refsByNumber: {
228
+ [key: number]: { name: string; id: number; length: number }
229
+ } = []
230
+ const refsByName: { [key: string]: number } = {}
231
+ const { chromTreeOffset } = header
232
+ let { unzoomedDataOffset } = header
233
+
234
+ while (unzoomedDataOffset % 4 !== 0) {
235
+ unzoomedDataOffset += 1
236
+ }
237
+
238
+ const { buffer: data } = await this.bbi.read(
239
+ Buffer.alloc(unzoomedDataOffset - chromTreeOffset),
240
+ 0,
241
+ unzoomedDataOffset - chromTreeOffset,
242
+ chromTreeOffset,
243
+ opts,
244
+ )
245
+
246
+ const p = getParsers(isBE)
247
+ const { keySize } = p.chromTreeParser.parse(data).result
248
+ const leafNodeParser = new Parser()
249
+ .endianess(le)
250
+ .string('key', { stripNull: true, length: keySize })
251
+ .uint32('refId')
252
+ .uint32('refSize')
253
+ const nonleafNodeParser = new Parser()
254
+ .endianess(le)
255
+ .skip(keySize)
256
+ .uint64('childOffset')
257
+ const rootNodeOffset = 32
258
+ const bptReadNode = async (currentOffset: number): Promise<void> => {
259
+ let offset = currentOffset
260
+ if (offset >= data.length) {
261
+ throw new Error('reading beyond end of buffer')
262
+ }
263
+ const ret = p.isLeafNode.parse(data.slice(offset))
264
+ const { isLeafNode, cnt } = ret.result
265
+ offset += ret.offset
266
+ if (isLeafNode) {
267
+ for (let n = 0; n < cnt; n += 1) {
268
+ const leafRet = leafNodeParser.parse(data.slice(offset))
269
+ offset += leafRet.offset
270
+ const { key, refId, refSize } = leafRet.result
271
+ const refRec = { name: key, id: refId, length: refSize }
272
+ refsByName[this.renameRefSeqs(key)] = refId
273
+ refsByNumber[refId] = refRec
274
+ }
275
+ } else {
276
+ // parse index node
277
+ const nextNodes = []
278
+ for (let n = 0; n < cnt; n += 1) {
279
+ const nonleafRet = nonleafNodeParser.parse(data.slice(offset))
280
+ let { childOffset } = nonleafRet.result
281
+ offset += nonleafRet.offset
282
+ childOffset -= chromTreeOffset
283
+ nextNodes.push(bptReadNode(childOffset))
284
+ }
285
+ await Promise.all(nextNodes)
286
+ }
287
+ }
288
+ await bptReadNode(rootNodeOffset)
289
+ return {
290
+ refsByName,
291
+ refsByNumber,
292
+ }
293
+ }
294
+
295
+ /*
296
+ * fetches the "unzoomed" view of the bigwig data. this is the default for bigbed
297
+ * @param abortSignal - a signal to optionally abort this operation
298
+ */
299
+ protected async getUnzoomedView(opts: RequestOptions): Promise<BlockView> {
300
+ const {
301
+ unzoomedIndexOffset,
302
+ zoomLevels,
303
+ refsByName,
304
+ uncompressBufSize,
305
+ isBigEndian,
306
+ fileType,
307
+ } = await this.getHeader(opts)
308
+ const nzl = zoomLevels[0]
309
+ const cirLen = nzl ? nzl.dataOffset - unzoomedIndexOffset : 4000
310
+ return new BlockView(
311
+ this.bbi,
312
+ refsByName,
313
+ unzoomedIndexOffset,
314
+ cirLen,
315
+ isBigEndian,
316
+ uncompressBufSize > 0,
317
+ fileType,
318
+ )
319
+ }
320
+
321
+ /*
322
+ * abstract method - get the view for a given scale
323
+ */
324
+ protected abstract getView(
325
+ scale: number,
326
+ opts: RequestOptions,
327
+ ): Promise<BlockView>
328
+
329
+ /**
330
+ * Gets features from a BigWig file
331
+ *
332
+ * @param refName - The chromosome name
333
+ * @param start - The start of a region
334
+ * @param end - The end of a region
335
+ * @param opts - An object containing basesPerSpan (e.g. pixels per basepair) or scale used to infer the zoomLevel to use
336
+ */
337
+ public async getFeatureStream(
338
+ refName: string,
339
+ start: number,
340
+ end: number,
341
+ opts: RequestOptions & { scale?: number; basesPerSpan?: number } = {
342
+ scale: 1,
343
+ },
344
+ ): Promise<Observable<Feature[]>> {
345
+ await this.getHeader(opts)
346
+ const chrName = this.renameRefSeqs(refName)
347
+ let view: BlockView
348
+
349
+ if (opts.basesPerSpan) {
350
+ view = await this.getView(1 / opts.basesPerSpan, opts)
351
+ } else if (opts.scale) {
352
+ view = await this.getView(opts.scale, opts)
353
+ } else {
354
+ view = await this.getView(1, opts)
355
+ }
356
+
357
+ if (!view) {
358
+ throw new Error('unable to get block view for data')
359
+ }
360
+ return new Observable((observer: Observer<Feature[]>): void => {
361
+ view.readWigData(chrName, start, end, observer, opts)
362
+ })
363
+ }
364
+
365
+ public async getFeatures(
366
+ refName: string,
367
+ start: number,
368
+ end: number,
369
+ opts: RequestOptions & { scale?: number; basesPerSpan?: number } = {
370
+ scale: 1,
371
+ },
372
+ ): Promise<Feature[]> {
373
+ const ob = await this.getFeatureStream(refName, start, end, opts)
374
+
375
+ const ret = await ob
376
+ .pipe(reduce((acc, curr) => acc.concat(curr)))
377
+ .toPromise()
378
+ return ret || []
379
+ }
380
+ }
package/src/bigbed.ts ADDED
@@ -0,0 +1,239 @@
1
+ import { Parser } from '@gmod/binary-parser'
2
+ import { Observable, Observer, merge } from 'rxjs'
3
+ import { map, reduce } from 'rxjs/operators'
4
+ import AbortablePromiseCache from 'abortable-promise-cache'
5
+ import QuickLRU from 'quick-lru'
6
+
7
+ import { BBI, Feature, RequestOptions } from './bbi'
8
+ import { BlockView } from './blockView'
9
+
10
+ interface Loc {
11
+ key: string
12
+ offset: number
13
+ length: number
14
+ field?: number
15
+ }
16
+
17
+ interface Index {
18
+ type: number
19
+ fieldcount: number
20
+ offset: number
21
+ field: number
22
+ }
23
+
24
+ export function filterUndef<T>(ts: (T | undefined)[]): T[] {
25
+ return ts.filter((t: T | undefined): t is T => !!t)
26
+ }
27
+
28
+ export class BigBed extends BBI {
29
+ public readIndicesCache = new AbortablePromiseCache({
30
+ cache: new QuickLRU({ maxSize: 1 }),
31
+ fill: async (args: any, signal?: AbortSignal) => {
32
+ return this._readIndices({ ...args, signal })
33
+ },
34
+ })
35
+
36
+ public constructor(opts?: any) {
37
+ super(opts)
38
+ }
39
+
40
+ public readIndices(opts: AbortSignal | RequestOptions = {}) {
41
+ const options = 'aborted' in opts ? { signal: opts } : opts
42
+ return this.readIndicesCache.get(
43
+ JSON.stringify(options),
44
+ options,
45
+ options.signal,
46
+ )
47
+ }
48
+
49
+ /*
50
+ * retrieve unzoomed view for any scale
51
+ * @param scale - unused
52
+ * @param abortSignal - an optional AbortSignal to kill operation
53
+ * @return promise for a BlockView
54
+ */
55
+ protected async getView(
56
+ scale: number,
57
+ opts: RequestOptions,
58
+ ): Promise<BlockView> {
59
+ return this.getUnzoomedView(opts)
60
+ }
61
+
62
+ /*
63
+ * parse the bigbed extraIndex fields
64
+ * @param abortSignal to abort operation
65
+ * @return a Promise for an array of Index data structure since there can be multiple extraIndexes in a bigbed, see bedToBigBed documentation
66
+ */
67
+ private async _readIndices(opts: RequestOptions): Promise<Index[]> {
68
+ const { extHeaderOffset, isBigEndian } = await this.getHeader(opts)
69
+ const { buffer: data } = await this.bbi.read(
70
+ Buffer.alloc(64),
71
+ 0,
72
+ 64,
73
+ extHeaderOffset,
74
+ )
75
+ const le = isBigEndian ? 'big' : 'little'
76
+ const ret = new Parser()
77
+ .endianess(le)
78
+ .uint16('size')
79
+ .uint16('count')
80
+ .uint64('offset')
81
+ .parse(data).result
82
+ const { count, offset } = ret
83
+
84
+ // no extra index is defined if count==0
85
+ if (count === 0) {
86
+ return []
87
+ }
88
+
89
+ const blocklen = 20
90
+ const len = blocklen * count
91
+ const { buffer } = await this.bbi.read(Buffer.alloc(len), 0, len, offset)
92
+ const extParser = new Parser()
93
+ .endianess(le)
94
+ .int16('type')
95
+ .int16('fieldcount')
96
+ .uint64('offset')
97
+ .skip(4)
98
+ .int16('field')
99
+ const indices = []
100
+
101
+ for (let i = 0; i < count; i += 1) {
102
+ indices.push(extParser.parse(buffer.slice(i * blocklen)).result)
103
+ }
104
+ return indices
105
+ }
106
+
107
+ /*
108
+ * perform a search in the bigbed extraIndex to find which blocks in the bigbed data to look for the
109
+ * actual feature data
110
+ *
111
+ * @param name - the name to search for
112
+ * @param opts - a SearchOptions argument with optional signal
113
+ * @return a Promise for an array of bigbed block Loc entries
114
+ */
115
+ private async searchExtraIndexBlocks(
116
+ name: string,
117
+ opts: RequestOptions = {},
118
+ ): Promise<Loc[]> {
119
+ const { isBigEndian } = await this.getHeader(opts)
120
+ const indices = await this.readIndices(opts)
121
+ if (!indices.length) {
122
+ return []
123
+ }
124
+ const locs = indices.map(async (index: any): Promise<Loc | undefined> => {
125
+ const { offset, field } = index
126
+ const { buffer: data } = await this.bbi.read(
127
+ Buffer.alloc(32),
128
+ 0,
129
+ 32,
130
+ offset,
131
+ opts,
132
+ )
133
+ const p = new Parser()
134
+ .endianess(isBigEndian ? 'big' : 'little')
135
+ .int32('magic')
136
+ .int32('blockSize')
137
+ .int32('keySize')
138
+ .int32('valSize')
139
+ .uint64('itemCount')
140
+
141
+ const { blockSize, keySize, valSize } = p.parse(data).result
142
+ const bpt = new Parser()
143
+ .endianess(isBigEndian ? 'big' : 'little')
144
+ .int8('nodeType')
145
+ .skip(1)
146
+ .int16('cnt')
147
+ .choice({
148
+ tag: 'nodeType',
149
+ choices: {
150
+ 0: new Parser().array('leafkeys', {
151
+ length: 'cnt',
152
+ type: new Parser()
153
+ .string('key', { length: keySize, stripNull: true })
154
+ .uint64('offset'),
155
+ }),
156
+ 1: new Parser().array('keys', {
157
+ length: 'cnt',
158
+ type: new Parser()
159
+ .string('key', { length: keySize, stripNull: true })
160
+ .uint64('offset')
161
+ .uint32('length')
162
+ .uint32('reserved'),
163
+ }),
164
+ },
165
+ })
166
+
167
+ const bptReadNode = async (
168
+ nodeOffset: number,
169
+ ): Promise<Loc | undefined> => {
170
+ const len = 4 + blockSize * (keySize + valSize)
171
+ const { buffer } = await this.bbi.read(
172
+ Buffer.alloc(len),
173
+ 0,
174
+ len,
175
+ nodeOffset,
176
+ opts,
177
+ )
178
+ const node = bpt.parse(buffer).result
179
+ if (node.leafkeys) {
180
+ let lastOffset
181
+ for (let i = 0; i < node.leafkeys.length; i += 1) {
182
+ const { key } = node.leafkeys[i]
183
+ if (name.localeCompare(key) < 0 && lastOffset) {
184
+ return bptReadNode(lastOffset)
185
+ }
186
+ lastOffset = node.leafkeys[i].offset
187
+ }
188
+ return bptReadNode(lastOffset)
189
+ }
190
+ for (let i = 0; i < node.keys.length; i += 1) {
191
+ if (node.keys[i].key === name) {
192
+ return { ...node.keys[i], field }
193
+ }
194
+ }
195
+
196
+ return undefined
197
+ }
198
+ const rootNodeOffset = 32
199
+ return bptReadNode(offset + rootNodeOffset)
200
+ })
201
+ return filterUndef(await Promise.all(locs))
202
+ }
203
+
204
+ /*
205
+ * retrieve the features from the bigbed data that were found through the lookup of the extraIndex
206
+ * note that there can be multiple extraIndex, see the BigBed specification and the -extraIndex argument to bedToBigBed
207
+ *
208
+ * @param name - the name to search for
209
+ * @param opts - a SearchOptions argument with optional signal
210
+ * @return a Promise for an array of Feature
211
+ */
212
+ public async searchExtraIndex(
213
+ name: string,
214
+ opts: RequestOptions = {},
215
+ ): Promise<Feature[]> {
216
+ const blocks = await this.searchExtraIndexBlocks(name, opts)
217
+ if (!blocks.length) {
218
+ return []
219
+ }
220
+ const view = await this.getUnzoomedView(opts)
221
+ const res = blocks.map(block => {
222
+ return new Observable((observer: Observer<Feature[]>) => {
223
+ view.readFeatures(observer, [block], opts)
224
+ }).pipe(
225
+ reduce((acc, curr) => acc.concat(curr)),
226
+ map(x => {
227
+ for (let i = 0; i < x.length; i += 1) {
228
+ x[i].field = block.field
229
+ }
230
+ return x
231
+ }),
232
+ )
233
+ })
234
+ const ret = await merge(...res).toPromise()
235
+ return ret.filter((f: any) => {
236
+ return f.rest.split('\t')[f.field - 3] === name
237
+ })
238
+ }
239
+ }
package/src/bigwig.ts ADDED
@@ -0,0 +1,46 @@
1
+ import { BlockView } from './blockView'
2
+ import { BBI, RequestOptions } from './bbi'
3
+
4
+ export class BigWig extends BBI {
5
+ /**
6
+ * Retrieves a BlockView of a specific zoomLevel
7
+ *
8
+ * @param refName - The chromosome name
9
+ * @param start - The start of a region
10
+ * @param end - The end of a region
11
+ * @param opts - An object containing basesPerSpan (e.g. pixels per basepair) or scale used to infer the zoomLevel to use
12
+ */
13
+ protected async getView(
14
+ scale: number,
15
+ opts: RequestOptions,
16
+ ): Promise<BlockView> {
17
+ const { zoomLevels, refsByName, fileSize, isBigEndian, uncompressBufSize } =
18
+ await this.getHeader(opts)
19
+ const basesPerPx = 1 / scale
20
+ let maxLevel = zoomLevels.length
21
+ if (!fileSize) {
22
+ // if we don't know the file size, we can't fetch the highest zoom level :-(
23
+ maxLevel -= 1
24
+ }
25
+
26
+ for (let i = maxLevel; i >= 0; i -= 1) {
27
+ const zh = zoomLevels[i]
28
+ if (zh && zh.reductionLevel <= 2 * basesPerPx) {
29
+ const indexLength =
30
+ i < zoomLevels.length - 1
31
+ ? zoomLevels[i + 1].dataOffset - zh.indexOffset
32
+ : fileSize - 4 - zh.indexOffset
33
+ return new BlockView(
34
+ this.bbi,
35
+ refsByName,
36
+ zh.indexOffset,
37
+ indexLength,
38
+ isBigEndian,
39
+ uncompressBufSize > 0,
40
+ 'summary',
41
+ )
42
+ }
43
+ }
44
+ return this.getUnzoomedView(opts)
45
+ }
46
+ }