@gmod/bam 1.1.17 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +65 -25
- package/README.md +108 -57
- package/dist/bai.d.ts +34 -15
- package/dist/bai.js +180 -273
- package/dist/bai.js.map +1 -1
- package/dist/bamFile.d.ts +33 -27
- package/dist/bamFile.js +353 -572
- package/dist/bamFile.js.map +1 -1
- package/dist/chunk.d.ts +4 -8
- package/dist/chunk.js +13 -21
- package/dist/chunk.js.map +1 -1
- package/dist/csi.d.ts +74 -10
- package/dist/csi.js +157 -256
- package/dist/csi.js.map +1 -1
- package/dist/errors.js +12 -57
- package/dist/errors.js.map +1 -1
- package/dist/htsget.d.ts +5 -8
- package/dist/htsget.js +120 -209
- package/dist/htsget.js.map +1 -1
- package/dist/index.d.ts +5 -6
- package/dist/index.js +11 -11
- package/dist/index.js.map +1 -1
- package/dist/indexFile.d.ts +0 -6
- package/dist/indexFile.js +3 -77
- package/dist/indexFile.js.map +1 -1
- package/dist/nullIndex.d.ts +7 -0
- package/dist/nullIndex.js +33 -0
- package/dist/nullIndex.js.map +1 -0
- package/dist/record.d.ts +2 -2
- package/dist/record.js +200 -193
- package/dist/record.js.map +1 -1
- package/dist/sam.js +12 -10
- package/dist/sam.js.map +1 -1
- package/dist/util.d.ts +13 -1
- package/dist/util.js +55 -58
- package/dist/util.js.map +1 -1
- package/dist/virtualOffset.js +13 -20
- package/dist/virtualOffset.js.map +1 -1
- package/esm/bai.d.ts +34 -15
- package/esm/bai.js +86 -91
- package/esm/bai.js.map +1 -1
- package/esm/bamFile.d.ts +33 -27
- package/esm/bamFile.js +124 -120
- package/esm/bamFile.js.map +1 -1
- package/esm/chunk.d.ts +4 -8
- package/esm/chunk.js +2 -8
- package/esm/chunk.js.map +1 -1
- package/esm/csi.d.ts +74 -10
- package/esm/csi.js +85 -93
- package/esm/csi.js.map +1 -1
- package/esm/htsget.d.ts +5 -8
- package/esm/htsget.js +68 -43
- package/esm/htsget.js.map +1 -1
- package/esm/index.d.ts +5 -6
- package/esm/index.js +5 -6
- package/esm/index.js.map +1 -1
- package/esm/indexFile.d.ts +0 -6
- package/esm/indexFile.js +0 -22
- package/esm/indexFile.js.map +1 -1
- package/esm/nullIndex.d.ts +7 -0
- package/esm/nullIndex.js +16 -0
- package/esm/nullIndex.js.map +1 -0
- package/esm/record.d.ts +2 -2
- package/esm/record.js +34 -24
- package/esm/record.js.map +1 -1
- package/esm/sam.js +9 -7
- package/esm/sam.js.map +1 -1
- package/esm/util.d.ts +13 -1
- package/esm/util.js +40 -14
- package/esm/util.js.map +1 -1
- package/package.json +19 -20
- package/src/bai.ts +99 -102
- package/src/bamFile.ts +174 -198
- package/src/chunk.ts +6 -20
- package/src/csi.ts +102 -111
- package/src/htsget.ts +81 -61
- package/src/index.ts +5 -7
- package/src/indexFile.ts +0 -27
- package/src/nullIndex.ts +18 -0
- package/src/record.ts +34 -24
- package/src/sam.ts +9 -7
- package/src/util.ts +54 -13
- package/src/declare.d.ts +0 -2
package/src/csi.ts
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
|
-
import Long from 'long'
|
|
2
1
|
import { unzip } from '@gmod/bgzf-filehandle'
|
|
3
2
|
import VirtualOffset, { fromBytes } from './virtualOffset'
|
|
4
3
|
import Chunk from './chunk'
|
|
5
|
-
import {
|
|
4
|
+
import {
|
|
5
|
+
optimizeChunks,
|
|
6
|
+
findFirstData,
|
|
7
|
+
parsePseudoBin,
|
|
8
|
+
parseNameBytes,
|
|
9
|
+
BaseOpts,
|
|
10
|
+
} from './util'
|
|
6
11
|
|
|
7
12
|
import IndexFile from './indexFile'
|
|
8
13
|
|
|
@@ -17,102 +22,69 @@ function rshift(num: number, bits: number) {
|
|
|
17
22
|
}
|
|
18
23
|
|
|
19
24
|
export default class CSI extends IndexFile {
|
|
20
|
-
private maxBinNumber
|
|
21
|
-
private depth
|
|
22
|
-
private minShift
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
async lineCount(refId: number): Promise<number> {
|
|
30
|
-
const indexData = await this.parse()
|
|
31
|
-
if (!indexData) {
|
|
32
|
-
return -1
|
|
33
|
-
}
|
|
34
|
-
const idx = indexData.indices[refId]
|
|
35
|
-
if (!idx) {
|
|
36
|
-
return -1
|
|
37
|
-
}
|
|
38
|
-
const { stats } = indexData.indices[refId]
|
|
39
|
-
if (stats) {
|
|
40
|
-
return stats.lineCount
|
|
41
|
-
}
|
|
42
|
-
return -1
|
|
25
|
+
private maxBinNumber = 0
|
|
26
|
+
private depth = 0
|
|
27
|
+
private minShift = 0
|
|
28
|
+
|
|
29
|
+
public setupP?: ReturnType<CSI['_parse']>
|
|
30
|
+
|
|
31
|
+
async lineCount(refId: number, opts?: BaseOpts) {
|
|
32
|
+
const indexData = await this.parse(opts)
|
|
33
|
+
return indexData.indices[refId]?.stats?.lineCount || 0
|
|
43
34
|
}
|
|
44
35
|
|
|
45
36
|
async indexCov() {
|
|
46
37
|
return []
|
|
47
38
|
}
|
|
48
39
|
|
|
49
|
-
parseAuxData(bytes: Buffer, offset: number
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
const data: { [key: string]: any } = {}
|
|
55
|
-
data.formatFlags = bytes.readInt32LE(offset)
|
|
56
|
-
data.coordinateType =
|
|
57
|
-
data.formatFlags & 0x10000 ? 'zero-based-half-open' : '1-based-closed'
|
|
58
|
-
data.format = (
|
|
40
|
+
parseAuxData(bytes: Buffer, offset: number) {
|
|
41
|
+
const formatFlags = bytes.readInt32LE(offset)
|
|
42
|
+
const coordinateType =
|
|
43
|
+
formatFlags & 0x10000 ? 'zero-based-half-open' : '1-based-closed'
|
|
44
|
+
const format = (
|
|
59
45
|
{ 0: 'generic', 1: 'SAM', 2: 'VCF' } as {
|
|
60
46
|
[key: number]: string
|
|
61
47
|
}
|
|
62
|
-
)[
|
|
63
|
-
if (!
|
|
64
|
-
throw new Error(`invalid Tabix preset format flags ${
|
|
48
|
+
)[formatFlags & 0xf]
|
|
49
|
+
if (!format) {
|
|
50
|
+
throw new Error(`invalid Tabix preset format flags ${formatFlags}`)
|
|
65
51
|
}
|
|
66
|
-
|
|
52
|
+
const columnNumbers = {
|
|
67
53
|
ref: bytes.readInt32LE(offset + 4),
|
|
68
54
|
start: bytes.readInt32LE(offset + 8),
|
|
69
55
|
end: bytes.readInt32LE(offset + 12),
|
|
70
56
|
}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
57
|
+
const metaValue = bytes.readInt32LE(offset + 16)
|
|
58
|
+
const metaChar = metaValue ? String.fromCharCode(metaValue) : ''
|
|
59
|
+
const skipLines = bytes.readInt32LE(offset + 20)
|
|
74
60
|
const nameSectionLength = bytes.readInt32LE(offset + 24)
|
|
75
61
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
62
|
+
return {
|
|
63
|
+
columnNumbers,
|
|
64
|
+
coordinateType,
|
|
65
|
+
metaValue,
|
|
66
|
+
metaChar,
|
|
67
|
+
skipLines,
|
|
68
|
+
format,
|
|
69
|
+
formatFlags,
|
|
70
|
+
...parseNameBytes(
|
|
79
71
|
bytes.subarray(offset + 28, offset + 28 + nameSectionLength),
|
|
72
|
+
this.renameRefSeq,
|
|
80
73
|
),
|
|
81
|
-
)
|
|
82
|
-
return data
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
_parseNameBytes(namesBytes: Buffer) {
|
|
86
|
-
let currRefId = 0
|
|
87
|
-
let currNameStart = 0
|
|
88
|
-
const refIdToName = []
|
|
89
|
-
const refNameToId: { [key: string]: number } = {}
|
|
90
|
-
for (let i = 0; i < namesBytes.length; i += 1) {
|
|
91
|
-
if (!namesBytes[i]) {
|
|
92
|
-
if (currNameStart < i) {
|
|
93
|
-
let refName = namesBytes.toString('utf8', currNameStart, i)
|
|
94
|
-
refName = this.renameRefSeq(refName)
|
|
95
|
-
refIdToName[currRefId] = refName
|
|
96
|
-
refNameToId[refName] = currRefId
|
|
97
|
-
}
|
|
98
|
-
currNameStart = i + 1
|
|
99
|
-
currRefId += 1
|
|
100
|
-
}
|
|
101
74
|
}
|
|
102
|
-
return { refNameToId, refIdToName }
|
|
103
75
|
}
|
|
104
76
|
|
|
105
77
|
// fetch and parse the index
|
|
106
78
|
async _parse(opts: { signal?: AbortSignal }) {
|
|
107
|
-
const
|
|
108
|
-
const buffer = (await this.filehandle.readFile(opts)) as Buffer
|
|
79
|
+
const buffer = await this.filehandle.readFile(opts)
|
|
109
80
|
const bytes = await unzip(buffer)
|
|
110
81
|
|
|
82
|
+
let csiVersion
|
|
111
83
|
// check TBI magic numbers
|
|
112
84
|
if (bytes.readUInt32LE(0) === CSI1_MAGIC) {
|
|
113
|
-
|
|
85
|
+
csiVersion = 1
|
|
114
86
|
} else if (bytes.readUInt32LE(0) === CSI2_MAGIC) {
|
|
115
|
-
|
|
87
|
+
csiVersion = 2
|
|
116
88
|
} else {
|
|
117
89
|
throw new Error('Not a CSI file')
|
|
118
90
|
// TODO: do we need to support big-endian CSI files?
|
|
@@ -122,59 +94,60 @@ export default class CSI extends IndexFile {
|
|
|
122
94
|
this.depth = bytes.readInt32LE(8)
|
|
123
95
|
this.maxBinNumber = ((1 << ((this.depth + 1) * 3)) - 1) / 7
|
|
124
96
|
const auxLength = bytes.readInt32LE(12)
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
97
|
+
const aux = auxLength >= 30 ? this.parseAuxData(bytes, 16) : undefined
|
|
98
|
+
const refCount = bytes.readInt32LE(16 + auxLength)
|
|
99
|
+
|
|
100
|
+
type BinIndex = { [key: string]: Chunk[] }
|
|
129
101
|
|
|
130
102
|
// read the indexes for each reference sequence
|
|
131
|
-
|
|
132
|
-
let
|
|
133
|
-
|
|
134
|
-
|
|
103
|
+
let curr = 16 + auxLength + 4
|
|
104
|
+
let firstDataLine: VirtualOffset | undefined
|
|
105
|
+
const indices = new Array<{
|
|
106
|
+
binIndex: BinIndex
|
|
107
|
+
stats?: { lineCount: number }
|
|
108
|
+
}>(refCount)
|
|
109
|
+
for (let i = 0; i < refCount; i++) {
|
|
135
110
|
// the binning index
|
|
136
|
-
const binCount = bytes.readInt32LE(
|
|
137
|
-
|
|
111
|
+
const binCount = bytes.readInt32LE(curr)
|
|
112
|
+
curr += 4
|
|
138
113
|
const binIndex: { [key: string]: Chunk[] } = {}
|
|
139
114
|
let stats // < provided by parsing a pseudo-bin, if present
|
|
140
|
-
for (let j = 0; j < binCount; j
|
|
141
|
-
const bin = bytes.readUInt32LE(
|
|
115
|
+
for (let j = 0; j < binCount; j++) {
|
|
116
|
+
const bin = bytes.readUInt32LE(curr)
|
|
117
|
+
curr += 4
|
|
142
118
|
if (bin > this.maxBinNumber) {
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
stats = this.parsePseudoBin(bytes, currOffset + 4)
|
|
146
|
-
currOffset += 4 + 8 + 4 + 16 + 16
|
|
119
|
+
stats = parsePseudoBin(bytes, curr + 28)
|
|
120
|
+
curr += 28 + 16
|
|
147
121
|
} else {
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
const chunkCount = bytes.readInt32LE(
|
|
151
|
-
|
|
152
|
-
const chunks = new Array(chunkCount)
|
|
122
|
+
firstDataLine = findFirstData(firstDataLine, fromBytes(bytes, curr))
|
|
123
|
+
curr += 8
|
|
124
|
+
const chunkCount = bytes.readInt32LE(curr)
|
|
125
|
+
curr += 4
|
|
126
|
+
const chunks = new Array<Chunk>(chunkCount)
|
|
153
127
|
for (let k = 0; k < chunkCount; k += 1) {
|
|
154
|
-
const u = fromBytes(bytes,
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
128
|
+
const u = fromBytes(bytes, curr)
|
|
129
|
+
curr += 8
|
|
130
|
+
const v = fromBytes(bytes, curr)
|
|
131
|
+
curr += 8
|
|
132
|
+
firstDataLine = findFirstData(firstDataLine, u)
|
|
158
133
|
chunks[k] = new Chunk(u, v, bin)
|
|
159
134
|
}
|
|
160
135
|
binIndex[bin] = chunks
|
|
161
136
|
}
|
|
162
137
|
}
|
|
163
138
|
|
|
164
|
-
|
|
139
|
+
indices[i] = { binIndex, stats }
|
|
165
140
|
}
|
|
166
141
|
|
|
167
|
-
return
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
)
|
|
177
|
-
return { lineCount }
|
|
142
|
+
return {
|
|
143
|
+
csiVersion,
|
|
144
|
+
firstDataLine,
|
|
145
|
+
indices,
|
|
146
|
+
refCount,
|
|
147
|
+
csi: true,
|
|
148
|
+
maxBlockSize: 1 << 16,
|
|
149
|
+
...aux,
|
|
150
|
+
}
|
|
178
151
|
}
|
|
179
152
|
|
|
180
153
|
async blocksForRange(
|
|
@@ -192,17 +165,20 @@ export default class CSI extends IndexFile {
|
|
|
192
165
|
if (!ba) {
|
|
193
166
|
return []
|
|
194
167
|
}
|
|
168
|
+
const overlappingBins = this.reg2bins(min, max)
|
|
195
169
|
|
|
196
|
-
|
|
197
|
-
|
|
170
|
+
if (overlappingBins.length === 0) {
|
|
171
|
+
return []
|
|
172
|
+
}
|
|
198
173
|
|
|
174
|
+
const chunks = []
|
|
199
175
|
// Find chunks in overlapping bins. Leaf bins (< 4681) are not pruned
|
|
200
176
|
for (const [start, end] of overlappingBins) {
|
|
201
177
|
for (let bin = start; bin <= end; bin++) {
|
|
202
178
|
if (ba.binIndex[bin]) {
|
|
203
179
|
const binChunks = ba.binIndex[bin]
|
|
204
|
-
for (
|
|
205
|
-
chunks.push(
|
|
180
|
+
for (const c of binChunks) {
|
|
181
|
+
chunks.push(c)
|
|
206
182
|
}
|
|
207
183
|
}
|
|
208
184
|
}
|
|
@@ -240,4 +216,19 @@ export default class CSI extends IndexFile {
|
|
|
240
216
|
}
|
|
241
217
|
return bins
|
|
242
218
|
}
|
|
219
|
+
|
|
220
|
+
async parse(opts: BaseOpts = {}) {
|
|
221
|
+
if (!this.setupP) {
|
|
222
|
+
this.setupP = this._parse(opts).catch(e => {
|
|
223
|
+
this.setupP = undefined
|
|
224
|
+
throw e
|
|
225
|
+
})
|
|
226
|
+
}
|
|
227
|
+
return this.setupP
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
async hasRefSeq(seqId: number, opts: BaseOpts = {}) {
|
|
231
|
+
const header = await this.parse(opts)
|
|
232
|
+
return !!header.indices[seqId]?.binIndex
|
|
233
|
+
}
|
|
243
234
|
}
|
package/src/htsget.ts
CHANGED
|
@@ -1,36 +1,33 @@
|
|
|
1
|
+
import { unzip } from '@gmod/bgzf-filehandle'
|
|
2
|
+
import { Buffer } from 'buffer'
|
|
1
3
|
import { BaseOpts, BamOpts } from './util'
|
|
2
4
|
import BamFile, { BAM_MAGIC } from './bamFile'
|
|
3
|
-
import 'cross-fetch/polyfill'
|
|
4
5
|
import Chunk from './chunk'
|
|
5
|
-
import { unzip } from '@gmod/bgzf-filehandle'
|
|
6
6
|
import { parseHeaderText } from './sam'
|
|
7
7
|
|
|
8
|
-
interface HeaderLine {
|
|
9
|
-
tag: string
|
|
10
|
-
value: string
|
|
11
|
-
}
|
|
12
|
-
|
|
13
8
|
interface HtsgetChunk {
|
|
14
9
|
url: string
|
|
15
10
|
headers?: Record<string, string>
|
|
16
11
|
}
|
|
17
|
-
async function concat(arr:
|
|
12
|
+
async function concat(arr: HtsgetChunk[], opts?: Record<string, any>) {
|
|
18
13
|
const res = await Promise.all(
|
|
19
|
-
arr.map(async
|
|
14
|
+
arr.map(async chunk => {
|
|
20
15
|
const { url, headers } = chunk
|
|
21
16
|
if (url.startsWith('data:')) {
|
|
22
17
|
return Buffer.from(url.split(',')[1], 'base64')
|
|
23
18
|
} else {
|
|
24
19
|
//remove referer header, it is not even allowed to be specified
|
|
25
|
-
|
|
20
|
+
// @ts-expect-error
|
|
26
21
|
//eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
27
22
|
const { referer, ...rest } = headers
|
|
28
23
|
const res = await fetch(url, {
|
|
29
24
|
...opts,
|
|
30
|
-
headers: { ...opts
|
|
25
|
+
headers: { ...opts?.headers, ...rest },
|
|
31
26
|
})
|
|
32
27
|
if (!res.ok) {
|
|
33
|
-
throw new Error(
|
|
28
|
+
throw new Error(
|
|
29
|
+
`HTTP ${res.status} fetching ${url}: ${await res.text()}`,
|
|
30
|
+
)
|
|
34
31
|
}
|
|
35
32
|
return Buffer.from(await res.arrayBuffer())
|
|
36
33
|
}
|
|
@@ -46,8 +43,7 @@ export default class HtsgetFile extends BamFile {
|
|
|
46
43
|
private trackId: string
|
|
47
44
|
|
|
48
45
|
constructor(args: { trackId: string; baseUrl: string }) {
|
|
49
|
-
|
|
50
|
-
super({ bamFilehandle: '?', baiFilehandle: '?' })
|
|
46
|
+
super({ htsget: true })
|
|
51
47
|
this.baseUrl = args.baseUrl
|
|
52
48
|
this.trackId = args.trackId
|
|
53
49
|
}
|
|
@@ -56,54 +52,76 @@ export default class HtsgetFile extends BamFile {
|
|
|
56
52
|
chr: string,
|
|
57
53
|
min: number,
|
|
58
54
|
max: number,
|
|
59
|
-
opts
|
|
60
|
-
viewAsPairs: false,
|
|
61
|
-
pairAcrossChr: false,
|
|
62
|
-
maxInsertSize: 200000,
|
|
63
|
-
},
|
|
55
|
+
opts?: BamOpts,
|
|
64
56
|
) {
|
|
65
57
|
const base = `${this.baseUrl}/${this.trackId}`
|
|
66
58
|
const url = `${base}?referenceName=${chr}&start=${min}&end=${max}&format=BAM`
|
|
67
|
-
const chrId = this.chrToIndex
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
},
|
|
80
|
-
}
|
|
59
|
+
const chrId = this.chrToIndex?.[chr]
|
|
60
|
+
if (chrId === undefined) {
|
|
61
|
+
yield []
|
|
62
|
+
} else {
|
|
63
|
+
const result = await fetch(url, { ...opts })
|
|
64
|
+
if (!result.ok) {
|
|
65
|
+
throw new Error(
|
|
66
|
+
`HTTP ${result.status} fetching ${url}: ${await result.text()}`,
|
|
67
|
+
)
|
|
68
|
+
}
|
|
69
|
+
const data = await result.json()
|
|
70
|
+
const uncba = await concat(data.htsget.urls.slice(1), opts)
|
|
81
71
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
72
|
+
yield* this._fetchChunkFeatures(
|
|
73
|
+
[
|
|
74
|
+
// fake stuff to pretend to be a Chunk
|
|
75
|
+
{
|
|
76
|
+
buffer: uncba,
|
|
77
|
+
_fetchedSize: undefined,
|
|
78
|
+
bin: 0,
|
|
79
|
+
compareTo() {
|
|
80
|
+
return 0
|
|
81
|
+
},
|
|
82
|
+
toUniqueString() {
|
|
83
|
+
return `${chr}_${min}_${max}`
|
|
84
|
+
},
|
|
85
|
+
fetchedSize() {
|
|
86
|
+
return 0
|
|
87
|
+
},
|
|
88
|
+
minv: {
|
|
89
|
+
dataPosition: 0,
|
|
90
|
+
blockPosition: 0,
|
|
91
|
+
compareTo: () => 0,
|
|
92
|
+
},
|
|
93
|
+
maxv: {
|
|
94
|
+
dataPosition: Number.MAX_SAFE_INTEGER,
|
|
95
|
+
blockPosition: 0,
|
|
96
|
+
compareTo: () => 0,
|
|
97
|
+
},
|
|
98
|
+
toString() {
|
|
99
|
+
return `${chr}_${min}_${max}`
|
|
100
|
+
},
|
|
101
|
+
},
|
|
102
|
+
],
|
|
103
|
+
chrId,
|
|
104
|
+
min,
|
|
105
|
+
max,
|
|
106
|
+
opts,
|
|
107
|
+
)
|
|
108
|
+
}
|
|
90
109
|
}
|
|
91
110
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
const { chunk } = params
|
|
98
|
-
const { buffer, chunk: c2 } = chunk
|
|
99
|
-
return { data: buffer, cpositions: null, dpositions: null, chunk: c2 }
|
|
111
|
+
async _readChunk({ chunk }: { chunk: Chunk; opts: BaseOpts }) {
|
|
112
|
+
if (!chunk.buffer) {
|
|
113
|
+
throw new Error('expected chunk.buffer in htsget')
|
|
114
|
+
}
|
|
115
|
+
return { data: chunk.buffer, cpositions: [], dpositions: [], chunk }
|
|
100
116
|
}
|
|
101
117
|
|
|
102
118
|
async getHeader(opts: BaseOpts = {}) {
|
|
103
119
|
const url = `${this.baseUrl}/${this.trackId}?referenceName=na&class=header`
|
|
104
120
|
const result = await fetch(url, opts)
|
|
105
121
|
if (!result.ok) {
|
|
106
|
-
throw new Error(
|
|
122
|
+
throw new Error(
|
|
123
|
+
`HTTP ${result.status} fetching ${url}: ${await result.text()}`,
|
|
124
|
+
)
|
|
107
125
|
}
|
|
108
126
|
const data = await result.json()
|
|
109
127
|
const uncba = await concat(data.htsget.urls, opts)
|
|
@@ -112,25 +130,27 @@ export default class HtsgetFile extends BamFile {
|
|
|
112
130
|
throw new Error('Not a BAM file')
|
|
113
131
|
}
|
|
114
132
|
const headLen = uncba.readInt32LE(4)
|
|
115
|
-
|
|
116
133
|
const headerText = uncba.toString('utf8', 8, 8 + headLen)
|
|
117
134
|
const samHeader = parseHeaderText(headerText)
|
|
118
135
|
|
|
119
136
|
// use the @SQ lines in the header to figure out the
|
|
120
137
|
// mapping between ref ref ID numbers and names
|
|
121
|
-
const idToName: string[] = []
|
|
138
|
+
const idToName: { refName: string; length: number }[] = []
|
|
122
139
|
const nameToId: Record<string, number> = {}
|
|
123
|
-
const sqLines = samHeader.filter(
|
|
124
|
-
|
|
125
|
-
|
|
140
|
+
const sqLines = samHeader.filter(l => l.tag === 'SQ')
|
|
141
|
+
for (const [refId, sqLine] of sqLines.entries()) {
|
|
142
|
+
let refName = ''
|
|
143
|
+
let length = 0
|
|
144
|
+
for (const item of sqLine.data) {
|
|
126
145
|
if (item.tag === 'SN') {
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
idToName[refId] = refName
|
|
146
|
+
refName = item.value
|
|
147
|
+
} else if (item.tag === 'LN') {
|
|
148
|
+
length = +item.value
|
|
131
149
|
}
|
|
132
|
-
}
|
|
133
|
-
|
|
150
|
+
}
|
|
151
|
+
nameToId[refName] = refId
|
|
152
|
+
idToName[refId] = { refName, length }
|
|
153
|
+
}
|
|
134
154
|
this.chrToIndex = nameToId
|
|
135
155
|
this.indexToChr = idToName
|
|
136
156
|
return samHeader
|
package/src/index.ts
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
export { BAI, CSI, BamFile, BamRecord, HtsgetFile }
|
|
1
|
+
export { default as BAI } from './bai'
|
|
2
|
+
export { default as BamFile } from './bamFile'
|
|
3
|
+
export { default as CSI } from './csi'
|
|
4
|
+
export { default as BamRecord } from './record'
|
|
5
|
+
export { default as HtsgetFile } from './htsget'
|
package/src/indexFile.ts
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
import { GenericFilehandle } from 'generic-filehandle'
|
|
2
|
-
import VirtualOffset from './virtualOffset'
|
|
3
2
|
import Chunk from './chunk'
|
|
4
3
|
import { BaseOpts } from './util'
|
|
5
4
|
|
|
6
5
|
export default abstract class IndexFile {
|
|
7
6
|
public filehandle: GenericFilehandle
|
|
8
7
|
public renameRefSeq: (s: string) => string
|
|
9
|
-
public setupP?: Promise<any>
|
|
10
8
|
|
|
11
9
|
/**
|
|
12
10
|
* @param {filehandle} filehandle
|
|
@@ -23,7 +21,6 @@ export default abstract class IndexFile {
|
|
|
23
21
|
this.renameRefSeq = renameRefSeq
|
|
24
22
|
}
|
|
25
23
|
public abstract lineCount(refId: number): Promise<number>
|
|
26
|
-
protected abstract _parse(opts?: BaseOpts): Promise<any>
|
|
27
24
|
public abstract indexCov(
|
|
28
25
|
refId: number,
|
|
29
26
|
start?: number,
|
|
@@ -36,28 +33,4 @@ export default abstract class IndexFile {
|
|
|
36
33
|
end: number,
|
|
37
34
|
opts?: BaseOpts,
|
|
38
35
|
): Promise<Chunk[]>
|
|
39
|
-
|
|
40
|
-
_findFirstData(data: any, virtualOffset: VirtualOffset) {
|
|
41
|
-
const currentFdl = data.firstDataLine
|
|
42
|
-
if (currentFdl) {
|
|
43
|
-
data.firstDataLine =
|
|
44
|
-
currentFdl.compareTo(virtualOffset) > 0 ? virtualOffset : currentFdl
|
|
45
|
-
} else {
|
|
46
|
-
data.firstDataLine = virtualOffset
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
async parse(opts: BaseOpts = {}) {
|
|
51
|
-
if (!this.setupP) {
|
|
52
|
-
this.setupP = this._parse(opts).catch(e => {
|
|
53
|
-
this.setupP = undefined
|
|
54
|
-
throw e
|
|
55
|
-
})
|
|
56
|
-
}
|
|
57
|
-
return this.setupP
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
async hasRefSeq(seqId: number, opts: BaseOpts = {}) {
|
|
61
|
-
return !!((await this.parse(opts)).indices[seqId] || {}).binIndex
|
|
62
|
-
}
|
|
63
36
|
}
|
package/src/nullIndex.ts
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import IndexFile from './indexFile'
|
|
2
|
+
|
|
3
|
+
export default class NullIndex extends IndexFile {
|
|
4
|
+
public lineCount(): Promise<any> {
|
|
5
|
+
throw new Error('never called')
|
|
6
|
+
}
|
|
7
|
+
protected _parse(): Promise<any> {
|
|
8
|
+
throw new Error('never called')
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
public async indexCov(): Promise<any> {
|
|
12
|
+
throw new Error('never called')
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
public blocksForRange(): Promise<any> {
|
|
16
|
+
throw new Error('never called')
|
|
17
|
+
}
|
|
18
|
+
}
|