@jbrowse/plugin-gff3 1.5.0 → 1.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Gff3Adapter/Gff3Adapter.d.ts +6 -9
- package/dist/plugin-gff3.cjs.development.js +258 -126
- package/dist/plugin-gff3.cjs.development.js.map +1 -1
- package/dist/plugin-gff3.cjs.production.min.js +1 -1
- package/dist/plugin-gff3.cjs.production.min.js.map +1 -1
- package/dist/plugin-gff3.esm.js +258 -126
- package/dist/plugin-gff3.esm.js.map +1 -1
- package/package.json +7 -6
- package/src/Gff3Adapter/Gff3Adapter.test.ts +3 -0
- package/src/Gff3Adapter/Gff3Adapter.ts +85 -74
- package/src/Gff3TabixAdapter/Gff3TabixAdapter.ts +19 -14
- package/src/index.ts +55 -1
- package/dist/util.d.ts +0 -14
- package/src/declare.d.ts +0 -1
- package/src/util.ts +0 -12
|
@@ -1,100 +1,102 @@
|
|
|
1
|
-
import { Instance } from 'mobx-state-tree'
|
|
2
1
|
import {
|
|
3
2
|
BaseFeatureDataAdapter,
|
|
4
3
|
BaseOptions,
|
|
5
4
|
} from '@jbrowse/core/data_adapters/BaseAdapter'
|
|
6
|
-
import { getSubAdapterType } from '@jbrowse/core/data_adapters/dataAdapterCache'
|
|
7
|
-
import PluginManager from '@jbrowse/core/PluginManager'
|
|
8
|
-
import { readConfObject } from '@jbrowse/core/configuration'
|
|
9
5
|
import { NoAssemblyRegion } from '@jbrowse/core/util/types'
|
|
6
|
+
import { readConfObject } from '@jbrowse/core/configuration'
|
|
10
7
|
import { openLocation } from '@jbrowse/core/util/io'
|
|
11
8
|
import { ObservableCreate } from '@jbrowse/core/util/rxjs'
|
|
12
9
|
import IntervalTree from '@flatten-js/interval-tree'
|
|
13
10
|
import SimpleFeature, { Feature } from '@jbrowse/core/util/simpleFeature'
|
|
11
|
+
import { unzip } from '@gmod/bgzf-filehandle'
|
|
14
12
|
|
|
15
|
-
import gff from '@gmod/gff'
|
|
16
|
-
import { GenericFilehandle } from 'generic-filehandle'
|
|
13
|
+
import gff, { GFF3FeatureLineWithRefs } from '@gmod/gff'
|
|
17
14
|
|
|
18
|
-
|
|
19
|
-
|
|
15
|
+
function isGzip(buf: Buffer) {
|
|
16
|
+
return buf[0] === 31 && buf[1] === 139 && buf[2] === 8
|
|
17
|
+
}
|
|
20
18
|
|
|
21
19
|
export default class extends BaseFeatureDataAdapter {
|
|
22
|
-
protected gffFeatures?: Promise<
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
20
|
+
protected gffFeatures?: Promise<{
|
|
21
|
+
header: string
|
|
22
|
+
intervalTree: Record<string, IntervalTree>
|
|
23
|
+
}>
|
|
24
|
+
|
|
25
|
+
private async loadDataP() {
|
|
26
|
+
const buffer = await openLocation(
|
|
27
|
+
readConfObject(this.config, 'gffLocation'),
|
|
28
|
+
this.pluginManager,
|
|
29
|
+
).readFile()
|
|
30
|
+
const buf = isGzip(buffer) ? await unzip(buffer) : buffer
|
|
31
|
+
// 512MB max chrome string length is 512MB
|
|
32
|
+
if (buf.length > 536_870_888) {
|
|
33
|
+
throw new Error('Data exceeds maximum string length (512MB)')
|
|
34
|
+
}
|
|
35
|
+
const data = new TextDecoder('utf8', { fatal: true }).decode(buf)
|
|
36
|
+
const lines = data.split('\n')
|
|
37
|
+
const headerLines = []
|
|
38
|
+
for (let i = 0; i < lines.length && lines[i].startsWith('#'); i++) {
|
|
39
|
+
headerLines.push(lines[i])
|
|
40
|
+
}
|
|
41
|
+
const header = headerLines.join('\n')
|
|
42
|
+
|
|
43
|
+
const feats = gff.parseStringSync(data, {
|
|
44
|
+
parseFeatures: true,
|
|
45
|
+
parseComments: false,
|
|
46
|
+
parseDirectives: false,
|
|
47
|
+
parseSequences: false,
|
|
48
|
+
})
|
|
49
|
+
|
|
50
|
+
const intervalTree = feats
|
|
51
|
+
.flat()
|
|
52
|
+
.map(
|
|
53
|
+
(f, i) =>
|
|
54
|
+
new SimpleFeature({
|
|
55
|
+
data: this.featureData(f),
|
|
56
|
+
id: `${this.id}-offset-${i}`,
|
|
57
|
+
}),
|
|
58
|
+
)
|
|
59
|
+
.reduce((acc, obj) => {
|
|
60
|
+
const key = obj.get('refName')
|
|
61
|
+
if (!acc[key]) {
|
|
62
|
+
acc[key] = new IntervalTree()
|
|
63
|
+
}
|
|
64
|
+
acc[key].insert([obj.get('start'), obj.get('end')], obj)
|
|
65
|
+
return acc
|
|
66
|
+
}, {} as Record<string, IntervalTree>)
|
|
67
|
+
|
|
68
|
+
return { header, intervalTree }
|
|
38
69
|
}
|
|
39
70
|
|
|
40
71
|
private async loadData() {
|
|
41
|
-
const { size } = await this.filehandle.stat()
|
|
42
|
-
// Add a warning to avoid crashing the browser, recommend indexing
|
|
43
|
-
if (size > 500_000_000) {
|
|
44
|
-
throw new Error('This file is too large. Consider using Gff3TabixAdapter')
|
|
45
|
-
}
|
|
46
72
|
if (!this.gffFeatures) {
|
|
47
|
-
this.gffFeatures = this.
|
|
48
|
-
.
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
parseFeatures: true,
|
|
52
|
-
parseComments: false,
|
|
53
|
-
parseDirectives: false,
|
|
54
|
-
parseSequences: false,
|
|
55
|
-
}) as FeatureLoc[][]
|
|
56
|
-
|
|
57
|
-
return gffFeatures
|
|
58
|
-
.flat()
|
|
59
|
-
.map(
|
|
60
|
-
(f, i) =>
|
|
61
|
-
new SimpleFeature({
|
|
62
|
-
data: this.featureData(f),
|
|
63
|
-
id: `${this.id}-offset-${i}`,
|
|
64
|
-
}),
|
|
65
|
-
)
|
|
66
|
-
.reduce((acc: Record<string, IntervalTree>, obj: SimpleFeature) => {
|
|
67
|
-
const key = obj.get('refName')
|
|
68
|
-
if (!acc[key]) {
|
|
69
|
-
acc[key] = new IntervalTree()
|
|
70
|
-
}
|
|
71
|
-
acc[key].insert([obj.get('start'), obj.get('end')], obj)
|
|
72
|
-
return acc
|
|
73
|
-
}, {})
|
|
74
|
-
})
|
|
75
|
-
.catch(e => {
|
|
76
|
-
this.gffFeatures = undefined
|
|
77
|
-
throw e
|
|
78
|
-
})
|
|
73
|
+
this.gffFeatures = this.loadDataP().catch(e => {
|
|
74
|
+
this.gffFeatures = undefined
|
|
75
|
+
throw e
|
|
76
|
+
})
|
|
79
77
|
}
|
|
80
78
|
|
|
81
79
|
return this.gffFeatures
|
|
82
80
|
}
|
|
83
81
|
|
|
84
82
|
public async getRefNames(opts: BaseOptions = {}) {
|
|
85
|
-
const
|
|
86
|
-
return Object.keys(
|
|
83
|
+
const { intervalTree } = await this.loadData()
|
|
84
|
+
return Object.keys(intervalTree)
|
|
87
85
|
}
|
|
86
|
+
|
|
87
|
+
public async getHeader() {
|
|
88
|
+
const { header } = await this.loadData()
|
|
89
|
+
return header
|
|
90
|
+
}
|
|
91
|
+
|
|
88
92
|
public getFeatures(query: NoAssemblyRegion, opts: BaseOptions = {}) {
|
|
89
93
|
return ObservableCreate<Feature>(async observer => {
|
|
90
94
|
try {
|
|
91
95
|
const { start, end, refName } = query
|
|
92
|
-
const
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
observer.next(f)
|
|
97
|
-
})
|
|
96
|
+
const { intervalTree } = await this.loadData()
|
|
97
|
+
intervalTree[refName]
|
|
98
|
+
?.search([start, end])
|
|
99
|
+
.forEach(f => observer.next(f))
|
|
98
100
|
observer.complete()
|
|
99
101
|
} catch (e) {
|
|
100
102
|
observer.error(e)
|
|
@@ -102,10 +104,18 @@ export default class extends BaseFeatureDataAdapter {
|
|
|
102
104
|
}, opts.signal)
|
|
103
105
|
}
|
|
104
106
|
|
|
105
|
-
private featureData(data:
|
|
107
|
+
private featureData(data: GFF3FeatureLineWithRefs) {
|
|
106
108
|
const f: Record<string, unknown> = { ...data }
|
|
107
109
|
;(f.start as number) -= 1 // convert to interbase
|
|
108
|
-
|
|
110
|
+
if (data.strand === '+') {
|
|
111
|
+
f.strand = 1
|
|
112
|
+
} else if (data.strand === '-') {
|
|
113
|
+
f.strand = -1
|
|
114
|
+
} else if (data.strand === '.') {
|
|
115
|
+
f.strand = 0
|
|
116
|
+
} else {
|
|
117
|
+
f.strand = undefined
|
|
118
|
+
}
|
|
109
119
|
f.phase = Number(data.phase)
|
|
110
120
|
f.refName = data.seq_id
|
|
111
121
|
if (data.score === null) {
|
|
@@ -124,15 +134,16 @@ export default class extends BaseFeatureDataAdapter {
|
|
|
124
134
|
'phase',
|
|
125
135
|
'strand',
|
|
126
136
|
]
|
|
127
|
-
|
|
137
|
+
const dataAttributes = data.attributes || {}
|
|
138
|
+
Object.keys(dataAttributes).forEach(a => {
|
|
128
139
|
let b = a.toLowerCase()
|
|
129
140
|
if (defaultFields.includes(b)) {
|
|
130
141
|
// add "suffix" to tag name if it already exists
|
|
131
142
|
// reproduces behavior of NCList
|
|
132
143
|
b += '2'
|
|
133
144
|
}
|
|
134
|
-
if (
|
|
135
|
-
let attr =
|
|
145
|
+
if (dataAttributes[a] !== null) {
|
|
146
|
+
let attr: string | string[] | undefined = dataAttributes[a]
|
|
136
147
|
if (Array.isArray(attr) && attr.length === 1) {
|
|
137
148
|
;[attr] = attr
|
|
138
149
|
}
|
|
@@ -9,7 +9,7 @@ import { openLocation } from '@jbrowse/core/util/io'
|
|
|
9
9
|
import { ObservableCreate } from '@jbrowse/core/util/rxjs'
|
|
10
10
|
import SimpleFeature, { Feature } from '@jbrowse/core/util/simpleFeature'
|
|
11
11
|
import { TabixIndexedFile } from '@gmod/tabix'
|
|
12
|
-
import gff from '@gmod/gff'
|
|
12
|
+
import gff, { GFF3Feature, GFF3FeatureLineWithRefs } from '@gmod/gff'
|
|
13
13
|
import { Observer } from 'rxjs'
|
|
14
14
|
|
|
15
15
|
import { Instance } from 'mobx-state-tree'
|
|
@@ -17,7 +17,6 @@ import { readConfObject } from '@jbrowse/core/configuration'
|
|
|
17
17
|
import MyConfigSchema from './configSchema'
|
|
18
18
|
import PluginManager from '@jbrowse/core/PluginManager'
|
|
19
19
|
import { getSubAdapterType } from '@jbrowse/core/data_adapters/dataAdapterCache'
|
|
20
|
-
import { FeatureLoc } from '../util'
|
|
21
20
|
|
|
22
21
|
interface LineFeature {
|
|
23
22
|
start: number
|
|
@@ -110,12 +109,8 @@ export default class extends BaseFeatureDataAdapter {
|
|
|
110
109
|
}
|
|
111
110
|
})
|
|
112
111
|
if (maxEnd > query.end || minStart < query.start) {
|
|
113
|
-
// console.log(
|
|
114
|
-
// `redispatching ${query.start}-${query.end} => ${minStart}-${maxEnd}`,
|
|
115
|
-
// )
|
|
116
112
|
// make a new feature callback to only return top-level features
|
|
117
113
|
// in the original query range
|
|
118
|
-
|
|
119
114
|
this.getFeaturesHelper(
|
|
120
115
|
{ ...query, start: minStart, end: maxEnd },
|
|
121
116
|
opts,
|
|
@@ -146,7 +141,7 @@ export default class extends BaseFeatureDataAdapter {
|
|
|
146
141
|
parseComments: false,
|
|
147
142
|
parseDirectives: false,
|
|
148
143
|
parseSequences: false,
|
|
149
|
-
})
|
|
144
|
+
})
|
|
150
145
|
|
|
151
146
|
features.forEach(featureLocs =>
|
|
152
147
|
this.formatFeatures(featureLocs).forEach(f => {
|
|
@@ -184,20 +179,29 @@ export default class extends BaseFeatureDataAdapter {
|
|
|
184
179
|
}
|
|
185
180
|
}
|
|
186
181
|
|
|
187
|
-
private formatFeatures(featureLocs:
|
|
182
|
+
private formatFeatures(featureLocs: GFF3Feature) {
|
|
188
183
|
return featureLocs.map(
|
|
189
184
|
featureLoc =>
|
|
190
185
|
new SimpleFeature({
|
|
191
186
|
data: this.featureData(featureLoc),
|
|
192
|
-
|
|
187
|
+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
188
|
+
id: `${this.id}-offset-${featureLoc.attributes!._lineHash![0]}`,
|
|
193
189
|
}),
|
|
194
190
|
)
|
|
195
191
|
}
|
|
196
192
|
|
|
197
|
-
private featureData(data:
|
|
193
|
+
private featureData(data: GFF3FeatureLineWithRefs) {
|
|
198
194
|
const f: Record<string, unknown> = { ...data }
|
|
199
195
|
;(f.start as number) -= 1 // convert to interbase
|
|
200
|
-
|
|
196
|
+
if (data.strand === '+') {
|
|
197
|
+
f.strand = 1
|
|
198
|
+
} else if (data.strand === '-') {
|
|
199
|
+
f.strand = -1
|
|
200
|
+
} else if (data.strand === '.') {
|
|
201
|
+
f.strand = 0
|
|
202
|
+
} else {
|
|
203
|
+
f.strand = undefined
|
|
204
|
+
}
|
|
201
205
|
f.phase = Number(data.phase)
|
|
202
206
|
f.refName = data.seq_id
|
|
203
207
|
if (data.score === null) {
|
|
@@ -216,15 +220,16 @@ export default class extends BaseFeatureDataAdapter {
|
|
|
216
220
|
'phase',
|
|
217
221
|
'strand',
|
|
218
222
|
]
|
|
219
|
-
|
|
223
|
+
const dataAttributes = data.attributes || {}
|
|
224
|
+
Object.keys(dataAttributes).forEach(a => {
|
|
220
225
|
let b = a.toLowerCase()
|
|
221
226
|
if (defaultFields.includes(b)) {
|
|
222
227
|
// add "suffix" to tag name if it already exists
|
|
223
228
|
// reproduces behavior of NCList
|
|
224
229
|
b += '2'
|
|
225
230
|
}
|
|
226
|
-
if (
|
|
227
|
-
let attr =
|
|
231
|
+
if (dataAttributes[a] !== null) {
|
|
232
|
+
let attr: string | string[] | undefined = dataAttributes[a]
|
|
228
233
|
if (Array.isArray(attr) && attr.length === 1) {
|
|
229
234
|
;[attr] = attr
|
|
230
235
|
}
|
package/src/index.ts
CHANGED
|
@@ -2,6 +2,13 @@ import AdapterType from '@jbrowse/core/pluggableElementTypes/AdapterType'
|
|
|
2
2
|
import PluginManager from '@jbrowse/core/PluginManager'
|
|
3
3
|
import Plugin from '@jbrowse/core/Plugin'
|
|
4
4
|
import { configSchema as gff3TabixAdapterConfigSchema } from './Gff3TabixAdapter'
|
|
5
|
+
import { FileLocation } from '@jbrowse/core/util/types'
|
|
6
|
+
import {
|
|
7
|
+
makeIndex,
|
|
8
|
+
makeIndexType,
|
|
9
|
+
AdapterGuesser,
|
|
10
|
+
getFileName,
|
|
11
|
+
} from '@jbrowse/core/util/tracks'
|
|
5
12
|
import { configSchema as gff3AdapterConfigSchema } from './Gff3Adapter'
|
|
6
13
|
|
|
7
14
|
export default class extends Plugin {
|
|
@@ -17,7 +24,33 @@ export default class extends Plugin {
|
|
|
17
24
|
import('./Gff3TabixAdapter/Gff3TabixAdapter').then(r => r.default),
|
|
18
25
|
}),
|
|
19
26
|
)
|
|
20
|
-
|
|
27
|
+
pluginManager.addToExtensionPoint(
|
|
28
|
+
'Core-guessAdapterForLocation',
|
|
29
|
+
(adapterGuesser: AdapterGuesser) => {
|
|
30
|
+
return (
|
|
31
|
+
file: FileLocation,
|
|
32
|
+
index?: FileLocation,
|
|
33
|
+
adapterHint?: string,
|
|
34
|
+
) => {
|
|
35
|
+
const regexGuess = /\.gff3?\.b?gz$/i
|
|
36
|
+
const adapterName = 'Gff3TabixAdapter'
|
|
37
|
+
const fileName = getFileName(file)
|
|
38
|
+
const indexName = index && getFileName(index)
|
|
39
|
+
if (regexGuess.test(fileName) || adapterHint === adapterName) {
|
|
40
|
+
return {
|
|
41
|
+
type: adapterName,
|
|
42
|
+
bamLocation: file,
|
|
43
|
+
gffGzLocation: file,
|
|
44
|
+
index: {
|
|
45
|
+
location: index || makeIndex(file, '.tbi'),
|
|
46
|
+
indexType: makeIndexType(indexName, 'CSI', 'TBI'),
|
|
47
|
+
},
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return adapterGuesser(file, index, adapterHint)
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
)
|
|
21
54
|
pluginManager.addAdapterType(
|
|
22
55
|
() =>
|
|
23
56
|
new AdapterType({
|
|
@@ -27,5 +60,26 @@ export default class extends Plugin {
|
|
|
27
60
|
import('./Gff3Adapter/Gff3Adapter').then(r => r.default),
|
|
28
61
|
}),
|
|
29
62
|
)
|
|
63
|
+
pluginManager.addToExtensionPoint(
|
|
64
|
+
'Core-guessAdapterForLocation',
|
|
65
|
+
(adapterGuesser: AdapterGuesser) => {
|
|
66
|
+
return (
|
|
67
|
+
file: FileLocation,
|
|
68
|
+
index?: FileLocation,
|
|
69
|
+
adapterHint?: string,
|
|
70
|
+
) => {
|
|
71
|
+
const regexGuess = /\.gff3?$/i
|
|
72
|
+
const adapterName = 'Gff3Adapter'
|
|
73
|
+
const fileName = getFileName(file)
|
|
74
|
+
if (regexGuess.test(fileName) || adapterHint === adapterName) {
|
|
75
|
+
return {
|
|
76
|
+
type: adapterName,
|
|
77
|
+
gffLocation: file,
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return adapterGuesser(file, index, adapterHint)
|
|
81
|
+
}
|
|
82
|
+
},
|
|
83
|
+
)
|
|
30
84
|
}
|
|
31
85
|
}
|
package/dist/util.d.ts
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
export declare type Strand = '+' | '-' | '.' | '?';
|
|
2
|
-
export interface FeatureLoc {
|
|
3
|
-
[key: string]: unknown;
|
|
4
|
-
start: number;
|
|
5
|
-
end: number;
|
|
6
|
-
strand: Strand;
|
|
7
|
-
seq_id: string;
|
|
8
|
-
child_features: FeatureLoc[][];
|
|
9
|
-
data: unknown;
|
|
10
|
-
derived_features: unknown;
|
|
11
|
-
attributes: {
|
|
12
|
-
[key: string]: unknown[];
|
|
13
|
-
};
|
|
14
|
-
}
|
package/src/declare.d.ts
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
declare module '@gmod/gff'
|
package/src/util.ts
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
export type Strand = '+' | '-' | '.' | '?'
|
|
2
|
-
export interface FeatureLoc {
|
|
3
|
-
[key: string]: unknown
|
|
4
|
-
start: number
|
|
5
|
-
end: number
|
|
6
|
-
strand: Strand
|
|
7
|
-
seq_id: string
|
|
8
|
-
child_features: FeatureLoc[][]
|
|
9
|
-
data: unknown
|
|
10
|
-
derived_features: unknown
|
|
11
|
-
attributes: { [key: string]: unknown[] }
|
|
12
|
-
}
|