@jbrowse/plugin-gff3 1.5.0 → 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,100 +1,102 @@
1
- import { Instance } from 'mobx-state-tree'
2
1
  import {
3
2
  BaseFeatureDataAdapter,
4
3
  BaseOptions,
5
4
  } from '@jbrowse/core/data_adapters/BaseAdapter'
6
- import { getSubAdapterType } from '@jbrowse/core/data_adapters/dataAdapterCache'
7
- import PluginManager from '@jbrowse/core/PluginManager'
8
- import { readConfObject } from '@jbrowse/core/configuration'
9
5
  import { NoAssemblyRegion } from '@jbrowse/core/util/types'
6
+ import { readConfObject } from '@jbrowse/core/configuration'
10
7
  import { openLocation } from '@jbrowse/core/util/io'
11
8
  import { ObservableCreate } from '@jbrowse/core/util/rxjs'
12
9
  import IntervalTree from '@flatten-js/interval-tree'
13
10
  import SimpleFeature, { Feature } from '@jbrowse/core/util/simpleFeature'
11
+ import { unzip } from '@gmod/bgzf-filehandle'
14
12
 
15
- import gff from '@gmod/gff'
16
- import { GenericFilehandle } from 'generic-filehandle'
13
+ import gff, { GFF3FeatureLineWithRefs } from '@gmod/gff'
17
14
 
18
- import MyConfigSchema from './configSchema'
19
- import { FeatureLoc } from '../util'
15
+ function isGzip(buf: Buffer) {
16
+ return buf[0] === 31 && buf[1] === 139 && buf[2] === 8
17
+ }
20
18
 
21
19
  export default class extends BaseFeatureDataAdapter {
22
- protected gffFeatures?: Promise<Record<string, IntervalTree>>
23
-
24
- protected uri: string
25
-
26
- protected filehandle: GenericFilehandle
27
-
28
- public constructor(
29
- config: Instance<typeof MyConfigSchema>,
30
- getSubAdapter?: getSubAdapterType,
31
- pluginManager?: PluginManager,
32
- ) {
33
- super(config, getSubAdapter, pluginManager)
34
- const gffLocation = readConfObject(config, 'gffLocation')
35
- const { uri } = gffLocation
36
- this.uri = uri
37
- this.filehandle = openLocation(gffLocation, this.pluginManager)
20
+ protected gffFeatures?: Promise<{
21
+ header: string
22
+ intervalTree: Record<string, IntervalTree>
23
+ }>
24
+
25
+ private async loadDataP() {
26
+ const buffer = await openLocation(
27
+ readConfObject(this.config, 'gffLocation'),
28
+ this.pluginManager,
29
+ ).readFile()
30
+ const buf = isGzip(buffer) ? await unzip(buffer) : buffer
31
+ // 512MB max chrome string length is 512MB
32
+ if (buf.length > 536_870_888) {
33
+ throw new Error('Data exceeds maximum string length (512MB)')
34
+ }
35
+ const data = new TextDecoder('utf8', { fatal: true }).decode(buf)
36
+ const lines = data.split('\n')
37
+ const headerLines = []
38
+ for (let i = 0; i < lines.length && lines[i].startsWith('#'); i++) {
39
+ headerLines.push(lines[i])
40
+ }
41
+ const header = headerLines.join('\n')
42
+
43
+ const feats = gff.parseStringSync(data, {
44
+ parseFeatures: true,
45
+ parseComments: false,
46
+ parseDirectives: false,
47
+ parseSequences: false,
48
+ })
49
+
50
+ const intervalTree = feats
51
+ .flat()
52
+ .map(
53
+ (f, i) =>
54
+ new SimpleFeature({
55
+ data: this.featureData(f),
56
+ id: `${this.id}-offset-${i}`,
57
+ }),
58
+ )
59
+ .reduce((acc, obj) => {
60
+ const key = obj.get('refName')
61
+ if (!acc[key]) {
62
+ acc[key] = new IntervalTree()
63
+ }
64
+ acc[key].insert([obj.get('start'), obj.get('end')], obj)
65
+ return acc
66
+ }, {} as Record<string, IntervalTree>)
67
+
68
+ return { header, intervalTree }
38
69
  }
39
70
 
40
71
  private async loadData() {
41
- const { size } = await this.filehandle.stat()
42
- // Add a warning to avoid crashing the browser, recommend indexing
43
- if (size > 500_000_000) {
44
- throw new Error('This file is too large. Consider using Gff3TabixAdapter')
45
- }
46
72
  if (!this.gffFeatures) {
47
- this.gffFeatures = this.filehandle
48
- .readFile('utf8')
49
- .then(data => {
50
- const gffFeatures = gff.parseStringSync(data, {
51
- parseFeatures: true,
52
- parseComments: false,
53
- parseDirectives: false,
54
- parseSequences: false,
55
- }) as FeatureLoc[][]
56
-
57
- return gffFeatures
58
- .flat()
59
- .map(
60
- (f, i) =>
61
- new SimpleFeature({
62
- data: this.featureData(f),
63
- id: `${this.id}-offset-${i}`,
64
- }),
65
- )
66
- .reduce((acc: Record<string, IntervalTree>, obj: SimpleFeature) => {
67
- const key = obj.get('refName')
68
- if (!acc[key]) {
69
- acc[key] = new IntervalTree()
70
- }
71
- acc[key].insert([obj.get('start'), obj.get('end')], obj)
72
- return acc
73
- }, {})
74
- })
75
- .catch(e => {
76
- this.gffFeatures = undefined
77
- throw e
78
- })
73
+ this.gffFeatures = this.loadDataP().catch(e => {
74
+ this.gffFeatures = undefined
75
+ throw e
76
+ })
79
77
  }
80
78
 
81
79
  return this.gffFeatures
82
80
  }
83
81
 
84
82
  public async getRefNames(opts: BaseOptions = {}) {
85
- const gffFeatures = await this.loadData()
86
- return Object.keys(gffFeatures)
83
+ const { intervalTree } = await this.loadData()
84
+ return Object.keys(intervalTree)
87
85
  }
86
+
87
+ public async getHeader() {
88
+ const { header } = await this.loadData()
89
+ return header
90
+ }
91
+
88
92
  public getFeatures(query: NoAssemblyRegion, opts: BaseOptions = {}) {
89
93
  return ObservableCreate<Feature>(async observer => {
90
94
  try {
91
95
  const { start, end, refName } = query
92
- const gffFeatures = await this.loadData()
93
- const tree = gffFeatures[refName]
94
- const feats = tree.search([start, end])
95
- feats.forEach(f => {
96
- observer.next(f)
97
- })
96
+ const { intervalTree } = await this.loadData()
97
+ intervalTree[refName]
98
+ ?.search([start, end])
99
+ .forEach(f => observer.next(f))
98
100
  observer.complete()
99
101
  } catch (e) {
100
102
  observer.error(e)
@@ -102,10 +104,18 @@ export default class extends BaseFeatureDataAdapter {
102
104
  }, opts.signal)
103
105
  }
104
106
 
105
- private featureData(data: FeatureLoc) {
107
+ private featureData(data: GFF3FeatureLineWithRefs) {
106
108
  const f: Record<string, unknown> = { ...data }
107
109
  ;(f.start as number) -= 1 // convert to interbase
108
- f.strand = { '+': 1, '-': -1, '.': 0, '?': undefined }[data.strand] // convert strand
110
+ if (data.strand === '+') {
111
+ f.strand = 1
112
+ } else if (data.strand === '-') {
113
+ f.strand = -1
114
+ } else if (data.strand === '.') {
115
+ f.strand = 0
116
+ } else {
117
+ f.strand = undefined
118
+ }
109
119
  f.phase = Number(data.phase)
110
120
  f.refName = data.seq_id
111
121
  if (data.score === null) {
@@ -124,15 +134,16 @@ export default class extends BaseFeatureDataAdapter {
124
134
  'phase',
125
135
  'strand',
126
136
  ]
127
- Object.keys(data.attributes).forEach(a => {
137
+ const dataAttributes = data.attributes || {}
138
+ Object.keys(dataAttributes).forEach(a => {
128
139
  let b = a.toLowerCase()
129
140
  if (defaultFields.includes(b)) {
130
141
  // add "suffix" to tag name if it already exists
131
142
  // reproduces behavior of NCList
132
143
  b += '2'
133
144
  }
134
- if (data.attributes[a] !== null) {
135
- let attr = data.attributes[a]
145
+ if (dataAttributes[a] !== null) {
146
+ let attr: string | string[] | undefined = dataAttributes[a]
136
147
  if (Array.isArray(attr) && attr.length === 1) {
137
148
  ;[attr] = attr
138
149
  }
@@ -9,7 +9,7 @@ import { openLocation } from '@jbrowse/core/util/io'
9
9
  import { ObservableCreate } from '@jbrowse/core/util/rxjs'
10
10
  import SimpleFeature, { Feature } from '@jbrowse/core/util/simpleFeature'
11
11
  import { TabixIndexedFile } from '@gmod/tabix'
12
- import gff from '@gmod/gff'
12
+ import gff, { GFF3Feature, GFF3FeatureLineWithRefs } from '@gmod/gff'
13
13
  import { Observer } from 'rxjs'
14
14
 
15
15
  import { Instance } from 'mobx-state-tree'
@@ -17,7 +17,6 @@ import { readConfObject } from '@jbrowse/core/configuration'
17
17
  import MyConfigSchema from './configSchema'
18
18
  import PluginManager from '@jbrowse/core/PluginManager'
19
19
  import { getSubAdapterType } from '@jbrowse/core/data_adapters/dataAdapterCache'
20
- import { FeatureLoc } from '../util'
21
20
 
22
21
  interface LineFeature {
23
22
  start: number
@@ -110,12 +109,8 @@ export default class extends BaseFeatureDataAdapter {
110
109
  }
111
110
  })
112
111
  if (maxEnd > query.end || minStart < query.start) {
113
- // console.log(
114
- // `redispatching ${query.start}-${query.end} => ${minStart}-${maxEnd}`,
115
- // )
116
112
  // make a new feature callback to only return top-level features
117
113
  // in the original query range
118
-
119
114
  this.getFeaturesHelper(
120
115
  { ...query, start: minStart, end: maxEnd },
121
116
  opts,
@@ -146,7 +141,7 @@ export default class extends BaseFeatureDataAdapter {
146
141
  parseComments: false,
147
142
  parseDirectives: false,
148
143
  parseSequences: false,
149
- }) as FeatureLoc[][]
144
+ })
150
145
 
151
146
  features.forEach(featureLocs =>
152
147
  this.formatFeatures(featureLocs).forEach(f => {
@@ -184,20 +179,29 @@ export default class extends BaseFeatureDataAdapter {
184
179
  }
185
180
  }
186
181
 
187
- private formatFeatures(featureLocs: FeatureLoc[]) {
182
+ private formatFeatures(featureLocs: GFF3Feature) {
188
183
  return featureLocs.map(
189
184
  featureLoc =>
190
185
  new SimpleFeature({
191
186
  data: this.featureData(featureLoc),
192
- id: `${this.id}-offset-${featureLoc.attributes._lineHash[0]}`,
187
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
188
+ id: `${this.id}-offset-${featureLoc.attributes!._lineHash![0]}`,
193
189
  }),
194
190
  )
195
191
  }
196
192
 
197
- private featureData(data: FeatureLoc) {
193
+ private featureData(data: GFF3FeatureLineWithRefs) {
198
194
  const f: Record<string, unknown> = { ...data }
199
195
  ;(f.start as number) -= 1 // convert to interbase
200
- f.strand = { '+': 1, '-': -1, '.': 0, '?': undefined }[data.strand] // convert strand
196
+ if (data.strand === '+') {
197
+ f.strand = 1
198
+ } else if (data.strand === '-') {
199
+ f.strand = -1
200
+ } else if (data.strand === '.') {
201
+ f.strand = 0
202
+ } else {
203
+ f.strand = undefined
204
+ }
201
205
  f.phase = Number(data.phase)
202
206
  f.refName = data.seq_id
203
207
  if (data.score === null) {
@@ -216,15 +220,16 @@ export default class extends BaseFeatureDataAdapter {
216
220
  'phase',
217
221
  'strand',
218
222
  ]
219
- Object.keys(data.attributes).forEach(a => {
223
+ const dataAttributes = data.attributes || {}
224
+ Object.keys(dataAttributes).forEach(a => {
220
225
  let b = a.toLowerCase()
221
226
  if (defaultFields.includes(b)) {
222
227
  // add "suffix" to tag name if it already exists
223
228
  // reproduces behavior of NCList
224
229
  b += '2'
225
230
  }
226
- if (data.attributes[a] !== null) {
227
- let attr = data.attributes[a]
231
+ if (dataAttributes[a] !== null) {
232
+ let attr: string | string[] | undefined = dataAttributes[a]
228
233
  if (Array.isArray(attr) && attr.length === 1) {
229
234
  ;[attr] = attr
230
235
  }
package/src/index.ts CHANGED
@@ -2,6 +2,13 @@ import AdapterType from '@jbrowse/core/pluggableElementTypes/AdapterType'
2
2
  import PluginManager from '@jbrowse/core/PluginManager'
3
3
  import Plugin from '@jbrowse/core/Plugin'
4
4
  import { configSchema as gff3TabixAdapterConfigSchema } from './Gff3TabixAdapter'
5
+ import { FileLocation } from '@jbrowse/core/util/types'
6
+ import {
7
+ makeIndex,
8
+ makeIndexType,
9
+ AdapterGuesser,
10
+ getFileName,
11
+ } from '@jbrowse/core/util/tracks'
5
12
  import { configSchema as gff3AdapterConfigSchema } from './Gff3Adapter'
6
13
 
7
14
  export default class extends Plugin {
@@ -17,7 +24,33 @@ export default class extends Plugin {
17
24
  import('./Gff3TabixAdapter/Gff3TabixAdapter').then(r => r.default),
18
25
  }),
19
26
  )
20
-
27
+ pluginManager.addToExtensionPoint(
28
+ 'Core-guessAdapterForLocation',
29
+ (adapterGuesser: AdapterGuesser) => {
30
+ return (
31
+ file: FileLocation,
32
+ index?: FileLocation,
33
+ adapterHint?: string,
34
+ ) => {
35
+ const regexGuess = /\.gff3?\.b?gz$/i
36
+ const adapterName = 'Gff3TabixAdapter'
37
+ const fileName = getFileName(file)
38
+ const indexName = index && getFileName(index)
39
+ if (regexGuess.test(fileName) || adapterHint === adapterName) {
40
+ return {
41
+ type: adapterName,
42
+ bamLocation: file,
43
+ gffGzLocation: file,
44
+ index: {
45
+ location: index || makeIndex(file, '.tbi'),
46
+ indexType: makeIndexType(indexName, 'CSI', 'TBI'),
47
+ },
48
+ }
49
+ }
50
+ return adapterGuesser(file, index, adapterHint)
51
+ }
52
+ },
53
+ )
21
54
  pluginManager.addAdapterType(
22
55
  () =>
23
56
  new AdapterType({
@@ -27,5 +60,26 @@ export default class extends Plugin {
27
60
  import('./Gff3Adapter/Gff3Adapter').then(r => r.default),
28
61
  }),
29
62
  )
63
+ pluginManager.addToExtensionPoint(
64
+ 'Core-guessAdapterForLocation',
65
+ (adapterGuesser: AdapterGuesser) => {
66
+ return (
67
+ file: FileLocation,
68
+ index?: FileLocation,
69
+ adapterHint?: string,
70
+ ) => {
71
+ const regexGuess = /\.gff3?$/i
72
+ const adapterName = 'Gff3Adapter'
73
+ const fileName = getFileName(file)
74
+ if (regexGuess.test(fileName) || adapterHint === adapterName) {
75
+ return {
76
+ type: adapterName,
77
+ gffLocation: file,
78
+ }
79
+ }
80
+ return adapterGuesser(file, index, adapterHint)
81
+ }
82
+ },
83
+ )
30
84
  }
31
85
  }
package/dist/util.d.ts DELETED
@@ -1,14 +0,0 @@
1
- export declare type Strand = '+' | '-' | '.' | '?';
2
- export interface FeatureLoc {
3
- [key: string]: unknown;
4
- start: number;
5
- end: number;
6
- strand: Strand;
7
- seq_id: string;
8
- child_features: FeatureLoc[][];
9
- data: unknown;
10
- derived_features: unknown;
11
- attributes: {
12
- [key: string]: unknown[];
13
- };
14
- }
package/src/declare.d.ts DELETED
@@ -1 +0,0 @@
1
- declare module '@gmod/gff'
package/src/util.ts DELETED
@@ -1,12 +0,0 @@
1
- export type Strand = '+' | '-' | '.' | '?'
2
- export interface FeatureLoc {
3
- [key: string]: unknown
4
- start: number
5
- end: number
6
- strand: Strand
7
- seq_id: string
8
- child_features: FeatureLoc[][]
9
- data: unknown
10
- derived_features: unknown
11
- attributes: { [key: string]: unknown[] }
12
- }