@jbrowse/plugin-gff3 2.13.1 → 2.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Gff3Adapter/Gff3Adapter.d.ts +4 -3
- package/dist/Gff3Adapter/Gff3Adapter.js +42 -101
- package/dist/Gff3TabixAdapter/Gff3TabixAdapter.d.ts +0 -1
- package/dist/Gff3TabixAdapter/Gff3TabixAdapter.js +20 -78
- package/dist/GuessGff3/index.js +1 -1
- package/dist/featureData.d.ts +2 -0
- package/dist/featureData.js +64 -0
- package/esm/Gff3Adapter/Gff3Adapter.d.ts +4 -3
- package/esm/Gff3Adapter/Gff3Adapter.js +42 -101
- package/esm/Gff3TabixAdapter/Gff3TabixAdapter.d.ts +0 -1
- package/esm/Gff3TabixAdapter/Gff3TabixAdapter.js +20 -78
- package/esm/GuessGff3/index.js +1 -1
- package/esm/featureData.d.ts +2 -0
- package/esm/featureData.js +61 -0
- package/package.json +3 -3
|
@@ -2,17 +2,18 @@ import { BaseFeatureDataAdapter, BaseOptions } from '@jbrowse/core/data_adapters
|
|
|
2
2
|
import { NoAssemblyRegion } from '@jbrowse/core/util/types';
|
|
3
3
|
import IntervalTree from '@flatten-js/interval-tree';
|
|
4
4
|
import { Feature } from '@jbrowse/core/util/simpleFeature';
|
|
5
|
+
type StatusCallback = (arg: string) => void;
|
|
5
6
|
export default class Gff3Adapter extends BaseFeatureDataAdapter {
|
|
6
7
|
calculatedIntervalTreeMap: Record<string, IntervalTree>;
|
|
7
|
-
|
|
8
|
+
gffFeatures?: Promise<{
|
|
8
9
|
header: string;
|
|
9
|
-
intervalTreeMap: Record<string, (
|
|
10
|
+
intervalTreeMap: Record<string, (sc?: StatusCallback) => IntervalTree>;
|
|
10
11
|
}>;
|
|
11
12
|
private loadDataP;
|
|
12
13
|
private loadData;
|
|
13
14
|
getRefNames(opts?: BaseOptions): Promise<string[]>;
|
|
14
15
|
getHeader(opts?: BaseOptions): Promise<string>;
|
|
15
16
|
getFeatures(query: NoAssemblyRegion, opts?: BaseOptions): import("rxjs").Observable<Feature>;
|
|
16
|
-
private featureData;
|
|
17
17
|
freeResources(): void;
|
|
18
18
|
}
|
|
19
|
+
export {};
|
|
@@ -10,10 +10,8 @@ const interval_tree_1 = __importDefault(require("@flatten-js/interval-tree"));
|
|
|
10
10
|
const simpleFeature_1 = __importDefault(require("@jbrowse/core/util/simpleFeature"));
|
|
11
11
|
const bgzf_filehandle_1 = require("@gmod/bgzf-filehandle");
|
|
12
12
|
const gff_1 = __importDefault(require("@gmod/gff"));
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
}
|
|
16
|
-
const decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder('utf8') : undefined;
|
|
13
|
+
const util_1 = require("@jbrowse/core/util");
|
|
14
|
+
const featureData_1 = require("../featureData");
|
|
17
15
|
class Gff3Adapter extends BaseAdapter_1.BaseFeatureDataAdapter {
|
|
18
16
|
constructor() {
|
|
19
17
|
super(...arguments);
|
|
@@ -21,12 +19,14 @@ class Gff3Adapter extends BaseAdapter_1.BaseFeatureDataAdapter {
|
|
|
21
19
|
}
|
|
22
20
|
async loadDataP(opts) {
|
|
23
21
|
const { statusCallback = () => { } } = opts || {};
|
|
24
|
-
const
|
|
25
|
-
const
|
|
26
|
-
|
|
22
|
+
const buf = (await (0, io_1.openLocation)(this.getConf('gffLocation'), this.pluginManager).readFile(opts));
|
|
23
|
+
const buffer = (0, util_1.isGzip)(buf)
|
|
24
|
+
? await (0, util_1.updateStatus)('Unzipping', statusCallback, () => (0, bgzf_filehandle_1.unzip)(buf))
|
|
25
|
+
: buf;
|
|
27
26
|
const headerLines = [];
|
|
28
27
|
const featureMap = {};
|
|
29
28
|
let blockStart = 0;
|
|
29
|
+
const decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder('utf8') : undefined;
|
|
30
30
|
let i = 0;
|
|
31
31
|
while (blockStart < buffer.length) {
|
|
32
32
|
const n = buffer.indexOf('\n', blockStart);
|
|
@@ -46,7 +46,7 @@ class Gff3Adapter extends BaseAdapter_1.BaseFeatureDataAdapter {
|
|
|
46
46
|
if (!featureMap[refName]) {
|
|
47
47
|
featureMap[refName] = '';
|
|
48
48
|
}
|
|
49
|
-
featureMap[refName] += line
|
|
49
|
+
featureMap[refName] += `${line}\n`;
|
|
50
50
|
}
|
|
51
51
|
}
|
|
52
52
|
if (i++ % 10000 === 0) {
|
|
@@ -54,38 +54,39 @@ class Gff3Adapter extends BaseAdapter_1.BaseFeatureDataAdapter {
|
|
|
54
54
|
}
|
|
55
55
|
blockStart = n + 1;
|
|
56
56
|
}
|
|
57
|
-
const intervalTreeMap = Object.fromEntries(Object.entries(featureMap).map(([refName, lines]) =>
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
(
|
|
61
|
-
sc === null || sc === void 0 ? void 0 : sc(
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
57
|
+
const intervalTreeMap = Object.fromEntries(Object.entries(featureMap).map(([refName, lines]) => [
|
|
58
|
+
refName,
|
|
59
|
+
(sc) => {
|
|
60
|
+
if (!this.calculatedIntervalTreeMap[refName]) {
|
|
61
|
+
sc === null || sc === void 0 ? void 0 : sc('Parsing GFF data');
|
|
62
|
+
const intervalTree = new interval_tree_1.default();
|
|
63
|
+
gff_1.default
|
|
64
|
+
.parseStringSync(lines, {
|
|
65
|
+
parseFeatures: true,
|
|
66
|
+
parseComments: false,
|
|
67
|
+
parseDirectives: false,
|
|
68
|
+
parseSequences: false,
|
|
69
|
+
disableDerivesFromReferences: true,
|
|
70
|
+
})
|
|
71
|
+
.flat()
|
|
72
|
+
.map((f, i) => new simpleFeature_1.default({
|
|
73
|
+
data: (0, featureData_1.featureData)(f),
|
|
74
|
+
id: `${this.id}-${refName}-${i}`,
|
|
75
|
+
}))
|
|
76
|
+
.forEach(obj => intervalTree.insert([obj.get('start'), obj.get('end')], obj));
|
|
77
|
+
this.calculatedIntervalTreeMap[refName] = intervalTree;
|
|
78
|
+
}
|
|
79
|
+
return this.calculatedIntervalTreeMap[refName];
|
|
80
|
+
},
|
|
81
|
+
]));
|
|
82
|
+
return {
|
|
83
|
+
header: headerLines.join('\n'),
|
|
84
|
+
intervalTreeMap,
|
|
85
|
+
};
|
|
85
86
|
}
|
|
86
87
|
async loadData(opts) {
|
|
87
88
|
if (!this.gffFeatures) {
|
|
88
|
-
this.gffFeatures = this.loadDataP(opts).catch(e => {
|
|
89
|
+
this.gffFeatures = this.loadDataP(opts).catch((e) => {
|
|
89
90
|
this.gffFeatures = undefined;
|
|
90
91
|
throw e;
|
|
91
92
|
});
|
|
@@ -102,11 +103,13 @@ class Gff3Adapter extends BaseAdapter_1.BaseFeatureDataAdapter {
|
|
|
102
103
|
}
|
|
103
104
|
getFeatures(query, opts = {}) {
|
|
104
105
|
return (0, rxjs_1.ObservableCreate)(async (observer) => {
|
|
105
|
-
var _a
|
|
106
|
+
var _a;
|
|
106
107
|
try {
|
|
107
108
|
const { start, end, refName } = query;
|
|
108
109
|
const { intervalTreeMap } = await this.loadData(opts);
|
|
109
|
-
(
|
|
110
|
+
(_a = intervalTreeMap[refName]) === null || _a === void 0 ? void 0 : _a.call(intervalTreeMap, opts.statusCallback).search([start, end]).forEach(f => {
|
|
111
|
+
observer.next(f);
|
|
112
|
+
});
|
|
110
113
|
observer.complete();
|
|
111
114
|
}
|
|
112
115
|
catch (e) {
|
|
@@ -114,68 +117,6 @@ class Gff3Adapter extends BaseAdapter_1.BaseFeatureDataAdapter {
|
|
|
114
117
|
}
|
|
115
118
|
}, opts.signal);
|
|
116
119
|
}
|
|
117
|
-
featureData(data) {
|
|
118
|
-
const f = { ...data };
|
|
119
|
-
f.start -= 1; // convert to interbase
|
|
120
|
-
if (data.strand === '+') {
|
|
121
|
-
f.strand = 1;
|
|
122
|
-
}
|
|
123
|
-
else if (data.strand === '-') {
|
|
124
|
-
f.strand = -1;
|
|
125
|
-
}
|
|
126
|
-
else if (data.strand === '.') {
|
|
127
|
-
f.strand = 0;
|
|
128
|
-
}
|
|
129
|
-
else {
|
|
130
|
-
f.strand = undefined;
|
|
131
|
-
}
|
|
132
|
-
f.phase = Number(data.phase);
|
|
133
|
-
f.refName = data.seq_id;
|
|
134
|
-
if (data.score === null) {
|
|
135
|
-
delete f.score;
|
|
136
|
-
}
|
|
137
|
-
if (data.phase === null) {
|
|
138
|
-
delete f.score;
|
|
139
|
-
}
|
|
140
|
-
const defaultFields = new Set([
|
|
141
|
-
'start',
|
|
142
|
-
'end',
|
|
143
|
-
'seq_id',
|
|
144
|
-
'score',
|
|
145
|
-
'type',
|
|
146
|
-
'source',
|
|
147
|
-
'phase',
|
|
148
|
-
'strand',
|
|
149
|
-
]);
|
|
150
|
-
const dataAttributes = data.attributes || {};
|
|
151
|
-
for (const a of Object.keys(dataAttributes)) {
|
|
152
|
-
let b = a.toLowerCase();
|
|
153
|
-
if (defaultFields.has(b)) {
|
|
154
|
-
// add "suffix" to tag name if it already exists
|
|
155
|
-
// reproduces behavior of NCList
|
|
156
|
-
b += '2';
|
|
157
|
-
}
|
|
158
|
-
if (dataAttributes[a] !== null) {
|
|
159
|
-
let attr = dataAttributes[a];
|
|
160
|
-
if (Array.isArray(attr) && attr.length === 1) {
|
|
161
|
-
;
|
|
162
|
-
[attr] = attr;
|
|
163
|
-
}
|
|
164
|
-
f[b] = attr;
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
f.refName = f.seq_id;
|
|
168
|
-
// the SimpleFeature constructor takes care of recursively inflating subfeatures
|
|
169
|
-
if (data.child_features && data.child_features.length > 0) {
|
|
170
|
-
f.subfeatures = data.child_features.flatMap(childLocs => childLocs.map(childLoc => this.featureData(childLoc)));
|
|
171
|
-
}
|
|
172
|
-
delete f.child_features;
|
|
173
|
-
delete f.data;
|
|
174
|
-
// delete f.derived_features
|
|
175
|
-
delete f.attributes;
|
|
176
|
-
delete f.seq_id;
|
|
177
|
-
return f;
|
|
178
|
-
}
|
|
179
120
|
freeResources( /* { region } */) { }
|
|
180
121
|
}
|
|
181
122
|
exports.default = Gff3Adapter;
|
|
@@ -3,7 +3,6 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
/* eslint-disable no-underscore-dangle */
|
|
7
6
|
const BaseAdapter_1 = require("@jbrowse/core/data_adapters/BaseAdapter");
|
|
8
7
|
const range_1 = require("@jbrowse/core/util/range");
|
|
9
8
|
const io_1 = require("@jbrowse/core/util/io");
|
|
@@ -12,6 +11,7 @@ const simpleFeature_1 = __importDefault(require("@jbrowse/core/util/simpleFeatur
|
|
|
12
11
|
const tabix_1 = require("@gmod/tabix");
|
|
13
12
|
const gff_1 = __importDefault(require("@gmod/gff"));
|
|
14
13
|
const configuration_1 = require("@jbrowse/core/configuration");
|
|
14
|
+
const featureData_1 = require("../featureData");
|
|
15
15
|
class Gff3TabixAdapter extends BaseAdapter_1.BaseFeatureDataAdapter {
|
|
16
16
|
constructor(config, getSubAdapter, pluginManager) {
|
|
17
17
|
super(config, getSubAdapter, pluginManager);
|
|
@@ -44,19 +44,19 @@ class Gff3TabixAdapter extends BaseAdapter_1.BaseFeatureDataAdapter {
|
|
|
44
44
|
await this.getFeaturesHelper(query, opts, metadata, observer, true);
|
|
45
45
|
}, opts.signal);
|
|
46
46
|
}
|
|
47
|
-
async getFeaturesHelper(query, opts
|
|
47
|
+
async getFeaturesHelper(query, opts, metadata, observer, allowRedispatch, originalQuery = query) {
|
|
48
48
|
try {
|
|
49
49
|
const lines = [];
|
|
50
50
|
await this.gff.getLines(query.refName, query.start, query.end, (line, fileOffset) => {
|
|
51
51
|
lines.push(this.parseLine(metadata.columnNumbers, line, fileOffset));
|
|
52
52
|
});
|
|
53
53
|
if (allowRedispatch && lines.length) {
|
|
54
|
-
let minStart =
|
|
55
|
-
let maxEnd =
|
|
54
|
+
let minStart = Number.POSITIVE_INFINITY;
|
|
55
|
+
let maxEnd = Number.NEGATIVE_INFINITY;
|
|
56
56
|
lines.forEach(line => {
|
|
57
57
|
const featureType = line.fields[2];
|
|
58
|
-
// only expand redispatch range if feature is not a "dontRedispatch"
|
|
59
|
-
// skips large regions like chromosome,region
|
|
58
|
+
// only expand redispatch range if feature is not a "dontRedispatch"
|
|
59
|
+
// type skips large regions like chromosome,region
|
|
60
60
|
if (!this.dontRedispatch.includes(featureType)) {
|
|
61
61
|
const start = line.start - 1; // gff is 1-based
|
|
62
62
|
if (start < minStart) {
|
|
@@ -94,11 +94,13 @@ class Gff3TabixAdapter extends BaseAdapter_1.BaseFeatureDataAdapter {
|
|
|
94
94
|
parseSequences: false,
|
|
95
95
|
disableDerivesFromReferences: true,
|
|
96
96
|
});
|
|
97
|
-
features.forEach(featureLocs =>
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
97
|
+
features.forEach(featureLocs => {
|
|
98
|
+
this.formatFeatures(featureLocs).forEach(f => {
|
|
99
|
+
if ((0, range_1.doesIntersect2)(f.get('start'), f.get('end'), originalQuery.start, originalQuery.end)) {
|
|
100
|
+
observer.next(f);
|
|
101
|
+
}
|
|
102
|
+
});
|
|
103
|
+
});
|
|
102
104
|
observer.complete();
|
|
103
105
|
}
|
|
104
106
|
catch (e) {
|
|
@@ -116,73 +118,13 @@ class Gff3TabixAdapter extends BaseAdapter_1.BaseFeatureDataAdapter {
|
|
|
116
118
|
};
|
|
117
119
|
}
|
|
118
120
|
formatFeatures(featureLocs) {
|
|
119
|
-
return featureLocs.map(featureLoc =>
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
f.start -= 1; // convert to interbase
|
|
127
|
-
if (data.strand === '+') {
|
|
128
|
-
f.strand = 1;
|
|
129
|
-
}
|
|
130
|
-
else if (data.strand === '-') {
|
|
131
|
-
f.strand = -1;
|
|
132
|
-
}
|
|
133
|
-
else if (data.strand === '.') {
|
|
134
|
-
f.strand = 0;
|
|
135
|
-
}
|
|
136
|
-
else {
|
|
137
|
-
f.strand = undefined;
|
|
138
|
-
}
|
|
139
|
-
f.phase = Number(data.phase);
|
|
140
|
-
f.refName = data.seq_id;
|
|
141
|
-
if (data.score === null) {
|
|
142
|
-
delete f.score;
|
|
143
|
-
}
|
|
144
|
-
if (data.phase === null) {
|
|
145
|
-
delete f.score;
|
|
146
|
-
}
|
|
147
|
-
const defaultFields = new Set([
|
|
148
|
-
'start',
|
|
149
|
-
'end',
|
|
150
|
-
'seq_id',
|
|
151
|
-
'score',
|
|
152
|
-
'type',
|
|
153
|
-
'source',
|
|
154
|
-
'phase',
|
|
155
|
-
'strand',
|
|
156
|
-
]);
|
|
157
|
-
const dataAttributes = data.attributes || {};
|
|
158
|
-
for (const a of Object.keys(dataAttributes)) {
|
|
159
|
-
let b = a.toLowerCase();
|
|
160
|
-
if (defaultFields.has(b)) {
|
|
161
|
-
// add "suffix" to tag name if it already exists
|
|
162
|
-
// reproduces behavior of NCList
|
|
163
|
-
b += '2';
|
|
164
|
-
}
|
|
165
|
-
if (dataAttributes[a] !== null) {
|
|
166
|
-
let attr = dataAttributes[a];
|
|
167
|
-
if (Array.isArray(attr) && attr.length === 1) {
|
|
168
|
-
;
|
|
169
|
-
[attr] = attr;
|
|
170
|
-
}
|
|
171
|
-
f[b] = attr;
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
f.refName = f.seq_id;
|
|
175
|
-
// the SimpleFeature constructor takes care of recursively inflating subfeatures
|
|
176
|
-
if (data.child_features && data.child_features.length > 0) {
|
|
177
|
-
f.subfeatures = data.child_features.flatMap(childLocs => childLocs.map(childLoc => this.featureData(childLoc)));
|
|
178
|
-
}
|
|
179
|
-
delete f.child_features;
|
|
180
|
-
delete f.data;
|
|
181
|
-
// delete f.derived_features
|
|
182
|
-
delete f._linehash;
|
|
183
|
-
delete f.attributes;
|
|
184
|
-
delete f.seq_id;
|
|
185
|
-
return f;
|
|
121
|
+
return featureLocs.map(featureLoc => {
|
|
122
|
+
var _a, _b;
|
|
123
|
+
return new simpleFeature_1.default({
|
|
124
|
+
data: (0, featureData_1.featureData)(featureLoc),
|
|
125
|
+
id: `${this.id}-offset-${(_b = (_a = featureLoc.attributes) === null || _a === void 0 ? void 0 : _a._lineHash) === null || _b === void 0 ? void 0 : _b[0]}`,
|
|
126
|
+
});
|
|
127
|
+
});
|
|
186
128
|
}
|
|
187
129
|
freeResources( /* { region } */) { }
|
|
188
130
|
}
|
package/dist/GuessGff3/index.js
CHANGED
|
@@ -35,7 +35,7 @@ function GuessGff3F(pluginManager) {
|
|
|
35
35
|
if (regexGuess.test(fileName) && !adapterHint) {
|
|
36
36
|
return obj;
|
|
37
37
|
}
|
|
38
|
-
|
|
38
|
+
if (adapterHint === adapterName) {
|
|
39
39
|
return obj;
|
|
40
40
|
}
|
|
41
41
|
return adapterGuesser(file, index, adapterHint);
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.featureData = featureData;
|
|
4
|
+
function featureData(data) {
|
|
5
|
+
const f = { ...data };
|
|
6
|
+
f.start -= 1; // convert to interbase
|
|
7
|
+
if (data.strand === '+') {
|
|
8
|
+
f.strand = 1;
|
|
9
|
+
}
|
|
10
|
+
else if (data.strand === '-') {
|
|
11
|
+
f.strand = -1;
|
|
12
|
+
}
|
|
13
|
+
else if (data.strand === '.') {
|
|
14
|
+
f.strand = 0;
|
|
15
|
+
}
|
|
16
|
+
else {
|
|
17
|
+
f.strand = undefined;
|
|
18
|
+
}
|
|
19
|
+
f.phase = data.phase === null ? undefined : Number(data.phase);
|
|
20
|
+
f.refName = data.seq_id;
|
|
21
|
+
if (data.score === null) {
|
|
22
|
+
f.score = undefined;
|
|
23
|
+
}
|
|
24
|
+
const defaultFields = new Set([
|
|
25
|
+
'start',
|
|
26
|
+
'end',
|
|
27
|
+
'seq_id',
|
|
28
|
+
'score',
|
|
29
|
+
'type',
|
|
30
|
+
'source',
|
|
31
|
+
'phase',
|
|
32
|
+
'strand',
|
|
33
|
+
]);
|
|
34
|
+
const dataAttributes = data.attributes || {};
|
|
35
|
+
for (const a of Object.keys(dataAttributes)) {
|
|
36
|
+
let b = a.toLowerCase();
|
|
37
|
+
if (defaultFields.has(b)) {
|
|
38
|
+
// add "suffix" to tag name if it already exists
|
|
39
|
+
// reproduces behavior of NCList
|
|
40
|
+
b += '2';
|
|
41
|
+
}
|
|
42
|
+
if (dataAttributes[a]) {
|
|
43
|
+
let attr = dataAttributes[a];
|
|
44
|
+
if (Array.isArray(attr) && attr.length === 1) {
|
|
45
|
+
;
|
|
46
|
+
[attr] = attr;
|
|
47
|
+
}
|
|
48
|
+
f[b] = attr;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
f.refName = f.seq_id;
|
|
52
|
+
// the SimpleFeature constructor takes care of recursively inflating
|
|
53
|
+
// subfeatures
|
|
54
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
55
|
+
if (data.child_features && data.child_features.length > 0) {
|
|
56
|
+
f.subfeatures = data.child_features.flatMap(childLocs => childLocs.map(childLoc => featureData(childLoc)));
|
|
57
|
+
}
|
|
58
|
+
f.child_features = undefined;
|
|
59
|
+
f.data = undefined;
|
|
60
|
+
// delete f.derived_features
|
|
61
|
+
f.attributes = undefined;
|
|
62
|
+
f.seq_id = undefined;
|
|
63
|
+
return f;
|
|
64
|
+
}
|
|
@@ -2,17 +2,18 @@ import { BaseFeatureDataAdapter, BaseOptions } from '@jbrowse/core/data_adapters
|
|
|
2
2
|
import { NoAssemblyRegion } from '@jbrowse/core/util/types';
|
|
3
3
|
import IntervalTree from '@flatten-js/interval-tree';
|
|
4
4
|
import { Feature } from '@jbrowse/core/util/simpleFeature';
|
|
5
|
+
type StatusCallback = (arg: string) => void;
|
|
5
6
|
export default class Gff3Adapter extends BaseFeatureDataAdapter {
|
|
6
7
|
calculatedIntervalTreeMap: Record<string, IntervalTree>;
|
|
7
|
-
|
|
8
|
+
gffFeatures?: Promise<{
|
|
8
9
|
header: string;
|
|
9
|
-
intervalTreeMap: Record<string, (
|
|
10
|
+
intervalTreeMap: Record<string, (sc?: StatusCallback) => IntervalTree>;
|
|
10
11
|
}>;
|
|
11
12
|
private loadDataP;
|
|
12
13
|
private loadData;
|
|
13
14
|
getRefNames(opts?: BaseOptions): Promise<string[]>;
|
|
14
15
|
getHeader(opts?: BaseOptions): Promise<string>;
|
|
15
16
|
getFeatures(query: NoAssemblyRegion, opts?: BaseOptions): import("rxjs").Observable<Feature>;
|
|
16
|
-
private featureData;
|
|
17
17
|
freeResources(): void;
|
|
18
18
|
}
|
|
19
|
+
export {};
|
|
@@ -5,10 +5,8 @@ import IntervalTree from '@flatten-js/interval-tree';
|
|
|
5
5
|
import SimpleFeature from '@jbrowse/core/util/simpleFeature';
|
|
6
6
|
import { unzip } from '@gmod/bgzf-filehandle';
|
|
7
7
|
import gff from '@gmod/gff';
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
}
|
|
11
|
-
const decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder('utf8') : undefined;
|
|
8
|
+
import { isGzip, updateStatus } from '@jbrowse/core/util';
|
|
9
|
+
import { featureData } from '../featureData';
|
|
12
10
|
export default class Gff3Adapter extends BaseFeatureDataAdapter {
|
|
13
11
|
constructor() {
|
|
14
12
|
super(...arguments);
|
|
@@ -16,12 +14,14 @@ export default class Gff3Adapter extends BaseFeatureDataAdapter {
|
|
|
16
14
|
}
|
|
17
15
|
async loadDataP(opts) {
|
|
18
16
|
const { statusCallback = () => { } } = opts || {};
|
|
19
|
-
const
|
|
20
|
-
const
|
|
21
|
-
|
|
17
|
+
const buf = (await openLocation(this.getConf('gffLocation'), this.pluginManager).readFile(opts));
|
|
18
|
+
const buffer = isGzip(buf)
|
|
19
|
+
? await updateStatus('Unzipping', statusCallback, () => unzip(buf))
|
|
20
|
+
: buf;
|
|
22
21
|
const headerLines = [];
|
|
23
22
|
const featureMap = {};
|
|
24
23
|
let blockStart = 0;
|
|
24
|
+
const decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder('utf8') : undefined;
|
|
25
25
|
let i = 0;
|
|
26
26
|
while (blockStart < buffer.length) {
|
|
27
27
|
const n = buffer.indexOf('\n', blockStart);
|
|
@@ -41,7 +41,7 @@ export default class Gff3Adapter extends BaseFeatureDataAdapter {
|
|
|
41
41
|
if (!featureMap[refName]) {
|
|
42
42
|
featureMap[refName] = '';
|
|
43
43
|
}
|
|
44
|
-
featureMap[refName] += line
|
|
44
|
+
featureMap[refName] += `${line}\n`;
|
|
45
45
|
}
|
|
46
46
|
}
|
|
47
47
|
if (i++ % 10000 === 0) {
|
|
@@ -49,38 +49,39 @@ export default class Gff3Adapter extends BaseFeatureDataAdapter {
|
|
|
49
49
|
}
|
|
50
50
|
blockStart = n + 1;
|
|
51
51
|
}
|
|
52
|
-
const intervalTreeMap = Object.fromEntries(Object.entries(featureMap).map(([refName, lines]) =>
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
(
|
|
56
|
-
sc === null || sc === void 0 ? void 0 : sc(
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
52
|
+
const intervalTreeMap = Object.fromEntries(Object.entries(featureMap).map(([refName, lines]) => [
|
|
53
|
+
refName,
|
|
54
|
+
(sc) => {
|
|
55
|
+
if (!this.calculatedIntervalTreeMap[refName]) {
|
|
56
|
+
sc === null || sc === void 0 ? void 0 : sc('Parsing GFF data');
|
|
57
|
+
const intervalTree = new IntervalTree();
|
|
58
|
+
gff
|
|
59
|
+
.parseStringSync(lines, {
|
|
60
|
+
parseFeatures: true,
|
|
61
|
+
parseComments: false,
|
|
62
|
+
parseDirectives: false,
|
|
63
|
+
parseSequences: false,
|
|
64
|
+
disableDerivesFromReferences: true,
|
|
65
|
+
})
|
|
66
|
+
.flat()
|
|
67
|
+
.map((f, i) => new SimpleFeature({
|
|
68
|
+
data: featureData(f),
|
|
69
|
+
id: `${this.id}-${refName}-${i}`,
|
|
70
|
+
}))
|
|
71
|
+
.forEach(obj => intervalTree.insert([obj.get('start'), obj.get('end')], obj));
|
|
72
|
+
this.calculatedIntervalTreeMap[refName] = intervalTree;
|
|
73
|
+
}
|
|
74
|
+
return this.calculatedIntervalTreeMap[refName];
|
|
75
|
+
},
|
|
76
|
+
]));
|
|
77
|
+
return {
|
|
78
|
+
header: headerLines.join('\n'),
|
|
79
|
+
intervalTreeMap,
|
|
80
|
+
};
|
|
80
81
|
}
|
|
81
82
|
async loadData(opts) {
|
|
82
83
|
if (!this.gffFeatures) {
|
|
83
|
-
this.gffFeatures = this.loadDataP(opts).catch(e => {
|
|
84
|
+
this.gffFeatures = this.loadDataP(opts).catch((e) => {
|
|
84
85
|
this.gffFeatures = undefined;
|
|
85
86
|
throw e;
|
|
86
87
|
});
|
|
@@ -97,11 +98,13 @@ export default class Gff3Adapter extends BaseFeatureDataAdapter {
|
|
|
97
98
|
}
|
|
98
99
|
getFeatures(query, opts = {}) {
|
|
99
100
|
return ObservableCreate(async (observer) => {
|
|
100
|
-
var _a
|
|
101
|
+
var _a;
|
|
101
102
|
try {
|
|
102
103
|
const { start, end, refName } = query;
|
|
103
104
|
const { intervalTreeMap } = await this.loadData(opts);
|
|
104
|
-
(
|
|
105
|
+
(_a = intervalTreeMap[refName]) === null || _a === void 0 ? void 0 : _a.call(intervalTreeMap, opts.statusCallback).search([start, end]).forEach(f => {
|
|
106
|
+
observer.next(f);
|
|
107
|
+
});
|
|
105
108
|
observer.complete();
|
|
106
109
|
}
|
|
107
110
|
catch (e) {
|
|
@@ -109,67 +112,5 @@ export default class Gff3Adapter extends BaseFeatureDataAdapter {
|
|
|
109
112
|
}
|
|
110
113
|
}, opts.signal);
|
|
111
114
|
}
|
|
112
|
-
featureData(data) {
|
|
113
|
-
const f = { ...data };
|
|
114
|
-
f.start -= 1; // convert to interbase
|
|
115
|
-
if (data.strand === '+') {
|
|
116
|
-
f.strand = 1;
|
|
117
|
-
}
|
|
118
|
-
else if (data.strand === '-') {
|
|
119
|
-
f.strand = -1;
|
|
120
|
-
}
|
|
121
|
-
else if (data.strand === '.') {
|
|
122
|
-
f.strand = 0;
|
|
123
|
-
}
|
|
124
|
-
else {
|
|
125
|
-
f.strand = undefined;
|
|
126
|
-
}
|
|
127
|
-
f.phase = Number(data.phase);
|
|
128
|
-
f.refName = data.seq_id;
|
|
129
|
-
if (data.score === null) {
|
|
130
|
-
delete f.score;
|
|
131
|
-
}
|
|
132
|
-
if (data.phase === null) {
|
|
133
|
-
delete f.score;
|
|
134
|
-
}
|
|
135
|
-
const defaultFields = new Set([
|
|
136
|
-
'start',
|
|
137
|
-
'end',
|
|
138
|
-
'seq_id',
|
|
139
|
-
'score',
|
|
140
|
-
'type',
|
|
141
|
-
'source',
|
|
142
|
-
'phase',
|
|
143
|
-
'strand',
|
|
144
|
-
]);
|
|
145
|
-
const dataAttributes = data.attributes || {};
|
|
146
|
-
for (const a of Object.keys(dataAttributes)) {
|
|
147
|
-
let b = a.toLowerCase();
|
|
148
|
-
if (defaultFields.has(b)) {
|
|
149
|
-
// add "suffix" to tag name if it already exists
|
|
150
|
-
// reproduces behavior of NCList
|
|
151
|
-
b += '2';
|
|
152
|
-
}
|
|
153
|
-
if (dataAttributes[a] !== null) {
|
|
154
|
-
let attr = dataAttributes[a];
|
|
155
|
-
if (Array.isArray(attr) && attr.length === 1) {
|
|
156
|
-
;
|
|
157
|
-
[attr] = attr;
|
|
158
|
-
}
|
|
159
|
-
f[b] = attr;
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
f.refName = f.seq_id;
|
|
163
|
-
// the SimpleFeature constructor takes care of recursively inflating subfeatures
|
|
164
|
-
if (data.child_features && data.child_features.length > 0) {
|
|
165
|
-
f.subfeatures = data.child_features.flatMap(childLocs => childLocs.map(childLoc => this.featureData(childLoc)));
|
|
166
|
-
}
|
|
167
|
-
delete f.child_features;
|
|
168
|
-
delete f.data;
|
|
169
|
-
// delete f.derived_features
|
|
170
|
-
delete f.attributes;
|
|
171
|
-
delete f.seq_id;
|
|
172
|
-
return f;
|
|
173
|
-
}
|
|
174
115
|
freeResources( /* { region } */) { }
|
|
175
116
|
}
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
/* eslint-disable no-underscore-dangle */
|
|
2
1
|
import { BaseFeatureDataAdapter, } from '@jbrowse/core/data_adapters/BaseAdapter';
|
|
3
2
|
import { doesIntersect2 } from '@jbrowse/core/util/range';
|
|
4
3
|
import { openLocation } from '@jbrowse/core/util/io';
|
|
@@ -7,6 +6,7 @@ import SimpleFeature from '@jbrowse/core/util/simpleFeature';
|
|
|
7
6
|
import { TabixIndexedFile } from '@gmod/tabix';
|
|
8
7
|
import gff from '@gmod/gff';
|
|
9
8
|
import { readConfObject, } from '@jbrowse/core/configuration';
|
|
9
|
+
import { featureData } from '../featureData';
|
|
10
10
|
export default class Gff3TabixAdapter extends BaseFeatureDataAdapter {
|
|
11
11
|
constructor(config, getSubAdapter, pluginManager) {
|
|
12
12
|
super(config, getSubAdapter, pluginManager);
|
|
@@ -39,19 +39,19 @@ export default class Gff3TabixAdapter extends BaseFeatureDataAdapter {
|
|
|
39
39
|
await this.getFeaturesHelper(query, opts, metadata, observer, true);
|
|
40
40
|
}, opts.signal);
|
|
41
41
|
}
|
|
42
|
-
async getFeaturesHelper(query, opts
|
|
42
|
+
async getFeaturesHelper(query, opts, metadata, observer, allowRedispatch, originalQuery = query) {
|
|
43
43
|
try {
|
|
44
44
|
const lines = [];
|
|
45
45
|
await this.gff.getLines(query.refName, query.start, query.end, (line, fileOffset) => {
|
|
46
46
|
lines.push(this.parseLine(metadata.columnNumbers, line, fileOffset));
|
|
47
47
|
});
|
|
48
48
|
if (allowRedispatch && lines.length) {
|
|
49
|
-
let minStart =
|
|
50
|
-
let maxEnd =
|
|
49
|
+
let minStart = Number.POSITIVE_INFINITY;
|
|
50
|
+
let maxEnd = Number.NEGATIVE_INFINITY;
|
|
51
51
|
lines.forEach(line => {
|
|
52
52
|
const featureType = line.fields[2];
|
|
53
|
-
// only expand redispatch range if feature is not a "dontRedispatch"
|
|
54
|
-
// skips large regions like chromosome,region
|
|
53
|
+
// only expand redispatch range if feature is not a "dontRedispatch"
|
|
54
|
+
// type skips large regions like chromosome,region
|
|
55
55
|
if (!this.dontRedispatch.includes(featureType)) {
|
|
56
56
|
const start = line.start - 1; // gff is 1-based
|
|
57
57
|
if (start < minStart) {
|
|
@@ -89,11 +89,13 @@ export default class Gff3TabixAdapter extends BaseFeatureDataAdapter {
|
|
|
89
89
|
parseSequences: false,
|
|
90
90
|
disableDerivesFromReferences: true,
|
|
91
91
|
});
|
|
92
|
-
features.forEach(featureLocs =>
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
92
|
+
features.forEach(featureLocs => {
|
|
93
|
+
this.formatFeatures(featureLocs).forEach(f => {
|
|
94
|
+
if (doesIntersect2(f.get('start'), f.get('end'), originalQuery.start, originalQuery.end)) {
|
|
95
|
+
observer.next(f);
|
|
96
|
+
}
|
|
97
|
+
});
|
|
98
|
+
});
|
|
97
99
|
observer.complete();
|
|
98
100
|
}
|
|
99
101
|
catch (e) {
|
|
@@ -111,73 +113,13 @@ export default class Gff3TabixAdapter extends BaseFeatureDataAdapter {
|
|
|
111
113
|
};
|
|
112
114
|
}
|
|
113
115
|
formatFeatures(featureLocs) {
|
|
114
|
-
return featureLocs.map(featureLoc =>
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
f.start -= 1; // convert to interbase
|
|
122
|
-
if (data.strand === '+') {
|
|
123
|
-
f.strand = 1;
|
|
124
|
-
}
|
|
125
|
-
else if (data.strand === '-') {
|
|
126
|
-
f.strand = -1;
|
|
127
|
-
}
|
|
128
|
-
else if (data.strand === '.') {
|
|
129
|
-
f.strand = 0;
|
|
130
|
-
}
|
|
131
|
-
else {
|
|
132
|
-
f.strand = undefined;
|
|
133
|
-
}
|
|
134
|
-
f.phase = Number(data.phase);
|
|
135
|
-
f.refName = data.seq_id;
|
|
136
|
-
if (data.score === null) {
|
|
137
|
-
delete f.score;
|
|
138
|
-
}
|
|
139
|
-
if (data.phase === null) {
|
|
140
|
-
delete f.score;
|
|
141
|
-
}
|
|
142
|
-
const defaultFields = new Set([
|
|
143
|
-
'start',
|
|
144
|
-
'end',
|
|
145
|
-
'seq_id',
|
|
146
|
-
'score',
|
|
147
|
-
'type',
|
|
148
|
-
'source',
|
|
149
|
-
'phase',
|
|
150
|
-
'strand',
|
|
151
|
-
]);
|
|
152
|
-
const dataAttributes = data.attributes || {};
|
|
153
|
-
for (const a of Object.keys(dataAttributes)) {
|
|
154
|
-
let b = a.toLowerCase();
|
|
155
|
-
if (defaultFields.has(b)) {
|
|
156
|
-
// add "suffix" to tag name if it already exists
|
|
157
|
-
// reproduces behavior of NCList
|
|
158
|
-
b += '2';
|
|
159
|
-
}
|
|
160
|
-
if (dataAttributes[a] !== null) {
|
|
161
|
-
let attr = dataAttributes[a];
|
|
162
|
-
if (Array.isArray(attr) && attr.length === 1) {
|
|
163
|
-
;
|
|
164
|
-
[attr] = attr;
|
|
165
|
-
}
|
|
166
|
-
f[b] = attr;
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
f.refName = f.seq_id;
|
|
170
|
-
// the SimpleFeature constructor takes care of recursively inflating subfeatures
|
|
171
|
-
if (data.child_features && data.child_features.length > 0) {
|
|
172
|
-
f.subfeatures = data.child_features.flatMap(childLocs => childLocs.map(childLoc => this.featureData(childLoc)));
|
|
173
|
-
}
|
|
174
|
-
delete f.child_features;
|
|
175
|
-
delete f.data;
|
|
176
|
-
// delete f.derived_features
|
|
177
|
-
delete f._linehash;
|
|
178
|
-
delete f.attributes;
|
|
179
|
-
delete f.seq_id;
|
|
180
|
-
return f;
|
|
116
|
+
return featureLocs.map(featureLoc => {
|
|
117
|
+
var _a, _b;
|
|
118
|
+
return new SimpleFeature({
|
|
119
|
+
data: featureData(featureLoc),
|
|
120
|
+
id: `${this.id}-offset-${(_b = (_a = featureLoc.attributes) === null || _a === void 0 ? void 0 : _a._lineHash) === null || _b === void 0 ? void 0 : _b[0]}`,
|
|
121
|
+
});
|
|
122
|
+
});
|
|
181
123
|
}
|
|
182
124
|
freeResources( /* { region } */) { }
|
|
183
125
|
}
|
package/esm/GuessGff3/index.js
CHANGED
|
@@ -32,7 +32,7 @@ export default function GuessGff3F(pluginManager) {
|
|
|
32
32
|
if (regexGuess.test(fileName) && !adapterHint) {
|
|
33
33
|
return obj;
|
|
34
34
|
}
|
|
35
|
-
|
|
35
|
+
if (adapterHint === adapterName) {
|
|
36
36
|
return obj;
|
|
37
37
|
}
|
|
38
38
|
return adapterGuesser(file, index, adapterHint);
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
export function featureData(data) {
|
|
2
|
+
const f = { ...data };
|
|
3
|
+
f.start -= 1; // convert to interbase
|
|
4
|
+
if (data.strand === '+') {
|
|
5
|
+
f.strand = 1;
|
|
6
|
+
}
|
|
7
|
+
else if (data.strand === '-') {
|
|
8
|
+
f.strand = -1;
|
|
9
|
+
}
|
|
10
|
+
else if (data.strand === '.') {
|
|
11
|
+
f.strand = 0;
|
|
12
|
+
}
|
|
13
|
+
else {
|
|
14
|
+
f.strand = undefined;
|
|
15
|
+
}
|
|
16
|
+
f.phase = data.phase === null ? undefined : Number(data.phase);
|
|
17
|
+
f.refName = data.seq_id;
|
|
18
|
+
if (data.score === null) {
|
|
19
|
+
f.score = undefined;
|
|
20
|
+
}
|
|
21
|
+
const defaultFields = new Set([
|
|
22
|
+
'start',
|
|
23
|
+
'end',
|
|
24
|
+
'seq_id',
|
|
25
|
+
'score',
|
|
26
|
+
'type',
|
|
27
|
+
'source',
|
|
28
|
+
'phase',
|
|
29
|
+
'strand',
|
|
30
|
+
]);
|
|
31
|
+
const dataAttributes = data.attributes || {};
|
|
32
|
+
for (const a of Object.keys(dataAttributes)) {
|
|
33
|
+
let b = a.toLowerCase();
|
|
34
|
+
if (defaultFields.has(b)) {
|
|
35
|
+
// add "suffix" to tag name if it already exists
|
|
36
|
+
// reproduces behavior of NCList
|
|
37
|
+
b += '2';
|
|
38
|
+
}
|
|
39
|
+
if (dataAttributes[a]) {
|
|
40
|
+
let attr = dataAttributes[a];
|
|
41
|
+
if (Array.isArray(attr) && attr.length === 1) {
|
|
42
|
+
;
|
|
43
|
+
[attr] = attr;
|
|
44
|
+
}
|
|
45
|
+
f[b] = attr;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
f.refName = f.seq_id;
|
|
49
|
+
// the SimpleFeature constructor takes care of recursively inflating
|
|
50
|
+
// subfeatures
|
|
51
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
52
|
+
if (data.child_features && data.child_features.length > 0) {
|
|
53
|
+
f.subfeatures = data.child_features.flatMap(childLocs => childLocs.map(childLoc => featureData(childLoc)));
|
|
54
|
+
}
|
|
55
|
+
f.child_features = undefined;
|
|
56
|
+
f.data = undefined;
|
|
57
|
+
// delete f.derived_features
|
|
58
|
+
f.attributes = undefined;
|
|
59
|
+
f.seq_id = undefined;
|
|
60
|
+
return f;
|
|
61
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@jbrowse/plugin-gff3",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.14.0",
|
|
4
4
|
"description": "JBrowse 2 gff3.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"jbrowse",
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
],
|
|
25
25
|
"scripts": {
|
|
26
26
|
"build": "npm-run-all build:*",
|
|
27
|
-
"test": "cd ../..; jest plugins/gff3 --passWithNoTests",
|
|
27
|
+
"test": "cd ../..; jest --passWithNoTests plugins/gff3 --passWithNoTests",
|
|
28
28
|
"prepublishOnly": "yarn test",
|
|
29
29
|
"prepack": "yarn build && yarn useDist",
|
|
30
30
|
"postpack": "yarn useSrc",
|
|
@@ -55,5 +55,5 @@
|
|
|
55
55
|
"distModule": "esm/index.js",
|
|
56
56
|
"srcModule": "src/index.ts",
|
|
57
57
|
"module": "esm/index.js",
|
|
58
|
-
"gitHead": "
|
|
58
|
+
"gitHead": "9fb8231d932db40adf0a283081765431756c66ff"
|
|
59
59
|
}
|