@jbrowse/plugin-gff3 2.11.1 → 2.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -3,14 +3,15 @@ import { NoAssemblyRegion } from '@jbrowse/core/util/types';
|
|
|
3
3
|
import IntervalTree from '@flatten-js/interval-tree';
|
|
4
4
|
import { Feature } from '@jbrowse/core/util/simpleFeature';
|
|
5
5
|
export default class Gff3Adapter extends BaseFeatureDataAdapter {
|
|
6
|
+
calculatedIntervalTreeMap: Record<string, IntervalTree>;
|
|
6
7
|
protected gffFeatures?: Promise<{
|
|
7
8
|
header: string;
|
|
8
|
-
|
|
9
|
+
intervalTreeMap: Record<string, (sc?: (arg: string) => void) => IntervalTree>;
|
|
9
10
|
}>;
|
|
10
11
|
private loadDataP;
|
|
11
12
|
private loadData;
|
|
12
|
-
getRefNames(
|
|
13
|
-
getHeader(): Promise<string>;
|
|
13
|
+
getRefNames(opts?: BaseOptions): Promise<string[]>;
|
|
14
|
+
getHeader(opts?: BaseOptions): Promise<string>;
|
|
14
15
|
getFeatures(query: NoAssemblyRegion, opts?: BaseOptions): import("rxjs").Observable<Feature>;
|
|
15
16
|
private featureData;
|
|
16
17
|
freeResources(): void;
|
|
@@ -13,57 +13,91 @@ const gff_1 = __importDefault(require("@gmod/gff"));
|
|
|
13
13
|
function isGzip(buf) {
|
|
14
14
|
return buf[0] === 31 && buf[1] === 139 && buf[2] === 8;
|
|
15
15
|
}
|
|
16
|
+
const decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder('utf8') : undefined;
|
|
16
17
|
class Gff3Adapter extends BaseAdapter_1.BaseFeatureDataAdapter {
|
|
17
|
-
|
|
18
|
+
constructor() {
|
|
19
|
+
super(...arguments);
|
|
20
|
+
this.calculatedIntervalTreeMap = {};
|
|
21
|
+
}
|
|
22
|
+
async loadDataP(opts) {
|
|
23
|
+
const { statusCallback = () => { } } = opts || {};
|
|
18
24
|
const pm = this.pluginManager;
|
|
19
25
|
const buf = await (0, io_1.openLocation)(this.getConf('gffLocation'), pm).readFile();
|
|
20
26
|
const buffer = isGzip(buf) ? await (0, bgzf_filehandle_1.unzip)(buf) : buf;
|
|
21
|
-
// 512MB max chrome string length is 512MB
|
|
22
|
-
if (buffer.length > 536870888) {
|
|
23
|
-
throw new Error('Data exceeds maximum string length (512MB)');
|
|
24
|
-
}
|
|
25
|
-
const data = new TextDecoder('utf8', { fatal: true }).decode(buffer);
|
|
26
|
-
const lines = data.split(/\n|\r\n|\r/);
|
|
27
27
|
const headerLines = [];
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
28
|
+
const featureMap = {};
|
|
29
|
+
let blockStart = 0;
|
|
30
|
+
let i = 0;
|
|
31
|
+
while (blockStart < buffer.length) {
|
|
32
|
+
const n = buffer.indexOf('\n', blockStart);
|
|
33
|
+
// could be a non-newline ended file, so slice to end of file if n===-1
|
|
34
|
+
const b = n === -1 ? buffer.slice(blockStart) : buffer.slice(blockStart, n);
|
|
35
|
+
const line = ((decoder === null || decoder === void 0 ? void 0 : decoder.decode(b)) || b.toString()).trim();
|
|
36
|
+
if (line) {
|
|
37
|
+
if (line.startsWith('#')) {
|
|
38
|
+
headerLines.push(line);
|
|
39
|
+
}
|
|
40
|
+
else if (line.startsWith('>')) {
|
|
41
|
+
break;
|
|
42
|
+
}
|
|
43
|
+
else {
|
|
44
|
+
const ret = line.indexOf('\t');
|
|
45
|
+
const refName = line.slice(0, ret);
|
|
46
|
+
if (!featureMap[refName]) {
|
|
47
|
+
featureMap[refName] = '';
|
|
48
|
+
}
|
|
49
|
+
featureMap[refName] += line + '\n';
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
if (i++ % 10000 === 0) {
|
|
53
|
+
statusCallback(`Loading ${Math.floor(blockStart / 1000000).toLocaleString('en-US')}/${Math.floor(buffer.length / 1000000).toLocaleString('en-US')} MB`);
|
|
47
54
|
}
|
|
48
|
-
|
|
55
|
+
blockStart = n + 1;
|
|
49
56
|
}
|
|
50
|
-
|
|
57
|
+
const intervalTreeMap = Object.fromEntries(Object.entries(featureMap).map(([refName, lines]) => {
|
|
58
|
+
return [
|
|
59
|
+
refName,
|
|
60
|
+
(sc) => {
|
|
61
|
+
sc === null || sc === void 0 ? void 0 : sc(`Parsing GFF data`);
|
|
62
|
+
if (!this.calculatedIntervalTreeMap[refName]) {
|
|
63
|
+
const intervalTree = new interval_tree_1.default();
|
|
64
|
+
gff_1.default
|
|
65
|
+
.parseStringSync(lines, {
|
|
66
|
+
parseFeatures: true,
|
|
67
|
+
parseComments: false,
|
|
68
|
+
parseDirectives: false,
|
|
69
|
+
parseSequences: false,
|
|
70
|
+
disableDerivesFromReferences: true,
|
|
71
|
+
})
|
|
72
|
+
.flat()
|
|
73
|
+
.map((f, i) => new simpleFeature_1.default({
|
|
74
|
+
data: this.featureData(f),
|
|
75
|
+
id: `${this.id}-${refName}-${i}`,
|
|
76
|
+
}))
|
|
77
|
+
.forEach(obj => intervalTree.insert([obj.get('start'), obj.get('end')], obj));
|
|
78
|
+
this.calculatedIntervalTreeMap[refName] = intervalTree;
|
|
79
|
+
}
|
|
80
|
+
return this.calculatedIntervalTreeMap[refName];
|
|
81
|
+
},
|
|
82
|
+
];
|
|
83
|
+
}));
|
|
84
|
+
return { header: headerLines.join('\n'), intervalTreeMap };
|
|
51
85
|
}
|
|
52
|
-
async loadData() {
|
|
86
|
+
async loadData(opts) {
|
|
53
87
|
if (!this.gffFeatures) {
|
|
54
|
-
this.gffFeatures = this.loadDataP().catch(e => {
|
|
88
|
+
this.gffFeatures = this.loadDataP(opts).catch(e => {
|
|
55
89
|
this.gffFeatures = undefined;
|
|
56
90
|
throw e;
|
|
57
91
|
});
|
|
58
92
|
}
|
|
59
93
|
return this.gffFeatures;
|
|
60
94
|
}
|
|
61
|
-
async getRefNames(
|
|
62
|
-
const {
|
|
63
|
-
return Object.keys(
|
|
95
|
+
async getRefNames(opts = {}) {
|
|
96
|
+
const { intervalTreeMap } = await this.loadData(opts);
|
|
97
|
+
return Object.keys(intervalTreeMap);
|
|
64
98
|
}
|
|
65
|
-
async getHeader() {
|
|
66
|
-
const { header } = await this.loadData();
|
|
99
|
+
async getHeader(opts = {}) {
|
|
100
|
+
const { header } = await this.loadData(opts);
|
|
67
101
|
return header;
|
|
68
102
|
}
|
|
69
103
|
getFeatures(query, opts = {}) {
|
|
@@ -71,8 +105,8 @@ class Gff3Adapter extends BaseAdapter_1.BaseFeatureDataAdapter {
|
|
|
71
105
|
var _a;
|
|
72
106
|
try {
|
|
73
107
|
const { start, end, refName } = query;
|
|
74
|
-
const {
|
|
75
|
-
(_a =
|
|
108
|
+
const { intervalTreeMap } = await this.loadData(opts);
|
|
109
|
+
(_a = intervalTreeMap[refName](opts.statusCallback)) === null || _a === void 0 ? void 0 : _a.search([start, end]).forEach(f => observer.next(f));
|
|
76
110
|
observer.complete();
|
|
77
111
|
}
|
|
78
112
|
catch (e) {
|
|
@@ -3,14 +3,15 @@ import { NoAssemblyRegion } from '@jbrowse/core/util/types';
|
|
|
3
3
|
import IntervalTree from '@flatten-js/interval-tree';
|
|
4
4
|
import { Feature } from '@jbrowse/core/util/simpleFeature';
|
|
5
5
|
export default class Gff3Adapter extends BaseFeatureDataAdapter {
|
|
6
|
+
calculatedIntervalTreeMap: Record<string, IntervalTree>;
|
|
6
7
|
protected gffFeatures?: Promise<{
|
|
7
8
|
header: string;
|
|
8
|
-
|
|
9
|
+
intervalTreeMap: Record<string, (sc?: (arg: string) => void) => IntervalTree>;
|
|
9
10
|
}>;
|
|
10
11
|
private loadDataP;
|
|
11
12
|
private loadData;
|
|
12
|
-
getRefNames(
|
|
13
|
-
getHeader(): Promise<string>;
|
|
13
|
+
getRefNames(opts?: BaseOptions): Promise<string[]>;
|
|
14
|
+
getHeader(opts?: BaseOptions): Promise<string>;
|
|
14
15
|
getFeatures(query: NoAssemblyRegion, opts?: BaseOptions): import("rxjs").Observable<Feature>;
|
|
15
16
|
private featureData;
|
|
16
17
|
freeResources(): void;
|
|
@@ -8,57 +8,91 @@ import gff from '@gmod/gff';
|
|
|
8
8
|
function isGzip(buf) {
|
|
9
9
|
return buf[0] === 31 && buf[1] === 139 && buf[2] === 8;
|
|
10
10
|
}
|
|
11
|
+
const decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder('utf8') : undefined;
|
|
11
12
|
export default class Gff3Adapter extends BaseFeatureDataAdapter {
|
|
12
|
-
|
|
13
|
+
constructor() {
|
|
14
|
+
super(...arguments);
|
|
15
|
+
this.calculatedIntervalTreeMap = {};
|
|
16
|
+
}
|
|
17
|
+
async loadDataP(opts) {
|
|
18
|
+
const { statusCallback = () => { } } = opts || {};
|
|
13
19
|
const pm = this.pluginManager;
|
|
14
20
|
const buf = await openLocation(this.getConf('gffLocation'), pm).readFile();
|
|
15
21
|
const buffer = isGzip(buf) ? await unzip(buf) : buf;
|
|
16
|
-
// 512MB max chrome string length is 512MB
|
|
17
|
-
if (buffer.length > 536870888) {
|
|
18
|
-
throw new Error('Data exceeds maximum string length (512MB)');
|
|
19
|
-
}
|
|
20
|
-
const data = new TextDecoder('utf8', { fatal: true }).decode(buffer);
|
|
21
|
-
const lines = data.split(/\n|\r\n|\r/);
|
|
22
22
|
const headerLines = [];
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
23
|
+
const featureMap = {};
|
|
24
|
+
let blockStart = 0;
|
|
25
|
+
let i = 0;
|
|
26
|
+
while (blockStart < buffer.length) {
|
|
27
|
+
const n = buffer.indexOf('\n', blockStart);
|
|
28
|
+
// could be a non-newline ended file, so slice to end of file if n===-1
|
|
29
|
+
const b = n === -1 ? buffer.slice(blockStart) : buffer.slice(blockStart, n);
|
|
30
|
+
const line = ((decoder === null || decoder === void 0 ? void 0 : decoder.decode(b)) || b.toString()).trim();
|
|
31
|
+
if (line) {
|
|
32
|
+
if (line.startsWith('#')) {
|
|
33
|
+
headerLines.push(line);
|
|
34
|
+
}
|
|
35
|
+
else if (line.startsWith('>')) {
|
|
36
|
+
break;
|
|
37
|
+
}
|
|
38
|
+
else {
|
|
39
|
+
const ret = line.indexOf('\t');
|
|
40
|
+
const refName = line.slice(0, ret);
|
|
41
|
+
if (!featureMap[refName]) {
|
|
42
|
+
featureMap[refName] = '';
|
|
43
|
+
}
|
|
44
|
+
featureMap[refName] += line + '\n';
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
if (i++ % 10000 === 0) {
|
|
48
|
+
statusCallback(`Loading ${Math.floor(blockStart / 1000000).toLocaleString('en-US')}/${Math.floor(buffer.length / 1000000).toLocaleString('en-US')} MB`);
|
|
42
49
|
}
|
|
43
|
-
|
|
50
|
+
blockStart = n + 1;
|
|
44
51
|
}
|
|
45
|
-
|
|
52
|
+
const intervalTreeMap = Object.fromEntries(Object.entries(featureMap).map(([refName, lines]) => {
|
|
53
|
+
return [
|
|
54
|
+
refName,
|
|
55
|
+
(sc) => {
|
|
56
|
+
sc === null || sc === void 0 ? void 0 : sc(`Parsing GFF data`);
|
|
57
|
+
if (!this.calculatedIntervalTreeMap[refName]) {
|
|
58
|
+
const intervalTree = new IntervalTree();
|
|
59
|
+
gff
|
|
60
|
+
.parseStringSync(lines, {
|
|
61
|
+
parseFeatures: true,
|
|
62
|
+
parseComments: false,
|
|
63
|
+
parseDirectives: false,
|
|
64
|
+
parseSequences: false,
|
|
65
|
+
disableDerivesFromReferences: true,
|
|
66
|
+
})
|
|
67
|
+
.flat()
|
|
68
|
+
.map((f, i) => new SimpleFeature({
|
|
69
|
+
data: this.featureData(f),
|
|
70
|
+
id: `${this.id}-${refName}-${i}`,
|
|
71
|
+
}))
|
|
72
|
+
.forEach(obj => intervalTree.insert([obj.get('start'), obj.get('end')], obj));
|
|
73
|
+
this.calculatedIntervalTreeMap[refName] = intervalTree;
|
|
74
|
+
}
|
|
75
|
+
return this.calculatedIntervalTreeMap[refName];
|
|
76
|
+
},
|
|
77
|
+
];
|
|
78
|
+
}));
|
|
79
|
+
return { header: headerLines.join('\n'), intervalTreeMap };
|
|
46
80
|
}
|
|
47
|
-
async loadData() {
|
|
81
|
+
async loadData(opts) {
|
|
48
82
|
if (!this.gffFeatures) {
|
|
49
|
-
this.gffFeatures = this.loadDataP().catch(e => {
|
|
83
|
+
this.gffFeatures = this.loadDataP(opts).catch(e => {
|
|
50
84
|
this.gffFeatures = undefined;
|
|
51
85
|
throw e;
|
|
52
86
|
});
|
|
53
87
|
}
|
|
54
88
|
return this.gffFeatures;
|
|
55
89
|
}
|
|
56
|
-
async getRefNames(
|
|
57
|
-
const {
|
|
58
|
-
return Object.keys(
|
|
90
|
+
async getRefNames(opts = {}) {
|
|
91
|
+
const { intervalTreeMap } = await this.loadData(opts);
|
|
92
|
+
return Object.keys(intervalTreeMap);
|
|
59
93
|
}
|
|
60
|
-
async getHeader() {
|
|
61
|
-
const { header } = await this.loadData();
|
|
94
|
+
async getHeader(opts = {}) {
|
|
95
|
+
const { header } = await this.loadData(opts);
|
|
62
96
|
return header;
|
|
63
97
|
}
|
|
64
98
|
getFeatures(query, opts = {}) {
|
|
@@ -66,8 +100,8 @@ export default class Gff3Adapter extends BaseFeatureDataAdapter {
|
|
|
66
100
|
var _a;
|
|
67
101
|
try {
|
|
68
102
|
const { start, end, refName } = query;
|
|
69
|
-
const {
|
|
70
|
-
(_a =
|
|
103
|
+
const { intervalTreeMap } = await this.loadData(opts);
|
|
104
|
+
(_a = intervalTreeMap[refName](opts.statusCallback)) === null || _a === void 0 ? void 0 : _a.search([start, end]).forEach(f => observer.next(f));
|
|
71
105
|
observer.complete();
|
|
72
106
|
}
|
|
73
107
|
catch (e) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@jbrowse/plugin-gff3",
|
|
3
|
-
"version": "2.11.
|
|
3
|
+
"version": "2.11.2",
|
|
4
4
|
"description": "JBrowse 2 gff3.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"jbrowse",
|
|
@@ -55,5 +55,5 @@
|
|
|
55
55
|
"distModule": "esm/index.js",
|
|
56
56
|
"srcModule": "src/index.ts",
|
|
57
57
|
"module": "esm/index.js",
|
|
58
|
-
"gitHead": "
|
|
58
|
+
"gitHead": "511048cb6965f0bf624c96de244e7fd47fce17d6"
|
|
59
59
|
}
|