@jbrowse/plugin-gff3 2.11.1 → 2.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,14 +3,15 @@ import { NoAssemblyRegion } from '@jbrowse/core/util/types';
3
3
  import IntervalTree from '@flatten-js/interval-tree';
4
4
  import { Feature } from '@jbrowse/core/util/simpleFeature';
5
5
  export default class Gff3Adapter extends BaseFeatureDataAdapter {
6
+ calculatedIntervalTreeMap: Record<string, IntervalTree>;
6
7
  protected gffFeatures?: Promise<{
7
8
  header: string;
8
- intervalTree: Record<string, IntervalTree>;
9
+ intervalTreeMap: Record<string, ((sc?: (arg: string) => void) => IntervalTree) | undefined>;
9
10
  }>;
10
11
  private loadDataP;
11
12
  private loadData;
12
- getRefNames(_opts?: BaseOptions): Promise<string[]>;
13
- getHeader(): Promise<string>;
13
+ getRefNames(opts?: BaseOptions): Promise<string[]>;
14
+ getHeader(opts?: BaseOptions): Promise<string>;
14
15
  getFeatures(query: NoAssemblyRegion, opts?: BaseOptions): import("rxjs").Observable<Feature>;
15
16
  private featureData;
16
17
  freeResources(): void;
@@ -13,66 +13,100 @@ const gff_1 = __importDefault(require("@gmod/gff"));
13
13
  function isGzip(buf) {
14
14
  return buf[0] === 31 && buf[1] === 139 && buf[2] === 8;
15
15
  }
16
+ const decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder('utf8') : undefined;
16
17
  class Gff3Adapter extends BaseAdapter_1.BaseFeatureDataAdapter {
17
- async loadDataP() {
18
+ constructor() {
19
+ super(...arguments);
20
+ this.calculatedIntervalTreeMap = {};
21
+ }
22
+ async loadDataP(opts) {
23
+ const { statusCallback = () => { } } = opts || {};
18
24
  const pm = this.pluginManager;
19
25
  const buf = await (0, io_1.openLocation)(this.getConf('gffLocation'), pm).readFile();
20
26
  const buffer = isGzip(buf) ? await (0, bgzf_filehandle_1.unzip)(buf) : buf;
21
- // 512MB max chrome string length is 512MB
22
- if (buffer.length > 536870888) {
23
- throw new Error('Data exceeds maximum string length (512MB)');
24
- }
25
- const data = new TextDecoder('utf8', { fatal: true }).decode(buffer);
26
- const lines = data.split(/\n|\r\n|\r/);
27
27
  const headerLines = [];
28
- for (let i = 0; i < lines.length && lines[i].startsWith('#'); i++) {
29
- headerLines.push(lines[i]);
30
- }
31
- const header = headerLines.join('\n');
32
- const feats = gff_1.default.parseStringSync(data, {
33
- parseFeatures: true,
34
- parseComments: false,
35
- parseDirectives: false,
36
- parseSequences: false,
37
- disableDerivesFromReferences: true,
38
- });
39
- const intervalTree = {};
40
- for (const obj of feats.flat().map((f, i) => new simpleFeature_1.default({
41
- data: this.featureData(f),
42
- id: `${this.id}-offset-${i}`,
43
- }))) {
44
- const key = obj.get('refName');
45
- if (!intervalTree[key]) {
46
- intervalTree[key] = new interval_tree_1.default();
28
+ const featureMap = {};
29
+ let blockStart = 0;
30
+ let i = 0;
31
+ while (blockStart < buffer.length) {
32
+ const n = buffer.indexOf('\n', blockStart);
33
+ // could be a non-newline ended file, so slice to end of file if n===-1
34
+ const b = n === -1 ? buffer.slice(blockStart) : buffer.slice(blockStart, n);
35
+ const line = ((decoder === null || decoder === void 0 ? void 0 : decoder.decode(b)) || b.toString()).trim();
36
+ if (line) {
37
+ if (line.startsWith('#')) {
38
+ headerLines.push(line);
39
+ }
40
+ else if (line.startsWith('>')) {
41
+ break;
42
+ }
43
+ else {
44
+ const ret = line.indexOf('\t');
45
+ const refName = line.slice(0, ret);
46
+ if (!featureMap[refName]) {
47
+ featureMap[refName] = '';
48
+ }
49
+ featureMap[refName] += line + '\n';
50
+ }
51
+ }
52
+ if (i++ % 10000 === 0) {
53
+ statusCallback(`Loading ${Math.floor(blockStart / 1000000).toLocaleString('en-US')}/${Math.floor(buffer.length / 1000000).toLocaleString('en-US')} MB`);
47
54
  }
48
- intervalTree[key].insert([obj.get('start'), obj.get('end')], obj);
55
+ blockStart = n + 1;
49
56
  }
50
- return { header, intervalTree };
57
+ const intervalTreeMap = Object.fromEntries(Object.entries(featureMap).map(([refName, lines]) => {
58
+ return [
59
+ refName,
60
+ (sc) => {
61
+ sc === null || sc === void 0 ? void 0 : sc(`Parsing GFF data`);
62
+ if (!this.calculatedIntervalTreeMap[refName]) {
63
+ const intervalTree = new interval_tree_1.default();
64
+ gff_1.default
65
+ .parseStringSync(lines, {
66
+ parseFeatures: true,
67
+ parseComments: false,
68
+ parseDirectives: false,
69
+ parseSequences: false,
70
+ disableDerivesFromReferences: true,
71
+ })
72
+ .flat()
73
+ .map((f, i) => new simpleFeature_1.default({
74
+ data: this.featureData(f),
75
+ id: `${this.id}-${refName}-${i}`,
76
+ }))
77
+ .forEach(obj => intervalTree.insert([obj.get('start'), obj.get('end')], obj));
78
+ this.calculatedIntervalTreeMap[refName] = intervalTree;
79
+ }
80
+ return this.calculatedIntervalTreeMap[refName];
81
+ },
82
+ ];
83
+ }));
84
+ return { header: headerLines.join('\n'), intervalTreeMap };
51
85
  }
52
- async loadData() {
86
+ async loadData(opts) {
53
87
  if (!this.gffFeatures) {
54
- this.gffFeatures = this.loadDataP().catch(e => {
88
+ this.gffFeatures = this.loadDataP(opts).catch(e => {
55
89
  this.gffFeatures = undefined;
56
90
  throw e;
57
91
  });
58
92
  }
59
93
  return this.gffFeatures;
60
94
  }
61
- async getRefNames(_opts = {}) {
62
- const { intervalTree } = await this.loadData();
63
- return Object.keys(intervalTree);
95
+ async getRefNames(opts = {}) {
96
+ const { intervalTreeMap } = await this.loadData(opts);
97
+ return Object.keys(intervalTreeMap);
64
98
  }
65
- async getHeader() {
66
- const { header } = await this.loadData();
99
+ async getHeader(opts = {}) {
100
+ const { header } = await this.loadData(opts);
67
101
  return header;
68
102
  }
69
103
  getFeatures(query, opts = {}) {
70
104
  return (0, rxjs_1.ObservableCreate)(async (observer) => {
71
- var _a;
105
+ var _a, _b;
72
106
  try {
73
107
  const { start, end, refName } = query;
74
- const { intervalTree } = await this.loadData();
75
- (_a = intervalTree[refName]) === null || _a === void 0 ? void 0 : _a.search([start, end]).forEach(f => observer.next(f));
108
+ const { intervalTreeMap } = await this.loadData(opts);
109
+ (_b = (_a = intervalTreeMap[refName]) === null || _a === void 0 ? void 0 : _a.call(intervalTreeMap, opts.statusCallback)) === null || _b === void 0 ? void 0 : _b.search([start, end]).forEach(f => observer.next(f));
76
110
  observer.complete();
77
111
  }
78
112
  catch (e) {
@@ -3,14 +3,15 @@ import { NoAssemblyRegion } from '@jbrowse/core/util/types';
3
3
  import IntervalTree from '@flatten-js/interval-tree';
4
4
  import { Feature } from '@jbrowse/core/util/simpleFeature';
5
5
  export default class Gff3Adapter extends BaseFeatureDataAdapter {
6
+ calculatedIntervalTreeMap: Record<string, IntervalTree>;
6
7
  protected gffFeatures?: Promise<{
7
8
  header: string;
8
- intervalTree: Record<string, IntervalTree>;
9
+ intervalTreeMap: Record<string, ((sc?: (arg: string) => void) => IntervalTree) | undefined>;
9
10
  }>;
10
11
  private loadDataP;
11
12
  private loadData;
12
- getRefNames(_opts?: BaseOptions): Promise<string[]>;
13
- getHeader(): Promise<string>;
13
+ getRefNames(opts?: BaseOptions): Promise<string[]>;
14
+ getHeader(opts?: BaseOptions): Promise<string>;
14
15
  getFeatures(query: NoAssemblyRegion, opts?: BaseOptions): import("rxjs").Observable<Feature>;
15
16
  private featureData;
16
17
  freeResources(): void;
@@ -8,66 +8,100 @@ import gff from '@gmod/gff';
8
8
  function isGzip(buf) {
9
9
  return buf[0] === 31 && buf[1] === 139 && buf[2] === 8;
10
10
  }
11
+ const decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder('utf8') : undefined;
11
12
  export default class Gff3Adapter extends BaseFeatureDataAdapter {
12
- async loadDataP() {
13
+ constructor() {
14
+ super(...arguments);
15
+ this.calculatedIntervalTreeMap = {};
16
+ }
17
+ async loadDataP(opts) {
18
+ const { statusCallback = () => { } } = opts || {};
13
19
  const pm = this.pluginManager;
14
20
  const buf = await openLocation(this.getConf('gffLocation'), pm).readFile();
15
21
  const buffer = isGzip(buf) ? await unzip(buf) : buf;
16
- // 512MB max chrome string length is 512MB
17
- if (buffer.length > 536870888) {
18
- throw new Error('Data exceeds maximum string length (512MB)');
19
- }
20
- const data = new TextDecoder('utf8', { fatal: true }).decode(buffer);
21
- const lines = data.split(/\n|\r\n|\r/);
22
22
  const headerLines = [];
23
- for (let i = 0; i < lines.length && lines[i].startsWith('#'); i++) {
24
- headerLines.push(lines[i]);
25
- }
26
- const header = headerLines.join('\n');
27
- const feats = gff.parseStringSync(data, {
28
- parseFeatures: true,
29
- parseComments: false,
30
- parseDirectives: false,
31
- parseSequences: false,
32
- disableDerivesFromReferences: true,
33
- });
34
- const intervalTree = {};
35
- for (const obj of feats.flat().map((f, i) => new SimpleFeature({
36
- data: this.featureData(f),
37
- id: `${this.id}-offset-${i}`,
38
- }))) {
39
- const key = obj.get('refName');
40
- if (!intervalTree[key]) {
41
- intervalTree[key] = new IntervalTree();
23
+ const featureMap = {};
24
+ let blockStart = 0;
25
+ let i = 0;
26
+ while (blockStart < buffer.length) {
27
+ const n = buffer.indexOf('\n', blockStart);
28
+ // could be a non-newline ended file, so slice to end of file if n===-1
29
+ const b = n === -1 ? buffer.slice(blockStart) : buffer.slice(blockStart, n);
30
+ const line = ((decoder === null || decoder === void 0 ? void 0 : decoder.decode(b)) || b.toString()).trim();
31
+ if (line) {
32
+ if (line.startsWith('#')) {
33
+ headerLines.push(line);
34
+ }
35
+ else if (line.startsWith('>')) {
36
+ break;
37
+ }
38
+ else {
39
+ const ret = line.indexOf('\t');
40
+ const refName = line.slice(0, ret);
41
+ if (!featureMap[refName]) {
42
+ featureMap[refName] = '';
43
+ }
44
+ featureMap[refName] += line + '\n';
45
+ }
46
+ }
47
+ if (i++ % 10000 === 0) {
48
+ statusCallback(`Loading ${Math.floor(blockStart / 1000000).toLocaleString('en-US')}/${Math.floor(buffer.length / 1000000).toLocaleString('en-US')} MB`);
42
49
  }
43
- intervalTree[key].insert([obj.get('start'), obj.get('end')], obj);
50
+ blockStart = n + 1;
44
51
  }
45
- return { header, intervalTree };
52
+ const intervalTreeMap = Object.fromEntries(Object.entries(featureMap).map(([refName, lines]) => {
53
+ return [
54
+ refName,
55
+ (sc) => {
56
+ sc === null || sc === void 0 ? void 0 : sc(`Parsing GFF data`);
57
+ if (!this.calculatedIntervalTreeMap[refName]) {
58
+ const intervalTree = new IntervalTree();
59
+ gff
60
+ .parseStringSync(lines, {
61
+ parseFeatures: true,
62
+ parseComments: false,
63
+ parseDirectives: false,
64
+ parseSequences: false,
65
+ disableDerivesFromReferences: true,
66
+ })
67
+ .flat()
68
+ .map((f, i) => new SimpleFeature({
69
+ data: this.featureData(f),
70
+ id: `${this.id}-${refName}-${i}`,
71
+ }))
72
+ .forEach(obj => intervalTree.insert([obj.get('start'), obj.get('end')], obj));
73
+ this.calculatedIntervalTreeMap[refName] = intervalTree;
74
+ }
75
+ return this.calculatedIntervalTreeMap[refName];
76
+ },
77
+ ];
78
+ }));
79
+ return { header: headerLines.join('\n'), intervalTreeMap };
46
80
  }
47
- async loadData() {
81
+ async loadData(opts) {
48
82
  if (!this.gffFeatures) {
49
- this.gffFeatures = this.loadDataP().catch(e => {
83
+ this.gffFeatures = this.loadDataP(opts).catch(e => {
50
84
  this.gffFeatures = undefined;
51
85
  throw e;
52
86
  });
53
87
  }
54
88
  return this.gffFeatures;
55
89
  }
56
- async getRefNames(_opts = {}) {
57
- const { intervalTree } = await this.loadData();
58
- return Object.keys(intervalTree);
90
+ async getRefNames(opts = {}) {
91
+ const { intervalTreeMap } = await this.loadData(opts);
92
+ return Object.keys(intervalTreeMap);
59
93
  }
60
- async getHeader() {
61
- const { header } = await this.loadData();
94
+ async getHeader(opts = {}) {
95
+ const { header } = await this.loadData(opts);
62
96
  return header;
63
97
  }
64
98
  getFeatures(query, opts = {}) {
65
99
  return ObservableCreate(async (observer) => {
66
- var _a;
100
+ var _a, _b;
67
101
  try {
68
102
  const { start, end, refName } = query;
69
- const { intervalTree } = await this.loadData();
70
- (_a = intervalTree[refName]) === null || _a === void 0 ? void 0 : _a.search([start, end]).forEach(f => observer.next(f));
103
+ const { intervalTreeMap } = await this.loadData(opts);
104
+ (_b = (_a = intervalTreeMap[refName]) === null || _a === void 0 ? void 0 : _a.call(intervalTreeMap, opts.statusCallback)) === null || _b === void 0 ? void 0 : _b.search([start, end]).forEach(f => observer.next(f));
71
105
  observer.complete();
72
106
  }
73
107
  catch (e) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jbrowse/plugin-gff3",
3
- "version": "2.11.1",
3
+ "version": "2.12.0",
4
4
  "description": "JBrowse 2 gff3.",
5
5
  "keywords": [
6
6
  "jbrowse",
@@ -55,5 +55,5 @@
55
55
  "distModule": "esm/index.js",
56
56
  "srcModule": "src/index.ts",
57
57
  "module": "esm/index.js",
58
- "gitHead": "11b28d66d782eb06f92ccb993108bb6c3c82819e"
58
+ "gitHead": "935f2602d29abc737bb1f493a922b6218d023ae2"
59
59
  }