gff-nostream 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +163 -0
- package/dist/api.d.ts +23 -0
- package/dist/api.js +40 -0
- package/dist/api.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -0
- package/dist/parse.d.ts +55 -0
- package/dist/parse.js +341 -0
- package/dist/parse.js.map +1 -0
- package/dist/util.d.ts +166 -0
- package/dist/util.js +274 -0
- package/dist/util.js.map +1 -0
- package/esm/api.d.ts +23 -0
- package/esm/api.js +42 -0
- package/esm/api.js.map +1 -0
- package/esm/index.d.ts +3 -0
- package/esm/index.js +3 -0
- package/esm/index.js.map +1 -0
- package/esm/parse.d.ts +55 -0
- package/esm/parse.js +317 -0
- package/esm/parse.js.map +1 -0
- package/esm/util.d.ts +166 -0
- package/esm/util.js +268 -0
- package/esm/util.js.map +1 -0
- package/package.json +52 -0
- package/src/api.ts +86 -0
- package/src/index.ts +12 -0
- package/src/parse.ts +400 -0
- package/src/util.ts +425 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2017 Robert Buels
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# gff-nostream
|
|
2
|
+
|
|
3
|
+
[](https://github.com/cmdcolin/gff-nostream/actions?query=branch%3Amaster+workflow%3APush+)
|
|
4
|
+
|
|
5
|
+
Parse GFF3 data. This is a simplified version of
|
|
6
|
+
[@gmod/gff](https://github.com/GMOD/gff-js) for with just basic parsing and no
|
|
7
|
+
node.js stream module usage
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
$ npm install --save gff-nostream
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
```js
|
|
16
|
+
const { parseStringSync } = require('gff-nostream')
|
|
17
|
+
// or in ES6 (recommended)
|
|
18
|
+
import { parseStringSync } from 'gff-nostream'
|
|
19
|
+
|
|
20
|
+
const fs = require('fs')
|
|
21
|
+
|
|
22
|
+
// parse a string of gff3 synchronously
|
|
23
|
+
const stringOfGFF3 = fs.readFileSync('my_annotations.gff3', 'utf8')
|
|
24
|
+
const arrayOfThings = gff.parseStringSync(stringOfGFF3)
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Object format
|
|
28
|
+
|
|
29
|
+
### features
|
|
30
|
+
|
|
31
|
+
In GFF3, features can have more than one location. We parse features as
|
|
32
|
+
arrayrefs of all the lines that share that feature's ID. Values that are `.` in
|
|
33
|
+
the GFF3 are `null` in the output.
|
|
34
|
+
|
|
35
|
+
A simple feature that's located in just one place:
|
|
36
|
+
|
|
37
|
+
```json
|
|
38
|
+
[
|
|
39
|
+
{
|
|
40
|
+
"seq_id": "ctg123",
|
|
41
|
+
"source": null,
|
|
42
|
+
"type": "gene",
|
|
43
|
+
"start": 1000,
|
|
44
|
+
"end": 9000,
|
|
45
|
+
"score": null,
|
|
46
|
+
"strand": "+",
|
|
47
|
+
"phase": null,
|
|
48
|
+
"attributes": {
|
|
49
|
+
"ID": ["gene00001"],
|
|
50
|
+
"Name": ["EDEN"]
|
|
51
|
+
},
|
|
52
|
+
"child_features": [],
|
|
53
|
+
"derived_features": []
|
|
54
|
+
}
|
|
55
|
+
]
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
A CDS called `cds00001` located in two places:
|
|
59
|
+
|
|
60
|
+
```json
|
|
61
|
+
[
|
|
62
|
+
{
|
|
63
|
+
"seq_id": "ctg123",
|
|
64
|
+
"source": null,
|
|
65
|
+
"type": "CDS",
|
|
66
|
+
"start": 1201,
|
|
67
|
+
"end": 1500,
|
|
68
|
+
"score": null,
|
|
69
|
+
"strand": "+",
|
|
70
|
+
"phase": "0",
|
|
71
|
+
"attributes": {
|
|
72
|
+
"ID": ["cds00001"],
|
|
73
|
+
"Parent": ["mRNA00001"]
|
|
74
|
+
},
|
|
75
|
+
"child_features": [],
|
|
76
|
+
"derived_features": []
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
"seq_id": "ctg123",
|
|
80
|
+
"source": null,
|
|
81
|
+
"type": "CDS",
|
|
82
|
+
"start": 3000,
|
|
83
|
+
"end": 3902,
|
|
84
|
+
"score": null,
|
|
85
|
+
"strand": "+",
|
|
86
|
+
"phase": "0",
|
|
87
|
+
"attributes": {
|
|
88
|
+
"ID": ["cds00001"],
|
|
89
|
+
"Parent": ["mRNA00001"]
|
|
90
|
+
},
|
|
91
|
+
"child_features": [],
|
|
92
|
+
"derived_features": []
|
|
93
|
+
}
|
|
94
|
+
]
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## API
|
|
98
|
+
|
|
99
|
+
<!-- Generated by documentation.js. Update this documentation by updating the source code. -->
|
|
100
|
+
|
|
101
|
+
#### Table of Contents
|
|
102
|
+
|
|
103
|
+
- [ParseOptions](#parseoptions)
|
|
104
|
+
- [disableDerivesFromReferences](#disablederivesfromreferences)
|
|
105
|
+
- [encoding](#encoding)
|
|
106
|
+
- [parseFeatures](#parsefeatures)
|
|
107
|
+
- [parseDirectives](#parsedirectives)
|
|
108
|
+
- [parseComments](#parsecomments)
|
|
109
|
+
- [parseSequences](#parsesequences)
|
|
110
|
+
- [parseAll](#parseall)
|
|
111
|
+
|
|
112
|
+
### ParseOptions
|
|
113
|
+
|
|
114
|
+
Parser options
|
|
115
|
+
|
|
116
|
+
#### disableDerivesFromReferences
|
|
117
|
+
|
|
118
|
+
Whether to resolve references to derives from features
|
|
119
|
+
|
|
120
|
+
Type:
|
|
121
|
+
[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)
|
|
122
|
+
|
|
123
|
+
#### encoding
|
|
124
|
+
|
|
125
|
+
Text encoding of the input GFF3. default 'utf8'
|
|
126
|
+
|
|
127
|
+
Type: BufferEncoding
|
|
128
|
+
|
|
129
|
+
#### parseFeatures
|
|
130
|
+
|
|
131
|
+
Whether to parse features, default true
|
|
132
|
+
|
|
133
|
+
Type:
|
|
134
|
+
[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)
|
|
135
|
+
|
|
136
|
+
#### parseDirectives
|
|
137
|
+
|
|
138
|
+
Whether to parse directives, default false
|
|
139
|
+
|
|
140
|
+
Type:
|
|
141
|
+
[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)
|
|
142
|
+
|
|
143
|
+
#### parseComments
|
|
144
|
+
|
|
145
|
+
Whether to parse comments, default false
|
|
146
|
+
|
|
147
|
+
Type:
|
|
148
|
+
[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)
|
|
149
|
+
|
|
150
|
+
#### parseSequences
|
|
151
|
+
|
|
152
|
+
Whether to parse sequences, default true
|
|
153
|
+
|
|
154
|
+
Type:
|
|
155
|
+
[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)
|
|
156
|
+
|
|
157
|
+
#### parseAll
|
|
158
|
+
|
|
159
|
+
Parse all features, directives, comments, and sequences. Overrides other parsing
|
|
160
|
+
options. Default false.
|
|
161
|
+
|
|
162
|
+
Type:
|
|
163
|
+
[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)
|
package/dist/api.d.ts
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { GFF3Item } from './util';
|
|
2
|
+
/** Parser options */
|
|
3
|
+
export interface ParseOptions {
|
|
4
|
+
/** Whether to resolve references to derives from features */
|
|
5
|
+
disableDerivesFromReferences?: boolean;
|
|
6
|
+
/** Text encoding of the input GFF3. default 'utf8' */
|
|
7
|
+
encoding?: BufferEncoding;
|
|
8
|
+
/** Whether to parse features, default true */
|
|
9
|
+
parseFeatures?: boolean;
|
|
10
|
+
/** Whether to parse directives, default false */
|
|
11
|
+
parseDirectives?: boolean;
|
|
12
|
+
/** Whether to parse comments, default false */
|
|
13
|
+
parseComments?: boolean;
|
|
14
|
+
/** Whether to parse sequences, default true */
|
|
15
|
+
parseSequences?: boolean;
|
|
16
|
+
/**
|
|
17
|
+
* Parse all features, directives, comments, and sequences. Overrides other
|
|
18
|
+
* parsing options. Default false.
|
|
19
|
+
*/
|
|
20
|
+
parseAll?: boolean;
|
|
21
|
+
}
|
|
22
|
+
export declare function parseStringSync(str: string, inputOptions?: ParseOptions): GFF3Item[];
|
|
23
|
+
export { type GFF3FeatureLine, type GFF3Comment, type GFF3FeatureLineWithRefs, type GFF3Directive, type GFF3Sequence, type GFF3Feature, type GFF3Item, } from './util';
|
package/dist/api.js
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.parseStringSync = parseStringSync;
|
|
7
|
+
const parse_1 = __importDefault(require("./parse"));
|
|
8
|
+
// shared arg processing for the parse routines
|
|
9
|
+
function _processParseOptions(options) {
|
|
10
|
+
const out = Object.assign({ encoding: 'utf8', parseFeatures: true, parseDirectives: false, parseSequences: true, parseComments: false, disableDerivesFromReferences: false }, options);
|
|
11
|
+
if (options.parseAll) {
|
|
12
|
+
out.parseFeatures = true;
|
|
13
|
+
out.parseDirectives = true;
|
|
14
|
+
out.parseComments = true;
|
|
15
|
+
out.parseSequences = true;
|
|
16
|
+
}
|
|
17
|
+
return out;
|
|
18
|
+
}
|
|
19
|
+
function parseStringSync(str, inputOptions = {}) {
|
|
20
|
+
if (!str) {
|
|
21
|
+
return [];
|
|
22
|
+
}
|
|
23
|
+
const options = _processParseOptions(inputOptions);
|
|
24
|
+
const items = [];
|
|
25
|
+
const push = items.push.bind(items);
|
|
26
|
+
const parser = new parse_1.default({
|
|
27
|
+
featureCallback: options.parseFeatures ? push : undefined,
|
|
28
|
+
directiveCallback: options.parseDirectives ? push : undefined,
|
|
29
|
+
commentCallback: options.parseComments ? push : undefined,
|
|
30
|
+
sequenceCallback: options.parseSequences ? push : undefined,
|
|
31
|
+
disableDerivesFromReferences: options.disableDerivesFromReferences || false,
|
|
32
|
+
errorCallback: err => {
|
|
33
|
+
throw err;
|
|
34
|
+
},
|
|
35
|
+
});
|
|
36
|
+
str.split(/\r?\n/).forEach(parser.addLine.bind(parser));
|
|
37
|
+
parser.finish();
|
|
38
|
+
return items;
|
|
39
|
+
}
|
|
40
|
+
//# sourceMappingURL=api.js.map
|
package/dist/api.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"api.js","sourceRoot":"","sources":["../src/api.ts"],"names":[],"mappings":";;;;;AAgDA,0CA2BC;AA3ED,oDAA4B;AA0B5B,+CAA+C;AAC/C,SAAS,oBAAoB,CAAC,OAAqB;IACjD,MAAM,GAAG,mBACP,QAAQ,EAAE,MAAe,EACzB,aAAa,EAAE,IAAI,EACnB,eAAe,EAAE,KAAK,EACtB,cAAc,EAAE,IAAI,EACpB,aAAa,EAAE,KAAK,EACpB,4BAA4B,EAAE,KAAK,IAChC,OAAO,CACX,CAAA;IAED,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;QACrB,GAAG,CAAC,aAAa,GAAG,IAAI,CAAA;QACxB,GAAG,CAAC,eAAe,GAAG,IAAI,CAAA;QAC1B,GAAG,CAAC,aAAa,GAAG,IAAI,CAAA;QACxB,GAAG,CAAC,cAAc,GAAG,IAAI,CAAA;IAC3B,CAAC;IAED,OAAO,GAAG,CAAA;AACZ,CAAC;AAED,SAAgB,eAAe,CAC7B,GAAW,EACX,eAA6B,EAAE;IAE/B,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO,EAAE,CAAA;IACX,CAAC;IAED,MAAM,OAAO,GAAG,oBAAoB,CAAC,YAAY,CAAC,CAAA;IAClD,MAAM,KAAK,GAAe,EAAE,CAAA;IAC5B,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IAEnC,MAAM,MAAM,GAAG,IAAI,eAAM,CAAC;QACxB,eAAe,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;QACzD,iBAAiB,EAAE,OAAO,CAAC,eAAe,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;QAC7D,eAAe,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;QACzD,gBAAgB,EAAE,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;QAC3D,4BAA4B,EAAE,OAAO,CAAC,4BAA4B,IAAI,KAAK;QAC3E,aAAa,EAAE,GAAG,CAAC,EAAE;YACnB,MAAM,GAAG,CAAA;QACX,CAAC;KACF,CAAC,CAAA;IAEF,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAA;IACvD,MAAM,CAAC,MAAM,EAAE,CAAA;IAEf,OAAO,KAAK,CAAA;AACd,CAAC"}
|
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.parseStringSync = void 0;
|
|
4
|
+
const api_1 = require("./api");
|
|
5
|
+
Object.defineProperty(exports, "parseStringSync", { enumerable: true, get: function () { return api_1.parseStringSync; } });
|
|
6
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAAA,+BAAuC;AAC9B,gGADA,qBAAe,OACA"}
|
package/dist/parse.d.ts
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import * as GFF3 from './util';
|
|
2
|
+
export declare class FASTAParser {
|
|
3
|
+
seqCallback: (sequence: GFF3.GFF3Sequence) => void;
|
|
4
|
+
currentSequence: {
|
|
5
|
+
id: string;
|
|
6
|
+
sequence: string;
|
|
7
|
+
description?: string;
|
|
8
|
+
} | undefined;
|
|
9
|
+
constructor(seqCallback: (sequence: GFF3.GFF3Sequence) => void);
|
|
10
|
+
addLine(line: string): void;
|
|
11
|
+
private _flush;
|
|
12
|
+
finish(): void;
|
|
13
|
+
}
|
|
14
|
+
interface ParserArgs {
|
|
15
|
+
featureCallback?(feature: GFF3.GFF3Feature): void;
|
|
16
|
+
endCallback?(): void;
|
|
17
|
+
commentCallback?(comment: GFF3.GFF3Comment): void;
|
|
18
|
+
errorCallback?(error: string): void;
|
|
19
|
+
directiveCallback?(directive: GFF3.GFF3Directive): void;
|
|
20
|
+
sequenceCallback?(sequence: GFF3.GFF3Sequence): void;
|
|
21
|
+
bufferSize?: number;
|
|
22
|
+
disableDerivesFromReferences?: boolean;
|
|
23
|
+
}
|
|
24
|
+
export default class Parser {
|
|
25
|
+
featureCallback: (feature: GFF3.GFF3Feature) => void;
|
|
26
|
+
endCallback: () => void;
|
|
27
|
+
commentCallback: (comment: GFF3.GFF3Comment) => void;
|
|
28
|
+
errorCallback: (error: string) => void;
|
|
29
|
+
disableDerivesFromReferences: boolean;
|
|
30
|
+
directiveCallback: (directive: GFF3.GFF3Directive) => void;
|
|
31
|
+
sequenceCallback: (sequence: GFF3.GFF3Sequence) => void;
|
|
32
|
+
bufferSize: number;
|
|
33
|
+
fastaParser: FASTAParser | undefined;
|
|
34
|
+
eof: boolean;
|
|
35
|
+
lineNumber: number;
|
|
36
|
+
private _underConstructionTopLevel;
|
|
37
|
+
private _underConstructionById;
|
|
38
|
+
private _completedReferences;
|
|
39
|
+
private _underConstructionOrphans;
|
|
40
|
+
constructor(args: ParserArgs);
|
|
41
|
+
addLine(line: string): void;
|
|
42
|
+
finish(): void;
|
|
43
|
+
private _emitItem;
|
|
44
|
+
private _enforceBufferSizeLimit;
|
|
45
|
+
/**
|
|
46
|
+
* return all under-construction features, called when we know
|
|
47
|
+
* there will be no additional data to attach to them
|
|
48
|
+
*/
|
|
49
|
+
private _emitAllUnderConstructionFeatures;
|
|
50
|
+
private _bufferLine;
|
|
51
|
+
private _resolveReferencesTo;
|
|
52
|
+
private _parseError;
|
|
53
|
+
private _resolveReferencesFrom;
|
|
54
|
+
}
|
|
55
|
+
export {};
|
package/dist/parse.js
ADDED
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
25
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
+
exports.FASTAParser = void 0;
|
|
27
|
+
const GFF3 = __importStar(require("./util"));
|
|
28
|
+
const containerAttributes = {
|
|
29
|
+
Parent: 'child_features',
|
|
30
|
+
Derives_from: 'derived_features',
|
|
31
|
+
};
|
|
32
|
+
class FASTAParser {
|
|
33
|
+
constructor(seqCallback) {
|
|
34
|
+
this.seqCallback = seqCallback;
|
|
35
|
+
this.currentSequence = undefined;
|
|
36
|
+
}
|
|
37
|
+
addLine(line) {
|
|
38
|
+
const defMatch = /^>\s*(\S+)\s*(.*)/.exec(line);
|
|
39
|
+
if (defMatch) {
|
|
40
|
+
this._flush();
|
|
41
|
+
this.currentSequence = { id: defMatch[1], sequence: '' };
|
|
42
|
+
if (defMatch[2]) {
|
|
43
|
+
this.currentSequence.description = defMatch[2].trim();
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
else if (this.currentSequence && /\S/.test(line)) {
|
|
47
|
+
this.currentSequence.sequence += line.replaceAll(/\s/g, '');
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
_flush() {
|
|
51
|
+
if (this.currentSequence) {
|
|
52
|
+
this.seqCallback(this.currentSequence);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
finish() {
|
|
56
|
+
this._flush();
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
exports.FASTAParser = FASTAParser;
|
|
60
|
+
class Parser {
|
|
61
|
+
constructor(args) {
|
|
62
|
+
this.fastaParser = undefined;
|
|
63
|
+
// if this is true, the parser ignores the
|
|
64
|
+
// rest of the lines in the file. currently
|
|
65
|
+
// set when the file switches over to FASTA
|
|
66
|
+
this.eof = false;
|
|
67
|
+
this.lineNumber = 0;
|
|
68
|
+
// features that we have to keep on hand for now because they
|
|
69
|
+
// might be referenced by something else
|
|
70
|
+
this._underConstructionTopLevel = [];
|
|
71
|
+
// index of the above by ID
|
|
72
|
+
this._underConstructionById = {};
|
|
73
|
+
this._completedReferences = {};
|
|
74
|
+
// features that reference something we have not seen yet
|
|
75
|
+
// structured as:
|
|
76
|
+
// { 'some_id' : {
|
|
77
|
+
// 'Parent' : [ orphans that have a Parent attr referencing it ],
|
|
78
|
+
// 'Derives_from' : [ orphans that have a Derives_from attr referencing it ],
|
|
79
|
+
// }
|
|
80
|
+
// }
|
|
81
|
+
this._underConstructionOrphans = {};
|
|
82
|
+
// eslint-disable-next-line @typescript-eslint/no-empty-function
|
|
83
|
+
const nullFunc = () => { };
|
|
84
|
+
this.featureCallback = args.featureCallback || nullFunc;
|
|
85
|
+
this.endCallback = args.endCallback || nullFunc;
|
|
86
|
+
this.commentCallback = args.commentCallback || nullFunc;
|
|
87
|
+
this.errorCallback = args.errorCallback || nullFunc;
|
|
88
|
+
this.directiveCallback = args.directiveCallback || nullFunc;
|
|
89
|
+
this.sequenceCallback = args.sequenceCallback || nullFunc;
|
|
90
|
+
this.disableDerivesFromReferences =
|
|
91
|
+
args.disableDerivesFromReferences || false;
|
|
92
|
+
// number of lines to buffer
|
|
93
|
+
this.bufferSize = args.bufferSize === undefined ? 1000 : args.bufferSize;
|
|
94
|
+
}
|
|
95
|
+
addLine(line) {
|
|
96
|
+
// if we have transitioned to a fasta section, just delegate to that parser
|
|
97
|
+
if (this.fastaParser) {
|
|
98
|
+
this.fastaParser.addLine(line);
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
if (this.eof) {
|
|
102
|
+
// otherwise, if we are done, ignore this line
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
this.lineNumber += 1;
|
|
106
|
+
if (/^\s*[^#\s>]/.test(line)) {
|
|
107
|
+
// feature line, most common case
|
|
108
|
+
this._bufferLine(line);
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
const match = /^\s*(#+)(.*)/.exec(line);
|
|
112
|
+
if (match) {
|
|
113
|
+
// directive or comment
|
|
114
|
+
const [, hashsigns] = match;
|
|
115
|
+
let [, , contents] = match;
|
|
116
|
+
if (hashsigns.length === 3) {
|
|
117
|
+
// sync directive, all forward-references are resolved.
|
|
118
|
+
this._emitAllUnderConstructionFeatures();
|
|
119
|
+
}
|
|
120
|
+
else if (hashsigns.length === 2) {
|
|
121
|
+
const directive = GFF3.parseDirective(line);
|
|
122
|
+
if (directive) {
|
|
123
|
+
if (directive.directive === 'FASTA') {
|
|
124
|
+
this._emitAllUnderConstructionFeatures();
|
|
125
|
+
this.eof = true;
|
|
126
|
+
this.fastaParser = new FASTAParser(this.sequenceCallback);
|
|
127
|
+
}
|
|
128
|
+
else {
|
|
129
|
+
this._emitItem(directive);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
else {
|
|
134
|
+
contents = contents.replace(/\s*/, '');
|
|
135
|
+
this._emitItem({ comment: contents });
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
else if (/^\s*$/.test(line)) {
|
|
139
|
+
// blank line, do nothing
|
|
140
|
+
}
|
|
141
|
+
else if (/^\s*>/.test(line)) {
|
|
142
|
+
// implicit beginning of a FASTA section
|
|
143
|
+
this._emitAllUnderConstructionFeatures();
|
|
144
|
+
this.eof = true;
|
|
145
|
+
this.fastaParser = new FASTAParser(this.sequenceCallback);
|
|
146
|
+
this.fastaParser.addLine(line);
|
|
147
|
+
}
|
|
148
|
+
else {
|
|
149
|
+
// it's a parse error
|
|
150
|
+
const errLine = line.replaceAll(/\r?\n?$/g, '');
|
|
151
|
+
throw new Error(`GFF3 parse error. Cannot parse '${errLine}'.`);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
finish() {
|
|
155
|
+
this._emitAllUnderConstructionFeatures();
|
|
156
|
+
if (this.fastaParser) {
|
|
157
|
+
this.fastaParser.finish();
|
|
158
|
+
}
|
|
159
|
+
this.endCallback();
|
|
160
|
+
}
|
|
161
|
+
_emitItem(i) {
|
|
162
|
+
if (Array.isArray(i)) {
|
|
163
|
+
this.featureCallback(i);
|
|
164
|
+
}
|
|
165
|
+
else if ('directive' in i) {
|
|
166
|
+
this.directiveCallback(i);
|
|
167
|
+
}
|
|
168
|
+
else if ('comment' in i) {
|
|
169
|
+
this.commentCallback(i);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
_enforceBufferSizeLimit(additionalItemCount = 0) {
|
|
173
|
+
const _unbufferItem = (item) => {
|
|
174
|
+
var _a, _b;
|
|
175
|
+
if (item && Array.isArray(item) && ((_b = (_a = item[0].attributes) === null || _a === void 0 ? void 0 : _a.ID) === null || _b === void 0 ? void 0 : _b[0])) {
|
|
176
|
+
const ids = item[0].attributes.ID;
|
|
177
|
+
ids.forEach(id => {
|
|
178
|
+
delete this._underConstructionById[id];
|
|
179
|
+
delete this._completedReferences[id];
|
|
180
|
+
});
|
|
181
|
+
item.forEach(i => {
|
|
182
|
+
if (i.child_features) {
|
|
183
|
+
i.child_features.forEach(c => _unbufferItem(c));
|
|
184
|
+
}
|
|
185
|
+
if (i.derived_features) {
|
|
186
|
+
i.derived_features.forEach(d => _unbufferItem(d));
|
|
187
|
+
}
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
};
|
|
191
|
+
while (this._underConstructionTopLevel.length + additionalItemCount >
|
|
192
|
+
this.bufferSize) {
|
|
193
|
+
const item = this._underConstructionTopLevel.shift();
|
|
194
|
+
if (item) {
|
|
195
|
+
this._emitItem(item);
|
|
196
|
+
_unbufferItem(item);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* return all under-construction features, called when we know
|
|
202
|
+
* there will be no additional data to attach to them
|
|
203
|
+
*/
|
|
204
|
+
_emitAllUnderConstructionFeatures() {
|
|
205
|
+
this._underConstructionTopLevel.forEach(this._emitItem.bind(this));
|
|
206
|
+
this._underConstructionTopLevel = [];
|
|
207
|
+
this._underConstructionById = {};
|
|
208
|
+
this._completedReferences = {};
|
|
209
|
+
// if we have any orphans hanging around still, this is a
|
|
210
|
+
// problem. die with a parse error
|
|
211
|
+
if (Array.from(Object.values(this._underConstructionOrphans)).length) {
|
|
212
|
+
throw new Error(`some features reference other features that do not exist in the file (or in the same '###' scope). ${Object.keys(this._underConstructionOrphans).join(',')}`);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
// do the right thing with a newly-parsed feature line
|
|
216
|
+
_bufferLine(line) {
|
|
217
|
+
var _a, _b, _c;
|
|
218
|
+
const rawFeatureLine = GFF3.parseFeature(line);
|
|
219
|
+
const featureLine = Object.assign(Object.assign({}, rawFeatureLine), { child_features: [], derived_features: [] });
|
|
220
|
+
// featureLine._lineNumber = this.lineNumber //< debugging aid
|
|
221
|
+
// NOTE: a feature is an arrayref of one or more feature lines.
|
|
222
|
+
const ids = ((_a = featureLine.attributes) === null || _a === void 0 ? void 0 : _a.ID) || [];
|
|
223
|
+
const parents = ((_b = featureLine.attributes) === null || _b === void 0 ? void 0 : _b.Parent) || [];
|
|
224
|
+
const derives = this.disableDerivesFromReferences
|
|
225
|
+
? []
|
|
226
|
+
: ((_c = featureLine.attributes) === null || _c === void 0 ? void 0 : _c.Derives_from) || [];
|
|
227
|
+
if (!ids.length && !parents.length && !derives.length) {
|
|
228
|
+
// if it has no IDs and does not refer to anything, we can just
|
|
229
|
+
// output it
|
|
230
|
+
this._emitItem([featureLine]);
|
|
231
|
+
return;
|
|
232
|
+
}
|
|
233
|
+
let feature = undefined;
|
|
234
|
+
ids.forEach(id => {
|
|
235
|
+
const existing = this._underConstructionById[id];
|
|
236
|
+
if (existing) {
|
|
237
|
+
// another location of the same feature
|
|
238
|
+
if (existing[existing.length - 1].type !== featureLine.type) {
|
|
239
|
+
this._parseError(`multi-line feature "${id}" has inconsistent types: "${featureLine.type}", "${existing[existing.length - 1].type}"`);
|
|
240
|
+
}
|
|
241
|
+
existing.push(featureLine);
|
|
242
|
+
feature = existing;
|
|
243
|
+
}
|
|
244
|
+
else {
|
|
245
|
+
// haven't seen it yet, so buffer it so we can attach
|
|
246
|
+
// child features to it
|
|
247
|
+
feature = [featureLine];
|
|
248
|
+
this._enforceBufferSizeLimit(1);
|
|
249
|
+
if (!parents.length && !derives.length) {
|
|
250
|
+
this._underConstructionTopLevel.push(feature);
|
|
251
|
+
}
|
|
252
|
+
this._underConstructionById[id] = feature;
|
|
253
|
+
// see if we have anything buffered that refers to it
|
|
254
|
+
this._resolveReferencesTo(feature, id);
|
|
255
|
+
}
|
|
256
|
+
});
|
|
257
|
+
// try to resolve all its references
|
|
258
|
+
this._resolveReferencesFrom(feature || [featureLine], { Parent: parents, Derives_from: derives }, ids);
|
|
259
|
+
}
|
|
260
|
+
_resolveReferencesTo(feature, id) {
|
|
261
|
+
const references = this._underConstructionOrphans[id];
|
|
262
|
+
// references is of the form
|
|
263
|
+
// {
|
|
264
|
+
// 'Parent' : [ orphans that have a Parent attr referencing this feature ],
|
|
265
|
+
// 'Derives_from' : [ orphans that have a Derives_from attr referencing this feature ],
|
|
266
|
+
// }
|
|
267
|
+
if (!references) {
|
|
268
|
+
return;
|
|
269
|
+
}
|
|
270
|
+
feature.forEach(loc => {
|
|
271
|
+
loc.child_features.push(...references.Parent);
|
|
272
|
+
});
|
|
273
|
+
feature.forEach(loc => {
|
|
274
|
+
loc.derived_features.push(...references.Derives_from);
|
|
275
|
+
});
|
|
276
|
+
delete this._underConstructionOrphans[id];
|
|
277
|
+
}
|
|
278
|
+
_parseError(message) {
|
|
279
|
+
this.eof = true;
|
|
280
|
+
this.errorCallback(`${this.lineNumber}: ${message}`);
|
|
281
|
+
}
|
|
282
|
+
_resolveReferencesFrom(feature, references, ids) {
|
|
283
|
+
// this is all a bit more awkward in javascript than it was in perl
|
|
284
|
+
function postSet(obj, slot1, slot2) {
|
|
285
|
+
let subObj = obj[slot1];
|
|
286
|
+
if (!subObj) {
|
|
287
|
+
subObj = {};
|
|
288
|
+
obj[slot1] = subObj;
|
|
289
|
+
}
|
|
290
|
+
const returnVal = subObj[slot2] || false;
|
|
291
|
+
subObj[slot2] = true;
|
|
292
|
+
return returnVal;
|
|
293
|
+
}
|
|
294
|
+
references.Parent.forEach(toId => {
|
|
295
|
+
const otherFeature = this._underConstructionById[toId];
|
|
296
|
+
if (otherFeature) {
|
|
297
|
+
const pname = containerAttributes.Parent;
|
|
298
|
+
if (!ids.filter(id => postSet(this._completedReferences, id, `Parent,${toId}`)).length) {
|
|
299
|
+
otherFeature.forEach(location => {
|
|
300
|
+
location[pname].push(feature);
|
|
301
|
+
});
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
else {
|
|
305
|
+
let ref = this._underConstructionOrphans[toId];
|
|
306
|
+
if (!ref) {
|
|
307
|
+
ref = {
|
|
308
|
+
Parent: [],
|
|
309
|
+
Derives_from: [],
|
|
310
|
+
};
|
|
311
|
+
this._underConstructionOrphans[toId] = ref;
|
|
312
|
+
}
|
|
313
|
+
ref.Parent.push(feature);
|
|
314
|
+
}
|
|
315
|
+
});
|
|
316
|
+
references.Derives_from.forEach(toId => {
|
|
317
|
+
const otherFeature = this._underConstructionById[toId];
|
|
318
|
+
if (otherFeature) {
|
|
319
|
+
const pname = containerAttributes.Derives_from;
|
|
320
|
+
if (!ids.filter(id => postSet(this._completedReferences, id, `Derives_from,${toId}`)).length) {
|
|
321
|
+
otherFeature.forEach(location => {
|
|
322
|
+
location[pname].push(feature);
|
|
323
|
+
});
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
else {
|
|
327
|
+
let ref = this._underConstructionOrphans[toId];
|
|
328
|
+
if (!ref) {
|
|
329
|
+
ref = {
|
|
330
|
+
Parent: [],
|
|
331
|
+
Derives_from: [],
|
|
332
|
+
};
|
|
333
|
+
this._underConstructionOrphans[toId] = ref;
|
|
334
|
+
}
|
|
335
|
+
ref.Derives_from.push(feature);
|
|
336
|
+
}
|
|
337
|
+
});
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
exports.default = Parser;
|
|
341
|
+
//# sourceMappingURL=parse.js.map
|