gff-nostream 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/parse.js ADDED
@@ -0,0 +1,317 @@
1
+ import * as GFF3 from './util';
2
+ const containerAttributes = {
3
+ Parent: 'child_features',
4
+ Derives_from: 'derived_features',
5
+ };
6
+ export class FASTAParser {
7
+ constructor(seqCallback) {
8
+ this.seqCallback = seqCallback;
9
+ this.currentSequence = undefined;
10
+ }
11
+ addLine(line) {
12
+ const defMatch = /^>\s*(\S+)\s*(.*)/.exec(line);
13
+ if (defMatch) {
14
+ this._flush();
15
+ this.currentSequence = { id: defMatch[1], sequence: '' };
16
+ if (defMatch[2]) {
17
+ this.currentSequence.description = defMatch[2].trim();
18
+ }
19
+ }
20
+ else if (this.currentSequence && /\S/.test(line)) {
21
+ this.currentSequence.sequence += line.replaceAll(/\s/g, '');
22
+ }
23
+ }
24
+ _flush() {
25
+ if (this.currentSequence) {
26
+ this.seqCallback(this.currentSequence);
27
+ }
28
+ }
29
+ finish() {
30
+ this._flush();
31
+ }
32
+ }
33
+ export default class Parser {
34
+ constructor(args) {
35
+ this.fastaParser = undefined;
36
+ // if this is true, the parser ignores the
37
+ // rest of the lines in the file. currently
38
+ // set when the file switches over to FASTA
39
+ this.eof = false;
40
+ this.lineNumber = 0;
41
+ // features that we have to keep on hand for now because they
42
+ // might be referenced by something else
43
+ this._underConstructionTopLevel = [];
44
+ // index of the above by ID
45
+ this._underConstructionById = {};
46
+ this._completedReferences = {};
47
+ // features that reference something we have not seen yet
48
+ // structured as:
49
+ // { 'some_id' : {
50
+ // 'Parent' : [ orphans that have a Parent attr referencing it ],
51
+ // 'Derives_from' : [ orphans that have a Derives_from attr referencing it ],
52
+ // }
53
+ // }
54
+ this._underConstructionOrphans = {};
55
+ // eslint-disable-next-line @typescript-eslint/no-empty-function
56
+ const nullFunc = () => { };
57
+ this.featureCallback = args.featureCallback || nullFunc;
58
+ this.endCallback = args.endCallback || nullFunc;
59
+ this.commentCallback = args.commentCallback || nullFunc;
60
+ this.errorCallback = args.errorCallback || nullFunc;
61
+ this.directiveCallback = args.directiveCallback || nullFunc;
62
+ this.sequenceCallback = args.sequenceCallback || nullFunc;
63
+ this.disableDerivesFromReferences =
64
+ args.disableDerivesFromReferences || false;
65
+ // number of lines to buffer
66
+ this.bufferSize = args.bufferSize === undefined ? 1000 : args.bufferSize;
67
+ }
68
+ addLine(line) {
69
+ // if we have transitioned to a fasta section, just delegate to that parser
70
+ if (this.fastaParser) {
71
+ this.fastaParser.addLine(line);
72
+ return;
73
+ }
74
+ if (this.eof) {
75
+ // otherwise, if we are done, ignore this line
76
+ return;
77
+ }
78
+ this.lineNumber += 1;
79
+ if (/^\s*[^#\s>]/.test(line)) {
80
+ // feature line, most common case
81
+ this._bufferLine(line);
82
+ return;
83
+ }
84
+ const match = /^\s*(#+)(.*)/.exec(line);
85
+ if (match) {
86
+ // directive or comment
87
+ const [, hashsigns] = match;
88
+ let [, , contents] = match;
89
+ if (hashsigns.length === 3) {
90
+ // sync directive, all forward-references are resolved.
91
+ this._emitAllUnderConstructionFeatures();
92
+ }
93
+ else if (hashsigns.length === 2) {
94
+ const directive = GFF3.parseDirective(line);
95
+ if (directive) {
96
+ if (directive.directive === 'FASTA') {
97
+ this._emitAllUnderConstructionFeatures();
98
+ this.eof = true;
99
+ this.fastaParser = new FASTAParser(this.sequenceCallback);
100
+ }
101
+ else {
102
+ this._emitItem(directive);
103
+ }
104
+ }
105
+ }
106
+ else {
107
+ contents = contents.replace(/\s*/, '');
108
+ this._emitItem({ comment: contents });
109
+ }
110
+ }
111
+ else if (/^\s*$/.test(line)) {
112
+ // blank line, do nothing
113
+ }
114
+ else if (/^\s*>/.test(line)) {
115
+ // implicit beginning of a FASTA section
116
+ this._emitAllUnderConstructionFeatures();
117
+ this.eof = true;
118
+ this.fastaParser = new FASTAParser(this.sequenceCallback);
119
+ this.fastaParser.addLine(line);
120
+ }
121
+ else {
122
+ // it's a parse error
123
+ const errLine = line.replaceAll(/\r?\n?$/g, '');
124
+ throw new Error(`GFF3 parse error. Cannot parse '${errLine}'.`);
125
+ }
126
+ }
127
+ finish() {
128
+ this._emitAllUnderConstructionFeatures();
129
+ if (this.fastaParser) {
130
+ this.fastaParser.finish();
131
+ }
132
+ this.endCallback();
133
+ }
134
+ _emitItem(i) {
135
+ if (Array.isArray(i)) {
136
+ this.featureCallback(i);
137
+ }
138
+ else if ('directive' in i) {
139
+ this.directiveCallback(i);
140
+ }
141
+ else if ('comment' in i) {
142
+ this.commentCallback(i);
143
+ }
144
+ }
145
+ _enforceBufferSizeLimit(additionalItemCount = 0) {
146
+ const _unbufferItem = (item) => {
147
+ var _a, _b;
148
+ if (item && Array.isArray(item) && ((_b = (_a = item[0].attributes) === null || _a === void 0 ? void 0 : _a.ID) === null || _b === void 0 ? void 0 : _b[0])) {
149
+ const ids = item[0].attributes.ID;
150
+ ids.forEach(id => {
151
+ delete this._underConstructionById[id];
152
+ delete this._completedReferences[id];
153
+ });
154
+ item.forEach(i => {
155
+ if (i.child_features) {
156
+ i.child_features.forEach(c => _unbufferItem(c));
157
+ }
158
+ if (i.derived_features) {
159
+ i.derived_features.forEach(d => _unbufferItem(d));
160
+ }
161
+ });
162
+ }
163
+ };
164
+ while (this._underConstructionTopLevel.length + additionalItemCount >
165
+ this.bufferSize) {
166
+ const item = this._underConstructionTopLevel.shift();
167
+ if (item) {
168
+ this._emitItem(item);
169
+ _unbufferItem(item);
170
+ }
171
+ }
172
+ }
173
+ /**
174
+ * return all under-construction features, called when we know
175
+ * there will be no additional data to attach to them
176
+ */
177
+ _emitAllUnderConstructionFeatures() {
178
+ this._underConstructionTopLevel.forEach(this._emitItem.bind(this));
179
+ this._underConstructionTopLevel = [];
180
+ this._underConstructionById = {};
181
+ this._completedReferences = {};
182
+ // if we have any orphans hanging around still, this is a
183
+ // problem. die with a parse error
184
+ if (Array.from(Object.values(this._underConstructionOrphans)).length) {
185
+ throw new Error(`some features reference other features that do not exist in the file (or in the same '###' scope). ${Object.keys(this._underConstructionOrphans).join(',')}`);
186
+ }
187
+ }
188
+ // do the right thing with a newly-parsed feature line
189
+ _bufferLine(line) {
190
+ var _a, _b, _c;
191
+ const rawFeatureLine = GFF3.parseFeature(line);
192
+ const featureLine = {
193
+ ...rawFeatureLine,
194
+ child_features: [],
195
+ derived_features: [],
196
+ };
197
+ // featureLine._lineNumber = this.lineNumber //< debugging aid
198
+ // NOTE: a feature is an arrayref of one or more feature lines.
199
+ const ids = ((_a = featureLine.attributes) === null || _a === void 0 ? void 0 : _a.ID) || [];
200
+ const parents = ((_b = featureLine.attributes) === null || _b === void 0 ? void 0 : _b.Parent) || [];
201
+ const derives = this.disableDerivesFromReferences
202
+ ? []
203
+ : ((_c = featureLine.attributes) === null || _c === void 0 ? void 0 : _c.Derives_from) || [];
204
+ if (!ids.length && !parents.length && !derives.length) {
205
+ // if it has no IDs and does not refer to anything, we can just
206
+ // output it
207
+ this._emitItem([featureLine]);
208
+ return;
209
+ }
210
+ let feature = undefined;
211
+ ids.forEach(id => {
212
+ const existing = this._underConstructionById[id];
213
+ if (existing) {
214
+ // another location of the same feature
215
+ if (existing[existing.length - 1].type !== featureLine.type) {
216
+ this._parseError(`multi-line feature "${id}" has inconsistent types: "${featureLine.type}", "${existing[existing.length - 1].type}"`);
217
+ }
218
+ existing.push(featureLine);
219
+ feature = existing;
220
+ }
221
+ else {
222
+ // haven't seen it yet, so buffer it so we can attach
223
+ // child features to it
224
+ feature = [featureLine];
225
+ this._enforceBufferSizeLimit(1);
226
+ if (!parents.length && !derives.length) {
227
+ this._underConstructionTopLevel.push(feature);
228
+ }
229
+ this._underConstructionById[id] = feature;
230
+ // see if we have anything buffered that refers to it
231
+ this._resolveReferencesTo(feature, id);
232
+ }
233
+ });
234
+ // try to resolve all its references
235
+ this._resolveReferencesFrom(feature || [featureLine], { Parent: parents, Derives_from: derives }, ids);
236
+ }
237
+ _resolveReferencesTo(feature, id) {
238
+ const references = this._underConstructionOrphans[id];
239
+ // references is of the form
240
+ // {
241
+ // 'Parent' : [ orphans that have a Parent attr referencing this feature ],
242
+ // 'Derives_from' : [ orphans that have a Derives_from attr referencing this feature ],
243
+ // }
244
+ if (!references) {
245
+ return;
246
+ }
247
+ feature.forEach(loc => {
248
+ loc.child_features.push(...references.Parent);
249
+ });
250
+ feature.forEach(loc => {
251
+ loc.derived_features.push(...references.Derives_from);
252
+ });
253
+ delete this._underConstructionOrphans[id];
254
+ }
255
+ _parseError(message) {
256
+ this.eof = true;
257
+ this.errorCallback(`${this.lineNumber}: ${message}`);
258
+ }
259
+ _resolveReferencesFrom(feature, references, ids) {
260
+ // this is all a bit more awkward in javascript than it was in perl
261
+ function postSet(obj, slot1, slot2) {
262
+ let subObj = obj[slot1];
263
+ if (!subObj) {
264
+ subObj = {};
265
+ obj[slot1] = subObj;
266
+ }
267
+ const returnVal = subObj[slot2] || false;
268
+ subObj[slot2] = true;
269
+ return returnVal;
270
+ }
271
+ references.Parent.forEach(toId => {
272
+ const otherFeature = this._underConstructionById[toId];
273
+ if (otherFeature) {
274
+ const pname = containerAttributes.Parent;
275
+ if (!ids.filter(id => postSet(this._completedReferences, id, `Parent,${toId}`)).length) {
276
+ otherFeature.forEach(location => {
277
+ location[pname].push(feature);
278
+ });
279
+ }
280
+ }
281
+ else {
282
+ let ref = this._underConstructionOrphans[toId];
283
+ if (!ref) {
284
+ ref = {
285
+ Parent: [],
286
+ Derives_from: [],
287
+ };
288
+ this._underConstructionOrphans[toId] = ref;
289
+ }
290
+ ref.Parent.push(feature);
291
+ }
292
+ });
293
+ references.Derives_from.forEach(toId => {
294
+ const otherFeature = this._underConstructionById[toId];
295
+ if (otherFeature) {
296
+ const pname = containerAttributes.Derives_from;
297
+ if (!ids.filter(id => postSet(this._completedReferences, id, `Derives_from,${toId}`)).length) {
298
+ otherFeature.forEach(location => {
299
+ location[pname].push(feature);
300
+ });
301
+ }
302
+ }
303
+ else {
304
+ let ref = this._underConstructionOrphans[toId];
305
+ if (!ref) {
306
+ ref = {
307
+ Parent: [],
308
+ Derives_from: [],
309
+ };
310
+ this._underConstructionOrphans[toId] = ref;
311
+ }
312
+ ref.Derives_from.push(feature);
313
+ }
314
+ });
315
+ }
316
+ }
317
+ //# sourceMappingURL=parse.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parse.js","sourceRoot":"","sources":["../src/parse.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,IAAI,MAAM,QAAQ,CAAA;AAE9B,MAAM,mBAAmB,GAAG;IAC1B,MAAM,EAAE,gBAAyB;IACjC,YAAY,EAAE,kBAA2B;CAC1C,CAAA;AAED,MAAM,OAAO,WAAW;IAMtB,YAAY,WAAkD;QAC5D,IAAI,CAAC,WAAW,GAAG,WAAW,CAAA;QAC9B,IAAI,CAAC,eAAe,GAAG,SAAS,CAAA;IAClC,CAAC;IAED,OAAO,CAAC,IAAY;QAClB,MAAM,QAAQ,GAAG,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAC/C,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,CAAC,MAAM,EAAE,CAAA;YACb,IAAI,CAAC,eAAe,GAAG,EAAE,EAAE,EAAE,QAAQ,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAA;YACxD,IAAI,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;gBAChB,IAAI,CAAC,eAAe,CAAC,WAAW,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;YACvD,CAAC;QACH,CAAC;aAAM,IAAI,IAAI,CAAC,eAAe,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACnD,IAAI,CAAC,eAAe,CAAC,QAAQ,IAAI,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,CAAC,CAAA;QAC7D,CAAC;IACH,CAAC;IAEO,MAAM;QACZ,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACzB,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,eAAe,CAAC,CAAA;QACxC,CAAC;IACH,CAAC;IAED,MAAM;QACJ,IAAI,CAAC,MAAM,EAAE,CAAA;IACf,CAAC;CACF;AAkBD,MAAM,CAAC,OAAO,OAAO,MAAM;IAkCzB,YAAY,IAAgB;QAzB5B,gBAAW,GAA4B,SAAS,CAAA;QAChD,0CAA0C;QAC1C,4CAA4C;QAC5C,2CAA2C;QAC3C,QAAG,GAAG,KAAK,CAAA;QACX,eAAU,GAAG,CAAC,CAAA;QACd,6DAA6D;QAC7D,wCAAwC;QAChC,+BAA0B,GAAuB,EAAE,CAAA;QAC3D,2BAA2B;QACnB,2BAAsB,GAC5B,EAAE,CAAA;QACI,yBAAoB,GAGxB,EAAE,CAAA;QACN,yDAAyD;QACzD,iBAAiB;QACjB,mBAAmB;QACnB,qEAAqE;QACrE,iFAAiF;QACjF,OAAO;QACP,IAAI;QACI,8BAAyB,GAA2C,EAAE,CAAA;QAG5E,gEAAgE;QAChE,MAAM,QAAQ,GAAG,GAAG,EAAE,GAAE,CAAC,CAAA;QAEzB,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,eAAe,IAAI,QAAQ,CAAA;QACvD,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,QAAQ,CAAA;QAC/C,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,eAAe,IAAI,QAAQ,CAAA;QACvD,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC,aAAa,IAAI,QAAQ,CAAA;QACnD,IAAI,CAAC,iBAAiB,GAAG,IAAI,CAAC,iBAAiB,IAAI,QAAQ,CAAA;QAC3D,IAAI,CAAC,gBAAgB,GAAG,IAAI,CAAC,gBAAgB,IAAI,QAAQ,CAAA;QACzD,IAAI,CAAC,4BAA4B;YAC/B,IAAI,CAAC,4BAA4B,IAAI,KAAK,CAAA;QAE5C,4BAA4B;QAC5B,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,KAAK,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAA;IAC1E,CAAC;IAED,OAAO,CAAC,IAAY;QAClB,2EAA2E;QAC3E,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACrB,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;YAC9B,OAAM;QACR,CAAC;QACD,IAAI,IAAI,CAAC,GAAG,EAAE,CAAC;YACb,8CAA8C;YAC9C,OAAM;QACR,CAAC;QAED,IAAI,CAAC,UAAU,IAAI,CAAC,CAAA;QAEpB,IAAI,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7B,iCAAiC;YACjC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAA;YACtB,OAAM;QACR,CAAC;QAED,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACvC,IAAI,KAAK,EAAE,CAAC;YACV,uBAAuB;YACvB,MAAM,CAAC,EAAE,SAAS,CAAC,GAAG,KAAK,CAAA;YAC3B,IAAI,CAAC,EAAE,AAAD,EAAG,QAAQ,CAAC,GAAG,KAAK,CAAA;YAE1B,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC3B,uDAAuD;gBACvD,IAAI,CAAC,iCAAiC,EAAE,CAAA;YAC1C,CAAC;iBAAM,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAClC,MAAM,SAAS,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAA;gBAC3C,IAAI,SAAS,EAAE,CAAC;oBACd,IAAI,SAAS,CAAC,SAAS,KAAK,OAAO,EAAE,CAAC;wBACpC,IAAI,CAAC,iCAAiC,EAAE,CAAA;wBACxC,IAAI,CAAC,GAAG,GAAG,IAAI,CAAA;wBACf,IAAI,CAAC,WAAW,GAAG,IAAI,WAAW,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAA;oBAC3D,CAAC;yBAAM,CAAC;wBACN,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,CAAA;oBAC3B,CAAC;gBACH,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAA;gBACtC,IAAI,CAAC,SAAS,CAAC,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAA;YACvC,CAAC;QACH,CAAC;aAAM,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC9B,yBAAyB;QAC3B,CAAC;aAAM,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC9B,wCAAwC;YACxC,IAAI,CAAC,iCAAiC,EAAE,CAAA;YACxC,IAAI,CAAC,GAAG,GAAG,IAAI,CAAA;YACf,IAAI,CAAC,WAAW,GAAG,IAAI,WAAW,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAA;YACzD,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;QAChC,CAAC;aAAM,CAAC;YACN,qBAAqB;YACrB,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,UAAU,EAAE,EAAE,CAAC,CAAA;YAC/C,MAAM,IAAI,KAAK,CAAC,oCAAoC,OAAO,IAAI,CAAC,CAAA;QAClE,CAAC;IACH,CAAC;IAED,MAAM;QACJ,IAAI,CAAC,iCAAiC,EAAE,CAAA;QACxC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACrB,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,CAAA;QAC3B,CAAC;QACD,IAAI,CAAC,WAAW,EAAE,CAAA;IACpB,CAAC;IAEO,SAAS,CACf,CAA2D;QAE3D,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;YACrB,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;QACzB,CAAC;aAAM,IAAI,WAAW,IAAI,CAAC,EAAE,CAAC;YAC5B,IAAI,CAAC,iBAAiB,CAAC,CAAC,CAAC,CAAA;QAC3B,CAAC;aAAM,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;YAC1B,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;QACzB,CAAC;IACH,CAAC;IAEO,uBAAuB,CAAC,mBAAmB,GAAG,CAAC;QACrD,MAAM,aAAa,GAAG,CAAC,IAAuB,EAAE,EAAE;;YAChD,IAAI,IAAI,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,KAAI,MAAA,MAAA,IAAI,CAAC,CAAC,CAAC,CAAC,UAAU,0CAAE,EAAE,0CAAG,CAAC,CAAC,CAAA,EAAE,CAAC;gBAC/D,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,EAAE,CAAA;gBACjC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE;oBACf,OAAO,IAAI,CAAC,sBAAsB,CAAC,EAAE,CAAC,CAAA;oBACtC,OAAO,IAAI,CAAC,oBAAoB,CAAC,EAAE,CAAC,CAAA;gBACtC,CAAC,CAAC,CAAA;gBACF,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;oBACf,IAAI,CAAC,CAAC,cAAc,EAAE,CAAC;wBACrB,CAAC,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAA;oBACjD,CAAC;oBACD,IAAI,CAAC,CAAC,gBAAgB,EAAE,CAAC;wBACvB,CAAC,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAA;oBACnD,CAAC;gBACH,CAAC,CAAC,CAAA;YACJ,CAAC;QACH,CAAC,CAAA;QAED,OACE,IAAI,CAAC,0BAA0B,CAAC,MAAM,GAAG,mBAAmB;YAC5D,IAAI,CAAC,UAAU,EACf,CAAC;YACD,MAAM,IAAI,GAAG,IAAI,CAAC,0BAA0B,CAAC,KAAK,EAAE,CAAA;YACpD,IAAI,IAAI,EAAE,CAAC;gBACT,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;gBACpB,aAAa,CAAC,IAAI,CAAC,CAAA;YACrB,CAAC;QACH,CAAC;IACH,CAAC;IAED;;;OAGG;IACK,iCAAiC;QACvC,IAAI,CAAC,0BAA0B,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAA;QAElE,IAAI,CAAC,0BAA0B,GAAG,EAAE,CAAA;QACpC,IAAI,CAAC,sBAAsB,GAAG,EAAE,CAAA;QAChC,IAAI,CAAC,oBAAoB,GAAG,EAAE,CAAA;QAE9B,yDAAyD;QACzD,kCAAkC;QAClC,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;YACrE,MAAM,IAAI,KAAK,CACb,sGAAsG,MAAM,CAAC,IAAI,CAC/G,IAAI,CAAC,yBAAyB,CAC/B,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CACd,CAAA;QACH,CAAC;IACH,CAAC;IAED,sDAAsD;IAC9C,WAAW,CAAC,IAAY;;QAC9B,MAAM,cAAc,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAA;QAC9C,MAAM,WAAW,GAAiC;YAChD,GAAG,cAAc;YACjB,cAAc,EAAE,EAAE;YAClB,gBAAgB,EAAE,EAAE;SACrB,CAAA;QACD,8DAA8D;QAE9D,+DAA+D;QAC/D,MAAM,GAAG,GAAG,CAAA,MAAA,WAAW,CAAC,UAAU,0CAAE,EAAE,KAAI,EAAE,CAAA;QAC5C,MAAM,OAAO,GAAG,CAAA,MAAA,WAAW,CAAC,UAAU,0CAAE,MAAM,KAAI,EAAE,CAAA;QACpD,MAAM,OAAO,GAAG,IAAI,CAAC,4BAA4B;YAC/C,CAAC,CAAC,EAAE;YACJ,CAAC,CAAC,CAAA,MAAA,WAAW,CAAC,UAAU,0CAAE,YAAY,KAAI,EAAE,CAAA;QAE9C,IAAI,CAAC,GAAG,CAAC,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;YACtD,+DAA+D;YAC/D,YAAY;YACZ,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,CAAC,CAAC,CAAA;YAC7B,OAAM;QACR,CAAC;QAED,IAAI,OAAO,GAAiC,SAAS,CAAA;QACrD,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE;YACf,MAAM,QAAQ,GAAG,IAAI,CAAC,sBAAsB,CAAC,EAAE,CAAC,CAAA;YAChD,IAAI,QAAQ,EAAE,CAAC;gBACb,uCAAuC;gBACvC,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,WAAW,CAAC,IAAI,EAAE,CAAC;oBAC5D,IAAI,CAAC,WAAW,CACd,uBAAuB,EAAE,8BACvB,WAAW,CAAC,IACd,OAAO,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,CAC7C,CAAA;gBACH,CAAC;gBACD,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;gBAC1B,OAAO,GAAG,QAAQ,CAAA;YACpB,CAAC;iBAAM,CAAC;gBACN,qDAAqD;gBACrD,uBAAuB;gBACvB,OAAO,GAAG,CAAC,WAAW,CAAC,CAAA;gBAEvB,IAAI,CAAC,uBAAuB,CAAC,CAAC,CAAC,CAAA;gBAC/B,IAAI,CAAC,OAAO,CAAC,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;oBACvC,IAAI,CAAC,0BAA0B,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;gBAC/C,CAAC;gBACD,IAAI,CAAC,sBAAsB,CAAC,EAAE,CAAC,GAAG,OAAO,CAAA;gBAEzC,qDAAqD;gBACrD,IAAI,CAAC,oBAAoB,CAAC,OAAO,EAAE,EAAE,CAAC,CAAA;YACxC,CAAC;QACH,CAAC,CAAC,CAAA;QAEF,oCAAoC;QACpC,IAAI,CAAC,sBAAsB,CACzB,OAAO,IAAI,CAAC,WAAW,CAAC,EACxB,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,OAAO,EAAE,EAC1C,GAAG,CACJ,CAAA;IACH,CAAC;IAEO,oBAAoB,CAAC,OAAyB,EAAE,EAAU;QAChE,MAAM,UAAU,GAAG,IAAI,CAAC,yBAAyB,CAAC,EAAE,CAAC,CAAA;QACrD,8BAA8B;QAC9B,MAAM;QACN,+EAA+E;QAC/E,2FAA2F;QAC3F,OAAO;QACP,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,OAAM;QACR,CAAC;QACD,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;YACpB,GAAG,CAAC,cAAc,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC,CAAA;QAC/C,CAAC,CAAC,CAAA;QACF,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;YACpB,GAAG,CAAC,gBAAgB,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,YAAY,CAAC,CAAA;QACvD,CAAC,CAAC,CAAA;QACF,OAAO,IAAI,CAAC,yBAAyB,CAAC,EAAE,CAAC,CAAA;IAC3C,CAAC;IAEO,WAAW,CAAC,OAAe;QACjC,IAAI,CAAC,GAAG,GAAG,IAAI,CAAA;QACf,IAAI,CAAC,aAAa,CAAC,GAAG,IAAI,CAAC,UAAU,KAAK,OAAO,EAAE,CAAC,CAAA;IACtD,CAAC;IAEO,sBAAsB,CAC5B,OAAyB,EACzB,UAAwD,EACxD,GAAa;QAEb,mEAAmE;QACnE,SAAS,OAAO,CACd,GAAoE,EACpE,KAAa,EACb,KAAa;YAEb,IAAI,MAAM,GAAG,GAAG,CAAC,KAAK,CAAC,CAAA;YACvB,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,MAAM,GAAG,EAAE,CAAA;gBACX,GAAG,CAAC,KAAK,CAAC,GAAG,MAAM,CAAA;YACrB,CAAC;YACD,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,KAAK,CAAA;YACxC,MAAM,CAAC,KAAK,CAAC,GAAG,IAAI,CAAA;YACpB,OAAO,SAAS,CAAA;QAClB,CAAC;QAED,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE;YAC/B,MAAM,YAAY,GAAG,IAAI,CAAC,sBAAsB,CAAC,IAAI,CAAC,CAAA;YACtD,IAAI,YAAY,EAAE,CAAC;gBACjB,MAAM,KAAK,GAAG,mBAAmB,CAAC,MAAM,CAAA;gBACxC,IACE,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CACf,OAAO,CAAC,IAAI,CAAC,oBAAoB,EAAE,EAAE,EAAE,UAAU,IAAI,EAAE,CAAC,CACzD,CAAC,MAAM,EACR,CAAC;oBACD,YAAY,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE;wBAC9B,QAAQ,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;oBAC/B,CAAC,CAAC,CAAA;gBACJ,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,IAAI,GAAG,GAAG,IAAI,CAAC,yBAAyB,CAAC,IAAI,CAAC,CAAA;gBAC9C,IAAI,CAAC,GAAG,EAAE,CAAC;oBACT,GAAG,GAAG;wBACJ,MAAM,EAAE,EAAE;wBACV,YAAY,EAAE,EAAE;qBACjB,CAAA;oBACD,IAAI,CAAC,yBAAyB,CAAC,IAAI,CAAC,GAAG,GAAG,CAAA;gBAC5C,CAAC;gBACD,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YAC1B,CAAC;QACH,CAAC,CAAC,CAAA;QAEF,UAAU,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE;YACrC,MAAM,YAAY,GAAG,IAAI,CAAC,sBAAsB,CAAC,IAAI,CAAC,CAAA;YACtD,IAAI,YAAY,EAAE,CAAC;gBACjB,MAAM,KAAK,GAAG,mBAAmB,CAAC,YAAY,CAAA;gBAC9C,IACE,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CACf,OAAO,CAAC,IAAI,CAAC,oBAAoB,EAAE,EAAE,EAAE,gBAAgB,IAAI,EAAE,CAAC,CAC/D,CAAC,MAAM,EACR,CAAC;oBACD,YAAY,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE;wBAC9B,QAAQ,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;oBAC/B,CAAC,CAAC,CAAA;gBACJ,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,IAAI,GAAG,GAAG,IAAI,CAAC,yBAAyB,CAAC,IAAI,CAAC,CAAA;gBAC9C,IAAI,CAAC,GAAG,EAAE,CAAC;oBACT,GAAG,GAAG;wBACJ,MAAM,EAAE,EAAE;wBACV,YAAY,EAAE,EAAE;qBACjB,CAAA;oBACD,IAAI,CAAC,yBAAyB,CAAC,IAAI,CAAC,GAAG,GAAG,CAAA;gBAC5C,CAAC;gBACD,GAAG,CAAC,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YAChC,CAAC;QACH,CAAC,CAAC,CAAA;IACJ,CAAC;CACF"}
package/esm/util.d.ts ADDED
@@ -0,0 +1,166 @@
1
+ /**
2
+ * Unescape a string value used in a GFF3 attribute.
3
+ *
4
+ * @param stringVal - Escaped GFF3 string value
5
+ * @returns An unescaped string value
6
+ */
7
+ export declare function unescape(stringVal: string): string;
8
+ /**
9
+ * Escape a value for use in a GFF3 attribute value.
10
+ *
11
+ * @param rawVal - Raw GFF3 attribute value
12
+ * @returns An escaped string value
13
+ */
14
+ export declare function escape(rawVal: string | number): string;
15
+ /**
16
+ * Escape a value for use in a GFF3 column value.
17
+ *
18
+ * @param rawVal - Raw GFF3 column value
19
+ * @returns An escaped column value
20
+ */
21
+ export declare function escapeColumn(rawVal: string | number): string;
22
+ /**
23
+ * Parse the 9th column (attributes) of a GFF3 feature line.
24
+ *
25
+ * @param attrString - String of GFF3 9th column
26
+ * @returns Parsed attributes
27
+ */
28
+ export declare function parseAttributes(attrString: string): GFF3Attributes;
29
+ /**
30
+ * Parse a GFF3 feature line
31
+ *
32
+ * @param line - GFF3 feature line
33
+ * @returns The parsed feature
34
+ */
35
+ export declare function parseFeature(line: string): GFF3FeatureLine;
36
+ /**
37
+ * Parse a GFF3 directive line.
38
+ *
39
+ * @param line - GFF3 directive line
40
+ * @returns The parsed directive
41
+ */
42
+ export declare function parseDirective(line: string): GFF3Directive | GFF3SequenceRegionDirective | GFF3GenomeBuildDirective | null;
43
+ /**
44
+ * Format an attributes object into a string suitable for the 9th column of GFF3.
45
+ *
46
+ * @param attrs - Attributes
47
+ * @returns GFF3 9th column string
48
+ */
49
+ export declare function formatAttributes(attrs: GFF3Attributes): string;
50
+ /**
51
+ * Format a feature object or array of feature objects into one or more lines of
52
+ * GFF3.
53
+ *
54
+ * @param featureOrFeatures - A feature object or array of feature objects
55
+ * @returns A string of one or more GFF3 lines
56
+ */
57
+ export declare function formatFeature(featureOrFeatures: GFF3FeatureLine | GFF3FeatureLineWithRefs | (GFF3FeatureLine | GFF3FeatureLineWithRefs)[]): string;
58
+ /**
59
+ * Format a directive into a line of GFF3.
60
+ *
61
+ * @param directive - A directive object
62
+ * @returns A directive line string
63
+ */
64
+ export declare function formatDirective(directive: GFF3Directive): string;
65
+ /**
66
+ * Format a comment into a GFF3 comment.
67
+ * Yes I know this is just adding a # and a newline.
68
+ *
69
+ * @param comment - A comment object
70
+ * @returns A comment line string
71
+ */
72
+ export declare function formatComment(comment: GFF3Comment): string;
73
+ /**
74
+ * Format a sequence object as FASTA
75
+ *
76
+ * @param seq - A sequence object
77
+ * @returns Formatted single FASTA sequence string
78
+ */
79
+ export declare function formatSequence(seq: GFF3Sequence): string;
80
+ /**
81
+ * Format a directive, comment, sequence, or feature, or array of such items,
82
+ * into one or more lines of GFF3.
83
+ *
84
+ * @param itemOrItems - A comment, sequence, or feature, or array of such items
85
+ * @returns A formatted string or array of strings
86
+ */
87
+ export declare function formatItem(itemOrItems: GFF3FeatureLineWithRefs | GFF3Directive | GFF3Comment | GFF3Sequence | (GFF3FeatureLineWithRefs | GFF3Directive | GFF3Comment | GFF3Sequence)[]): string | string[];
88
+ /** A record of GFF3 attribute identifiers and the values of those identifiers */
89
+ export type GFF3Attributes = Record<string, string[] | undefined>;
90
+ /** A representation of a single line of a GFF3 file */
91
+ export interface GFF3FeatureLine {
92
+ /** The ID of the landmark used to establish the coordinate system for the current feature */
93
+ seq_id: string | null;
94
+ /** A free text qualifier intended to describe the algorithm or operating procedure that generated this feature */
95
+ source: string | null;
96
+ /** The type of the feature */
97
+ type: string | null;
98
+ /** The start coordinates of the feature */
99
+ start: number | null;
100
+ /** The end coordinates of the feature */
101
+ end: number | null;
102
+ /** The score of the feature */
103
+ score: number | null;
104
+ /** The strand of the feature */
105
+ strand: string | null;
106
+ /** For features of type "CDS", the phase indicates where the next codon begins relative to the 5' end of the current CDS feature */
107
+ phase: string | null;
108
+ /** Feature attributes */
109
+ attributes: GFF3Attributes | null;
110
+ }
111
+ /**
112
+ * A GFF3 Feature line that includes references to other features defined in
113
+ * their "Parent" or "Derives_from" attributes
114
+ */
115
+ export interface GFF3FeatureLineWithRefs extends GFF3FeatureLine {
116
+ /** An array of child features */
117
+ child_features: GFF3Feature[];
118
+ /** An array of features derived from this feature */
119
+ derived_features: GFF3Feature[];
120
+ }
121
+ /**
122
+ * A GFF3 feature, which may include multiple individual feature lines
123
+ */
124
+ export type GFF3Feature = GFF3FeatureLineWithRefs[];
125
+ /** A GFF3 directive */
126
+ export interface GFF3Directive {
127
+ /** The name of the directive */
128
+ directive: string;
129
+ /** The string value of the directive */
130
+ value?: string;
131
+ }
132
+ /** A GFF3 sequence-region directive */
133
+ export interface GFF3SequenceRegionDirective extends GFF3Directive {
134
+ /** The string value of the directive */
135
+ value: string;
136
+ /** The sequence ID parsed from the directive */
137
+ seq_id: string;
138
+ /** The sequence start parsed from the directive */
139
+ start: string;
140
+ /** The sequence end parsed from the directive */
141
+ end: string;
142
+ }
143
+ /** A GFF3 genome-build directive */
144
+ export interface GFF3GenomeBuildDirective extends GFF3Directive {
145
+ /** The string value of the directive */
146
+ value: string;
147
+ /** The genome build source parsed from the directive */
148
+ source: string;
149
+ /** The genome build name parsed from the directive */
150
+ buildName: string;
151
+ }
152
+ /** A GFF3 comment */
153
+ export interface GFF3Comment {
154
+ /** The text of the comment */
155
+ comment: string;
156
+ }
157
+ /** A GFF3 FASTA single sequence */
158
+ export interface GFF3Sequence {
159
+ /** The ID of the sequence */
160
+ id: string;
161
+ /** The description of the sequence */
162
+ description?: string;
163
+ /** The sequence */
164
+ sequence: string;
165
+ }
166
+ export type GFF3Item = GFF3Feature | GFF3Directive | GFF3Comment | GFF3Sequence;