gff-nostream 1.3.9 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api.d.ts +33 -2
- package/dist/api.js +91 -3
- package/dist/api.js.map +1 -1
- package/dist/index.d.ts +3 -3
- package/dist/index.js +6 -3
- package/dist/index.js.map +1 -1
- package/dist/parse.d.ts +3 -16
- package/dist/parse.js +80 -124
- package/dist/parse.js.map +1 -1
- package/dist/util.d.ts +17 -0
- package/dist/util.js +160 -59
- package/dist/util.js.map +1 -1
- package/esm/api.d.ts +33 -2
- package/esm/api.js +87 -2
- package/esm/api.js.map +1 -1
- package/esm/index.d.ts +3 -3
- package/esm/index.js +2 -2
- package/esm/index.js.map +1 -1
- package/esm/parse.d.ts +3 -16
- package/esm/parse.js +103 -142
- package/esm/parse.js.map +1 -1
- package/esm/util.d.ts +17 -0
- package/esm/util.js +148 -58
- package/esm/util.js.map +1 -1
- package/package.json +27 -16
- package/src/api.ts +113 -12
- package/src/index.ts +6 -5
- package/src/parse.ts +80 -146
- package/src/util.ts +165 -62
package/src/parse.ts
CHANGED
|
@@ -1,44 +1,10 @@
|
|
|
1
|
-
import * as GFF3 from './util'
|
|
1
|
+
import * as GFF3 from './util.ts'
|
|
2
2
|
|
|
3
|
-
const
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
export class FASTAParser {
|
|
9
|
-
seqCallback: (sequence: GFF3.GFF3Sequence) => void
|
|
10
|
-
currentSequence:
|
|
11
|
-
| { id: string; sequence: string; description?: string }
|
|
12
|
-
| undefined
|
|
13
|
-
|
|
14
|
-
constructor(seqCallback: (sequence: GFF3.GFF3Sequence) => void) {
|
|
15
|
-
this.seqCallback = seqCallback
|
|
16
|
-
this.currentSequence = undefined
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
addLine(line: string): void {
|
|
20
|
-
const defMatch = /^>\s*(\S+)\s*(.*)/.exec(line)
|
|
21
|
-
if (defMatch) {
|
|
22
|
-
this._flush()
|
|
23
|
-
this.currentSequence = { id: defMatch[1], sequence: '' }
|
|
24
|
-
if (defMatch[2]) {
|
|
25
|
-
this.currentSequence.description = defMatch[2].trim()
|
|
26
|
-
}
|
|
27
|
-
} else if (this.currentSequence && /\S/.test(line)) {
|
|
28
|
-
this.currentSequence.sequence += line.replaceAll(/\s/g, '')
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
private _flush() {
|
|
33
|
-
if (this.currentSequence) {
|
|
34
|
-
this.seqCallback(this.currentSequence)
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
finish(): void {
|
|
39
|
-
this._flush()
|
|
40
|
-
}
|
|
41
|
-
}
|
|
3
|
+
const featureLineRegex = /^\s*[^#\s>]/
|
|
4
|
+
const commentOrDirectiveRegex = /^\s*(#+)(.*)/
|
|
5
|
+
const blankLineRegex = /^\s*$/
|
|
6
|
+
const fastaStartRegex = /^\s*>/
|
|
7
|
+
const lineEndingRegex = /\r?\n?$/g
|
|
42
8
|
|
|
43
9
|
interface ParserArgs {
|
|
44
10
|
featureCallback?(feature: GFF3.GFF3Feature): void
|
|
@@ -46,7 +12,6 @@ interface ParserArgs {
|
|
|
46
12
|
commentCallback?(comment: GFF3.GFF3Comment): void
|
|
47
13
|
errorCallback?(error: string): void
|
|
48
14
|
directiveCallback?(directive: GFF3.GFF3Directive): void
|
|
49
|
-
sequenceCallback?(sequence: GFF3.GFF3Sequence): void
|
|
50
15
|
bufferSize?: number
|
|
51
16
|
disableDerivesFromReferences?: boolean
|
|
52
17
|
}
|
|
@@ -63,12 +28,7 @@ export default class Parser {
|
|
|
63
28
|
errorCallback: (error: string) => void
|
|
64
29
|
disableDerivesFromReferences: boolean
|
|
65
30
|
directiveCallback: (directive: GFF3.GFF3Directive) => void
|
|
66
|
-
sequenceCallback: (sequence: GFF3.GFF3Sequence) => void
|
|
67
31
|
bufferSize: number
|
|
68
|
-
fastaParser: FASTAParser | undefined = undefined
|
|
69
|
-
// if this is true, the parser ignores the
|
|
70
|
-
// rest of the lines in the file. currently
|
|
71
|
-
// set when the file switches over to FASTA
|
|
72
32
|
eof = false
|
|
73
33
|
lineNumber = 0
|
|
74
34
|
// features that we have to keep on hand for now because they
|
|
@@ -99,7 +59,6 @@ export default class Parser {
|
|
|
99
59
|
this.commentCallback = args.commentCallback || nullFunc
|
|
100
60
|
this.errorCallback = args.errorCallback || nullFunc
|
|
101
61
|
this.directiveCallback = args.directiveCallback || nullFunc
|
|
102
|
-
this.sequenceCallback = args.sequenceCallback || nullFunc
|
|
103
62
|
this.disableDerivesFromReferences =
|
|
104
63
|
args.disableDerivesFromReferences || false
|
|
105
64
|
|
|
@@ -108,68 +67,63 @@ export default class Parser {
|
|
|
108
67
|
}
|
|
109
68
|
|
|
110
69
|
addLine(line: string): void {
|
|
111
|
-
// if we have transitioned to a fasta section, just delegate to that parser
|
|
112
|
-
if (this.fastaParser) {
|
|
113
|
-
this.fastaParser.addLine(line)
|
|
114
|
-
return
|
|
115
|
-
}
|
|
116
70
|
if (this.eof) {
|
|
117
|
-
// otherwise, if we are done, ignore this line
|
|
118
71
|
return
|
|
119
72
|
}
|
|
120
73
|
|
|
121
74
|
this.lineNumber += 1
|
|
122
75
|
|
|
123
|
-
if (
|
|
76
|
+
if (featureLineRegex.test(line)) {
|
|
124
77
|
// feature line, most common case
|
|
125
78
|
this._bufferLine(line)
|
|
126
79
|
return
|
|
127
80
|
}
|
|
128
81
|
|
|
129
|
-
const match =
|
|
82
|
+
const match = commentOrDirectiveRegex.exec(line)
|
|
130
83
|
if (match) {
|
|
131
84
|
// directive or comment
|
|
132
85
|
const [, hashsigns] = match
|
|
133
86
|
let [, , contents] = match
|
|
134
87
|
|
|
135
|
-
if (hashsigns
|
|
88
|
+
if (hashsigns!.length === 3) {
|
|
136
89
|
// sync directive, all forward-references are resolved.
|
|
137
90
|
this._emitAllUnderConstructionFeatures()
|
|
138
|
-
} else if (hashsigns
|
|
91
|
+
} else if (hashsigns!.length === 2) {
|
|
139
92
|
const directive = GFF3.parseDirective(line)
|
|
140
93
|
if (directive) {
|
|
141
94
|
if (directive.directive === 'FASTA') {
|
|
142
95
|
this._emitAllUnderConstructionFeatures()
|
|
143
96
|
this.eof = true
|
|
144
|
-
this.fastaParser = new FASTAParser(this.sequenceCallback)
|
|
145
97
|
} else {
|
|
146
98
|
this._emitItem(directive)
|
|
147
99
|
}
|
|
148
100
|
}
|
|
149
101
|
} else {
|
|
150
|
-
|
|
151
|
-
this._emitItem({ comment: contents })
|
|
102
|
+
this._emitItem({ comment: contents!.trimStart() })
|
|
152
103
|
}
|
|
153
|
-
} else if (
|
|
104
|
+
} else if (blankLineRegex.test(line)) {
|
|
154
105
|
// blank line, do nothing
|
|
155
|
-
} else if (
|
|
156
|
-
// implicit beginning of a FASTA section
|
|
106
|
+
} else if (fastaStartRegex.test(line)) {
|
|
107
|
+
// implicit beginning of a FASTA section, stop parsing
|
|
157
108
|
this._emitAllUnderConstructionFeatures()
|
|
158
109
|
this.eof = true
|
|
159
|
-
this.fastaParser = new FASTAParser(this.sequenceCallback)
|
|
160
|
-
this.fastaParser.addLine(line)
|
|
161
110
|
} else {
|
|
162
111
|
// it's a parse error
|
|
163
|
-
const errLine = line.replaceAll(
|
|
112
|
+
const errLine = line.replaceAll(lineEndingRegex, '')
|
|
164
113
|
throw new Error(`GFF3 parse error. Cannot parse '${errLine}'.`)
|
|
165
114
|
}
|
|
166
115
|
}
|
|
167
116
|
|
|
117
|
+
addParsedFeatureLine(featureLine: GFF3.GFF3FeatureLine): void {
|
|
118
|
+
if (this.eof) {
|
|
119
|
+
return
|
|
120
|
+
}
|
|
121
|
+
this.lineNumber += 1
|
|
122
|
+
this._bufferParsedLine(featureLine)
|
|
123
|
+
}
|
|
124
|
+
|
|
168
125
|
finish(): void {
|
|
169
126
|
this._emitAllUnderConstructionFeatures()
|
|
170
|
-
if (this.fastaParser) {
|
|
171
|
-
this.fastaParser.finish()
|
|
172
|
-
}
|
|
173
127
|
this.endCallback()
|
|
174
128
|
}
|
|
175
129
|
|
|
@@ -194,11 +148,17 @@ export default class Parser {
|
|
|
194
148
|
delete this._completedReferences[id]
|
|
195
149
|
})
|
|
196
150
|
item.forEach(i => {
|
|
151
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
197
152
|
if (i.child_features) {
|
|
198
|
-
i.child_features.forEach(c =>
|
|
153
|
+
i.child_features.forEach(c => {
|
|
154
|
+
_unbufferItem(c)
|
|
155
|
+
})
|
|
199
156
|
}
|
|
157
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
200
158
|
if (i.derived_features) {
|
|
201
|
-
i.derived_features.forEach(d =>
|
|
159
|
+
i.derived_features.forEach(d => {
|
|
160
|
+
_unbufferItem(d)
|
|
161
|
+
})
|
|
202
162
|
}
|
|
203
163
|
})
|
|
204
164
|
}
|
|
@@ -229,26 +189,23 @@ export default class Parser {
|
|
|
229
189
|
|
|
230
190
|
// if we have any orphans hanging around still, this is a
|
|
231
191
|
// problem. die with a parse error
|
|
232
|
-
|
|
192
|
+
const orphanKeys = Object.keys(this._underConstructionOrphans)
|
|
193
|
+
if (orphanKeys.length) {
|
|
233
194
|
throw new Error(
|
|
234
|
-
`some features reference other features that do not exist in the file (or in the same '###' scope). ${
|
|
235
|
-
this._underConstructionOrphans,
|
|
236
|
-
).join(',')}`,
|
|
195
|
+
`some features reference other features that do not exist in the file (or in the same '###' scope). ${orphanKeys.join(',')}`,
|
|
237
196
|
)
|
|
238
197
|
}
|
|
239
198
|
}
|
|
240
199
|
|
|
241
|
-
// do the right thing with a newly-parsed feature line
|
|
242
200
|
private _bufferLine(line: string) {
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
201
|
+
this._bufferParsedLine(GFF3.parseFeature(line))
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
private _bufferParsedLine(rawFeatureLine: GFF3.GFF3FeatureLine) {
|
|
205
|
+
const featureLine = rawFeatureLine as GFF3.GFF3FeatureLineWithRefs
|
|
206
|
+
featureLine.child_features = []
|
|
207
|
+
featureLine.derived_features = []
|
|
250
208
|
|
|
251
|
-
// NOTE: a feature is an arrayref of one or more feature lines.
|
|
252
209
|
const ids = featureLine.attributes?.ID || []
|
|
253
210
|
const parents = featureLine.attributes?.Parent || []
|
|
254
211
|
const derives = this.disableDerivesFromReferences
|
|
@@ -256,8 +213,6 @@ export default class Parser {
|
|
|
256
213
|
: featureLine.attributes?.Derives_from || []
|
|
257
214
|
|
|
258
215
|
if (!ids.length && !parents.length && !derives.length) {
|
|
259
|
-
// if it has no IDs and does not refer to anything, we can just
|
|
260
|
-
// output it
|
|
261
216
|
this._emitItem([featureLine])
|
|
262
217
|
return
|
|
263
218
|
}
|
|
@@ -266,7 +221,6 @@ export default class Parser {
|
|
|
266
221
|
ids.forEach(id => {
|
|
267
222
|
const existing = this._underConstructionById[id]
|
|
268
223
|
if (existing) {
|
|
269
|
-
// another location of the same feature
|
|
270
224
|
if (existing[existing.length - 1].type !== featureLine.type) {
|
|
271
225
|
this._parseError(
|
|
272
226
|
`multi-line feature "${id}" has inconsistent types: "${
|
|
@@ -277,8 +231,6 @@ export default class Parser {
|
|
|
277
231
|
existing.push(featureLine)
|
|
278
232
|
feature = existing
|
|
279
233
|
} else {
|
|
280
|
-
// haven't seen it yet, so buffer it so we can attach
|
|
281
|
-
// child features to it
|
|
282
234
|
feature = [featureLine]
|
|
283
235
|
|
|
284
236
|
this._enforceBufferSizeLimit(1)
|
|
@@ -287,13 +239,12 @@ export default class Parser {
|
|
|
287
239
|
}
|
|
288
240
|
this._underConstructionById[id] = feature
|
|
289
241
|
|
|
290
|
-
// see if we have anything buffered that refers to it
|
|
291
242
|
this._resolveReferencesTo(feature, id)
|
|
292
243
|
}
|
|
293
244
|
})
|
|
294
245
|
|
|
295
|
-
// try to resolve all its references
|
|
296
246
|
this._resolveReferencesFrom(
|
|
247
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
297
248
|
feature || [featureLine],
|
|
298
249
|
{ Parent: parents, Derives_from: derives },
|
|
299
250
|
ids,
|
|
@@ -302,20 +253,13 @@ export default class Parser {
|
|
|
302
253
|
|
|
303
254
|
private _resolveReferencesTo(feature: GFF3.GFF3Feature, id: string) {
|
|
304
255
|
const references = this._underConstructionOrphans[id]
|
|
305
|
-
// references is of the form
|
|
306
|
-
// {
|
|
307
|
-
// 'Parent' : [ orphans that have a Parent attr referencing this feature ],
|
|
308
|
-
// 'Derives_from' : [ orphans that have a Derives_from attr referencing this feature ],
|
|
309
|
-
// }
|
|
310
256
|
if (!references) {
|
|
311
257
|
return
|
|
312
258
|
}
|
|
313
|
-
|
|
259
|
+
for (const loc of feature) {
|
|
314
260
|
loc.child_features.push(...references.Parent)
|
|
315
|
-
})
|
|
316
|
-
feature.forEach(loc => {
|
|
317
261
|
loc.derived_features.push(...references.Derives_from)
|
|
318
|
-
}
|
|
262
|
+
}
|
|
319
263
|
delete this._underConstructionOrphans[id]
|
|
320
264
|
}
|
|
321
265
|
|
|
@@ -329,72 +273,62 @@ export default class Parser {
|
|
|
329
273
|
references: { Parent: string[]; Derives_from: string[] },
|
|
330
274
|
ids: string[],
|
|
331
275
|
) {
|
|
332
|
-
|
|
333
|
-
function postSet(
|
|
334
|
-
obj: Record<string, Record<string, boolean | undefined> | undefined>,
|
|
335
|
-
slot1: string,
|
|
336
|
-
slot2: string,
|
|
337
|
-
) {
|
|
338
|
-
let subObj = obj[slot1]
|
|
339
|
-
if (!subObj) {
|
|
340
|
-
subObj = {}
|
|
341
|
-
obj[slot1] = subObj
|
|
342
|
-
}
|
|
343
|
-
const returnVal = subObj[slot2] || false
|
|
344
|
-
subObj[slot2] = true
|
|
345
|
-
return returnVal
|
|
346
|
-
}
|
|
347
|
-
|
|
348
|
-
references.Parent.forEach(toId => {
|
|
276
|
+
for (const toId of references.Parent) {
|
|
349
277
|
const otherFeature = this._underConstructionById[toId]
|
|
350
278
|
if (otherFeature) {
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
}
|
|
279
|
+
let dominated = false
|
|
280
|
+
for (const id of ids) {
|
|
281
|
+
const domKey = `Parent,${toId}`
|
|
282
|
+
const rec =
|
|
283
|
+
this._completedReferences[id] ||
|
|
284
|
+
(this._completedReferences[id] = {})
|
|
285
|
+
if (rec[domKey]) {
|
|
286
|
+
dominated = true
|
|
287
|
+
}
|
|
288
|
+
rec[domKey] = true
|
|
289
|
+
}
|
|
290
|
+
if (!dominated) {
|
|
291
|
+
for (const location of otherFeature) {
|
|
292
|
+
location.child_features.push(feature)
|
|
293
|
+
}
|
|
360
294
|
}
|
|
361
295
|
} else {
|
|
362
296
|
let ref = this._underConstructionOrphans[toId]
|
|
363
297
|
if (!ref) {
|
|
364
|
-
ref = {
|
|
365
|
-
Parent: [],
|
|
366
|
-
Derives_from: [],
|
|
367
|
-
}
|
|
298
|
+
ref = { Parent: [], Derives_from: [] }
|
|
368
299
|
this._underConstructionOrphans[toId] = ref
|
|
369
300
|
}
|
|
370
301
|
ref.Parent.push(feature)
|
|
371
302
|
}
|
|
372
|
-
}
|
|
303
|
+
}
|
|
373
304
|
|
|
374
|
-
references.Derives_from
|
|
305
|
+
for (const toId of references.Derives_from) {
|
|
375
306
|
const otherFeature = this._underConstructionById[toId]
|
|
376
307
|
if (otherFeature) {
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
}
|
|
308
|
+
let dominated = false
|
|
309
|
+
for (const id of ids) {
|
|
310
|
+
const domKey = `Derives_from,${toId}`
|
|
311
|
+
const rec =
|
|
312
|
+
this._completedReferences[id] ||
|
|
313
|
+
(this._completedReferences[id] = {})
|
|
314
|
+
if (rec[domKey]) {
|
|
315
|
+
dominated = true
|
|
316
|
+
}
|
|
317
|
+
rec[domKey] = true
|
|
318
|
+
}
|
|
319
|
+
if (!dominated) {
|
|
320
|
+
for (const location of otherFeature) {
|
|
321
|
+
location.derived_features.push(feature)
|
|
322
|
+
}
|
|
386
323
|
}
|
|
387
324
|
} else {
|
|
388
325
|
let ref = this._underConstructionOrphans[toId]
|
|
389
326
|
if (!ref) {
|
|
390
|
-
ref = {
|
|
391
|
-
Parent: [],
|
|
392
|
-
Derives_from: [],
|
|
393
|
-
}
|
|
327
|
+
ref = { Parent: [], Derives_from: [] }
|
|
394
328
|
this._underConstructionOrphans[toId] = ref
|
|
395
329
|
}
|
|
396
330
|
ref.Derives_from.push(feature)
|
|
397
331
|
}
|
|
398
|
-
}
|
|
332
|
+
}
|
|
399
333
|
}
|
|
400
334
|
}
|