gff-nostream 1.3.9 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/parse.ts CHANGED
@@ -1,44 +1,10 @@
1
- import * as GFF3 from './util'
1
+ import * as GFF3 from './util.ts'
2
2
 
3
- const containerAttributes = {
4
- Parent: 'child_features' as const,
5
- Derives_from: 'derived_features' as const,
6
- }
7
-
8
- export class FASTAParser {
9
- seqCallback: (sequence: GFF3.GFF3Sequence) => void
10
- currentSequence:
11
- | { id: string; sequence: string; description?: string }
12
- | undefined
13
-
14
- constructor(seqCallback: (sequence: GFF3.GFF3Sequence) => void) {
15
- this.seqCallback = seqCallback
16
- this.currentSequence = undefined
17
- }
18
-
19
- addLine(line: string): void {
20
- const defMatch = /^>\s*(\S+)\s*(.*)/.exec(line)
21
- if (defMatch) {
22
- this._flush()
23
- this.currentSequence = { id: defMatch[1], sequence: '' }
24
- if (defMatch[2]) {
25
- this.currentSequence.description = defMatch[2].trim()
26
- }
27
- } else if (this.currentSequence && /\S/.test(line)) {
28
- this.currentSequence.sequence += line.replaceAll(/\s/g, '')
29
- }
30
- }
31
-
32
- private _flush() {
33
- if (this.currentSequence) {
34
- this.seqCallback(this.currentSequence)
35
- }
36
- }
37
-
38
- finish(): void {
39
- this._flush()
40
- }
41
- }
3
+ const featureLineRegex = /^\s*[^#\s>]/
4
+ const commentOrDirectiveRegex = /^\s*(#+)(.*)/
5
+ const blankLineRegex = /^\s*$/
6
+ const fastaStartRegex = /^\s*>/
7
+ const lineEndingRegex = /\r?\n?$/g
42
8
 
43
9
  interface ParserArgs {
44
10
  featureCallback?(feature: GFF3.GFF3Feature): void
@@ -46,7 +12,6 @@ interface ParserArgs {
46
12
  commentCallback?(comment: GFF3.GFF3Comment): void
47
13
  errorCallback?(error: string): void
48
14
  directiveCallback?(directive: GFF3.GFF3Directive): void
49
- sequenceCallback?(sequence: GFF3.GFF3Sequence): void
50
15
  bufferSize?: number
51
16
  disableDerivesFromReferences?: boolean
52
17
  }
@@ -63,12 +28,7 @@ export default class Parser {
63
28
  errorCallback: (error: string) => void
64
29
  disableDerivesFromReferences: boolean
65
30
  directiveCallback: (directive: GFF3.GFF3Directive) => void
66
- sequenceCallback: (sequence: GFF3.GFF3Sequence) => void
67
31
  bufferSize: number
68
- fastaParser: FASTAParser | undefined = undefined
69
- // if this is true, the parser ignores the
70
- // rest of the lines in the file. currently
71
- // set when the file switches over to FASTA
72
32
  eof = false
73
33
  lineNumber = 0
74
34
  // features that we have to keep on hand for now because they
@@ -99,7 +59,6 @@ export default class Parser {
99
59
  this.commentCallback = args.commentCallback || nullFunc
100
60
  this.errorCallback = args.errorCallback || nullFunc
101
61
  this.directiveCallback = args.directiveCallback || nullFunc
102
- this.sequenceCallback = args.sequenceCallback || nullFunc
103
62
  this.disableDerivesFromReferences =
104
63
  args.disableDerivesFromReferences || false
105
64
 
@@ -108,68 +67,63 @@ export default class Parser {
108
67
  }
109
68
 
110
69
  addLine(line: string): void {
111
- // if we have transitioned to a fasta section, just delegate to that parser
112
- if (this.fastaParser) {
113
- this.fastaParser.addLine(line)
114
- return
115
- }
116
70
  if (this.eof) {
117
- // otherwise, if we are done, ignore this line
118
71
  return
119
72
  }
120
73
 
121
74
  this.lineNumber += 1
122
75
 
123
- if (/^\s*[^#\s>]/.test(line)) {
76
+ if (featureLineRegex.test(line)) {
124
77
  // feature line, most common case
125
78
  this._bufferLine(line)
126
79
  return
127
80
  }
128
81
 
129
- const match = /^\s*(#+)(.*)/.exec(line)
82
+ const match = commentOrDirectiveRegex.exec(line)
130
83
  if (match) {
131
84
  // directive or comment
132
85
  const [, hashsigns] = match
133
86
  let [, , contents] = match
134
87
 
135
- if (hashsigns.length === 3) {
88
+ if (hashsigns!.length === 3) {
136
89
  // sync directive, all forward-references are resolved.
137
90
  this._emitAllUnderConstructionFeatures()
138
- } else if (hashsigns.length === 2) {
91
+ } else if (hashsigns!.length === 2) {
139
92
  const directive = GFF3.parseDirective(line)
140
93
  if (directive) {
141
94
  if (directive.directive === 'FASTA') {
142
95
  this._emitAllUnderConstructionFeatures()
143
96
  this.eof = true
144
- this.fastaParser = new FASTAParser(this.sequenceCallback)
145
97
  } else {
146
98
  this._emitItem(directive)
147
99
  }
148
100
  }
149
101
  } else {
150
- contents = contents.replace(/\s*/, '')
151
- this._emitItem({ comment: contents })
102
+ this._emitItem({ comment: contents!.trimStart() })
152
103
  }
153
- } else if (/^\s*$/.test(line)) {
104
+ } else if (blankLineRegex.test(line)) {
154
105
  // blank line, do nothing
155
- } else if (/^\s*>/.test(line)) {
156
- // implicit beginning of a FASTA section
106
+ } else if (fastaStartRegex.test(line)) {
107
+ // implicit beginning of a FASTA section, stop parsing
157
108
  this._emitAllUnderConstructionFeatures()
158
109
  this.eof = true
159
- this.fastaParser = new FASTAParser(this.sequenceCallback)
160
- this.fastaParser.addLine(line)
161
110
  } else {
162
111
  // it's a parse error
163
- const errLine = line.replaceAll(/\r?\n?$/g, '')
112
+ const errLine = line.replaceAll(lineEndingRegex, '')
164
113
  throw new Error(`GFF3 parse error. Cannot parse '${errLine}'.`)
165
114
  }
166
115
  }
167
116
 
117
+ addParsedFeatureLine(featureLine: GFF3.GFF3FeatureLine): void {
118
+ if (this.eof) {
119
+ return
120
+ }
121
+ this.lineNumber += 1
122
+ this._bufferParsedLine(featureLine)
123
+ }
124
+
168
125
  finish(): void {
169
126
  this._emitAllUnderConstructionFeatures()
170
- if (this.fastaParser) {
171
- this.fastaParser.finish()
172
- }
173
127
  this.endCallback()
174
128
  }
175
129
 
@@ -194,11 +148,17 @@ export default class Parser {
194
148
  delete this._completedReferences[id]
195
149
  })
196
150
  item.forEach(i => {
151
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
197
152
  if (i.child_features) {
198
- i.child_features.forEach(c => _unbufferItem(c))
153
+ i.child_features.forEach(c => {
154
+ _unbufferItem(c)
155
+ })
199
156
  }
157
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
200
158
  if (i.derived_features) {
201
- i.derived_features.forEach(d => _unbufferItem(d))
159
+ i.derived_features.forEach(d => {
160
+ _unbufferItem(d)
161
+ })
202
162
  }
203
163
  })
204
164
  }
@@ -229,26 +189,23 @@ export default class Parser {
229
189
 
230
190
  // if we have any orphans hanging around still, this is a
231
191
  // problem. die with a parse error
232
- if (Array.from(Object.values(this._underConstructionOrphans)).length) {
192
+ const orphanKeys = Object.keys(this._underConstructionOrphans)
193
+ if (orphanKeys.length) {
233
194
  throw new Error(
234
- `some features reference other features that do not exist in the file (or in the same '###' scope). ${Object.keys(
235
- this._underConstructionOrphans,
236
- ).join(',')}`,
195
+ `some features reference other features that do not exist in the file (or in the same '###' scope). ${orphanKeys.join(',')}`,
237
196
  )
238
197
  }
239
198
  }
240
199
 
241
- // do the right thing with a newly-parsed feature line
242
200
  private _bufferLine(line: string) {
243
- const rawFeatureLine = GFF3.parseFeature(line)
244
- const featureLine: GFF3.GFF3FeatureLineWithRefs = {
245
- ...rawFeatureLine,
246
- child_features: [],
247
- derived_features: [],
248
- }
249
- // featureLine._lineNumber = this.lineNumber //< debugging aid
201
+ this._bufferParsedLine(GFF3.parseFeature(line))
202
+ }
203
+
204
+ private _bufferParsedLine(rawFeatureLine: GFF3.GFF3FeatureLine) {
205
+ const featureLine = rawFeatureLine as GFF3.GFF3FeatureLineWithRefs
206
+ featureLine.child_features = []
207
+ featureLine.derived_features = []
250
208
 
251
- // NOTE: a feature is an arrayref of one or more feature lines.
252
209
  const ids = featureLine.attributes?.ID || []
253
210
  const parents = featureLine.attributes?.Parent || []
254
211
  const derives = this.disableDerivesFromReferences
@@ -256,8 +213,6 @@ export default class Parser {
256
213
  : featureLine.attributes?.Derives_from || []
257
214
 
258
215
  if (!ids.length && !parents.length && !derives.length) {
259
- // if it has no IDs and does not refer to anything, we can just
260
- // output it
261
216
  this._emitItem([featureLine])
262
217
  return
263
218
  }
@@ -266,7 +221,6 @@ export default class Parser {
266
221
  ids.forEach(id => {
267
222
  const existing = this._underConstructionById[id]
268
223
  if (existing) {
269
- // another location of the same feature
270
224
  if (existing[existing.length - 1].type !== featureLine.type) {
271
225
  this._parseError(
272
226
  `multi-line feature "${id}" has inconsistent types: "${
@@ -277,8 +231,6 @@ export default class Parser {
277
231
  existing.push(featureLine)
278
232
  feature = existing
279
233
  } else {
280
- // haven't seen it yet, so buffer it so we can attach
281
- // child features to it
282
234
  feature = [featureLine]
283
235
 
284
236
  this._enforceBufferSizeLimit(1)
@@ -287,13 +239,12 @@ export default class Parser {
287
239
  }
288
240
  this._underConstructionById[id] = feature
289
241
 
290
- // see if we have anything buffered that refers to it
291
242
  this._resolveReferencesTo(feature, id)
292
243
  }
293
244
  })
294
245
 
295
- // try to resolve all its references
296
246
  this._resolveReferencesFrom(
247
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
297
248
  feature || [featureLine],
298
249
  { Parent: parents, Derives_from: derives },
299
250
  ids,
@@ -302,20 +253,13 @@ export default class Parser {
302
253
 
303
254
  private _resolveReferencesTo(feature: GFF3.GFF3Feature, id: string) {
304
255
  const references = this._underConstructionOrphans[id]
305
- // references is of the form
306
- // {
307
- // 'Parent' : [ orphans that have a Parent attr referencing this feature ],
308
- // 'Derives_from' : [ orphans that have a Derives_from attr referencing this feature ],
309
- // }
310
256
  if (!references) {
311
257
  return
312
258
  }
313
- feature.forEach(loc => {
259
+ for (const loc of feature) {
314
260
  loc.child_features.push(...references.Parent)
315
- })
316
- feature.forEach(loc => {
317
261
  loc.derived_features.push(...references.Derives_from)
318
- })
262
+ }
319
263
  delete this._underConstructionOrphans[id]
320
264
  }
321
265
 
@@ -329,72 +273,62 @@ export default class Parser {
329
273
  references: { Parent: string[]; Derives_from: string[] },
330
274
  ids: string[],
331
275
  ) {
332
- // this is all a bit more awkward in javascript than it was in perl
333
- function postSet(
334
- obj: Record<string, Record<string, boolean | undefined> | undefined>,
335
- slot1: string,
336
- slot2: string,
337
- ) {
338
- let subObj = obj[slot1]
339
- if (!subObj) {
340
- subObj = {}
341
- obj[slot1] = subObj
342
- }
343
- const returnVal = subObj[slot2] || false
344
- subObj[slot2] = true
345
- return returnVal
346
- }
347
-
348
- references.Parent.forEach(toId => {
276
+ for (const toId of references.Parent) {
349
277
  const otherFeature = this._underConstructionById[toId]
350
278
  if (otherFeature) {
351
- const pname = containerAttributes.Parent
352
- if (
353
- !ids.filter(id =>
354
- postSet(this._completedReferences, id, `Parent,${toId}`),
355
- ).length
356
- ) {
357
- otherFeature.forEach(location => {
358
- location[pname].push(feature)
359
- })
279
+ let dominated = false
280
+ for (const id of ids) {
281
+ const domKey = `Parent,${toId}`
282
+ const rec =
283
+ this._completedReferences[id] ||
284
+ (this._completedReferences[id] = {})
285
+ if (rec[domKey]) {
286
+ dominated = true
287
+ }
288
+ rec[domKey] = true
289
+ }
290
+ if (!dominated) {
291
+ for (const location of otherFeature) {
292
+ location.child_features.push(feature)
293
+ }
360
294
  }
361
295
  } else {
362
296
  let ref = this._underConstructionOrphans[toId]
363
297
  if (!ref) {
364
- ref = {
365
- Parent: [],
366
- Derives_from: [],
367
- }
298
+ ref = { Parent: [], Derives_from: [] }
368
299
  this._underConstructionOrphans[toId] = ref
369
300
  }
370
301
  ref.Parent.push(feature)
371
302
  }
372
- })
303
+ }
373
304
 
374
- references.Derives_from.forEach(toId => {
305
+ for (const toId of references.Derives_from) {
375
306
  const otherFeature = this._underConstructionById[toId]
376
307
  if (otherFeature) {
377
- const pname = containerAttributes.Derives_from
378
- if (
379
- !ids.filter(id =>
380
- postSet(this._completedReferences, id, `Derives_from,${toId}`),
381
- ).length
382
- ) {
383
- otherFeature.forEach(location => {
384
- location[pname].push(feature)
385
- })
308
+ let dominated = false
309
+ for (const id of ids) {
310
+ const domKey = `Derives_from,${toId}`
311
+ const rec =
312
+ this._completedReferences[id] ||
313
+ (this._completedReferences[id] = {})
314
+ if (rec[domKey]) {
315
+ dominated = true
316
+ }
317
+ rec[domKey] = true
318
+ }
319
+ if (!dominated) {
320
+ for (const location of otherFeature) {
321
+ location.derived_features.push(feature)
322
+ }
386
323
  }
387
324
  } else {
388
325
  let ref = this._underConstructionOrphans[toId]
389
326
  if (!ref) {
390
- ref = {
391
- Parent: [],
392
- Derives_from: [],
393
- }
327
+ ref = { Parent: [], Derives_from: [] }
394
328
  this._underConstructionOrphans[toId] = ref
395
329
  }
396
330
  ref.Derives_from.push(feature)
397
331
  }
398
- })
332
+ }
399
333
  }
400
334
  }