gff-nostream 3.0.5 → 3.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/api.ts CHANGED
@@ -14,8 +14,10 @@ import type {
14
14
  export interface LineRecord {
15
15
  line: string
16
16
  lineHash?: string | number
17
- start: number
18
- end: number
17
+ /** Optional passthrough byte offsets — not used by the parser */
18
+ start?: number
19
+ /** Optional passthrough byte offsets — not used by the parser */
20
+ end?: number
19
21
  hasEscapes: boolean
20
22
  }
21
23
 
@@ -44,19 +46,14 @@ function stringToRecords(str: string) {
44
46
  const lines = str.split(/\r?\n/)
45
47
  const records: LineRecord[] = []
46
48
  for (const line of lines) {
49
+ if (line.startsWith('##FASTA') || line.startsWith('>')) {
50
+ break
51
+ }
47
52
  if (line.length === 0 || line.startsWith('#')) {
48
- if (line.startsWith('##FASTA')) {
49
- break
50
- }
51
53
  continue
52
54
  }
53
- if (line.startsWith('>')) {
54
- break
55
- }
56
55
  records.push({
57
56
  line,
58
- start: 0,
59
- end: 0,
60
57
  hasEscapes: line.includes('%'),
61
58
  })
62
59
  }
@@ -95,48 +92,52 @@ export function parseRecords(records: LineRecord[]): GFF3Feature[] {
95
92
 
96
93
  if (!ids && !parents) {
97
94
  items.push([featureLine])
98
- continue
99
- }
100
-
101
- let feature: GFF3Feature
102
- if (ids) {
103
- const id = ids[0]!
104
- const existing = byId.get(id)
105
- if (existing) {
106
- existing.push(featureLine)
107
- feature = existing
108
- } else {
109
- feature = [featureLine]
110
- if (!parents) {
111
- items.push(feature)
112
- }
113
- byId.set(id, feature)
114
- const waiting = orphans.get(id)
115
- if (waiting) {
116
- for (const w of waiting) {
117
- featureLine.child_features.push(w)
95
+ } else {
96
+ let feature: GFF3Feature
97
+ if (ids) {
98
+ const id = ids[0]!
99
+ const existing = byId.get(id)
100
+ if (existing) {
101
+ // Multi-location continuation: share child_features/derived_features
102
+ // with the first line so children remain visible across all lines
103
+ // regardless of arrival order.
104
+ featureLine.child_features = existing[0]!.child_features
105
+ featureLine.derived_features = existing[0]!.derived_features
106
+ existing.push(featureLine)
107
+ feature = existing
108
+ } else {
109
+ feature = [featureLine]
110
+ if (!parents) {
111
+ items.push(feature)
112
+ }
113
+ byId.set(id, feature)
114
+ const waiting = orphans.get(id)
115
+ if (waiting) {
116
+ for (const w of waiting) {
117
+ featureLine.child_features.push(w)
118
+ }
119
+ orphans.delete(id)
118
120
  }
119
- orphans.delete(id)
120
121
  }
122
+ } else {
123
+ feature = [featureLine]
121
124
  }
122
- } else {
123
- feature = [featureLine]
124
- }
125
125
 
126
- if (parents) {
127
- for (const parentId of parents) {
128
- const parent = byId.get(parentId)
129
- if (parent) {
130
- for (const p of parent) {
131
- p.child_features.push(feature)
126
+ if (parents) {
127
+ for (const parentId of parents) {
128
+ const parent = byId.get(parentId)
129
+ if (parent) {
130
+ // child_features is shared across all parent feature lines,
131
+ // so push once via the first line.
132
+ parent[0]!.child_features.push(feature)
133
+ } else {
134
+ let arr = orphans.get(parentId)
135
+ if (!arr) {
136
+ arr = []
137
+ orphans.set(parentId, arr)
138
+ }
139
+ arr.push(feature)
132
140
  }
133
- } else {
134
- let arr = orphans.get(parentId)
135
- if (!arr) {
136
- arr = []
137
- orphans.set(parentId, arr)
138
- }
139
- arr.push(feature)
140
141
  }
141
142
  }
142
143
  }
@@ -166,17 +167,16 @@ export function parseRecordsJBrowse(records: LineRecord[]): JBrowseFeature[] {
166
167
  feature._lineHash = String(record.lineHash)
167
168
  }
168
169
 
169
- const id = feature.id as string | undefined
170
+ // attribute parsing collapses single-element arrays to scalars, so id can
171
+ // be string | string[]; defensively take the first if multi-valued.
172
+ const rawId = feature.id as string | string[] | undefined
173
+ const id = Array.isArray(rawId) ? rawId[0] : rawId
170
174
  const parent = feature.parent as string | string[] | undefined
171
175
 
172
176
  if (!id && !parent) {
173
177
  items.push(feature)
174
- continue
175
- }
176
-
177
- if (id) {
178
- const existing = byId.get(id)
179
- if (!existing) {
178
+ } else if (!id || !byId.has(id)) {
179
+ if (id) {
180
180
  if (!parent) {
181
181
  items.push(feature)
182
182
  }
@@ -189,21 +189,21 @@ export function parseRecordsJBrowse(records: LineRecord[]): JBrowseFeature[] {
189
189
  orphans.delete(id)
190
190
  }
191
191
  }
192
- }
193
192
 
194
- if (parent) {
195
- const parents = Array.isArray(parent) ? parent : [parent]
196
- for (const parentId of parents) {
197
- const parentFeature = byId.get(parentId)
198
- if (parentFeature) {
199
- parentFeature.subfeatures.push(feature)
200
- } else {
201
- let arr = orphans.get(parentId)
202
- if (!arr) {
203
- arr = []
204
- orphans.set(parentId, arr)
193
+ if (parent) {
194
+ const parents = Array.isArray(parent) ? parent : [parent]
195
+ for (const parentId of parents) {
196
+ const parentFeature = byId.get(parentId)
197
+ if (parentFeature) {
198
+ parentFeature.subfeatures.push(feature)
199
+ } else {
200
+ let arr = orphans.get(parentId)
201
+ if (!arr) {
202
+ arr = []
203
+ orphans.set(parentId, arr)
204
+ }
205
+ arr.push(feature)
205
206
  }
206
- arr.push(feature)
207
207
  }
208
208
  }
209
209
  }
package/src/util.ts CHANGED
@@ -2,7 +2,6 @@
2
2
  // JavaScript port of Robert Buels's Bio::GFF3::LowLevel Perl module.
3
3
 
4
4
  const directiveRegex = /^\s*##\s*(\S+)\s*(.*)/
5
- const lineEndRegex = /\r?\n$/
6
5
  const whitespaceRegex = /\s+/
7
6
  const nonDigitRegex = /\D/g
8
7
 
@@ -123,11 +122,20 @@ export function parseAttributesNoUnescape(attrString: string): GFF3Attributes {
123
122
  return parseAttributesImpl(attrString, false)
124
123
  }
125
124
 
126
- function normImpl(s: string, shouldUnescape: boolean) {
127
- if (s.length === 0 || s === '.') {
128
- return null
129
- }
130
- return shouldUnescape ? unescape(s) : s
125
+ function isEmpty(s: string) {
126
+ return s.length === 0 || s === '.'
127
+ }
128
+
129
+ function strField<E extends null | ''>(
130
+ s: string,
131
+ shouldUnescape: boolean,
132
+ empty: E,
133
+ ) {
134
+ return isEmpty(s) ? empty : shouldUnescape ? unescape(s) : s
135
+ }
136
+
137
+ function numField(s: string) {
138
+ return isEmpty(s) ? null : +s
131
139
  }
132
140
 
133
141
  function parseFeatureImpl(
@@ -135,26 +143,19 @@ function parseFeatureImpl(
135
143
  shouldUnescape: boolean,
136
144
  ): GFF3FeatureLine {
137
145
  const f = line.split('\t')
138
- const startStr = f[3]!
139
- const endStr = f[4]!
140
- const scoreStr = f[5]!
141
- const strand = f[6]!
142
- const phase = f[7]!
143
146
  const attrString = f[8]!
144
-
145
147
  return {
146
- seq_id: normImpl(f[0]!, shouldUnescape),
147
- source: normImpl(f[1]!, shouldUnescape),
148
- type: normImpl(f[2]!, shouldUnescape),
149
- start: startStr.length === 0 || startStr === '.' ? null : +startStr,
150
- end: endStr.length === 0 || endStr === '.' ? null : +endStr,
151
- score: scoreStr.length === 0 || scoreStr === '.' ? null : +scoreStr,
152
- strand: normImpl(strand, false),
153
- phase: normImpl(phase, false),
154
- attributes:
155
- attrString.length === 0 || attrString === '.'
156
- ? null
157
- : parseAttributesImpl(attrString, shouldUnescape),
148
+ seq_id: strField(f[0]!, shouldUnescape, null),
149
+ source: strField(f[1]!, shouldUnescape, null),
150
+ type: strField(f[2]!, shouldUnescape, null),
151
+ start: numField(f[3]!),
152
+ end: numField(f[4]!),
153
+ score: numField(f[5]!),
154
+ strand: strField(f[6]!, false, null),
155
+ phase: strField(f[7]!, false, null),
156
+ attributes: isEmpty(attrString)
157
+ ? null
158
+ : parseAttributesImpl(attrString, shouldUnescape),
158
159
  }
159
160
  }
160
161
 
@@ -179,61 +180,6 @@ export function parseFeatureNoUnescape(line: string): GFF3FeatureLine {
179
180
  return parseFeatureImpl(line, false)
180
181
  }
181
182
 
182
- function parseFieldsArrayImpl(
183
- f: (string | null | undefined)[],
184
- shouldUnescape: boolean,
185
- ): GFF3FeatureLine {
186
- const seq_id = f[0]
187
- const source = f[1]
188
- const type = f[2]
189
- const startStr = f[3]
190
- const endStr = f[4]
191
- const scoreStr = f[5]
192
- const strand = f[6]
193
- const phase = f[7]
194
- const attrString = f[8]
195
-
196
- return {
197
- seq_id: seq_id ? normImpl(seq_id, shouldUnescape) : null,
198
- source: source ? normImpl(source, shouldUnescape) : null,
199
- type: type ? normImpl(type, shouldUnescape) : null,
200
- start: !startStr || startStr === '.' ? null : +startStr,
201
- end: !endStr || endStr === '.' ? null : +endStr,
202
- score: !scoreStr || scoreStr === '.' ? null : +scoreStr,
203
- strand: strand && strand !== '.' ? strand : null,
204
- phase: phase && phase !== '.' ? phase : null,
205
- attributes:
206
- !attrString || attrString === '.'
207
- ? null
208
- : parseAttributesImpl(attrString, shouldUnescape),
209
- }
210
- }
211
-
212
- /**
213
- * Parse a GFF3 feature from a pre-split fields array
214
- *
215
- * @param f - Array of 9 GFF3 column values (use null or '.' for empty values)
216
- * @returns The parsed feature
217
- */
218
- export function parseFieldsArray(
219
- f: (string | null | undefined)[],
220
- ): GFF3FeatureLine {
221
- return parseFieldsArrayImpl(f, true)
222
- }
223
-
224
- /**
225
- * Parse a GFF3 feature from a pre-split fields array without unescaping.
226
- * Fast path for data known to contain no escaped characters.
227
- *
228
- * @param f - Array of 9 GFF3 column values (use null or '.' for empty values)
229
- * @returns The parsed feature
230
- */
231
- export function parseFieldsArrayNoUnescape(
232
- f: (string | null | undefined)[],
233
- ): GFF3FeatureLine {
234
- return parseFieldsArrayImpl(f, false)
235
- }
236
-
237
183
  /**
238
184
  * Parse a GFF3 directive line.
239
185
  *
@@ -252,17 +198,16 @@ export function parseDirective(
252
198
  return null
253
199
  }
254
200
 
255
- const [, name] = match
256
- let [, , contents] = match
201
+ const name = match[1]!
202
+ const contents = match[2]!
257
203
 
258
- const parsed: GFF3Directive = { directive: name! }
259
- if (contents!.length) {
260
- contents = contents!.replace(lineEndRegex, '')
261
- parsed.value = contents
204
+ const parsed: GFF3Directive = { directive: name }
205
+ if (contents.length) {
206
+ parsed.value = contents.trimEnd()
262
207
  }
263
208
 
264
209
  if (name === 'sequence-region') {
265
- const c = contents!.split(whitespaceRegex, 3)
210
+ const c = contents.split(whitespaceRegex, 3)
266
211
  return {
267
212
  ...parsed,
268
213
  seq_id: c[0]!,
@@ -270,7 +215,7 @@ export function parseDirective(
270
215
  end: c[2]!.replaceAll(nonDigitRegex, ''),
271
216
  }
272
217
  } else if (name === 'genome-build') {
273
- const [source, buildName] = contents!.split(whitespaceRegex, 2)
218
+ const [source, buildName] = contents.split(whitespaceRegex, 2)
274
219
  return {
275
220
  ...parsed,
276
221
  source: source!,
@@ -446,11 +391,6 @@ function parseAttributesJBrowseImpl(
446
391
  const eqIdx = attrString.indexOf('=', start)
447
392
  if (eqIdx !== -1 && eqIdx < semiIdx && eqIdx + 1 < semiIdx) {
448
393
  const tag = attrString.slice(start, eqIdx)
449
- if (tag === '_lineHash') {
450
- start = semiIdx + 1
451
- continue
452
- }
453
-
454
394
  let key = COMMON_ATTRS[tag]
455
395
  if (key === undefined) {
456
396
  key = tag.toLowerCase()
@@ -494,46 +434,32 @@ export function parseAttributesJBrowseNoUnescape(
494
434
  parseAttributesJBrowseImpl(attrString, result, false)
495
435
  }
496
436
 
437
+ const STRAND_MAP: Record<string, number | undefined> = {
438
+ '+': 1,
439
+ '-': -1,
440
+ '.': 0,
441
+ }
442
+
497
443
  function parseFeatureJBrowseImpl(
498
444
  line: string,
499
445
  shouldUnescape: boolean,
500
446
  ): JBrowseFeature {
501
447
  const f = line.split('\t')
502
- const seq_id = f[0]!
503
- const source = f[1]!
504
- const type = f[2]!
505
448
  const startStr = f[3]!
506
449
  const endStr = f[4]!
507
450
  const scoreStr = f[5]!
508
- const strand = f[6]!
509
451
  const phase = f[7]!
510
452
  const attrString = f[8]!
511
453
 
512
454
  const result: JBrowseFeature = {
513
- refName:
514
- seq_id.length === 0 || seq_id === '.'
515
- ? ''
516
- : shouldUnescape
517
- ? unescape(seq_id)
518
- : seq_id,
519
- source:
520
- source.length === 0 || source === '.'
521
- ? null
522
- : shouldUnescape
523
- ? unescape(source)
524
- : source,
525
- type:
526
- type.length === 0 || type === '.'
527
- ? null
528
- : shouldUnescape
529
- ? unescape(type)
530
- : type,
531
- start: startStr.length === 0 || startStr === '.' ? 0 : +startStr - 1,
532
- end: endStr.length === 0 || endStr === '.' ? 0 : +endStr,
533
- score: scoreStr.length === 0 || scoreStr === '.' ? undefined : +scoreStr,
534
- strand:
535
- strand === '+' ? 1 : strand === '-' ? -1 : strand === '.' ? 0 : undefined,
536
- phase: phase.length === 0 || phase === '.' ? undefined : +phase,
455
+ refName: strField(f[0]!, shouldUnescape, ''),
456
+ source: strField(f[1]!, shouldUnescape, null),
457
+ type: strField(f[2]!, shouldUnescape, null),
458
+ start: isEmpty(startStr) ? 0 : +startStr - 1,
459
+ end: isEmpty(endStr) ? 0 : +endStr,
460
+ score: isEmpty(scoreStr) ? undefined : +scoreStr,
461
+ strand: STRAND_MAP[f[6]!],
462
+ phase: isEmpty(phase) ? undefined : +phase,
537
463
  subfeatures: [],
538
464
  }
539
465