gff-nostream 3.0.11 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/util.js CHANGED
@@ -1,8 +1,5 @@
1
- // Fast, low-level functions for parsing and formatting GFF3.
1
+ // Fast, low-level functions for parsing GFF3.
2
2
  // JavaScript port of Robert Buels's Bio::GFF3::LowLevel Perl module.
3
- const directiveRegex = /^\s*##\s*(\S+)\s*(.*)/;
4
- const whitespaceRegex = /\s+/;
5
- const nonDigitRegex = /\D/g;
6
3
  const HEX_LOOKUP = {};
7
4
  for (let i = 0; i < 256; i++) {
8
5
  const hex = i.toString(16).toUpperCase().padStart(2, '0');
@@ -24,169 +21,28 @@ export function unescape(stringVal) {
24
21
  let lastIdx = 0;
25
22
  let i = idx;
26
23
  while (i < stringVal.length) {
27
- if (stringVal[i] === '%' && i + 2 < stringVal.length) {
28
- result += stringVal.slice(lastIdx, i);
29
- const hex = stringVal.slice(i + 1, i + 3);
30
- const char = HEX_LOOKUP[hex];
31
- if (char !== undefined) {
32
- result += char;
33
- }
34
- else {
35
- result += stringVal.slice(i, i + 3);
36
- }
24
+ const char = stringVal[i] === '%' && i + 2 < stringVal.length
25
+ ? HEX_LOOKUP[stringVal.slice(i + 1, i + 3)]
26
+ : undefined;
27
+ if (char !== undefined) {
28
+ result += stringVal.slice(lastIdx, i) + char;
37
29
  i += 3;
38
30
  lastIdx = i;
39
31
  }
40
32
  else {
33
+ // Not a valid escape: advance one char so a '%' that begins a real
34
+ // escape immediately after isn't swallowed (e.g. the %20 in "a%b%20c").
41
35
  i++;
42
36
  }
43
37
  }
44
38
  return result + stringVal.slice(lastIdx);
45
39
  }
46
- function parseAttributesImpl(attrString, shouldUnescape) {
47
- if (attrString.length === 0 || attrString === '.') {
48
- return {};
49
- }
50
- const attrs = {};
51
- let len = attrString.length;
52
- if (attrString[len - 1] === '\n') {
53
- len = attrString[len - 2] === '\r' ? len - 2 : len - 1;
54
- attrString = attrString.slice(0, len);
55
- }
56
- let start = 0;
57
- while (start < len) {
58
- let semiIdx = attrString.indexOf(';', start);
59
- if (semiIdx === -1) {
60
- semiIdx = len;
61
- }
62
- if (semiIdx > start) {
63
- const eqIdx = attrString.indexOf('=', start);
64
- if (eqIdx !== -1 && eqIdx < semiIdx && eqIdx + 1 < semiIdx) {
65
- const tag = attrString.slice(start, eqIdx);
66
- let arec = attrs[tag];
67
- if (!arec) {
68
- arec = [];
69
- attrs[tag] = arec;
70
- }
71
- let valStart = eqIdx + 1;
72
- while (valStart < semiIdx) {
73
- let commaIdx = attrString.indexOf(',', valStart);
74
- if (commaIdx === -1 || commaIdx > semiIdx) {
75
- commaIdx = semiIdx;
76
- }
77
- if (commaIdx > valStart) {
78
- const val = attrString.slice(valStart, commaIdx);
79
- arec.push(shouldUnescape ? unescape(val) : val);
80
- }
81
- valStart = commaIdx + 1;
82
- }
83
- }
84
- }
85
- start = semiIdx + 1;
86
- }
87
- return attrs;
88
- }
89
- /**
90
- * Parse the 9th column (attributes) of a GFF3 feature line.
91
- *
92
- * @param attrString - String of GFF3 9th column
93
- * @returns Parsed attributes
94
- */
95
- export function parseAttributes(attrString) {
96
- return parseAttributesImpl(attrString, true);
97
- }
98
- /**
99
- * Parse the 9th column (attributes) of a GFF3 feature line without unescaping.
100
- * Fast path for data known to contain no escaped characters.
101
- *
102
- * @param attrString - String of GFF3 9th column
103
- * @returns Parsed attributes
104
- */
105
- export function parseAttributesNoUnescape(attrString) {
106
- return parseAttributesImpl(attrString, false);
107
- }
108
40
  function isEmpty(s) {
109
41
  return s.length === 0 || s === '.';
110
42
  }
111
43
  function strField(s, shouldUnescape, empty) {
112
44
  return isEmpty(s) ? empty : shouldUnescape ? unescape(s) : s;
113
45
  }
114
- function numField(s) {
115
- return isEmpty(s) ? null : +s;
116
- }
117
- function parseFeatureImpl(line, shouldUnescape) {
118
- const f = line.split('\t');
119
- const attrString = f[8];
120
- return {
121
- seq_id: strField(f[0], shouldUnescape, null),
122
- source: strField(f[1], shouldUnescape, null),
123
- type: strField(f[2], shouldUnescape, null),
124
- start: numField(f[3]),
125
- end: numField(f[4]),
126
- score: numField(f[5]),
127
- strand: strField(f[6], false, null),
128
- phase: strField(f[7], false, null),
129
- attributes: isEmpty(attrString)
130
- ? null
131
- : parseAttributesImpl(attrString, shouldUnescape),
132
- };
133
- }
134
- /**
135
- * Parse a GFF3 feature line
136
- *
137
- * @param line - GFF3 feature line
138
- * @returns The parsed feature
139
- */
140
- export function parseFeature(line) {
141
- return parseFeatureImpl(line, true);
142
- }
143
- /**
144
- * Parse a GFF3 feature line without unescaping.
145
- * Fast path for data known to contain no escaped characters.
146
- *
147
- * @param line - GFF3 feature line
148
- * @returns The parsed feature
149
- */
150
- export function parseFeatureNoUnescape(line) {
151
- return parseFeatureImpl(line, false);
152
- }
153
- /**
154
- * Parse a GFF3 directive line.
155
- *
156
- * @param line - GFF3 directive line
157
- * @returns The parsed directive
158
- */
159
- export function parseDirective(line) {
160
- const match = directiveRegex.exec(line);
161
- if (!match) {
162
- return null;
163
- }
164
- const name = match[1];
165
- const contents = match[2];
166
- const parsed = { directive: name };
167
- if (contents.length) {
168
- parsed.value = contents.trimEnd();
169
- }
170
- if (name === 'sequence-region') {
171
- const c = contents.split(whitespaceRegex, 3);
172
- return {
173
- ...parsed,
174
- seq_id: c[0],
175
- start: c[1].replaceAll(nonDigitRegex, ''),
176
- end: c[2].replaceAll(nonDigitRegex, ''),
177
- };
178
- }
179
- else if (name === 'genome-build') {
180
- const [source, buildName] = contents.split(whitespaceRegex, 2);
181
- return {
182
- ...parsed,
183
- source: source,
184
- buildName: buildName,
185
- };
186
- }
187
- return parsed;
188
- }
189
- // JBrowse format types and parsing functions
190
46
  const JBROWSE_DEFAULT_FIELDS = new Set([
191
47
  'start',
192
48
  'end',
@@ -220,7 +76,17 @@ const COMMON_ATTRS = {
220
76
  target: 'target',
221
77
  gap: 'gap',
222
78
  };
223
- function parseAttributesJBrowseImpl(attrString, result, shouldUnescape) {
79
+ const STRAND_MAP = {
80
+ '+': 1,
81
+ '-': -1,
82
+ '.': 0,
83
+ };
84
+ /**
85
+ * Parse the 9th column (attributes) of a GFF3 feature line into `result`,
86
+ * lowercasing keys and suffixing any that collide with a default field name.
87
+ * Pass shouldUnescape=false as a fast path for data with no escaped characters.
88
+ */
89
+ export function parseAttributes(attrString, result, shouldUnescape) {
224
90
  if (attrString.length === 0 || attrString === '.') {
225
91
  return;
226
92
  }
@@ -265,19 +131,16 @@ function parseAttributesJBrowseImpl(attrString, result, shouldUnescape) {
265
131
  start = semiIdx + 1;
266
132
  }
267
133
  }
268
- export function parseAttributesJBrowse(attrString, result) {
269
- parseAttributesJBrowseImpl(attrString, result, true);
270
- }
271
- export function parseAttributesJBrowseNoUnescape(attrString, result) {
272
- parseAttributesJBrowseImpl(attrString, result, false);
273
- }
274
- const STRAND_MAP = {
275
- '+': 1,
276
- '-': -1,
277
- '.': 0,
278
- };
279
- function parseFeatureJBrowseImpl(line, shouldUnescape) {
134
+ /**
135
+ * Parse a GFF3 feature line. Unescaping is skipped entirely for lines with no
136
+ * '%' character, which is the common case.
137
+ *
138
+ * @param line - GFF3 feature line
139
+ * @returns The parsed feature
140
+ */
141
+ export function parseFeature(line) {
280
142
  const f = line.split('\t');
143
+ const shouldUnescape = line.includes('%');
281
144
  const startStr = f[3];
282
145
  const endStr = f[4];
283
146
  const scoreStr = f[5];
@@ -294,13 +157,7 @@ function parseFeatureJBrowseImpl(line, shouldUnescape) {
294
157
  phase: isEmpty(phase) ? undefined : +phase,
295
158
  subfeatures: [],
296
159
  };
297
- parseAttributesJBrowseImpl(attrString, result, shouldUnescape);
160
+ parseAttributes(attrString, result, shouldUnescape);
298
161
  return result;
299
162
  }
300
- export function parseFeatureJBrowse(line) {
301
- return parseFeatureJBrowseImpl(line, true);
302
- }
303
- export function parseFeatureJBrowseNoUnescape(line) {
304
- return parseFeatureJBrowseImpl(line, false);
305
- }
306
163
  //# sourceMappingURL=util.js.map
package/esm/util.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AAAA,6DAA6D;AAC7D,qEAAqE;AAErE,MAAM,cAAc,GAAG,uBAAuB,CAAA;AAC9C,MAAM,eAAe,GAAG,KAAK,CAAA;AAC7B,MAAM,aAAa,GAAG,KAAK,CAAA;AAE3B,MAAM,UAAU,GAAuC,EAAE,CAAA;AACzD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;IAC7B,MAAM,GAAG,GAAG,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;IACzD,UAAU,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;IACxC,UAAU,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;AACxD,CAAC;AAED;;;;;GAKG;AAEH,MAAM,UAAU,QAAQ,CAAC,SAAiB;IACxC,MAAM,GAAG,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IAClC,IAAI,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC;QACf,OAAO,SAAS,CAAA;IAClB,CAAC;IAED,IAAI,MAAM,GAAG,EAAE,CAAA;IACf,IAAI,OAAO,GAAG,CAAC,CAAA;IACf,IAAI,CAAC,GAAG,GAAG,CAAA;IAEX,OAAO,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC;QAC5B,IAAI,SAAS,CAAC,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC;YACrD,MAAM,IAAI,SAAS,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAA;YACrC,MAAM,GAAG,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAA;YACzC,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,CAAA;YAC5B,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;gBACvB,MAAM,IAAI,IAAI,CAAA;YAChB,CAAC;iBAAM,CAAC;gBACN,MAAM,IAAI,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAA;YACrC,CAAC;YACD,CAAC,IAAI,CAAC,CAAA;YACN,OAAO,GAAG,CAAC,CAAA;QACb,CAAC;aAAM,CAAC;YACN,CAAC,EAAE,CAAA;QACL,CAAC;IACH,CAAC;IAED,OAAO,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAA;AAC1C,CAAC;AAED,SAAS,mBAAmB,CAC1B,UAAkB,EAClB,cAAuB;IAEvB,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,UAAU,KAAK,GAAG,EAAE,CAAC;QAClD,OAAO,EAAE,CAAA;IACX,CAAC;IAED,MAAM,KAAK,GAAmB,EAAE,CAAA;IAChC,IAAI,GAAG,GAAG,UAAU,CAAC,MAAM,CAAA;IAE3B,IAAI,UAAU,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACjC,GAAG,GAAG,UAAU,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAA;QACtD,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;IACvC,CAAC;IAED,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,OAAO,KAAK,GAAG,GAAG,EAAE,CAAC;QACnB,IAAI,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;QAC5C,IAAI,OAAO,KAAK,CAAC,CAAC,EAAE,CAAC;YACnB,OAAO,GAAG,GAAG,CAAA;QACf,CAAC;QAED,IAAI,OAAO,GAAG,KAAK,EAAE,CAAC;YACpB,MAAM,KAAK,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;YAC5C,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,KAAK,GAAG,OAAO,IAAI,KAAK,GAAG,CAAC,GAAG,OAAO,EAAE,CAAC;gBAC3D,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,CAAA;gBAC1C,IAAI,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,CAAA;gBACrB,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,IAAI,GAAG,EAAE,CAAA;oBACT,KAAK,CAAC,GAAG,CAAC,GAAG,IAAI,CAAA;gBACnB,CAAC;gBAED,IAAI,QAAQ,GAAG,KAAK,GAAG,CAAC,CAAA;gBACxB,OAAO,QAAQ,GAAG,OAAO,EAAE,CAAC;oBAC1B,IAAI,QAAQ,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAA;oBAChD,IAAI,QAAQ,KAAK,CAAC,CAAC,IAAI,QAAQ,GAAG,OAAO,EAAE,CAAC;wBAC1C,QAAQ,GAAG,OAAO,CAAA;oBACpB,CAAC;oBACD,IAAI,QAAQ,GAAG,QAAQ,EAAE,CAAC;wBACxB,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAA;wBAChD,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;oBACjD,CAAC;oBACD,QAAQ,GAAG,QAAQ,GAAG,CAAC,CAAA;gBACzB,CAAC;YACH,CAAC;QACH,CAAC;QACD,KAAK,GAAG,OAAO,GAAG,CAAC,CAAA;IACrB,CAAC;IACD,OAAO,KAAK,CAAA;AACd,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,eAAe,CAAC,UAAkB;IAChD,OAAO,mBAAmB,CAAC,UAAU,EAAE,IAAI,CAAC,CAAA;AAC9C,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,yBAAyB,CAAC,UAAkB;IAC1D,OAAO,mBAAmB,CAAC,UAAU,EAAE,KAAK,CAAC,CAAA;AAC/C,CAAC;AAED,SAAS,OAAO,CAAC,CAAS;IACxB,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,CAAA;AACpC,CAAC;AAED,SAAS,QAAQ,CACf,CAAS,EACT,cAAuB,EACvB,KAAQ;IAER,OAAO,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AAC9D,CAAC;AAED,SAAS,QAAQ,CAAC,CAAS;IACzB,OAAO,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AAC/B,CAAC;AAED,SAAS,gBAAgB,CACvB,IAAY,EACZ,cAAuB;IAEvB,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;IAC1B,MAAM,UAAU,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACxB,OAAO;QACL,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,IAAI,CAAC;QAC7C,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,IAAI,CAAC;QAC7C,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,IAAI,CAAC;QAC3C,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC;QACtB,GAAG,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC;QACpB,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC;QACtB,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,KAAK,EAAE,IAAI,CAAC;QACpC,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,KAAK,EAAE,IAAI,CAAC;QACnC,UAAU,EAAE,OAAO,CAAC,UAAU,CAAC;YAC7B,CAAC,CAAC,IAAI;YACN,CAAC,CAAC,mBAAmB,CAAC,UAAU,EAAE,cAAc,CAAC;KACpD,CAAA;AACH,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,OAAO,gBAAgB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;AACrC,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,sBAAsB,CAAC,IAAY;IACjD,OAAO,gBAAgB,CAAC,IAAI,EAAE,KAAK,CAAC,CAAA;AACtC,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAC5B,IAAY;IAMZ,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACvC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO,IAAI,CAAA;IACb,CAAC;IAED,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAE,CAAA;IACtB,MAAM,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAE,CAAA;IAE1B,MAAM,MAAM,GAAkB,EAAE,SAAS,EAAE,IAAI,EAAE,CAAA;IACjD,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC;QACpB,MAAM,CAAC,KAAK,GAAG,QAAQ,CAAC,OAAO,EAAE,CAAA;IACnC,CAAC;IAED,IAAI,IAAI,KAAK,iBAAiB,EAAE,CAAC;QAC/B,MAAM,CAAC,GAAG,QAAQ,CAAC,KAAK,CAAC,eAAe,EAAE,CAAC,CAAC,CAAA;QAC5C,OAAO;YACL,GAAG,MAAM;YACT,MAAM,EAAE,CAAC,CAAC,CAAC,CAAE;YACb,KAAK,EAAE,CAAC,CAAC,CAAC,CAAE,CAAC,UAAU,CAAC,aAAa,EAAE,EAAE,CAAC;YAC1C,GAAG,EAAE,CAAC,CAAC,CAAC,CAAE,CAAC,UAAU,CAAC,aAAa,EAAE,EAAE,CAAC;SACzC,CAAA;IACH,CAAC;SAAM,IAAI,IAAI,KAAK,cAAc,EAAE,CAAC;QACnC,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,GAAG,QAAQ,CAAC,KAAK,CAAC,eAAe,EAAE,CAAC,CAAC,CAAA;QAC9D,OAAO;YACL,GAAG,MAAM;YACT,MAAM,EAAE,MAAO;YACf,SAAS,EAAE,SAAU;SACtB,CAAA;IACH,CAAC;IAED,OAAO,MAAM,CAAA;AACf,CAAC;AA2FD,6CAA6C;AAE7C,MAAM,sBAAsB,GAAG,IAAI,GAAG,CAAC;IACrC,OAAO;IACP,KAAK;IACL,QAAQ;IACR,OAAO;IACP,MAAM;IACN,QAAQ;IACR,OAAO;IACP,QAAQ;CACT,CAAC,CAAA;AAEF,uEAAuE;AACvE,sCAAsC;AACtC,MAAM,YAAY,GAAuC;IACvD,EAAE,EAAE,IAAI;IACR,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,aAAa,EAAE,eAAe;IAC9B,WAAW,EAAE,aAAa;IAC1B,KAAK,EAAE,OAAO;IACd,MAAM,EAAE,QAAQ;IAChB,GAAG,EAAE,KAAK;IACV,YAAY,EAAE,cAAc;IAC5B,EAAE,EAAE,IAAI;IACR,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,KAAK,EAAE,OAAO;IACd,MAAM,EAAE,QAAQ;IAChB,GAAG,EAAE,KAAK;CACX,CAAA;AAeD,SAAS,0BAA0B,CACjC,UAAkB,EAClB,MAA+B,EAC/B,cAAuB;IAEvB,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,UAAU,KAAK,GAAG,EAAE,CAAC;QAClD,OAAM;IACR,CAAC;IAED,IAAI,GAAG,GAAG,UAAU,CAAC,MAAM,CAAA;IAC3B,IAAI,UAAU,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACjC,GAAG,GAAG,UAAU,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAA;QACtD,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;IACvC,CAAC;IAED,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,OAAO,KAAK,GAAG,GAAG,EAAE,CAAC;QACnB,IAAI,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;QAC5C,IAAI,OAAO,KAAK,CAAC,CAAC,EAAE,CAAC;YACnB,OAAO,GAAG,GAAG,CAAA;QACf,CAAC;QAED,IAAI,OAAO,GAAG,KAAK,EAAE,CAAC;YACpB,MAAM,KAAK,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;YAC5C,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,KAAK,GAAG,OAAO,IAAI,KAAK,GAAG,CAAC,GAAG,OAAO,EAAE,CAAC;gBAC3D,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,CAAA;gBAC1C,IAAI,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,CAAA;gBAC3B,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;oBACtB,GAAG,GAAG,GAAG,CAAC,WAAW,EAAE,CAAA;oBACvB,IAAI,sBAAsB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;wBACpC,GAAG,IAAI,GAAG,CAAA;oBACZ,CAAC;gBACH,CAAC;gBAED,MAAM,MAAM,GAAa,EAAE,CAAA;gBAC3B,IAAI,QAAQ,GAAG,KAAK,GAAG,CAAC,CAAA;gBACxB,OAAO,QAAQ,GAAG,OAAO,EAAE,CAAC;oBAC1B,IAAI,QAAQ,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAA;oBAChD,IAAI,QAAQ,KAAK,CAAC,CAAC,IAAI,QAAQ,GAAG,OAAO,EAAE,CAAC;wBAC1C,QAAQ,GAAG,OAAO,CAAA;oBACpB,CAAC;oBACD,IAAI,QAAQ,GAAG,QAAQ,EAAE,CAAC;wBACxB,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAA;wBAChD,MAAM,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;oBACnD,CAAC;oBACD,QAAQ,GAAG,QAAQ,GAAG,CAAC,CAAA;gBACzB,CAAC;gBAED,MAAM,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAA;YACxD,CAAC;QACH,CAAC;QACD,KAAK,GAAG,OAAO,GAAG,CAAC,CAAA;IACrB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,sBAAsB,CACpC,UAAkB,EAClB,MAA+B;IAE/B,0BAA0B,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,CAAC,CAAA;AACtD,CAAC;AAED,MAAM,UAAU,gCAAgC,CAC9C,UAAkB,EAClB,MAA+B;IAE/B,0BAA0B,CAAC,UAAU,EAAE,MAAM,EAAE,KAAK,CAAC,CAAA;AACvD,CAAC;AAED,MAAM,UAAU,GAAuC;IACrD,GAAG,EAAE,CAAC;IACN,GAAG,EAAE,CAAC,CAAC;IACP,GAAG,EAAE,CAAC;CACP,CAAA;AAED,SAAS,uBAAuB,CAC9B,IAAY,EACZ,cAAuB;IAEvB,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;IAC1B,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACtB,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACpB,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACtB,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACnB,MAAM,UAAU,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IAExB,MAAM,MAAM,GAAmB;QAC7B,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,EAAE,CAAC;QAC5C,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,IAAI,CAAC;QAC7C,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,IAAI,CAAC;QAC3C,KAAK,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC;QAC5C,GAAG,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM;QAClC,KAAK,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,QAAQ;QAChD,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC;QACzB,KAAK,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,KAAK;QAC1C,WAAW,EAAE,EAAE;KAChB,CAAA;IAED,0BAA0B,CAAC,UAAU,EAAE,MAAM,EAAE,cAAc,CAAC,CAAA;IAC9D,OAAO,MAAM,CAAA;AACf,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,IAAY;IAC9C,OAAO,uBAAuB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;AAC5C,CAAC;AAED,MAAM,UAAU,6BAA6B,CAAC,IAAY;IACxD,OAAO,uBAAuB,CAAC,IAAI,EAAE,KAAK,CAAC,CAAA;AAC7C,CAAC"}
1
+ {"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AAAA,8CAA8C;AAC9C,qEAAqE;AAErE,MAAM,UAAU,GAAuC,EAAE,CAAA;AACzD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;IAC7B,MAAM,GAAG,GAAG,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;IACzD,UAAU,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;IACxC,UAAU,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;AACxD,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,QAAQ,CAAC,SAAiB;IACxC,MAAM,GAAG,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IAClC,IAAI,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC;QACf,OAAO,SAAS,CAAA;IAClB,CAAC;IAED,IAAI,MAAM,GAAG,EAAE,CAAA;IACf,IAAI,OAAO,GAAG,CAAC,CAAA;IACf,IAAI,CAAC,GAAG,GAAG,CAAA;IAEX,OAAO,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC;QAC5B,MAAM,IAAI,GACR,SAAS,CAAC,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,SAAS,CAAC,MAAM;YAC9C,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;YAC3C,CAAC,CAAC,SAAS,CAAA;QACf,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACvB,MAAM,IAAI,SAAS,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,IAAI,CAAA;YAC5C,CAAC,IAAI,CAAC,CAAA;YACN,OAAO,GAAG,CAAC,CAAA;QACb,CAAC;aAAM,CAAC;YACN,mEAAmE;YACnE,wEAAwE;YACxE,CAAC,EAAE,CAAA;QACL,CAAC;IACH,CAAC;IAED,OAAO,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAA;AAC1C,CAAC;AAED,SAAS,OAAO,CAAC,CAAS;IACxB,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,CAAA;AACpC,CAAC;AAED,SAAS,QAAQ,CACf,CAAS,EACT,cAAuB,EACvB,KAAQ;IAER,OAAO,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AAC9D,CAAC;AAED,MAAM,sBAAsB,GAAG,IAAI,GAAG,CAAC;IACrC,OAAO;IACP,KAAK;IACL,QAAQ;IACR,OAAO;IACP,MAAM;IACN,QAAQ;IACR,OAAO;IACP,QAAQ;CACT,CAAC,CAAA;AAEF,uEAAuE;AACvE,sCAAsC;AACtC,MAAM,YAAY,GAAuC;IACvD,EAAE,EAAE,IAAI;IACR,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,aAAa,EAAE,eAAe;IAC9B,WAAW,EAAE,aAAa;IAC1B,KAAK,EAAE,OAAO;IACd,MAAM,EAAE,QAAQ;IAChB,GAAG,EAAE,KAAK;IACV,YAAY,EAAE,cAAc;IAC5B,EAAE,EAAE,IAAI;IACR,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,KAAK,EAAE,OAAO;IACd,MAAM,EAAE,QAAQ;IAChB,GAAG,EAAE,KAAK;CACX,CAAA;AAED,MAAM,UAAU,GAAuC;IACrD,GAAG,EAAE,CAAC;IACN,GAAG,EAAE,CAAC,CAAC;IACP,GAAG,EAAE,CAAC;CACP,CAAA;AAoBD;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAC7B,UAAkB,EAClB,MAA+B,EAC/B,cAAuB;IAEvB,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,UAAU,KAAK,GAAG,EAAE,CAAC;QAClD,OAAM;IACR,CAAC;IAED,IAAI,GAAG,GAAG,UAAU,CAAC,MAAM,CAAA;IAC3B,IAAI,UAAU,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACjC,GAAG,GAAG,UAAU,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAA;QACtD,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;IACvC,CAAC;IAED,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,OAAO,KAAK,GAAG,GAAG,EAAE,CAAC;QACnB,IAAI,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;QAC5C,IAAI,OAAO,KAAK,CAAC,CAAC,EAAE,CAAC;YACnB,OAAO,GAAG,GAAG,CAAA;QACf,CAAC;QAED,IAAI,OAAO,GAAG,KAAK,EAAE,CAAC;YACpB,MAAM,KAAK,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;YAC5C,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,KAAK,GAAG,OAAO,IAAI,KAAK,GAAG,CAAC,GAAG,OAAO,EAAE,CAAC;gBAC3D,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,CAAA;gBAC1C,IAAI,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,CAAA;gBAC3B,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;oBACtB,GAAG,GAAG,GAAG,CAAC,WAAW,EAAE,CAAA;oBACvB,IAAI,sBAAsB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;wBACpC,GAAG,IAAI,GAAG,CAAA;oBACZ,CAAC;gBACH,CAAC;gBAED,MAAM,MAAM,GAAa,EAAE,CAAA;gBAC3B,IAAI,QAAQ,GAAG,KAAK,GAAG,CAAC,CAAA;gBACxB,OAAO,QAAQ,GAAG,OAAO,EAAE,CAAC;oBAC1B,IAAI,QAAQ,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAA;oBAChD,IAAI,QAAQ,KAAK,CAAC,CAAC,IAAI,QAAQ,GAAG,OAAO,EAAE,CAAC;wBAC1C,QAAQ,GAAG,OAAO,CAAA;oBACpB,CAAC;oBACD,IAAI,QAAQ,GAAG,QAAQ,EAAE,CAAC;wBACxB,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAA;wBAChD,MAAM,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;oBACnD,CAAC;oBACD,QAAQ,GAAG,QAAQ,GAAG,CAAC,CAAA;gBACzB,CAAC;gBAED,MAAM,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAA;YACxD,CAAC;QACH,CAAC;QACD,KAAK,GAAG,OAAO,GAAG,CAAC,CAAA;IACrB,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;IAC1B,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAA;IACzC,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACtB,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACpB,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACtB,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACnB,MAAM,UAAU,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IAExB,MAAM,MAAM,GAAe;QACzB,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,EAAE,CAAC;QAC5C,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,IAAI,CAAC;QAC7C,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,IAAI,CAAC;QAC3C,KAAK,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC;QAC5C,GAAG,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM;QAClC,KAAK,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,QAAQ;QAChD,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC;QACzB,KAAK,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,KAAK;QAC1C,WAAW,EAAE,EAAE;KAChB,CAAA;IAED,eAAe,CAAC,UAAU,EAAE,MAAM,EAAE,cAAc,CAAC,CAAA;IACnD,OAAO,MAAM,CAAA;AACf,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gff-nostream",
3
- "version": "3.0.11",
3
+ "version": "5.0.0",
4
4
  "description": "utilities to read GFF3 data",
5
5
  "license": "MIT",
6
6
  "repository": {
package/src/api.ts CHANGED
@@ -1,29 +1,20 @@
1
- import {
2
- parseFeature,
3
- parseFeatureJBrowse,
4
- parseFeatureJBrowseNoUnescape,
5
- parseFeatureNoUnescape,
6
- } from './util.ts'
1
+ import { parseFeature } from './util.ts'
7
2
 
8
- import type {
9
- GFF3Feature,
10
- GFF3FeatureLineWithRefs,
11
- JBrowseFeature,
12
- } from './util.ts'
3
+ import type { GffFeature } from './util.ts'
13
4
 
14
- interface ParseInput {
5
+ export interface LineRecord {
6
+ /** Raw GFF3 feature line */
15
7
  line: string
16
- lineHash?: string | number
17
- hasEscapes: boolean
18
8
  }
19
9
 
20
- export interface LineRecord extends ParseInput {
21
- /** Genomic start coordinate from the tabix index (1-based) */
22
- start: number
23
- /** Genomic end coordinate from the tabix index */
24
- end: number
25
- /** GFF3 feature type (column 3) */
26
- type: string
10
+ /**
11
+ * A top-level parsed feature paired with the input record it came from. The
12
+ * parser stamps no identity onto the feature itself; callers that need a stable
13
+ * per-feature id (e.g. from a tabix byte offset) read it off their own `record`.
14
+ */
15
+ export interface ParsedRecord<R extends LineRecord = LineRecord> {
16
+ feature: GffFeature
17
+ record: R
27
18
  }
28
19
 
29
20
  /** Extract the GFF3 feature type (column 3) from a raw line without a full split. */
@@ -45,8 +36,8 @@ function appendOrphan<T>(orphans: Map<string, T[]>, key: string, value: T) {
45
36
  }
46
37
 
47
38
  /**
48
- * The JBrowse parser collapses single-element attribute arrays to scalars, so a
49
- * raw ID/Parent value can be a string, a string array, or absent. These coerce
39
+ * The parser collapses single-element attribute arrays to scalars, so a raw
40
+ * ID/Parent value can be a string, a string array, or absent. These coerce
50
41
  * those `unknown` values without typecasts.
51
42
  */
52
43
  function firstString(value: unknown): string | undefined {
@@ -62,119 +53,68 @@ function toStringArray(value: unknown): string[] {
62
53
  }
63
54
 
64
55
  /**
65
- * Synchronously parse a string containing GFF3 and return an array of the
66
- * parsed items.
67
- *
68
- * @param str - GFF3 string
69
- * @returns array of parsed features
70
- */
71
- export function parseStringSync(str: string): GFF3Feature[] {
72
- return parseRecords(stringToRecords(str))
73
- }
74
-
75
- /**
76
- * Synchronously parse a string containing GFF3 directly into JBrowse format.
77
- *
78
- * @param str - GFF3 string
79
- * @returns array of JBrowse-format features
56
+ * Register a feature's ID and attach it to its parent(s), building the
57
+ * subfeature tree in `byId`/`orphans`. Returns true when the feature is
58
+ * top-level (has no Parent) and the caller should collect it.
80
59
  */
81
- export function parseStringSyncJBrowse(str: string): JBrowseFeature[] {
82
- return parseRecordsJBrowse(stringToRecords(str))
83
- }
84
-
85
- function stringToRecords(str: string) {
86
- const lines = str.split(/\r?\n/)
87
- const records: ParseInput[] = []
88
- for (const line of lines) {
89
- if (line.startsWith('##FASTA') || line.startsWith('>')) {
90
- break
60
+ function linkFeature(
61
+ feature: GffFeature,
62
+ byId: Map<string, GffFeature>,
63
+ orphans: Map<string, GffFeature[]>,
64
+ ): boolean {
65
+ const id = firstString(feature.id)
66
+ const parents = toStringArray(feature.parent)
67
+
68
+ // Register the id only the first time it is seen. Continuation lines
69
+ // (multi-location features such as a CDS spanning several segments share one
70
+ // ID across lines) skip registration but must still be attached to their
71
+ // parent below, so this is independent of the parent handling.
72
+ if (id && !byId.has(id)) {
73
+ byId.set(id, feature)
74
+ const waiting = orphans.get(id)
75
+ if (waiting) {
76
+ for (const w of waiting) {
77
+ feature.subfeatures.push(w)
78
+ }
79
+ orphans.delete(id)
91
80
  }
92
- if (line.length === 0 || line.startsWith('#')) {
93
- continue
81
+ }
82
+
83
+ for (const parentId of parents) {
84
+ const parentFeature = byId.get(parentId)
85
+ if (parentFeature) {
86
+ parentFeature.subfeatures.push(feature)
87
+ } else {
88
+ appendOrphan(orphans, parentId, feature)
94
89
  }
95
- records.push({
96
- line,
97
- hasEscapes: line.includes('%'),
98
- })
99
90
  }
100
- return records
91
+
92
+ // Every line of a top-level discontinuous feature (e.g. cDNA_match spanning
93
+ // several segments under one shared ID, with no Parent) is its own top-level
94
+ // item, so this is independent of whether the id was just registered.
95
+ return parents.length === 0
101
96
  }
102
97
 
103
98
  /**
104
- * Parse an array of LineRecord objects containing raw GFF3 lines.
105
- * Supports parent/child relationships.
99
+ * Synchronously parse a string containing GFF3 and return an array of the
100
+ * parsed features. Comments, directives, and `##FASTA` sections are ignored.
106
101
  *
107
- * @param records - Array of LineRecord objects with raw line and metadata
102
+ * @param str - GFF3 string
108
103
  * @returns array of parsed features
109
104
  */
110
- export function parseRecords(records: ParseInput[]): GFF3Feature[] {
111
- const items: GFF3Feature[] = []
112
- const byId = new Map<string, GFF3Feature>()
113
- const orphans = new Map<string, GFF3Feature[]>()
105
+ export function parseStringSync(str: string): GffFeature[] {
106
+ const items: GffFeature[] = []
107
+ const byId = new Map<string, GffFeature>()
108
+ const orphans = new Map<string, GffFeature[]>()
114
109
 
115
- for (const record of records) {
116
- const parsed = record.hasEscapes
117
- ? parseFeature(record.line)
118
- : parseFeatureNoUnescape(record.line)
119
- const featureLine: GFF3FeatureLineWithRefs = {
120
- ...parsed,
121
- child_features: [],
122
- derived_features: [],
123
- }
124
-
125
- if (record.lineHash !== undefined) {
126
- featureLine.attributes ??= {}
127
- featureLine.attributes._lineHash = [String(record.lineHash)]
110
+ for (const line of str.split(/\r?\n/)) {
111
+ if (line.startsWith('##FASTA') || line.startsWith('>')) {
112
+ break
128
113
  }
129
-
130
- const attrs = featureLine.attributes
131
- const ids = attrs?.ID
132
- const parents = attrs?.Parent
133
-
134
- if (!ids && !parents) {
135
- items.push([featureLine])
136
- } else {
137
- let feature: GFF3Feature
138
- if (ids) {
139
- const id = ids[0]!
140
- const existing = byId.get(id)
141
- if (existing) {
142
- // Multi-location continuation: share child_features/derived_features
143
- // with the first line so children remain visible across all lines
144
- // regardless of arrival order.
145
- featureLine.child_features = existing[0]!.child_features
146
- featureLine.derived_features = existing[0]!.derived_features
147
- existing.push(featureLine)
148
- feature = existing
149
- } else {
150
- feature = [featureLine]
151
- if (!parents) {
152
- items.push(feature)
153
- }
154
- byId.set(id, feature)
155
- const waiting = orphans.get(id)
156
- if (waiting) {
157
- for (const w of waiting) {
158
- featureLine.child_features.push(w)
159
- }
160
- orphans.delete(id)
161
- }
162
- }
163
- } else {
164
- feature = [featureLine]
165
- }
166
-
167
- if (parents) {
168
- for (const parentId of parents) {
169
- const parent = byId.get(parentId)
170
- if (parent) {
171
- // child_features is shared across all parent feature lines,
172
- // so push once via the first line.
173
- parent[0]!.child_features.push(feature)
174
- } else {
175
- appendOrphan(orphans, parentId, feature)
176
- }
177
- }
114
+ if (line.length !== 0 && !line.startsWith('#')) {
115
+ const feature = parseFeature(line)
116
+ if (linkFeature(feature, byId, orphans)) {
117
+ items.push(feature)
178
118
  }
179
119
  }
180
120
  }
@@ -183,76 +123,29 @@ export function parseRecords(records: ParseInput[]): GFF3Feature[] {
183
123
  }
184
124
 
185
125
  /**
186
- * Parse an array of LineRecord objects directly into JBrowse feature format.
187
- * Supports parent/child relationships via subfeatures.
126
+ * Parse an array of records wrapping raw GFF3 lines, resolving parent/child
127
+ * relationships into `subfeatures`. Returns each top-level feature paired with
128
+ * the record it came from, so callers can attach their own identity (e.g. a
129
+ * byte offset) without the parser stamping anything onto the feature.
188
130
  *
189
- * @param records - Array of LineRecord objects with raw line and metadata
190
- * @returns array of JBrowse-format features
131
+ * @param records - Array of records, each carrying a raw GFF3 `line`
132
+ * @returns top-level features, each paired with its originating record
191
133
  */
192
- export function parseRecordsJBrowse(records: ParseInput[]): JBrowseFeature[] {
193
- const items: JBrowseFeature[] = []
194
- const byId = new Map<string, JBrowseFeature>()
195
- const orphans = new Map<string, JBrowseFeature[]>()
134
+ export function parseRecords<R extends LineRecord>(
135
+ records: readonly R[],
136
+ ): ParsedRecord<R>[] {
137
+ const items: ParsedRecord<R>[] = []
138
+ const byId = new Map<string, GffFeature>()
139
+ const orphans = new Map<string, GffFeature[]>()
196
140
 
197
141
  for (const record of records) {
198
- const feature = record.hasEscapes
199
- ? parseFeatureJBrowse(record.line)
200
- : parseFeatureJBrowseNoUnescape(record.line)
201
-
202
- if (record.lineHash !== undefined) {
203
- feature._lineHash = String(record.lineHash)
204
- }
205
-
206
- const id = firstString(feature.id)
207
- const parents = toStringArray(feature.parent)
208
-
209
- if (!id && parents.length === 0) {
210
- items.push(feature)
211
- } else {
212
- // A parentless line is a top-level item. Every line of a top-level
213
- // discontinuous feature (e.g. cDNA_match/EST_match spanning several
214
- // segments under one shared ID, with no Parent) is its own top-level
215
- // item, so push here regardless of whether the id is already registered.
216
- if (parents.length === 0) {
217
- items.push(feature)
218
- }
219
-
220
- // Register the id only the first time it is seen. Continuation lines
221
- // (multi-location features such as a CDS spanning several segments share
222
- // one ID across lines) skip registration but must still be attached to
223
- // their parent below, so this is independent of the parent handling.
224
- if (id && !byId.has(id)) {
225
- byId.set(id, feature)
226
- const waiting = orphans.get(id)
227
- if (waiting) {
228
- for (const w of waiting) {
229
- feature.subfeatures.push(w)
230
- }
231
- orphans.delete(id)
232
- }
233
- }
234
-
235
- for (const parentId of parents) {
236
- const parentFeature = byId.get(parentId)
237
- if (parentFeature) {
238
- parentFeature.subfeatures.push(feature)
239
- } else {
240
- appendOrphan(orphans, parentId, feature)
241
- }
242
- }
142
+ const feature = parseFeature(record.line)
143
+ if (linkFeature(feature, byId, orphans)) {
144
+ items.push({ feature, record })
243
145
  }
244
146
  }
245
147
 
246
148
  return items
247
149
  }
248
150
 
249
- export type {
250
- GFF3Comment,
251
- GFF3Directive,
252
- GFF3Feature,
253
- GFF3FeatureLine,
254
- GFF3FeatureLineWithRefs,
255
- GFF3Item,
256
- GFF3Sequence,
257
- JBrowseFeature,
258
- } from './util.ts'
151
+ export type { GffFeature } from './util.ts'
package/src/index.ts CHANGED
@@ -1,19 +1,3 @@
1
- export {
2
- extractType,
3
- parseRecords,
4
- parseRecordsJBrowse,
5
- parseStringSync,
6
- parseStringSyncJBrowse,
7
- } from './api.ts'
1
+ export { extractType, parseRecords, parseStringSync } from './api.ts'
8
2
 
9
- export type {
10
- GFF3Comment,
11
- GFF3Directive,
12
- GFF3Feature,
13
- GFF3FeatureLine,
14
- GFF3FeatureLineWithRefs,
15
- GFF3Item,
16
- GFF3Sequence,
17
- JBrowseFeature,
18
- LineRecord,
19
- } from './api.ts'
3
+ export type { GffFeature, LineRecord, ParsedRecord } from './api.ts'