gff-nostream 3.0.11 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -89
- package/dist/api.d.ts +6 -21
- package/dist/api.js +29 -126
- package/dist/api.js.map +1 -1
- package/dist/index.d.ts +2 -2
- package/dist/index.js +1 -3
- package/dist/index.js.map +1 -1
- package/dist/util.d.ts +20 -120
- package/dist/util.js +29 -179
- package/dist/util.js.map +1 -1
- package/esm/api.d.ts +6 -21
- package/esm/api.js +30 -125
- package/esm/api.js.map +1 -1
- package/esm/index.d.ts +2 -2
- package/esm/index.js +1 -1
- package/esm/index.js.map +1 -1
- package/esm/util.d.ts +20 -120
- package/esm/util.js +29 -172
- package/esm/util.js.map +1 -1
- package/package.json +1 -1
- package/src/api.ts +37 -153
- package/src/index.ts +2 -18
- package/src/util.ts +39 -308
package/dist/util.js
CHANGED
|
@@ -1,20 +1,10 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
// Fast, low-level functions for parsing
|
|
2
|
+
// Fast, low-level functions for parsing GFF3.
|
|
3
3
|
// JavaScript port of Robert Buels's Bio::GFF3::LowLevel Perl module.
|
|
4
4
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
5
|
exports.unescape = unescape;
|
|
6
6
|
exports.parseAttributes = parseAttributes;
|
|
7
|
-
exports.parseAttributesNoUnescape = parseAttributesNoUnescape;
|
|
8
7
|
exports.parseFeature = parseFeature;
|
|
9
|
-
exports.parseFeatureNoUnescape = parseFeatureNoUnescape;
|
|
10
|
-
exports.parseDirective = parseDirective;
|
|
11
|
-
exports.parseAttributesJBrowse = parseAttributesJBrowse;
|
|
12
|
-
exports.parseAttributesJBrowseNoUnescape = parseAttributesJBrowseNoUnescape;
|
|
13
|
-
exports.parseFeatureJBrowse = parseFeatureJBrowse;
|
|
14
|
-
exports.parseFeatureJBrowseNoUnescape = parseFeatureJBrowseNoUnescape;
|
|
15
|
-
const directiveRegex = /^\s*##\s*(\S+)\s*(.*)/;
|
|
16
|
-
const whitespaceRegex = /\s+/;
|
|
17
|
-
const nonDigitRegex = /\D/g;
|
|
18
8
|
const HEX_LOOKUP = {};
|
|
19
9
|
for (let i = 0; i < 256; i++) {
|
|
20
10
|
const hex = i.toString(16).toUpperCase().padStart(2, '0');
|
|
@@ -36,169 +26,28 @@ function unescape(stringVal) {
|
|
|
36
26
|
let lastIdx = 0;
|
|
37
27
|
let i = idx;
|
|
38
28
|
while (i < stringVal.length) {
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
result += char;
|
|
45
|
-
}
|
|
46
|
-
else {
|
|
47
|
-
result += stringVal.slice(i, i + 3);
|
|
48
|
-
}
|
|
29
|
+
const char = stringVal[i] === '%' && i + 2 < stringVal.length
|
|
30
|
+
? HEX_LOOKUP[stringVal.slice(i + 1, i + 3)]
|
|
31
|
+
: undefined;
|
|
32
|
+
if (char !== undefined) {
|
|
33
|
+
result += stringVal.slice(lastIdx, i) + char;
|
|
49
34
|
i += 3;
|
|
50
35
|
lastIdx = i;
|
|
51
36
|
}
|
|
52
37
|
else {
|
|
38
|
+
// Not a valid escape: advance one char so a '%' that begins a real
|
|
39
|
+
// escape immediately after isn't swallowed (e.g. the %20 in "a%b%20c").
|
|
53
40
|
i++;
|
|
54
41
|
}
|
|
55
42
|
}
|
|
56
43
|
return result + stringVal.slice(lastIdx);
|
|
57
44
|
}
|
|
58
|
-
function parseAttributesImpl(attrString, shouldUnescape) {
|
|
59
|
-
if (attrString.length === 0 || attrString === '.') {
|
|
60
|
-
return {};
|
|
61
|
-
}
|
|
62
|
-
const attrs = {};
|
|
63
|
-
let len = attrString.length;
|
|
64
|
-
if (attrString[len - 1] === '\n') {
|
|
65
|
-
len = attrString[len - 2] === '\r' ? len - 2 : len - 1;
|
|
66
|
-
attrString = attrString.slice(0, len);
|
|
67
|
-
}
|
|
68
|
-
let start = 0;
|
|
69
|
-
while (start < len) {
|
|
70
|
-
let semiIdx = attrString.indexOf(';', start);
|
|
71
|
-
if (semiIdx === -1) {
|
|
72
|
-
semiIdx = len;
|
|
73
|
-
}
|
|
74
|
-
if (semiIdx > start) {
|
|
75
|
-
const eqIdx = attrString.indexOf('=', start);
|
|
76
|
-
if (eqIdx !== -1 && eqIdx < semiIdx && eqIdx + 1 < semiIdx) {
|
|
77
|
-
const tag = attrString.slice(start, eqIdx);
|
|
78
|
-
let arec = attrs[tag];
|
|
79
|
-
if (!arec) {
|
|
80
|
-
arec = [];
|
|
81
|
-
attrs[tag] = arec;
|
|
82
|
-
}
|
|
83
|
-
let valStart = eqIdx + 1;
|
|
84
|
-
while (valStart < semiIdx) {
|
|
85
|
-
let commaIdx = attrString.indexOf(',', valStart);
|
|
86
|
-
if (commaIdx === -1 || commaIdx > semiIdx) {
|
|
87
|
-
commaIdx = semiIdx;
|
|
88
|
-
}
|
|
89
|
-
if (commaIdx > valStart) {
|
|
90
|
-
const val = attrString.slice(valStart, commaIdx);
|
|
91
|
-
arec.push(shouldUnescape ? unescape(val) : val);
|
|
92
|
-
}
|
|
93
|
-
valStart = commaIdx + 1;
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
start = semiIdx + 1;
|
|
98
|
-
}
|
|
99
|
-
return attrs;
|
|
100
|
-
}
|
|
101
|
-
/**
|
|
102
|
-
* Parse the 9th column (attributes) of a GFF3 feature line.
|
|
103
|
-
*
|
|
104
|
-
* @param attrString - String of GFF3 9th column
|
|
105
|
-
* @returns Parsed attributes
|
|
106
|
-
*/
|
|
107
|
-
function parseAttributes(attrString) {
|
|
108
|
-
return parseAttributesImpl(attrString, true);
|
|
109
|
-
}
|
|
110
|
-
/**
|
|
111
|
-
* Parse the 9th column (attributes) of a GFF3 feature line without unescaping.
|
|
112
|
-
* Fast path for data known to contain no escaped characters.
|
|
113
|
-
*
|
|
114
|
-
* @param attrString - String of GFF3 9th column
|
|
115
|
-
* @returns Parsed attributes
|
|
116
|
-
*/
|
|
117
|
-
function parseAttributesNoUnescape(attrString) {
|
|
118
|
-
return parseAttributesImpl(attrString, false);
|
|
119
|
-
}
|
|
120
45
|
function isEmpty(s) {
|
|
121
46
|
return s.length === 0 || s === '.';
|
|
122
47
|
}
|
|
123
48
|
function strField(s, shouldUnescape, empty) {
|
|
124
49
|
return isEmpty(s) ? empty : shouldUnescape ? unescape(s) : s;
|
|
125
50
|
}
|
|
126
|
-
function numField(s) {
|
|
127
|
-
return isEmpty(s) ? null : +s;
|
|
128
|
-
}
|
|
129
|
-
function parseFeatureImpl(line, shouldUnescape) {
|
|
130
|
-
const f = line.split('\t');
|
|
131
|
-
const attrString = f[8];
|
|
132
|
-
return {
|
|
133
|
-
seq_id: strField(f[0], shouldUnescape, null),
|
|
134
|
-
source: strField(f[1], shouldUnescape, null),
|
|
135
|
-
type: strField(f[2], shouldUnescape, null),
|
|
136
|
-
start: numField(f[3]),
|
|
137
|
-
end: numField(f[4]),
|
|
138
|
-
score: numField(f[5]),
|
|
139
|
-
strand: strField(f[6], false, null),
|
|
140
|
-
phase: strField(f[7], false, null),
|
|
141
|
-
attributes: isEmpty(attrString)
|
|
142
|
-
? null
|
|
143
|
-
: parseAttributesImpl(attrString, shouldUnescape),
|
|
144
|
-
};
|
|
145
|
-
}
|
|
146
|
-
/**
|
|
147
|
-
* Parse a GFF3 feature line
|
|
148
|
-
*
|
|
149
|
-
* @param line - GFF3 feature line
|
|
150
|
-
* @returns The parsed feature
|
|
151
|
-
*/
|
|
152
|
-
function parseFeature(line) {
|
|
153
|
-
return parseFeatureImpl(line, true);
|
|
154
|
-
}
|
|
155
|
-
/**
|
|
156
|
-
* Parse a GFF3 feature line without unescaping.
|
|
157
|
-
* Fast path for data known to contain no escaped characters.
|
|
158
|
-
*
|
|
159
|
-
* @param line - GFF3 feature line
|
|
160
|
-
* @returns The parsed feature
|
|
161
|
-
*/
|
|
162
|
-
function parseFeatureNoUnescape(line) {
|
|
163
|
-
return parseFeatureImpl(line, false);
|
|
164
|
-
}
|
|
165
|
-
/**
|
|
166
|
-
* Parse a GFF3 directive line.
|
|
167
|
-
*
|
|
168
|
-
* @param line - GFF3 directive line
|
|
169
|
-
* @returns The parsed directive
|
|
170
|
-
*/
|
|
171
|
-
function parseDirective(line) {
|
|
172
|
-
const match = directiveRegex.exec(line);
|
|
173
|
-
if (!match) {
|
|
174
|
-
return null;
|
|
175
|
-
}
|
|
176
|
-
const name = match[1];
|
|
177
|
-
const contents = match[2];
|
|
178
|
-
const parsed = { directive: name };
|
|
179
|
-
if (contents.length) {
|
|
180
|
-
parsed.value = contents.trimEnd();
|
|
181
|
-
}
|
|
182
|
-
if (name === 'sequence-region') {
|
|
183
|
-
const c = contents.split(whitespaceRegex, 3);
|
|
184
|
-
return {
|
|
185
|
-
...parsed,
|
|
186
|
-
seq_id: c[0],
|
|
187
|
-
start: c[1].replaceAll(nonDigitRegex, ''),
|
|
188
|
-
end: c[2].replaceAll(nonDigitRegex, ''),
|
|
189
|
-
};
|
|
190
|
-
}
|
|
191
|
-
else if (name === 'genome-build') {
|
|
192
|
-
const [source, buildName] = contents.split(whitespaceRegex, 2);
|
|
193
|
-
return {
|
|
194
|
-
...parsed,
|
|
195
|
-
source: source,
|
|
196
|
-
buildName: buildName,
|
|
197
|
-
};
|
|
198
|
-
}
|
|
199
|
-
return parsed;
|
|
200
|
-
}
|
|
201
|
-
// JBrowse format types and parsing functions
|
|
202
51
|
const JBROWSE_DEFAULT_FIELDS = new Set([
|
|
203
52
|
'start',
|
|
204
53
|
'end',
|
|
@@ -232,7 +81,17 @@ const COMMON_ATTRS = {
|
|
|
232
81
|
target: 'target',
|
|
233
82
|
gap: 'gap',
|
|
234
83
|
};
|
|
235
|
-
|
|
84
|
+
const STRAND_MAP = {
|
|
85
|
+
'+': 1,
|
|
86
|
+
'-': -1,
|
|
87
|
+
'.': 0,
|
|
88
|
+
};
|
|
89
|
+
/**
|
|
90
|
+
* Parse the 9th column (attributes) of a GFF3 feature line into `result`,
|
|
91
|
+
* lowercasing keys and suffixing any that collide with a default field name.
|
|
92
|
+
* Pass shouldUnescape=false as a fast path for data with no escaped characters.
|
|
93
|
+
*/
|
|
94
|
+
function parseAttributes(attrString, result, shouldUnescape) {
|
|
236
95
|
if (attrString.length === 0 || attrString === '.') {
|
|
237
96
|
return;
|
|
238
97
|
}
|
|
@@ -277,18 +136,15 @@ function parseAttributesJBrowseImpl(attrString, result, shouldUnescape) {
|
|
|
277
136
|
start = semiIdx + 1;
|
|
278
137
|
}
|
|
279
138
|
}
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
'.': 0,
|
|
290
|
-
};
|
|
291
|
-
function parseFeatureJBrowseImpl(line, shouldUnescape) {
|
|
139
|
+
/**
|
|
140
|
+
* Parse a GFF3 feature line. Pass shouldUnescape=false as a fast path for data
|
|
141
|
+
* known to contain no escaped characters.
|
|
142
|
+
*
|
|
143
|
+
* @param line - GFF3 feature line
|
|
144
|
+
* @param shouldUnescape - whether to unescape percent-encoded values
|
|
145
|
+
* @returns The parsed feature
|
|
146
|
+
*/
|
|
147
|
+
function parseFeature(line, shouldUnescape) {
|
|
292
148
|
const f = line.split('\t');
|
|
293
149
|
const startStr = f[3];
|
|
294
150
|
const endStr = f[4];
|
|
@@ -306,13 +162,7 @@ function parseFeatureJBrowseImpl(line, shouldUnescape) {
|
|
|
306
162
|
phase: isEmpty(phase) ? undefined : +phase,
|
|
307
163
|
subfeatures: [],
|
|
308
164
|
};
|
|
309
|
-
|
|
165
|
+
parseAttributes(attrString, result, shouldUnescape);
|
|
310
166
|
return result;
|
|
311
167
|
}
|
|
312
|
-
function parseFeatureJBrowse(line) {
|
|
313
|
-
return parseFeatureJBrowseImpl(line, true);
|
|
314
|
-
}
|
|
315
|
-
function parseFeatureJBrowseNoUnescape(line) {
|
|
316
|
-
return parseFeatureJBrowseImpl(line, false);
|
|
317
|
-
}
|
|
318
168
|
//# sourceMappingURL=util.js.map
|
package/dist/util.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":";AAAA,
|
|
1
|
+
{"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":";AAAA,8CAA8C;AAC9C,qEAAqE;;AAerE,4BA2BC;AA8ED,0CAqDC;AAUD,oCAsBC;AA3MD,MAAM,UAAU,GAAuC,EAAE,CAAA;AACzD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;IAC7B,MAAM,GAAG,GAAG,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;IACzD,UAAU,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;IACxC,UAAU,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;AACxD,CAAC;AAED;;;;;GAKG;AACH,SAAgB,QAAQ,CAAC,SAAiB;IACxC,MAAM,GAAG,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IAClC,IAAI,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC;QACf,OAAO,SAAS,CAAA;IAClB,CAAC;IAED,IAAI,MAAM,GAAG,EAAE,CAAA;IACf,IAAI,OAAO,GAAG,CAAC,CAAA;IACf,IAAI,CAAC,GAAG,GAAG,CAAA;IAEX,OAAO,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC;QAC5B,MAAM,IAAI,GACR,SAAS,CAAC,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,SAAS,CAAC,MAAM;YAC9C,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;YAC3C,CAAC,CAAC,SAAS,CAAA;QACf,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACvB,MAAM,IAAI,SAAS,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,IAAI,CAAA;YAC5C,CAAC,IAAI,CAAC,CAAA;YACN,OAAO,GAAG,CAAC,CAAA;QACb,CAAC;aAAM,CAAC;YACN,mEAAmE;YACnE,wEAAwE;YACxE,CAAC,EAAE,CAAA;QACL,CAAC;IACH,CAAC;IAED,OAAO,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAA;AAC1C,CAAC;AAED,SAAS,OAAO,CAAC,CAAS;IACxB,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,CAAA;AACpC,CAAC;AAED,SAAS,QAAQ,CACf,CAAS,EACT,cAAuB,EACvB,KAAQ;IAER,OAAO,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AAC9D,CAAC;AAED,MAAM,sBAAsB,GAAG,IAAI,GAAG,CAAC;IACrC,OAAO;IACP,KAAK;IACL,QAAQ;IACR,OAAO;IACP,MAAM;IACN,QAAQ;IACR,OAAO;IACP,QAAQ;CACT,CAAC,CAAA;AAEF,uEAAuE;AACvE,sCAAsC;AACtC,MAAM,YAAY,GAAuC;IACvD,EAAE,EAAE,IAAI;IACR,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,aAAa,EAAE,eAAe;IAC9B,WAAW,EAAE,aAAa;IAC1B,KAAK,EAAE,OAAO;IACd,MAAM,EAAE,QAAQ;IAChB,GAAG,EAAE,KAAK;IACV,YAAY,EAAE,cAAc;IAC5B,EAAE,EAAE,IAAI;IACR,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,KAAK,EAAE,OAAO;IACd,MAAM,EAAE,QAAQ;IAChB,GAAG,EAAE,KAAK;CACX,CAAA;AAED,MAAM,UAAU,GAAuC;IACrD,GAAG,EAAE,CAAC;IACN,GAAG,EAAE,CAAC,CAAC;IACP,GAAG,EAAE,CAAC;CACP,CAAA;AAoBD;;;;GAIG;AACH,SAAgB,eAAe,CAC7B,UAAkB,EAClB,MAA+B,EAC/B,cAAuB;IAEvB,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,UAAU,KAAK,GAAG,EAAE,CAAC;QAClD,OAAM;IACR,CAAC;IAED,IAAI,GAAG,GAAG,UAAU,CAAC,MAAM,CAAA;IAC3B,IAAI,UAAU,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACjC,GAAG,GAAG,UAAU,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAA;QACtD,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;IACvC,CAAC;IAED,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,OAAO,KAAK,GAAG,GAAG,EAAE,CAAC;QACnB,IAAI,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;QAC5C,IAAI,OAAO,KAAK,CAAC,CAAC,EAAE,CAAC;YACnB,OAAO,GAAG,GAAG,CAAA;QACf,CAAC;QAED,IAAI,OAAO,GAAG,KAAK,EAAE,CAAC;YACpB,MAAM,KAAK,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;YAC5C,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,KAAK,GAAG,OAAO,IAAI,KAAK,GAAG,CAAC,GAAG,OAAO,EAAE,CAAC;gBAC3D,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,CAAA;gBAC1C,IAAI,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,CAAA;gBAC3B,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;oBACtB,GAAG,GAAG,GAAG,CAAC,WAAW,EAAE,CAAA;oBACvB,IAAI,sBAAsB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;wBACpC,GAAG,IAAI,GAAG,CAAA;oBACZ,CAAC;gBACH,CAAC;gBAED,MAAM,MAAM,GAAa,EAAE,CAAA;gBAC3B,IAAI,QAAQ,GAAG,KAAK,GAAG,CAAC,CAAA;gBACxB,OAAO,QAAQ,GAAG,OAAO,EAAE,CAAC;oBAC1B,IAAI,QAAQ,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAA;oBAChD,IAAI,QAAQ,KAAK,CAAC,CAAC,IAAI,QAAQ,GAAG,OAAO,EAAE,CAAC;wBAC1C,QAAQ,GAAG,OAAO,CAAA;oBACpB,CAAC;oBACD,IAAI,QAAQ,GAAG,QAAQ,EAAE,CAAC;wBACxB,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAA;wBAChD,MAAM,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;oBACnD,CAAC;oBACD,QAAQ,GAAG,QAAQ,GAAG,CAAC,CAAA;gBACzB,CAAC;gBAED,MAAM,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAA;YACxD,CAAC;QACH,CAAC;QACD,KAAK,GAAG,OAAO,GAAG,CAAC,CAAA;IACrB,CAAC;AACH,CAAC;AAED;;;;;;;GAOG;AACH,SAAgB,YAAY,CAAC,IAAY,EAAE,cAAuB;IAChE,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;IAC1B,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACtB,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACpB,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACtB,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACnB,MAAM,UAAU,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IAExB,MAAM,MAAM,GAAe;QACzB,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,EAAE,CAAC;QAC5C,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,IAAI,CAAC;QAC7C,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,IAAI,CAAC;QAC3C,KAAK,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC;QAC5C,GAAG,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM;QAClC,KAAK,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,QAAQ;QAChD,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC;QACzB,KAAK,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,KAAK;QAC1C,WAAW,EAAE,EAAE;KAChB,CAAA;IAED,eAAe,CAAC,UAAU,EAAE,MAAM,EAAE,cAAc,CAAC,CAAA;IACnD,OAAO,MAAM,CAAA;AACf,CAAC"}
|
package/esm/api.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { GffFeature } from './util.ts';
|
|
2
2
|
interface ParseInput {
|
|
3
3
|
line: string;
|
|
4
4
|
lineHash?: string | number;
|
|
@@ -16,33 +16,18 @@ export interface LineRecord extends ParseInput {
|
|
|
16
16
|
export declare function extractType(line: string): string;
|
|
17
17
|
/**
|
|
18
18
|
* Synchronously parse a string containing GFF3 and return an array of the
|
|
19
|
-
* parsed
|
|
19
|
+
* parsed features. Comments, directives, and `##FASTA` sections are ignored.
|
|
20
20
|
*
|
|
21
21
|
* @param str - GFF3 string
|
|
22
22
|
* @returns array of parsed features
|
|
23
23
|
*/
|
|
24
|
-
export declare function parseStringSync(str: string):
|
|
25
|
-
/**
|
|
26
|
-
* Synchronously parse a string containing GFF3 directly into JBrowse format.
|
|
27
|
-
*
|
|
28
|
-
* @param str - GFF3 string
|
|
29
|
-
* @returns array of JBrowse-format features
|
|
30
|
-
*/
|
|
31
|
-
export declare function parseStringSyncJBrowse(str: string): JBrowseFeature[];
|
|
24
|
+
export declare function parseStringSync(str: string): GffFeature[];
|
|
32
25
|
/**
|
|
33
26
|
* Parse an array of LineRecord objects containing raw GFF3 lines.
|
|
34
|
-
* Supports parent/child relationships.
|
|
35
|
-
*
|
|
36
|
-
* @param records - Array of LineRecord objects with raw line and metadata
|
|
37
|
-
* @returns array of parsed features
|
|
38
|
-
*/
|
|
39
|
-
export declare function parseRecords(records: ParseInput[]): GFF3Feature[];
|
|
40
|
-
/**
|
|
41
|
-
* Parse an array of LineRecord objects directly into JBrowse feature format.
|
|
42
27
|
* Supports parent/child relationships via subfeatures.
|
|
43
28
|
*
|
|
44
29
|
* @param records - Array of LineRecord objects with raw line and metadata
|
|
45
|
-
* @returns array of
|
|
30
|
+
* @returns array of parsed features
|
|
46
31
|
*/
|
|
47
|
-
export declare function
|
|
48
|
-
export type {
|
|
32
|
+
export declare function parseRecords(records: ParseInput[]): GffFeature[];
|
|
33
|
+
export type { GffFeature } from './util.ts';
|
package/esm/api.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { parseFeature
|
|
1
|
+
import { parseFeature } from "./util.js";
|
|
2
2
|
/** Extract the GFF3 feature type (column 3) from a raw line without a full split. */
|
|
3
3
|
export function extractType(line) {
|
|
4
4
|
const t1 = line.indexOf('\t');
|
|
@@ -17,8 +17,8 @@ function appendOrphan(orphans, key, value) {
|
|
|
17
17
|
}
|
|
18
18
|
}
|
|
19
19
|
/**
|
|
20
|
-
* The
|
|
21
|
-
*
|
|
20
|
+
* The parser collapses single-element attribute arrays to scalars, so a raw
|
|
21
|
+
* ID/Parent value can be a string, a string array, or absent. These coerce
|
|
22
22
|
* those `unknown` values without typecasts.
|
|
23
23
|
*/
|
|
24
24
|
function firstString(value) {
|
|
@@ -33,7 +33,7 @@ function toStringArray(value) {
|
|
|
33
33
|
}
|
|
34
34
|
/**
|
|
35
35
|
* Synchronously parse a string containing GFF3 and return an array of the
|
|
36
|
-
* parsed
|
|
36
|
+
* parsed features. Comments, directives, and `##FASTA` sections are ignored.
|
|
37
37
|
*
|
|
38
38
|
* @param str - GFF3 string
|
|
39
39
|
* @returns array of parsed features
|
|
@@ -41,15 +41,6 @@ function toStringArray(value) {
|
|
|
41
41
|
export function parseStringSync(str) {
|
|
42
42
|
return parseRecords(stringToRecords(str));
|
|
43
43
|
}
|
|
44
|
-
/**
|
|
45
|
-
* Synchronously parse a string containing GFF3 directly into JBrowse format.
|
|
46
|
-
*
|
|
47
|
-
* @param str - GFF3 string
|
|
48
|
-
* @returns array of JBrowse-format features
|
|
49
|
-
*/
|
|
50
|
-
export function parseStringSyncJBrowse(str) {
|
|
51
|
-
return parseRecordsJBrowse(stringToRecords(str));
|
|
52
|
-
}
|
|
53
44
|
function stringToRecords(str) {
|
|
54
45
|
const lines = str.split(/\r?\n/);
|
|
55
46
|
const records = [];
|
|
@@ -69,7 +60,7 @@ function stringToRecords(str) {
|
|
|
69
60
|
}
|
|
70
61
|
/**
|
|
71
62
|
* Parse an array of LineRecord objects containing raw GFF3 lines.
|
|
72
|
-
* Supports parent/child relationships.
|
|
63
|
+
* Supports parent/child relationships via subfeatures.
|
|
73
64
|
*
|
|
74
65
|
* @param records - Array of LineRecord objects with raw line and metadata
|
|
75
66
|
* @returns array of parsed features
|
|
@@ -79,126 +70,40 @@ export function parseRecords(records) {
|
|
|
79
70
|
const byId = new Map();
|
|
80
71
|
const orphans = new Map();
|
|
81
72
|
for (const record of records) {
|
|
82
|
-
const
|
|
83
|
-
? parseFeature(record.line)
|
|
84
|
-
: parseFeatureNoUnescape(record.line);
|
|
85
|
-
const featureLine = {
|
|
86
|
-
...parsed,
|
|
87
|
-
child_features: [],
|
|
88
|
-
derived_features: [],
|
|
89
|
-
};
|
|
90
|
-
if (record.lineHash !== undefined) {
|
|
91
|
-
featureLine.attributes ??= {};
|
|
92
|
-
featureLine.attributes._lineHash = [String(record.lineHash)];
|
|
93
|
-
}
|
|
94
|
-
const attrs = featureLine.attributes;
|
|
95
|
-
const ids = attrs?.ID;
|
|
96
|
-
const parents = attrs?.Parent;
|
|
97
|
-
if (!ids && !parents) {
|
|
98
|
-
items.push([featureLine]);
|
|
99
|
-
}
|
|
100
|
-
else {
|
|
101
|
-
let feature;
|
|
102
|
-
if (ids) {
|
|
103
|
-
const id = ids[0];
|
|
104
|
-
const existing = byId.get(id);
|
|
105
|
-
if (existing) {
|
|
106
|
-
// Multi-location continuation: share child_features/derived_features
|
|
107
|
-
// with the first line so children remain visible across all lines
|
|
108
|
-
// regardless of arrival order.
|
|
109
|
-
featureLine.child_features = existing[0].child_features;
|
|
110
|
-
featureLine.derived_features = existing[0].derived_features;
|
|
111
|
-
existing.push(featureLine);
|
|
112
|
-
feature = existing;
|
|
113
|
-
}
|
|
114
|
-
else {
|
|
115
|
-
feature = [featureLine];
|
|
116
|
-
if (!parents) {
|
|
117
|
-
items.push(feature);
|
|
118
|
-
}
|
|
119
|
-
byId.set(id, feature);
|
|
120
|
-
const waiting = orphans.get(id);
|
|
121
|
-
if (waiting) {
|
|
122
|
-
for (const w of waiting) {
|
|
123
|
-
featureLine.child_features.push(w);
|
|
124
|
-
}
|
|
125
|
-
orphans.delete(id);
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
else {
|
|
130
|
-
feature = [featureLine];
|
|
131
|
-
}
|
|
132
|
-
if (parents) {
|
|
133
|
-
for (const parentId of parents) {
|
|
134
|
-
const parent = byId.get(parentId);
|
|
135
|
-
if (parent) {
|
|
136
|
-
// child_features is shared across all parent feature lines,
|
|
137
|
-
// so push once via the first line.
|
|
138
|
-
parent[0].child_features.push(feature);
|
|
139
|
-
}
|
|
140
|
-
else {
|
|
141
|
-
appendOrphan(orphans, parentId, feature);
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
return items;
|
|
148
|
-
}
|
|
149
|
-
/**
|
|
150
|
-
* Parse an array of LineRecord objects directly into JBrowse feature format.
|
|
151
|
-
* Supports parent/child relationships via subfeatures.
|
|
152
|
-
*
|
|
153
|
-
* @param records - Array of LineRecord objects with raw line and metadata
|
|
154
|
-
* @returns array of JBrowse-format features
|
|
155
|
-
*/
|
|
156
|
-
export function parseRecordsJBrowse(records) {
|
|
157
|
-
const items = [];
|
|
158
|
-
const byId = new Map();
|
|
159
|
-
const orphans = new Map();
|
|
160
|
-
for (const record of records) {
|
|
161
|
-
const feature = record.hasEscapes
|
|
162
|
-
? parseFeatureJBrowse(record.line)
|
|
163
|
-
: parseFeatureJBrowseNoUnescape(record.line);
|
|
73
|
+
const feature = parseFeature(record.line, record.hasEscapes);
|
|
164
74
|
if (record.lineHash !== undefined) {
|
|
165
75
|
feature._lineHash = String(record.lineHash);
|
|
166
76
|
}
|
|
167
77
|
const id = firstString(feature.id);
|
|
168
78
|
const parents = toStringArray(feature.parent);
|
|
169
|
-
|
|
79
|
+
// A parentless line is a top-level item. Every line of a top-level
|
|
80
|
+
// discontinuous feature (e.g. cDNA_match/EST_match spanning several
|
|
81
|
+
// segments under one shared ID, with no Parent) is its own top-level
|
|
82
|
+
// item, so push regardless of whether the id is already registered.
|
|
83
|
+
if (parents.length === 0) {
|
|
170
84
|
items.push(feature);
|
|
171
85
|
}
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
// one ID across lines) skip registration but must still be attached to
|
|
183
|
-
// their parent below, so this is independent of the parent handling.
|
|
184
|
-
if (id && !byId.has(id)) {
|
|
185
|
-
byId.set(id, feature);
|
|
186
|
-
const waiting = orphans.get(id);
|
|
187
|
-
if (waiting) {
|
|
188
|
-
for (const w of waiting) {
|
|
189
|
-
feature.subfeatures.push(w);
|
|
190
|
-
}
|
|
191
|
-
orphans.delete(id);
|
|
86
|
+
// Register the id only the first time it is seen. Continuation lines
|
|
87
|
+
// (multi-location features such as a CDS spanning several segments share
|
|
88
|
+
// one ID across lines) skip registration but must still be attached to
|
|
89
|
+
// their parent below, so this is independent of the parent handling.
|
|
90
|
+
if (id && !byId.has(id)) {
|
|
91
|
+
byId.set(id, feature);
|
|
92
|
+
const waiting = orphans.get(id);
|
|
93
|
+
if (waiting) {
|
|
94
|
+
for (const w of waiting) {
|
|
95
|
+
feature.subfeatures.push(w);
|
|
192
96
|
}
|
|
97
|
+
orphans.delete(id);
|
|
193
98
|
}
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
99
|
+
}
|
|
100
|
+
for (const parentId of parents) {
|
|
101
|
+
const parentFeature = byId.get(parentId);
|
|
102
|
+
if (parentFeature) {
|
|
103
|
+
parentFeature.subfeatures.push(feature);
|
|
104
|
+
}
|
|
105
|
+
else {
|
|
106
|
+
appendOrphan(orphans, parentId, feature);
|
|
202
107
|
}
|
|
203
108
|
}
|
|
204
109
|
}
|
package/esm/api.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"api.js","sourceRoot":"","sources":["../src/api.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"api.js","sourceRoot":"","sources":["../src/api.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAA;AAmBxC,qFAAqF;AACrF,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;IAC7B,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,GAAG,CAAC,CAAC,CAAA;IACrC,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,GAAG,CAAC,CAAC,CAAA;IACrC,OAAO,IAAI,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,EAAE,EAAE,CAAC,CAAA;AAC/B,CAAC;AAED,kFAAkF;AAClF,SAAS,YAAY,CAAI,OAAyB,EAAE,GAAW,EAAE,KAAQ;IACvE,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IAC5B,IAAI,GAAG,EAAE,CAAC;QACR,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IACjB,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,CAAA;IAC3B,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,KAAc;IACjC,MAAM,CAAC,GAAY,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAA;IAC1D,OAAO,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAA;AAC9C,CAAC;AAED,SAAS,aAAa,CAAC,KAAc;IACnC,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAA;IAChE,CAAC;IACD,OAAO,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;AACjD,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,eAAe,CAAC,GAAW;IACzC,OAAO,YAAY,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAA;AAC3C,CAAC;AAED,SAAS,eAAe,CAAC,GAAW;IAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,CAAA;IAChC,MAAM,OAAO,GAAiB,EAAE,CAAA;IAChC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACvD,MAAK;QACP,CAAC;QACD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAC9C,SAAQ;QACV,CAAC;QACD,OAAO,CAAC,IAAI,CAAC;YACX,IAAI;YACJ,UAAU,EAAE,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC;SAC/B,CAAC,CAAA;IACJ,CAAC;IACD,OAAO,OAAO,CAAA;AAChB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,YAAY,CAAC,OAAqB;IAChD,MAAM,KAAK,GAAiB,EAAE,CAAA;IAC9B,MAAM,IAAI,GAAG,IAAI,GAAG,EAAsB,CAAA;IAC1C,MAAM,OAAO,GAAG,IAAI,GAAG,EAAwB,CAAA;IAE/C,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,YAAY,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,UAAU,CAAC,CAAA;QAE5D,IAAI,MAAM,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;YAClC,OAAO,CAAC,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAA;QAC7C,CAAC;QAED,MAAM,EAAE,GAAG,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QAClC,MAAM,OAAO,GAAG,aAAa,CAAC,OAAO,CAAC,MAAM,CAAC,CAAA;QAE7C,mEAAmE;QACnE,oEAAoE;QACpE,qEAAqE;QACrE,oEAAoE;QACpE,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QACrB,CAAC;QAED,qEAAqE;QACrE,yEAAyE;QACzE,uEAAuE;QACvE,qEAAqE;QACrE,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;YACxB,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,OAAO,CAAC,CAAA;YACrB,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAA;YAC/B,IAAI,OAAO,EAAE,CAAC;gBACZ,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;oBACxB,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;gBAC7B,CAAC;gBACD,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAA;YACpB,CAAC;QACH,CAAC;QAED,KAAK,MAAM,QAAQ,IAAI,OAAO,EAAE,CAAC;YAC/B,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAA;YACxC,IAAI,aAAa,EAAE,CAAC;gBAClB,aAAa,CAAC,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YACzC,CAAC;iBAAM,CAAC;gBACN,YAAY,CAAC,OAAO,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAA;YAC1C,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAA;AACd,CAAC"}
|
package/esm/index.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { extractType, parseRecords,
|
|
2
|
-
export type {
|
|
1
|
+
export { extractType, parseRecords, parseStringSync } from './api.ts';
|
|
2
|
+
export type { GffFeature, LineRecord } from './api.ts';
|
package/esm/index.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { extractType, parseRecords,
|
|
1
|
+
export { extractType, parseRecords, parseStringSync } from "./api.js";
|
|
2
2
|
//# sourceMappingURL=index.js.map
|
package/esm/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,UAAU,CAAA"}
|