gff-nostream 3.0.11 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -94
- package/dist/api.d.ts +21 -34
- package/dist/api.js +53 -147
- package/dist/api.js.map +1 -1
- package/dist/index.d.ts +2 -2
- package/dist/index.js +1 -3
- package/dist/index.js.map +1 -1
- package/dist/util.d.ts +19 -120
- package/dist/util.js +29 -179
- package/dist/util.js.map +1 -1
- package/esm/api.d.ts +21 -34
- package/esm/api.js +54 -146
- package/esm/api.js.map +1 -1
- package/esm/index.d.ts +2 -2
- package/esm/index.js +1 -1
- package/esm/index.js.map +1 -1
- package/esm/util.d.ts +19 -120
- package/esm/util.js +29 -172
- package/esm/util.js.map +1 -1
- package/package.json +1 -1
- package/src/api.ts +80 -187
- package/src/index.ts +2 -18
- package/src/util.ts +39 -308
package/esm/util.js
CHANGED
|
@@ -1,8 +1,5 @@
|
|
|
1
|
-
// Fast, low-level functions for parsing
|
|
1
|
+
// Fast, low-level functions for parsing GFF3.
|
|
2
2
|
// JavaScript port of Robert Buels's Bio::GFF3::LowLevel Perl module.
|
|
3
|
-
const directiveRegex = /^\s*##\s*(\S+)\s*(.*)/;
|
|
4
|
-
const whitespaceRegex = /\s+/;
|
|
5
|
-
const nonDigitRegex = /\D/g;
|
|
6
3
|
const HEX_LOOKUP = {};
|
|
7
4
|
for (let i = 0; i < 256; i++) {
|
|
8
5
|
const hex = i.toString(16).toUpperCase().padStart(2, '0');
|
|
@@ -24,169 +21,28 @@ export function unescape(stringVal) {
|
|
|
24
21
|
let lastIdx = 0;
|
|
25
22
|
let i = idx;
|
|
26
23
|
while (i < stringVal.length) {
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
result += char;
|
|
33
|
-
}
|
|
34
|
-
else {
|
|
35
|
-
result += stringVal.slice(i, i + 3);
|
|
36
|
-
}
|
|
24
|
+
const char = stringVal[i] === '%' && i + 2 < stringVal.length
|
|
25
|
+
? HEX_LOOKUP[stringVal.slice(i + 1, i + 3)]
|
|
26
|
+
: undefined;
|
|
27
|
+
if (char !== undefined) {
|
|
28
|
+
result += stringVal.slice(lastIdx, i) + char;
|
|
37
29
|
i += 3;
|
|
38
30
|
lastIdx = i;
|
|
39
31
|
}
|
|
40
32
|
else {
|
|
33
|
+
// Not a valid escape: advance one char so a '%' that begins a real
|
|
34
|
+
// escape immediately after isn't swallowed (e.g. the %20 in "a%b%20c").
|
|
41
35
|
i++;
|
|
42
36
|
}
|
|
43
37
|
}
|
|
44
38
|
return result + stringVal.slice(lastIdx);
|
|
45
39
|
}
|
|
46
|
-
function parseAttributesImpl(attrString, shouldUnescape) {
|
|
47
|
-
if (attrString.length === 0 || attrString === '.') {
|
|
48
|
-
return {};
|
|
49
|
-
}
|
|
50
|
-
const attrs = {};
|
|
51
|
-
let len = attrString.length;
|
|
52
|
-
if (attrString[len - 1] === '\n') {
|
|
53
|
-
len = attrString[len - 2] === '\r' ? len - 2 : len - 1;
|
|
54
|
-
attrString = attrString.slice(0, len);
|
|
55
|
-
}
|
|
56
|
-
let start = 0;
|
|
57
|
-
while (start < len) {
|
|
58
|
-
let semiIdx = attrString.indexOf(';', start);
|
|
59
|
-
if (semiIdx === -1) {
|
|
60
|
-
semiIdx = len;
|
|
61
|
-
}
|
|
62
|
-
if (semiIdx > start) {
|
|
63
|
-
const eqIdx = attrString.indexOf('=', start);
|
|
64
|
-
if (eqIdx !== -1 && eqIdx < semiIdx && eqIdx + 1 < semiIdx) {
|
|
65
|
-
const tag = attrString.slice(start, eqIdx);
|
|
66
|
-
let arec = attrs[tag];
|
|
67
|
-
if (!arec) {
|
|
68
|
-
arec = [];
|
|
69
|
-
attrs[tag] = arec;
|
|
70
|
-
}
|
|
71
|
-
let valStart = eqIdx + 1;
|
|
72
|
-
while (valStart < semiIdx) {
|
|
73
|
-
let commaIdx = attrString.indexOf(',', valStart);
|
|
74
|
-
if (commaIdx === -1 || commaIdx > semiIdx) {
|
|
75
|
-
commaIdx = semiIdx;
|
|
76
|
-
}
|
|
77
|
-
if (commaIdx > valStart) {
|
|
78
|
-
const val = attrString.slice(valStart, commaIdx);
|
|
79
|
-
arec.push(shouldUnescape ? unescape(val) : val);
|
|
80
|
-
}
|
|
81
|
-
valStart = commaIdx + 1;
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
start = semiIdx + 1;
|
|
86
|
-
}
|
|
87
|
-
return attrs;
|
|
88
|
-
}
|
|
89
|
-
/**
|
|
90
|
-
* Parse the 9th column (attributes) of a GFF3 feature line.
|
|
91
|
-
*
|
|
92
|
-
* @param attrString - String of GFF3 9th column
|
|
93
|
-
* @returns Parsed attributes
|
|
94
|
-
*/
|
|
95
|
-
export function parseAttributes(attrString) {
|
|
96
|
-
return parseAttributesImpl(attrString, true);
|
|
97
|
-
}
|
|
98
|
-
/**
|
|
99
|
-
* Parse the 9th column (attributes) of a GFF3 feature line without unescaping.
|
|
100
|
-
* Fast path for data known to contain no escaped characters.
|
|
101
|
-
*
|
|
102
|
-
* @param attrString - String of GFF3 9th column
|
|
103
|
-
* @returns Parsed attributes
|
|
104
|
-
*/
|
|
105
|
-
export function parseAttributesNoUnescape(attrString) {
|
|
106
|
-
return parseAttributesImpl(attrString, false);
|
|
107
|
-
}
|
|
108
40
|
function isEmpty(s) {
|
|
109
41
|
return s.length === 0 || s === '.';
|
|
110
42
|
}
|
|
111
43
|
function strField(s, shouldUnescape, empty) {
|
|
112
44
|
return isEmpty(s) ? empty : shouldUnescape ? unescape(s) : s;
|
|
113
45
|
}
|
|
114
|
-
function numField(s) {
|
|
115
|
-
return isEmpty(s) ? null : +s;
|
|
116
|
-
}
|
|
117
|
-
function parseFeatureImpl(line, shouldUnescape) {
|
|
118
|
-
const f = line.split('\t');
|
|
119
|
-
const attrString = f[8];
|
|
120
|
-
return {
|
|
121
|
-
seq_id: strField(f[0], shouldUnescape, null),
|
|
122
|
-
source: strField(f[1], shouldUnescape, null),
|
|
123
|
-
type: strField(f[2], shouldUnescape, null),
|
|
124
|
-
start: numField(f[3]),
|
|
125
|
-
end: numField(f[4]),
|
|
126
|
-
score: numField(f[5]),
|
|
127
|
-
strand: strField(f[6], false, null),
|
|
128
|
-
phase: strField(f[7], false, null),
|
|
129
|
-
attributes: isEmpty(attrString)
|
|
130
|
-
? null
|
|
131
|
-
: parseAttributesImpl(attrString, shouldUnescape),
|
|
132
|
-
};
|
|
133
|
-
}
|
|
134
|
-
/**
|
|
135
|
-
* Parse a GFF3 feature line
|
|
136
|
-
*
|
|
137
|
-
* @param line - GFF3 feature line
|
|
138
|
-
* @returns The parsed feature
|
|
139
|
-
*/
|
|
140
|
-
export function parseFeature(line) {
|
|
141
|
-
return parseFeatureImpl(line, true);
|
|
142
|
-
}
|
|
143
|
-
/**
|
|
144
|
-
* Parse a GFF3 feature line without unescaping.
|
|
145
|
-
* Fast path for data known to contain no escaped characters.
|
|
146
|
-
*
|
|
147
|
-
* @param line - GFF3 feature line
|
|
148
|
-
* @returns The parsed feature
|
|
149
|
-
*/
|
|
150
|
-
export function parseFeatureNoUnescape(line) {
|
|
151
|
-
return parseFeatureImpl(line, false);
|
|
152
|
-
}
|
|
153
|
-
/**
|
|
154
|
-
* Parse a GFF3 directive line.
|
|
155
|
-
*
|
|
156
|
-
* @param line - GFF3 directive line
|
|
157
|
-
* @returns The parsed directive
|
|
158
|
-
*/
|
|
159
|
-
export function parseDirective(line) {
|
|
160
|
-
const match = directiveRegex.exec(line);
|
|
161
|
-
if (!match) {
|
|
162
|
-
return null;
|
|
163
|
-
}
|
|
164
|
-
const name = match[1];
|
|
165
|
-
const contents = match[2];
|
|
166
|
-
const parsed = { directive: name };
|
|
167
|
-
if (contents.length) {
|
|
168
|
-
parsed.value = contents.trimEnd();
|
|
169
|
-
}
|
|
170
|
-
if (name === 'sequence-region') {
|
|
171
|
-
const c = contents.split(whitespaceRegex, 3);
|
|
172
|
-
return {
|
|
173
|
-
...parsed,
|
|
174
|
-
seq_id: c[0],
|
|
175
|
-
start: c[1].replaceAll(nonDigitRegex, ''),
|
|
176
|
-
end: c[2].replaceAll(nonDigitRegex, ''),
|
|
177
|
-
};
|
|
178
|
-
}
|
|
179
|
-
else if (name === 'genome-build') {
|
|
180
|
-
const [source, buildName] = contents.split(whitespaceRegex, 2);
|
|
181
|
-
return {
|
|
182
|
-
...parsed,
|
|
183
|
-
source: source,
|
|
184
|
-
buildName: buildName,
|
|
185
|
-
};
|
|
186
|
-
}
|
|
187
|
-
return parsed;
|
|
188
|
-
}
|
|
189
|
-
// JBrowse format types and parsing functions
|
|
190
46
|
const JBROWSE_DEFAULT_FIELDS = new Set([
|
|
191
47
|
'start',
|
|
192
48
|
'end',
|
|
@@ -220,7 +76,17 @@ const COMMON_ATTRS = {
|
|
|
220
76
|
target: 'target',
|
|
221
77
|
gap: 'gap',
|
|
222
78
|
};
|
|
223
|
-
|
|
79
|
+
const STRAND_MAP = {
|
|
80
|
+
'+': 1,
|
|
81
|
+
'-': -1,
|
|
82
|
+
'.': 0,
|
|
83
|
+
};
|
|
84
|
+
/**
|
|
85
|
+
* Parse the 9th column (attributes) of a GFF3 feature line into `result`,
|
|
86
|
+
* lowercasing keys and suffixing any that collide with a default field name.
|
|
87
|
+
* Pass shouldUnescape=false as a fast path for data with no escaped characters.
|
|
88
|
+
*/
|
|
89
|
+
export function parseAttributes(attrString, result, shouldUnescape) {
|
|
224
90
|
if (attrString.length === 0 || attrString === '.') {
|
|
225
91
|
return;
|
|
226
92
|
}
|
|
@@ -265,19 +131,16 @@ function parseAttributesJBrowseImpl(attrString, result, shouldUnescape) {
|
|
|
265
131
|
start = semiIdx + 1;
|
|
266
132
|
}
|
|
267
133
|
}
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
'-': -1,
|
|
277
|
-
'.': 0,
|
|
278
|
-
};
|
|
279
|
-
function parseFeatureJBrowseImpl(line, shouldUnescape) {
|
|
134
|
+
/**
|
|
135
|
+
* Parse a GFF3 feature line. Unescaping is skipped entirely for lines with no
|
|
136
|
+
* '%' character, which is the common case.
|
|
137
|
+
*
|
|
138
|
+
* @param line - GFF3 feature line
|
|
139
|
+
* @returns The parsed feature
|
|
140
|
+
*/
|
|
141
|
+
export function parseFeature(line) {
|
|
280
142
|
const f = line.split('\t');
|
|
143
|
+
const shouldUnescape = line.includes('%');
|
|
281
144
|
const startStr = f[3];
|
|
282
145
|
const endStr = f[4];
|
|
283
146
|
const scoreStr = f[5];
|
|
@@ -294,13 +157,7 @@ function parseFeatureJBrowseImpl(line, shouldUnescape) {
|
|
|
294
157
|
phase: isEmpty(phase) ? undefined : +phase,
|
|
295
158
|
subfeatures: [],
|
|
296
159
|
};
|
|
297
|
-
|
|
160
|
+
parseAttributes(attrString, result, shouldUnescape);
|
|
298
161
|
return result;
|
|
299
162
|
}
|
|
300
|
-
export function parseFeatureJBrowse(line) {
|
|
301
|
-
return parseFeatureJBrowseImpl(line, true);
|
|
302
|
-
}
|
|
303
|
-
export function parseFeatureJBrowseNoUnescape(line) {
|
|
304
|
-
return parseFeatureJBrowseImpl(line, false);
|
|
305
|
-
}
|
|
306
163
|
//# sourceMappingURL=util.js.map
|
package/esm/util.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AAAA,
|
|
1
|
+
{"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AAAA,8CAA8C;AAC9C,qEAAqE;AAErE,MAAM,UAAU,GAAuC,EAAE,CAAA;AACzD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;IAC7B,MAAM,GAAG,GAAG,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;IACzD,UAAU,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;IACxC,UAAU,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;AACxD,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,QAAQ,CAAC,SAAiB;IACxC,MAAM,GAAG,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IAClC,IAAI,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC;QACf,OAAO,SAAS,CAAA;IAClB,CAAC;IAED,IAAI,MAAM,GAAG,EAAE,CAAA;IACf,IAAI,OAAO,GAAG,CAAC,CAAA;IACf,IAAI,CAAC,GAAG,GAAG,CAAA;IAEX,OAAO,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC;QAC5B,MAAM,IAAI,GACR,SAAS,CAAC,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,SAAS,CAAC,MAAM;YAC9C,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;YAC3C,CAAC,CAAC,SAAS,CAAA;QACf,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACvB,MAAM,IAAI,SAAS,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,IAAI,CAAA;YAC5C,CAAC,IAAI,CAAC,CAAA;YACN,OAAO,GAAG,CAAC,CAAA;QACb,CAAC;aAAM,CAAC;YACN,mEAAmE;YACnE,wEAAwE;YACxE,CAAC,EAAE,CAAA;QACL,CAAC;IACH,CAAC;IAED,OAAO,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAA;AAC1C,CAAC;AAED,SAAS,OAAO,CAAC,CAAS;IACxB,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,CAAA;AACpC,CAAC;AAED,SAAS,QAAQ,CACf,CAAS,EACT,cAAuB,EACvB,KAAQ;IAER,OAAO,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AAC9D,CAAC;AAED,MAAM,sBAAsB,GAAG,IAAI,GAAG,CAAC;IACrC,OAAO;IACP,KAAK;IACL,QAAQ;IACR,OAAO;IACP,MAAM;IACN,QAAQ;IACR,OAAO;IACP,QAAQ;CACT,CAAC,CAAA;AAEF,uEAAuE;AACvE,sCAAsC;AACtC,MAAM,YAAY,GAAuC;IACvD,EAAE,EAAE,IAAI;IACR,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,aAAa,EAAE,eAAe;IAC9B,WAAW,EAAE,aAAa;IAC1B,KAAK,EAAE,OAAO;IACd,MAAM,EAAE,QAAQ;IAChB,GAAG,EAAE,KAAK;IACV,YAAY,EAAE,cAAc;IAC5B,EAAE,EAAE,IAAI;IACR,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,KAAK,EAAE,OAAO;IACd,MAAM,EAAE,QAAQ;IAChB,GAAG,EAAE,KAAK;CACX,CAAA;AAED,MAAM,UAAU,GAAuC;IACrD,GAAG,EAAE,CAAC;IACN,GAAG,EAAE,CAAC,CAAC;IACP,GAAG,EAAE,CAAC;CACP,CAAA;AAoBD;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAC7B,UAAkB,EAClB,MAA+B,EAC/B,cAAuB;IAEvB,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,UAAU,KAAK,GAAG,EAAE,CAAC;QAClD,OAAM;IACR,CAAC;IAED,IAAI,GAAG,GAAG,UAAU,CAAC,MAAM,CAAA;IAC3B,IAAI,UAAU,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACjC,GAAG,GAAG,UAAU,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAA;QACtD,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;IACvC,CAAC;IAED,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,OAAO,KAAK,GAAG,GAAG,EAAE,CAAC;QACnB,IAAI,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;QAC5C,IAAI,OAAO,KAAK,CAAC,CAAC,EAAE,CAAC;YACnB,OAAO,GAAG,GAAG,CAAA;QACf,CAAC;QAED,IAAI,OAAO,GAAG,KAAK,EAAE,CAAC;YACpB,MAAM,KAAK,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;YAC5C,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,KAAK,GAAG,OAAO,IAAI,KAAK,GAAG,CAAC,GAAG,OAAO,EAAE,CAAC;gBAC3D,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,CAAA;gBAC1C,IAAI,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,CAAA;gBAC3B,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;oBACtB,GAAG,GAAG,GAAG,CAAC,WAAW,EAAE,CAAA;oBACvB,IAAI,sBAAsB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;wBACpC,GAAG,IAAI,GAAG,CAAA;oBACZ,CAAC;gBACH,CAAC;gBAED,MAAM,MAAM,GAAa,EAAE,CAAA;gBAC3B,IAAI,QAAQ,GAAG,KAAK,GAAG,CAAC,CAAA;gBACxB,OAAO,QAAQ,GAAG,OAAO,EAAE,CAAC;oBAC1B,IAAI,QAAQ,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAA;oBAChD,IAAI,QAAQ,KAAK,CAAC,CAAC,IAAI,QAAQ,GAAG,OAAO,EAAE,CAAC;wBAC1C,QAAQ,GAAG,OAAO,CAAA;oBACpB,CAAC;oBACD,IAAI,QAAQ,GAAG,QAAQ,EAAE,CAAC;wBACxB,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAA;wBAChD,MAAM,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;oBACnD,CAAC;oBACD,QAAQ,GAAG,QAAQ,GAAG,CAAC,CAAA;gBACzB,CAAC;gBAED,MAAM,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAA;YACxD,CAAC;QACH,CAAC;QACD,KAAK,GAAG,OAAO,GAAG,CAAC,CAAA;IACrB,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;IAC1B,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAA;IACzC,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACtB,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACpB,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACtB,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACnB,MAAM,UAAU,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IAExB,MAAM,MAAM,GAAe;QACzB,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,EAAE,CAAC;QAC5C,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,IAAI,CAAC;QAC7C,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,IAAI,CAAC;QAC3C,KAAK,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC;QAC5C,GAAG,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM;QAClC,KAAK,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,QAAQ;QAChD,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC;QACzB,KAAK,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,KAAK;QAC1C,WAAW,EAAE,EAAE;KAChB,CAAA;IAED,eAAe,CAAC,UAAU,EAAE,MAAM,EAAE,cAAc,CAAC,CAAA;IACnD,OAAO,MAAM,CAAA;AACf,CAAC"}
|
package/package.json
CHANGED
package/src/api.ts
CHANGED
|
@@ -1,29 +1,20 @@
|
|
|
1
|
-
import {
|
|
2
|
-
parseFeature,
|
|
3
|
-
parseFeatureJBrowse,
|
|
4
|
-
parseFeatureJBrowseNoUnescape,
|
|
5
|
-
parseFeatureNoUnescape,
|
|
6
|
-
} from './util.ts'
|
|
1
|
+
import { parseFeature } from './util.ts'
|
|
7
2
|
|
|
8
|
-
import type {
|
|
9
|
-
GFF3Feature,
|
|
10
|
-
GFF3FeatureLineWithRefs,
|
|
11
|
-
JBrowseFeature,
|
|
12
|
-
} from './util.ts'
|
|
3
|
+
import type { GffFeature } from './util.ts'
|
|
13
4
|
|
|
14
|
-
interface
|
|
5
|
+
export interface LineRecord {
|
|
6
|
+
/** Raw GFF3 feature line */
|
|
15
7
|
line: string
|
|
16
|
-
lineHash?: string | number
|
|
17
|
-
hasEscapes: boolean
|
|
18
8
|
}
|
|
19
9
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
10
|
+
/**
|
|
11
|
+
* A top-level parsed feature paired with the input record it came from. The
|
|
12
|
+
* parser stamps no identity onto the feature itself; callers that need a stable
|
|
13
|
+
* per-feature id (e.g. from a tabix byte offset) read it off their own `record`.
|
|
14
|
+
*/
|
|
15
|
+
export interface ParsedRecord<R extends LineRecord = LineRecord> {
|
|
16
|
+
feature: GffFeature
|
|
17
|
+
record: R
|
|
27
18
|
}
|
|
28
19
|
|
|
29
20
|
/** Extract the GFF3 feature type (column 3) from a raw line without a full split. */
|
|
@@ -45,8 +36,8 @@ function appendOrphan<T>(orphans: Map<string, T[]>, key: string, value: T) {
|
|
|
45
36
|
}
|
|
46
37
|
|
|
47
38
|
/**
|
|
48
|
-
* The
|
|
49
|
-
*
|
|
39
|
+
* The parser collapses single-element attribute arrays to scalars, so a raw
|
|
40
|
+
* ID/Parent value can be a string, a string array, or absent. These coerce
|
|
50
41
|
* those `unknown` values without typecasts.
|
|
51
42
|
*/
|
|
52
43
|
function firstString(value: unknown): string | undefined {
|
|
@@ -62,119 +53,68 @@ function toStringArray(value: unknown): string[] {
|
|
|
62
53
|
}
|
|
63
54
|
|
|
64
55
|
/**
|
|
65
|
-
*
|
|
66
|
-
*
|
|
67
|
-
*
|
|
68
|
-
* @param str - GFF3 string
|
|
69
|
-
* @returns array of parsed features
|
|
70
|
-
*/
|
|
71
|
-
export function parseStringSync(str: string): GFF3Feature[] {
|
|
72
|
-
return parseRecords(stringToRecords(str))
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
/**
|
|
76
|
-
* Synchronously parse a string containing GFF3 directly into JBrowse format.
|
|
77
|
-
*
|
|
78
|
-
* @param str - GFF3 string
|
|
79
|
-
* @returns array of JBrowse-format features
|
|
56
|
+
* Register a feature's ID and attach it to its parent(s), building the
|
|
57
|
+
* subfeature tree in `byId`/`orphans`. Returns true when the feature is
|
|
58
|
+
* top-level (has no Parent) and the caller should collect it.
|
|
80
59
|
*/
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
const
|
|
87
|
-
const
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
60
|
+
function linkFeature(
|
|
61
|
+
feature: GffFeature,
|
|
62
|
+
byId: Map<string, GffFeature>,
|
|
63
|
+
orphans: Map<string, GffFeature[]>,
|
|
64
|
+
): boolean {
|
|
65
|
+
const id = firstString(feature.id)
|
|
66
|
+
const parents = toStringArray(feature.parent)
|
|
67
|
+
|
|
68
|
+
// Register the id only the first time it is seen. Continuation lines
|
|
69
|
+
// (multi-location features such as a CDS spanning several segments share one
|
|
70
|
+
// ID across lines) skip registration but must still be attached to their
|
|
71
|
+
// parent below, so this is independent of the parent handling.
|
|
72
|
+
if (id && !byId.has(id)) {
|
|
73
|
+
byId.set(id, feature)
|
|
74
|
+
const waiting = orphans.get(id)
|
|
75
|
+
if (waiting) {
|
|
76
|
+
for (const w of waiting) {
|
|
77
|
+
feature.subfeatures.push(w)
|
|
78
|
+
}
|
|
79
|
+
orphans.delete(id)
|
|
91
80
|
}
|
|
92
|
-
|
|
93
|
-
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
for (const parentId of parents) {
|
|
84
|
+
const parentFeature = byId.get(parentId)
|
|
85
|
+
if (parentFeature) {
|
|
86
|
+
parentFeature.subfeatures.push(feature)
|
|
87
|
+
} else {
|
|
88
|
+
appendOrphan(orphans, parentId, feature)
|
|
94
89
|
}
|
|
95
|
-
records.push({
|
|
96
|
-
line,
|
|
97
|
-
hasEscapes: line.includes('%'),
|
|
98
|
-
})
|
|
99
90
|
}
|
|
100
|
-
|
|
91
|
+
|
|
92
|
+
// Every line of a top-level discontinuous feature (e.g. cDNA_match spanning
|
|
93
|
+
// several segments under one shared ID, with no Parent) is its own top-level
|
|
94
|
+
// item, so this is independent of whether the id was just registered.
|
|
95
|
+
return parents.length === 0
|
|
101
96
|
}
|
|
102
97
|
|
|
103
98
|
/**
|
|
104
|
-
*
|
|
105
|
-
*
|
|
99
|
+
* Synchronously parse a string containing GFF3 and return an array of the
|
|
100
|
+
* parsed features. Comments, directives, and `##FASTA` sections are ignored.
|
|
106
101
|
*
|
|
107
|
-
* @param
|
|
102
|
+
* @param str - GFF3 string
|
|
108
103
|
* @returns array of parsed features
|
|
109
104
|
*/
|
|
110
|
-
export function
|
|
111
|
-
const items:
|
|
112
|
-
const byId = new Map<string,
|
|
113
|
-
const orphans = new Map<string,
|
|
105
|
+
export function parseStringSync(str: string): GffFeature[] {
|
|
106
|
+
const items: GffFeature[] = []
|
|
107
|
+
const byId = new Map<string, GffFeature>()
|
|
108
|
+
const orphans = new Map<string, GffFeature[]>()
|
|
114
109
|
|
|
115
|
-
for (const
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
: parseFeatureNoUnescape(record.line)
|
|
119
|
-
const featureLine: GFF3FeatureLineWithRefs = {
|
|
120
|
-
...parsed,
|
|
121
|
-
child_features: [],
|
|
122
|
-
derived_features: [],
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
if (record.lineHash !== undefined) {
|
|
126
|
-
featureLine.attributes ??= {}
|
|
127
|
-
featureLine.attributes._lineHash = [String(record.lineHash)]
|
|
110
|
+
for (const line of str.split(/\r?\n/)) {
|
|
111
|
+
if (line.startsWith('##FASTA') || line.startsWith('>')) {
|
|
112
|
+
break
|
|
128
113
|
}
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
if (!ids && !parents) {
|
|
135
|
-
items.push([featureLine])
|
|
136
|
-
} else {
|
|
137
|
-
let feature: GFF3Feature
|
|
138
|
-
if (ids) {
|
|
139
|
-
const id = ids[0]!
|
|
140
|
-
const existing = byId.get(id)
|
|
141
|
-
if (existing) {
|
|
142
|
-
// Multi-location continuation: share child_features/derived_features
|
|
143
|
-
// with the first line so children remain visible across all lines
|
|
144
|
-
// regardless of arrival order.
|
|
145
|
-
featureLine.child_features = existing[0]!.child_features
|
|
146
|
-
featureLine.derived_features = existing[0]!.derived_features
|
|
147
|
-
existing.push(featureLine)
|
|
148
|
-
feature = existing
|
|
149
|
-
} else {
|
|
150
|
-
feature = [featureLine]
|
|
151
|
-
if (!parents) {
|
|
152
|
-
items.push(feature)
|
|
153
|
-
}
|
|
154
|
-
byId.set(id, feature)
|
|
155
|
-
const waiting = orphans.get(id)
|
|
156
|
-
if (waiting) {
|
|
157
|
-
for (const w of waiting) {
|
|
158
|
-
featureLine.child_features.push(w)
|
|
159
|
-
}
|
|
160
|
-
orphans.delete(id)
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
} else {
|
|
164
|
-
feature = [featureLine]
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
if (parents) {
|
|
168
|
-
for (const parentId of parents) {
|
|
169
|
-
const parent = byId.get(parentId)
|
|
170
|
-
if (parent) {
|
|
171
|
-
// child_features is shared across all parent feature lines,
|
|
172
|
-
// so push once via the first line.
|
|
173
|
-
parent[0]!.child_features.push(feature)
|
|
174
|
-
} else {
|
|
175
|
-
appendOrphan(orphans, parentId, feature)
|
|
176
|
-
}
|
|
177
|
-
}
|
|
114
|
+
if (line.length !== 0 && !line.startsWith('#')) {
|
|
115
|
+
const feature = parseFeature(line)
|
|
116
|
+
if (linkFeature(feature, byId, orphans)) {
|
|
117
|
+
items.push(feature)
|
|
178
118
|
}
|
|
179
119
|
}
|
|
180
120
|
}
|
|
@@ -183,76 +123,29 @@ export function parseRecords(records: ParseInput[]): GFF3Feature[] {
|
|
|
183
123
|
}
|
|
184
124
|
|
|
185
125
|
/**
|
|
186
|
-
* Parse an array of
|
|
187
|
-
*
|
|
126
|
+
* Parse an array of records wrapping raw GFF3 lines, resolving parent/child
|
|
127
|
+
* relationships into `subfeatures`. Returns each top-level feature paired with
|
|
128
|
+
* the record it came from, so callers can attach their own identity (e.g. a
|
|
129
|
+
* byte offset) without the parser stamping anything onto the feature.
|
|
188
130
|
*
|
|
189
|
-
* @param records - Array of
|
|
190
|
-
* @returns
|
|
131
|
+
* @param records - Array of records, each carrying a raw GFF3 `line`
|
|
132
|
+
* @returns top-level features, each paired with its originating record
|
|
191
133
|
*/
|
|
192
|
-
export function
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
const
|
|
134
|
+
export function parseRecords<R extends LineRecord>(
|
|
135
|
+
records: readonly R[],
|
|
136
|
+
): ParsedRecord<R>[] {
|
|
137
|
+
const items: ParsedRecord<R>[] = []
|
|
138
|
+
const byId = new Map<string, GffFeature>()
|
|
139
|
+
const orphans = new Map<string, GffFeature[]>()
|
|
196
140
|
|
|
197
141
|
for (const record of records) {
|
|
198
|
-
const feature = record.
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
if (record.lineHash !== undefined) {
|
|
203
|
-
feature._lineHash = String(record.lineHash)
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
const id = firstString(feature.id)
|
|
207
|
-
const parents = toStringArray(feature.parent)
|
|
208
|
-
|
|
209
|
-
if (!id && parents.length === 0) {
|
|
210
|
-
items.push(feature)
|
|
211
|
-
} else {
|
|
212
|
-
// A parentless line is a top-level item. Every line of a top-level
|
|
213
|
-
// discontinuous feature (e.g. cDNA_match/EST_match spanning several
|
|
214
|
-
// segments under one shared ID, with no Parent) is its own top-level
|
|
215
|
-
// item, so push here regardless of whether the id is already registered.
|
|
216
|
-
if (parents.length === 0) {
|
|
217
|
-
items.push(feature)
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
// Register the id only the first time it is seen. Continuation lines
|
|
221
|
-
// (multi-location features such as a CDS spanning several segments share
|
|
222
|
-
// one ID across lines) skip registration but must still be attached to
|
|
223
|
-
// their parent below, so this is independent of the parent handling.
|
|
224
|
-
if (id && !byId.has(id)) {
|
|
225
|
-
byId.set(id, feature)
|
|
226
|
-
const waiting = orphans.get(id)
|
|
227
|
-
if (waiting) {
|
|
228
|
-
for (const w of waiting) {
|
|
229
|
-
feature.subfeatures.push(w)
|
|
230
|
-
}
|
|
231
|
-
orphans.delete(id)
|
|
232
|
-
}
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
for (const parentId of parents) {
|
|
236
|
-
const parentFeature = byId.get(parentId)
|
|
237
|
-
if (parentFeature) {
|
|
238
|
-
parentFeature.subfeatures.push(feature)
|
|
239
|
-
} else {
|
|
240
|
-
appendOrphan(orphans, parentId, feature)
|
|
241
|
-
}
|
|
242
|
-
}
|
|
142
|
+
const feature = parseFeature(record.line)
|
|
143
|
+
if (linkFeature(feature, byId, orphans)) {
|
|
144
|
+
items.push({ feature, record })
|
|
243
145
|
}
|
|
244
146
|
}
|
|
245
147
|
|
|
246
148
|
return items
|
|
247
149
|
}
|
|
248
150
|
|
|
249
|
-
export type {
|
|
250
|
-
GFF3Comment,
|
|
251
|
-
GFF3Directive,
|
|
252
|
-
GFF3Feature,
|
|
253
|
-
GFF3FeatureLine,
|
|
254
|
-
GFF3FeatureLineWithRefs,
|
|
255
|
-
GFF3Item,
|
|
256
|
-
GFF3Sequence,
|
|
257
|
-
JBrowseFeature,
|
|
258
|
-
} from './util.ts'
|
|
151
|
+
export type { GffFeature } from './util.ts'
|
package/src/index.ts
CHANGED
|
@@ -1,19 +1,3 @@
|
|
|
1
|
-
export {
|
|
2
|
-
extractType,
|
|
3
|
-
parseRecords,
|
|
4
|
-
parseRecordsJBrowse,
|
|
5
|
-
parseStringSync,
|
|
6
|
-
parseStringSyncJBrowse,
|
|
7
|
-
} from './api.ts'
|
|
1
|
+
export { extractType, parseRecords, parseStringSync } from './api.ts'
|
|
8
2
|
|
|
9
|
-
export type {
|
|
10
|
-
GFF3Comment,
|
|
11
|
-
GFF3Directive,
|
|
12
|
-
GFF3Feature,
|
|
13
|
-
GFF3FeatureLine,
|
|
14
|
-
GFF3FeatureLineWithRefs,
|
|
15
|
-
GFF3Item,
|
|
16
|
-
GFF3Sequence,
|
|
17
|
-
JBrowseFeature,
|
|
18
|
-
LineRecord,
|
|
19
|
-
} from './api.ts'
|
|
3
|
+
export type { GffFeature, LineRecord, ParsedRecord } from './api.ts'
|