gff-nostream 4.0.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -8
- package/dist/api.d.ts +17 -15
- package/dist/api.js +55 -52
- package/dist/api.js.map +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/util.d.ts +3 -4
- package/dist/util.js +4 -4
- package/dist/util.js.map +1 -1
- package/esm/api.d.ts +17 -15
- package/esm/api.js +55 -52
- package/esm/api.js.map +1 -1
- package/esm/index.d.ts +1 -1
- package/esm/util.d.ts +3 -4
- package/esm/util.js +4 -4
- package/esm/util.js.map +1 -1
- package/package.json +1 -1
- package/src/api.ts +77 -68
- package/src/index.ts +1 -1
- package/src/util.ts +4 -4
package/README.md
CHANGED
|
@@ -77,25 +77,36 @@ to its parent (or kept as a top-level item) independently.
|
|
|
77
77
|
Synchronously parse a GFF3 string and return an array of features. Comments,
|
|
78
78
|
directives, and `##FASTA` sections are ignored.
|
|
79
79
|
|
|
80
|
-
### `parseRecords(records:
|
|
80
|
+
### `parseRecords<R>(records: readonly R[]): ParsedRecord<R>[]`
|
|
81
81
|
|
|
82
|
-
Parse an array of
|
|
83
|
-
directly (e.g. from a tabix-indexed file
|
|
82
|
+
Parse an array of records wrapping raw GFF3 lines. Useful when managing raw line
|
|
83
|
+
data directly (e.g. from a tabix-indexed file). Each top-level feature is
|
|
84
|
+
returned paired with the record it came from, so a caller can attach its own
|
|
85
|
+
stable id (a byte offset, a hash, …) without the parser stamping anything onto
|
|
86
|
+
the feature. Records may carry extra fields (`R` is inferred), which pass
|
|
87
|
+
through untouched on `record`.
|
|
88
|
+
|
|
89
|
+
```ts
|
|
90
|
+
const features = parseRecords(
|
|
91
|
+
lines.map(line => ({ line, offset })),
|
|
92
|
+
).map(({ feature, record }) => ({ ...feature, id: record.offset }))
|
|
93
|
+
```
|
|
84
94
|
|
|
85
95
|
### `extractType(line: string): string`
|
|
86
96
|
|
|
87
97
|
Extract the feature type (GFF3 column 3) from a raw line without fully splitting
|
|
88
98
|
it.
|
|
89
99
|
|
|
90
|
-
### `LineRecord`
|
|
100
|
+
### `LineRecord` / `ParsedRecord`
|
|
91
101
|
|
|
92
102
|
```ts
|
|
93
103
|
interface LineRecord {
|
|
94
104
|
line: string
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
interface ParsedRecord<R extends LineRecord = LineRecord> {
|
|
108
|
+
feature: GffFeature
|
|
109
|
+
record: R // the input record this top-level feature was parsed from
|
|
99
110
|
}
|
|
100
111
|
```
|
|
101
112
|
|
package/dist/api.d.ts
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
import type { GffFeature } from './util.ts';
|
|
2
|
-
interface
|
|
2
|
+
export interface LineRecord {
|
|
3
|
+
/** Raw GFF3 feature line */
|
|
3
4
|
line: string;
|
|
4
|
-
lineHash?: string | number;
|
|
5
|
-
hasEscapes: boolean;
|
|
6
5
|
}
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
6
|
+
/**
|
|
7
|
+
* A top-level parsed feature paired with the input record it came from. The
|
|
8
|
+
* parser stamps no identity onto the feature itself; callers that need a stable
|
|
9
|
+
* per-feature id (e.g. from a tabix byte offset) read it off their own `record`.
|
|
10
|
+
*/
|
|
11
|
+
export interface ParsedRecord<R extends LineRecord = LineRecord> {
|
|
12
|
+
feature: GffFeature;
|
|
13
|
+
record: R;
|
|
14
14
|
}
|
|
15
15
|
/** Extract the GFF3 feature type (column 3) from a raw line without a full split. */
|
|
16
16
|
export declare function extractType(line: string): string;
|
|
@@ -23,11 +23,13 @@ export declare function extractType(line: string): string;
|
|
|
23
23
|
*/
|
|
24
24
|
export declare function parseStringSync(str: string): GffFeature[];
|
|
25
25
|
/**
|
|
26
|
-
* Parse an array of
|
|
27
|
-
*
|
|
26
|
+
* Parse an array of records wrapping raw GFF3 lines, resolving parent/child
|
|
27
|
+
* relationships into `subfeatures`. Returns each top-level feature paired with
|
|
28
|
+
* the record it came from, so callers can attach their own identity (e.g. a
|
|
29
|
+
* byte offset) without the parser stamping anything onto the feature.
|
|
28
30
|
*
|
|
29
|
-
* @param records - Array of
|
|
30
|
-
* @returns
|
|
31
|
+
* @param records - Array of records, each carrying a raw GFF3 `line`
|
|
32
|
+
* @returns top-level features, each paired with its originating record
|
|
31
33
|
*/
|
|
32
|
-
export declare function parseRecords(records:
|
|
34
|
+
export declare function parseRecords<R extends LineRecord>(records: readonly R[]): ParsedRecord<R>[];
|
|
33
35
|
export type { GffFeature } from './util.ts';
|
package/dist/api.js
CHANGED
|
@@ -36,6 +36,42 @@ function toStringArray(value) {
|
|
|
36
36
|
}
|
|
37
37
|
return typeof value === 'string' ? [value] : [];
|
|
38
38
|
}
|
|
39
|
+
/**
|
|
40
|
+
* Register a feature's ID and attach it to its parent(s), building the
|
|
41
|
+
* subfeature tree in `byId`/`orphans`. Returns true when the feature is
|
|
42
|
+
* top-level (has no Parent) and the caller should collect it.
|
|
43
|
+
*/
|
|
44
|
+
function linkFeature(feature, byId, orphans) {
|
|
45
|
+
const id = firstString(feature.id);
|
|
46
|
+
const parents = toStringArray(feature.parent);
|
|
47
|
+
// Register the id only the first time it is seen. Continuation lines
|
|
48
|
+
// (multi-location features such as a CDS spanning several segments share one
|
|
49
|
+
// ID across lines) skip registration but must still be attached to their
|
|
50
|
+
// parent below, so this is independent of the parent handling.
|
|
51
|
+
if (id && !byId.has(id)) {
|
|
52
|
+
byId.set(id, feature);
|
|
53
|
+
const waiting = orphans.get(id);
|
|
54
|
+
if (waiting) {
|
|
55
|
+
for (const w of waiting) {
|
|
56
|
+
feature.subfeatures.push(w);
|
|
57
|
+
}
|
|
58
|
+
orphans.delete(id);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
for (const parentId of parents) {
|
|
62
|
+
const parentFeature = byId.get(parentId);
|
|
63
|
+
if (parentFeature) {
|
|
64
|
+
parentFeature.subfeatures.push(feature);
|
|
65
|
+
}
|
|
66
|
+
else {
|
|
67
|
+
appendOrphan(orphans, parentId, feature);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
// Every line of a top-level discontinuous feature (e.g. cDNA_match spanning
|
|
71
|
+
// several segments under one shared ID, with no Parent) is its own top-level
|
|
72
|
+
// item, so this is independent of whether the id was just registered.
|
|
73
|
+
return parents.length === 0;
|
|
74
|
+
}
|
|
39
75
|
/**
|
|
40
76
|
* Synchronously parse a string containing GFF3 and return an array of the
|
|
41
77
|
* parsed features. Comments, directives, and `##FASTA` sections are ignored.
|
|
@@ -44,72 +80,39 @@ function toStringArray(value) {
|
|
|
44
80
|
* @returns array of parsed features
|
|
45
81
|
*/
|
|
46
82
|
function parseStringSync(str) {
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
const
|
|
51
|
-
const records = [];
|
|
52
|
-
for (const line of lines) {
|
|
83
|
+
const items = [];
|
|
84
|
+
const byId = new Map();
|
|
85
|
+
const orphans = new Map();
|
|
86
|
+
for (const line of str.split(/\r?\n/)) {
|
|
53
87
|
if (line.startsWith('##FASTA') || line.startsWith('>')) {
|
|
54
88
|
break;
|
|
55
89
|
}
|
|
56
|
-
if (line.length
|
|
57
|
-
|
|
90
|
+
if (line.length !== 0 && !line.startsWith('#')) {
|
|
91
|
+
const feature = (0, util_ts_1.parseFeature)(line);
|
|
92
|
+
if (linkFeature(feature, byId, orphans)) {
|
|
93
|
+
items.push(feature);
|
|
94
|
+
}
|
|
58
95
|
}
|
|
59
|
-
records.push({
|
|
60
|
-
line,
|
|
61
|
-
hasEscapes: line.includes('%'),
|
|
62
|
-
});
|
|
63
96
|
}
|
|
64
|
-
return
|
|
97
|
+
return items;
|
|
65
98
|
}
|
|
66
99
|
/**
|
|
67
|
-
* Parse an array of
|
|
68
|
-
*
|
|
100
|
+
* Parse an array of records wrapping raw GFF3 lines, resolving parent/child
|
|
101
|
+
* relationships into `subfeatures`. Returns each top-level feature paired with
|
|
102
|
+
* the record it came from, so callers can attach their own identity (e.g. a
|
|
103
|
+
* byte offset) without the parser stamping anything onto the feature.
|
|
69
104
|
*
|
|
70
|
-
* @param records - Array of
|
|
71
|
-
* @returns
|
|
105
|
+
* @param records - Array of records, each carrying a raw GFF3 `line`
|
|
106
|
+
* @returns top-level features, each paired with its originating record
|
|
72
107
|
*/
|
|
73
108
|
function parseRecords(records) {
|
|
74
109
|
const items = [];
|
|
75
110
|
const byId = new Map();
|
|
76
111
|
const orphans = new Map();
|
|
77
112
|
for (const record of records) {
|
|
78
|
-
const feature = (0, util_ts_1.parseFeature)(record.line
|
|
79
|
-
if (
|
|
80
|
-
|
|
81
|
-
}
|
|
82
|
-
const id = firstString(feature.id);
|
|
83
|
-
const parents = toStringArray(feature.parent);
|
|
84
|
-
// A parentless line is a top-level item. Every line of a top-level
|
|
85
|
-
// discontinuous feature (e.g. cDNA_match/EST_match spanning several
|
|
86
|
-
// segments under one shared ID, with no Parent) is its own top-level
|
|
87
|
-
// item, so push regardless of whether the id is already registered.
|
|
88
|
-
if (parents.length === 0) {
|
|
89
|
-
items.push(feature);
|
|
90
|
-
}
|
|
91
|
-
// Register the id only the first time it is seen. Continuation lines
|
|
92
|
-
// (multi-location features such as a CDS spanning several segments share
|
|
93
|
-
// one ID across lines) skip registration but must still be attached to
|
|
94
|
-
// their parent below, so this is independent of the parent handling.
|
|
95
|
-
if (id && !byId.has(id)) {
|
|
96
|
-
byId.set(id, feature);
|
|
97
|
-
const waiting = orphans.get(id);
|
|
98
|
-
if (waiting) {
|
|
99
|
-
for (const w of waiting) {
|
|
100
|
-
feature.subfeatures.push(w);
|
|
101
|
-
}
|
|
102
|
-
orphans.delete(id);
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
for (const parentId of parents) {
|
|
106
|
-
const parentFeature = byId.get(parentId);
|
|
107
|
-
if (parentFeature) {
|
|
108
|
-
parentFeature.subfeatures.push(feature);
|
|
109
|
-
}
|
|
110
|
-
else {
|
|
111
|
-
appendOrphan(orphans, parentId, feature);
|
|
112
|
-
}
|
|
113
|
+
const feature = (0, util_ts_1.parseFeature)(record.line);
|
|
114
|
+
if (linkFeature(feature, byId, orphans)) {
|
|
115
|
+
items.push({ feature, record });
|
|
113
116
|
}
|
|
114
117
|
}
|
|
115
118
|
return items;
|
package/dist/api.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"api.js","sourceRoot":"","sources":["../src/api.ts"],"names":[],"mappings":";;AAoBA,kCAKC;
|
|
1
|
+
{"version":3,"file":"api.js","sourceRoot":"","sources":["../src/api.ts"],"names":[],"mappings":";;AAoBA,kCAKC;AA+ED,0CAkBC;AAWD,oCAeC;AApJD,uCAAwC;AAmBxC,qFAAqF;AACrF,SAAgB,WAAW,CAAC,IAAY;IACtC,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;IAC7B,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,GAAG,CAAC,CAAC,CAAA;IACrC,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,GAAG,CAAC,CAAC,CAAA;IACrC,OAAO,IAAI,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,EAAE,EAAE,CAAC,CAAA;AAC/B,CAAC;AAED,kFAAkF;AAClF,SAAS,YAAY,CAAI,OAAyB,EAAE,GAAW,EAAE,KAAQ;IACvE,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IAC5B,IAAI,GAAG,EAAE,CAAC;QACR,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IACjB,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,CAAA;IAC3B,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,KAAc;IACjC,MAAM,CAAC,GAAY,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAA;IAC1D,OAAO,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAA;AAC9C,CAAC;AAED,SAAS,aAAa,CAAC,KAAc;IACnC,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAA;IAChE,CAAC;IACD,OAAO,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;AACjD,CAAC;AAED;;;;GAIG;AACH,SAAS,WAAW,CAClB,OAAmB,EACnB,IAA6B,EAC7B,OAAkC;IAElC,MAAM,EAAE,GAAG,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;IAClC,MAAM,OAAO,GAAG,aAAa,CAAC,OAAO,CAAC,MAAM,CAAC,CAAA;IAE7C,qEAAqE;IACrE,6EAA6E;IAC7E,yEAAyE;IACzE,+DAA+D;IAC/D,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;QACxB,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,OAAO,CAAC,CAAA;QACrB,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAA;QAC/B,IAAI,OAAO,EAAE,CAAC;YACZ,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;gBACxB,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YAC7B,CAAC;YACD,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAA;QACpB,CAAC;IACH,CAAC;IAED,KAAK,MAAM,QAAQ,IAAI,OAAO,EAAE,CAAC;QAC/B,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAA;QACxC,IAAI,aAAa,EAAE,CAAC;YAClB,aAAa,CAAC,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QACzC,CAAC;aAAM,CAAC;YACN,YAAY,CAAC,OAAO,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAA;QAC1C,CAAC;IACH,CAAC;IAED,4EAA4E;IAC5E,6EAA6E;IAC7E,sEAAsE;IACtE,OAAO,OAAO,CAAC,MAAM,KAAK,CAAC,CAAA;AAC7B,CAAC;AAED;;;;;;GAMG;AACH,SAAgB,eAAe,CAAC,GAAW;IACzC,MAAM,KAAK,GAAiB,EAAE,CAAA;IAC9B,MAAM,IAAI,GAAG,IAAI,GAAG,EAAsB,CAAA;IAC1C,MAAM,OAAO,GAAG,IAAI,GAAG,EAAwB,CAAA;IAE/C,KAAK,MAAM,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC;QACtC,IAAI,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACvD,MAAK;QACP,CAAC;QACD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAC/C,MAAM,OAAO,GAAG,IAAA,sBAAY,EAAC,IAAI,CAAC,CAAA;YAClC,IAAI,WAAW,CAAC,OAAO,EAAE,IAAI,EAAE,OAAO,CAAC,EAAE,CAAC;gBACxC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YACrB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAA;AACd,CAAC;AAED;;;;;;;;GAQG;AACH,SAAgB,YAAY,CAC1B,OAAqB;IAErB,MAAM,KAAK,GAAsB,EAAE,CAAA;IACnC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAsB,CAAA;IAC1C,MAAM,OAAO,GAAG,IAAI,GAAG,EAAwB,CAAA;IAE/C,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,IAAA,sBAAY,EAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QACzC,IAAI,WAAW,CAAC,OAAO,EAAE,IAAI,EAAE,OAAO,CAAC,EAAE,CAAC;YACxC,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAA;QACjC,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAA;AACd,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
export { extractType, parseRecords, parseStringSync } from './api.ts';
|
|
2
|
-
export type { GffFeature, LineRecord } from './api.ts';
|
|
2
|
+
export type { GffFeature, LineRecord, ParsedRecord } from './api.ts';
|
package/dist/util.d.ts
CHANGED
|
@@ -29,11 +29,10 @@ export interface GffFeature {
|
|
|
29
29
|
*/
|
|
30
30
|
export declare function parseAttributes(attrString: string, result: Record<string, unknown>, shouldUnescape: boolean): void;
|
|
31
31
|
/**
|
|
32
|
-
* Parse a GFF3 feature line.
|
|
33
|
-
*
|
|
32
|
+
* Parse a GFF3 feature line. Unescaping is skipped entirely for lines with no
|
|
33
|
+
* '%' character, which is the common case.
|
|
34
34
|
*
|
|
35
35
|
* @param line - GFF3 feature line
|
|
36
|
-
* @param shouldUnescape - whether to unescape percent-encoded values
|
|
37
36
|
* @returns The parsed feature
|
|
38
37
|
*/
|
|
39
|
-
export declare function parseFeature(line: string
|
|
38
|
+
export declare function parseFeature(line: string): GffFeature;
|
package/dist/util.js
CHANGED
|
@@ -137,15 +137,15 @@ function parseAttributes(attrString, result, shouldUnescape) {
|
|
|
137
137
|
}
|
|
138
138
|
}
|
|
139
139
|
/**
|
|
140
|
-
* Parse a GFF3 feature line.
|
|
141
|
-
*
|
|
140
|
+
* Parse a GFF3 feature line. Unescaping is skipped entirely for lines with no
|
|
141
|
+
* '%' character, which is the common case.
|
|
142
142
|
*
|
|
143
143
|
* @param line - GFF3 feature line
|
|
144
|
-
* @param shouldUnescape - whether to unescape percent-encoded values
|
|
145
144
|
* @returns The parsed feature
|
|
146
145
|
*/
|
|
147
|
-
function parseFeature(line
|
|
146
|
+
function parseFeature(line) {
|
|
148
147
|
const f = line.split('\t');
|
|
148
|
+
const shouldUnescape = line.includes('%');
|
|
149
149
|
const startStr = f[3];
|
|
150
150
|
const endStr = f[4];
|
|
151
151
|
const scoreStr = f[5];
|
package/dist/util.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":";AAAA,8CAA8C;AAC9C,qEAAqE;;AAerE,4BA2BC;AA8ED,0CAqDC;
|
|
1
|
+
{"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":";AAAA,8CAA8C;AAC9C,qEAAqE;;AAerE,4BA2BC;AA8ED,0CAqDC;AASD,oCAuBC;AA3MD,MAAM,UAAU,GAAuC,EAAE,CAAA;AACzD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;IAC7B,MAAM,GAAG,GAAG,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;IACzD,UAAU,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;IACxC,UAAU,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;AACxD,CAAC;AAED;;;;;GAKG;AACH,SAAgB,QAAQ,CAAC,SAAiB;IACxC,MAAM,GAAG,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IAClC,IAAI,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC;QACf,OAAO,SAAS,CAAA;IAClB,CAAC;IAED,IAAI,MAAM,GAAG,EAAE,CAAA;IACf,IAAI,OAAO,GAAG,CAAC,CAAA;IACf,IAAI,CAAC,GAAG,GAAG,CAAA;IAEX,OAAO,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC;QAC5B,MAAM,IAAI,GACR,SAAS,CAAC,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,SAAS,CAAC,MAAM;YAC9C,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;YAC3C,CAAC,CAAC,SAAS,CAAA;QACf,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACvB,MAAM,IAAI,SAAS,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,IAAI,CAAA;YAC5C,CAAC,IAAI,CAAC,CAAA;YACN,OAAO,GAAG,CAAC,CAAA;QACb,CAAC;aAAM,CAAC;YACN,mEAAmE;YACnE,wEAAwE;YACxE,CAAC,EAAE,CAAA;QACL,CAAC;IACH,CAAC;IAED,OAAO,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAA;AAC1C,CAAC;AAED,SAAS,OAAO,CAAC,CAAS;IACxB,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,CAAA;AACpC,CAAC;AAED,SAAS,QAAQ,CACf,CAAS,EACT,cAAuB,EACvB,KAAQ;IAER,OAAO,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AAC9D,CAAC;AAED,MAAM,sBAAsB,GAAG,IAAI,GAAG,CAAC;IACrC,OAAO;IACP,KAAK;IACL,QAAQ;IACR,OAAO;IACP,MAAM;IACN,QAAQ;IACR,OAAO;IACP,QAAQ;CACT,CAAC,CAAA;AAEF,uEAAuE;AACvE,sCAAsC;AACtC,MAAM,YAAY,GAAuC;IACvD,EAAE,EAAE,IAAI;IACR,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,aAAa,EAAE,eAAe;IAC9B,WAAW,EAAE,aAAa;IAC1B,KAAK,EAAE,OAAO;IACd,MAAM,EAAE,QAAQ;IAChB,GAAG,EAAE,KAAK;IACV,YAAY,EAAE,cAAc;IAC5B,EAAE,EAAE,IAAI;IACR,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,KAAK,EAAE,OAAO;IACd,MAAM,EAAE,QAAQ;IAChB,GAAG,EAAE,KAAK;CACX,CAAA;AAED,MAAM,UAAU,GAAuC;IACrD,GAAG,EAAE,CAAC;IACN,GAAG,EAAE,CAAC,CAAC;IACP,GAAG,EAAE,CAAC;CACP,CAAA;AAoBD;;;;GAIG;AACH,SAAgB,eAAe,CAC7B,UAAkB,EAClB,MAA+B,EAC/B,cAAuB;IAEvB,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,UAAU,KAAK,GAAG,EAAE,CAAC;QAClD,OAAM;IACR,CAAC;IAED,IAAI,GAAG,GAAG,UAAU,CAAC,MAAM,CAAA;IAC3B,IAAI,UAAU,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACjC,GAAG,GAAG,UAAU,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAA;QACtD,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;IACvC,CAAC;IAED,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,OAAO,KAAK,GAAG,GAAG,EAAE,CAAC;QACnB,IAAI,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;QAC5C,IAAI,OAAO,KAAK,CAAC,CAAC,EAAE,CAAC;YACnB,OAAO,GAAG,GAAG,CAAA;QACf,CAAC;QAED,IAAI,OAAO,GAAG,KAAK,EAAE,CAAC;YACpB,MAAM,KAAK,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;YAC5C,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,KAAK,GAAG,OAAO,IAAI,KAAK,GAAG,CAAC,GAAG,OAAO,EAAE,CAAC;gBAC3D,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,CAAA;gBAC1C,IAAI,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,CAAA;gBAC3B,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;oBACtB,GAAG,GAAG,GAAG,CAAC,WAAW,EAAE,CAAA;oBACvB,IAAI,sBAAsB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;wBACpC,GAAG,IAAI,GAAG,CAAA;oBACZ,CAAC;gBACH,CAAC;gBAED,MAAM,MAAM,GAAa,EAAE,CAAA;gBAC3B,IAAI,QAAQ,GAAG,KAAK,GAAG,CAAC,CAAA;gBACxB,OAAO,QAAQ,GAAG,OAAO,EAAE,CAAC;oBAC1B,IAAI,QAAQ,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAA;oBAChD,IAAI,QAAQ,KAAK,CAAC,CAAC,IAAI,QAAQ,GAAG,OAAO,EAAE,CAAC;wBAC1C,QAAQ,GAAG,OAAO,CAAA;oBACpB,CAAC;oBACD,IAAI,QAAQ,GAAG,QAAQ,EAAE,CAAC;wBACxB,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAA;wBAChD,MAAM,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;oBACnD,CAAC;oBACD,QAAQ,GAAG,QAAQ,GAAG,CAAC,CAAA;gBACzB,CAAC;gBAED,MAAM,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAA;YACxD,CAAC;QACH,CAAC;QACD,KAAK,GAAG,OAAO,GAAG,CAAC,CAAA;IACrB,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,SAAgB,YAAY,CAAC,IAAY;IACvC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;IAC1B,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAA;IACzC,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACtB,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACpB,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACtB,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACnB,MAAM,UAAU,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IAExB,MAAM,MAAM,GAAe;QACzB,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,EAAE,CAAC;QAC5C,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,IAAI,CAAC;QAC7C,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,IAAI,CAAC;QAC3C,KAAK,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC;QAC5C,GAAG,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM;QAClC,KAAK,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,QAAQ;QAChD,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC;QACzB,KAAK,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,KAAK;QAC1C,WAAW,EAAE,EAAE;KAChB,CAAA;IAED,eAAe,CAAC,UAAU,EAAE,MAAM,EAAE,cAAc,CAAC,CAAA;IACnD,OAAO,MAAM,CAAA;AACf,CAAC"}
|
package/esm/api.d.ts
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
import type { GffFeature } from './util.ts';
|
|
2
|
-
interface
|
|
2
|
+
export interface LineRecord {
|
|
3
|
+
/** Raw GFF3 feature line */
|
|
3
4
|
line: string;
|
|
4
|
-
lineHash?: string | number;
|
|
5
|
-
hasEscapes: boolean;
|
|
6
5
|
}
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
6
|
+
/**
|
|
7
|
+
* A top-level parsed feature paired with the input record it came from. The
|
|
8
|
+
* parser stamps no identity onto the feature itself; callers that need a stable
|
|
9
|
+
* per-feature id (e.g. from a tabix byte offset) read it off their own `record`.
|
|
10
|
+
*/
|
|
11
|
+
export interface ParsedRecord<R extends LineRecord = LineRecord> {
|
|
12
|
+
feature: GffFeature;
|
|
13
|
+
record: R;
|
|
14
14
|
}
|
|
15
15
|
/** Extract the GFF3 feature type (column 3) from a raw line without a full split. */
|
|
16
16
|
export declare function extractType(line: string): string;
|
|
@@ -23,11 +23,13 @@ export declare function extractType(line: string): string;
|
|
|
23
23
|
*/
|
|
24
24
|
export declare function parseStringSync(str: string): GffFeature[];
|
|
25
25
|
/**
|
|
26
|
-
* Parse an array of
|
|
27
|
-
*
|
|
26
|
+
* Parse an array of records wrapping raw GFF3 lines, resolving parent/child
|
|
27
|
+
* relationships into `subfeatures`. Returns each top-level feature paired with
|
|
28
|
+
* the record it came from, so callers can attach their own identity (e.g. a
|
|
29
|
+
* byte offset) without the parser stamping anything onto the feature.
|
|
28
30
|
*
|
|
29
|
-
* @param records - Array of
|
|
30
|
-
* @returns
|
|
31
|
+
* @param records - Array of records, each carrying a raw GFF3 `line`
|
|
32
|
+
* @returns top-level features, each paired with its originating record
|
|
31
33
|
*/
|
|
32
|
-
export declare function parseRecords(records:
|
|
34
|
+
export declare function parseRecords<R extends LineRecord>(records: readonly R[]): ParsedRecord<R>[];
|
|
33
35
|
export type { GffFeature } from './util.ts';
|
package/esm/api.js
CHANGED
|
@@ -31,6 +31,42 @@ function toStringArray(value) {
|
|
|
31
31
|
}
|
|
32
32
|
return typeof value === 'string' ? [value] : [];
|
|
33
33
|
}
|
|
34
|
+
/**
|
|
35
|
+
* Register a feature's ID and attach it to its parent(s), building the
|
|
36
|
+
* subfeature tree in `byId`/`orphans`. Returns true when the feature is
|
|
37
|
+
* top-level (has no Parent) and the caller should collect it.
|
|
38
|
+
*/
|
|
39
|
+
function linkFeature(feature, byId, orphans) {
|
|
40
|
+
const id = firstString(feature.id);
|
|
41
|
+
const parents = toStringArray(feature.parent);
|
|
42
|
+
// Register the id only the first time it is seen. Continuation lines
|
|
43
|
+
// (multi-location features such as a CDS spanning several segments share one
|
|
44
|
+
// ID across lines) skip registration but must still be attached to their
|
|
45
|
+
// parent below, so this is independent of the parent handling.
|
|
46
|
+
if (id && !byId.has(id)) {
|
|
47
|
+
byId.set(id, feature);
|
|
48
|
+
const waiting = orphans.get(id);
|
|
49
|
+
if (waiting) {
|
|
50
|
+
for (const w of waiting) {
|
|
51
|
+
feature.subfeatures.push(w);
|
|
52
|
+
}
|
|
53
|
+
orphans.delete(id);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
for (const parentId of parents) {
|
|
57
|
+
const parentFeature = byId.get(parentId);
|
|
58
|
+
if (parentFeature) {
|
|
59
|
+
parentFeature.subfeatures.push(feature);
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
appendOrphan(orphans, parentId, feature);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
// Every line of a top-level discontinuous feature (e.g. cDNA_match spanning
|
|
66
|
+
// several segments under one shared ID, with no Parent) is its own top-level
|
|
67
|
+
// item, so this is independent of whether the id was just registered.
|
|
68
|
+
return parents.length === 0;
|
|
69
|
+
}
|
|
34
70
|
/**
|
|
35
71
|
* Synchronously parse a string containing GFF3 and return an array of the
|
|
36
72
|
* parsed features. Comments, directives, and `##FASTA` sections are ignored.
|
|
@@ -39,72 +75,39 @@ function toStringArray(value) {
|
|
|
39
75
|
* @returns array of parsed features
|
|
40
76
|
*/
|
|
41
77
|
export function parseStringSync(str) {
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
const
|
|
46
|
-
const records = [];
|
|
47
|
-
for (const line of lines) {
|
|
78
|
+
const items = [];
|
|
79
|
+
const byId = new Map();
|
|
80
|
+
const orphans = new Map();
|
|
81
|
+
for (const line of str.split(/\r?\n/)) {
|
|
48
82
|
if (line.startsWith('##FASTA') || line.startsWith('>')) {
|
|
49
83
|
break;
|
|
50
84
|
}
|
|
51
|
-
if (line.length
|
|
52
|
-
|
|
85
|
+
if (line.length !== 0 && !line.startsWith('#')) {
|
|
86
|
+
const feature = parseFeature(line);
|
|
87
|
+
if (linkFeature(feature, byId, orphans)) {
|
|
88
|
+
items.push(feature);
|
|
89
|
+
}
|
|
53
90
|
}
|
|
54
|
-
records.push({
|
|
55
|
-
line,
|
|
56
|
-
hasEscapes: line.includes('%'),
|
|
57
|
-
});
|
|
58
91
|
}
|
|
59
|
-
return
|
|
92
|
+
return items;
|
|
60
93
|
}
|
|
61
94
|
/**
|
|
62
|
-
* Parse an array of
|
|
63
|
-
*
|
|
95
|
+
* Parse an array of records wrapping raw GFF3 lines, resolving parent/child
|
|
96
|
+
* relationships into `subfeatures`. Returns each top-level feature paired with
|
|
97
|
+
* the record it came from, so callers can attach their own identity (e.g. a
|
|
98
|
+
* byte offset) without the parser stamping anything onto the feature.
|
|
64
99
|
*
|
|
65
|
-
* @param records - Array of
|
|
66
|
-
* @returns
|
|
100
|
+
* @param records - Array of records, each carrying a raw GFF3 `line`
|
|
101
|
+
* @returns top-level features, each paired with its originating record
|
|
67
102
|
*/
|
|
68
103
|
export function parseRecords(records) {
|
|
69
104
|
const items = [];
|
|
70
105
|
const byId = new Map();
|
|
71
106
|
const orphans = new Map();
|
|
72
107
|
for (const record of records) {
|
|
73
|
-
const feature = parseFeature(record.line
|
|
74
|
-
if (
|
|
75
|
-
|
|
76
|
-
}
|
|
77
|
-
const id = firstString(feature.id);
|
|
78
|
-
const parents = toStringArray(feature.parent);
|
|
79
|
-
// A parentless line is a top-level item. Every line of a top-level
|
|
80
|
-
// discontinuous feature (e.g. cDNA_match/EST_match spanning several
|
|
81
|
-
// segments under one shared ID, with no Parent) is its own top-level
|
|
82
|
-
// item, so push regardless of whether the id is already registered.
|
|
83
|
-
if (parents.length === 0) {
|
|
84
|
-
items.push(feature);
|
|
85
|
-
}
|
|
86
|
-
// Register the id only the first time it is seen. Continuation lines
|
|
87
|
-
// (multi-location features such as a CDS spanning several segments share
|
|
88
|
-
// one ID across lines) skip registration but must still be attached to
|
|
89
|
-
// their parent below, so this is independent of the parent handling.
|
|
90
|
-
if (id && !byId.has(id)) {
|
|
91
|
-
byId.set(id, feature);
|
|
92
|
-
const waiting = orphans.get(id);
|
|
93
|
-
if (waiting) {
|
|
94
|
-
for (const w of waiting) {
|
|
95
|
-
feature.subfeatures.push(w);
|
|
96
|
-
}
|
|
97
|
-
orphans.delete(id);
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
for (const parentId of parents) {
|
|
101
|
-
const parentFeature = byId.get(parentId);
|
|
102
|
-
if (parentFeature) {
|
|
103
|
-
parentFeature.subfeatures.push(feature);
|
|
104
|
-
}
|
|
105
|
-
else {
|
|
106
|
-
appendOrphan(orphans, parentId, feature);
|
|
107
|
-
}
|
|
108
|
+
const feature = parseFeature(record.line);
|
|
109
|
+
if (linkFeature(feature, byId, orphans)) {
|
|
110
|
+
items.push({ feature, record });
|
|
108
111
|
}
|
|
109
112
|
}
|
|
110
113
|
return items;
|
package/esm/api.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"api.js","sourceRoot":"","sources":["../src/api.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAA;AAmBxC,qFAAqF;AACrF,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;IAC7B,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,GAAG,CAAC,CAAC,CAAA;IACrC,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,GAAG,CAAC,CAAC,CAAA;IACrC,OAAO,IAAI,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,EAAE,EAAE,CAAC,CAAA;AAC/B,CAAC;AAED,kFAAkF;AAClF,SAAS,YAAY,CAAI,OAAyB,EAAE,GAAW,EAAE,KAAQ;IACvE,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IAC5B,IAAI,GAAG,EAAE,CAAC;QACR,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IACjB,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,CAAA;IAC3B,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,KAAc;IACjC,MAAM,CAAC,GAAY,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAA;IAC1D,OAAO,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAA;AAC9C,CAAC;AAED,SAAS,aAAa,CAAC,KAAc;IACnC,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAA;IAChE,CAAC;IACD,OAAO,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;AACjD,CAAC;AAED
|
|
1
|
+
{"version":3,"file":"api.js","sourceRoot":"","sources":["../src/api.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAA;AAmBxC,qFAAqF;AACrF,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;IAC7B,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,GAAG,CAAC,CAAC,CAAA;IACrC,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,GAAG,CAAC,CAAC,CAAA;IACrC,OAAO,IAAI,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,EAAE,EAAE,CAAC,CAAA;AAC/B,CAAC;AAED,kFAAkF;AAClF,SAAS,YAAY,CAAI,OAAyB,EAAE,GAAW,EAAE,KAAQ;IACvE,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IAC5B,IAAI,GAAG,EAAE,CAAC;QACR,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IACjB,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,CAAA;IAC3B,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,KAAc;IACjC,MAAM,CAAC,GAAY,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAA;IAC1D,OAAO,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAA;AAC9C,CAAC;AAED,SAAS,aAAa,CAAC,KAAc;IACnC,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAA;IAChE,CAAC;IACD,OAAO,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;AACjD,CAAC;AAED;;;;GAIG;AACH,SAAS,WAAW,CAClB,OAAmB,EACnB,IAA6B,EAC7B,OAAkC;IAElC,MAAM,EAAE,GAAG,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;IAClC,MAAM,OAAO,GAAG,aAAa,CAAC,OAAO,CAAC,MAAM,CAAC,CAAA;IAE7C,qEAAqE;IACrE,6EAA6E;IAC7E,yEAAyE;IACzE,+DAA+D;IAC/D,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;QACxB,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,OAAO,CAAC,CAAA;QACrB,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAA;QAC/B,IAAI,OAAO,EAAE,CAAC;YACZ,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;gBACxB,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YAC7B,CAAC;YACD,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAA;QACpB,CAAC;IACH,CAAC;IAED,KAAK,MAAM,QAAQ,IAAI,OAAO,EAAE,CAAC;QAC/B,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAA;QACxC,IAAI,aAAa,EAAE,CAAC;YAClB,aAAa,CAAC,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QACzC,CAAC;aAAM,CAAC;YACN,YAAY,CAAC,OAAO,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAA;QAC1C,CAAC;IACH,CAAC;IAED,4EAA4E;IAC5E,6EAA6E;IAC7E,sEAAsE;IACtE,OAAO,OAAO,CAAC,MAAM,KAAK,CAAC,CAAA;AAC7B,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,eAAe,CAAC,GAAW;IACzC,MAAM,KAAK,GAAiB,EAAE,CAAA;IAC9B,MAAM,IAAI,GAAG,IAAI,GAAG,EAAsB,CAAA;IAC1C,MAAM,OAAO,GAAG,IAAI,GAAG,EAAwB,CAAA;IAE/C,KAAK,MAAM,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC;QACtC,IAAI,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACvD,MAAK;QACP,CAAC;QACD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAC/C,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,CAAA;YAClC,IAAI,WAAW,CAAC,OAAO,EAAE,IAAI,EAAE,OAAO,CAAC,EAAE,CAAC;gBACxC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YACrB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAA;AACd,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,YAAY,CAC1B,OAAqB;IAErB,MAAM,KAAK,GAAsB,EAAE,CAAA;IACnC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAsB,CAAA;IAC1C,MAAM,OAAO,GAAG,IAAI,GAAG,EAAwB,CAAA;IAE/C,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QACzC,IAAI,WAAW,CAAC,OAAO,EAAE,IAAI,EAAE,OAAO,CAAC,EAAE,CAAC;YACxC,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAA;QACjC,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAA;AACd,CAAC"}
|
package/esm/index.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
export { extractType, parseRecords, parseStringSync } from './api.ts';
|
|
2
|
-
export type { GffFeature, LineRecord } from './api.ts';
|
|
2
|
+
export type { GffFeature, LineRecord, ParsedRecord } from './api.ts';
|
package/esm/util.d.ts
CHANGED
|
@@ -29,11 +29,10 @@ export interface GffFeature {
|
|
|
29
29
|
*/
|
|
30
30
|
export declare function parseAttributes(attrString: string, result: Record<string, unknown>, shouldUnescape: boolean): void;
|
|
31
31
|
/**
|
|
32
|
-
* Parse a GFF3 feature line.
|
|
33
|
-
*
|
|
32
|
+
* Parse a GFF3 feature line. Unescaping is skipped entirely for lines with no
|
|
33
|
+
* '%' character, which is the common case.
|
|
34
34
|
*
|
|
35
35
|
* @param line - GFF3 feature line
|
|
36
|
-
* @param shouldUnescape - whether to unescape percent-encoded values
|
|
37
36
|
* @returns The parsed feature
|
|
38
37
|
*/
|
|
39
|
-
export declare function parseFeature(line: string
|
|
38
|
+
export declare function parseFeature(line: string): GffFeature;
|
package/esm/util.js
CHANGED
|
@@ -132,15 +132,15 @@ export function parseAttributes(attrString, result, shouldUnescape) {
|
|
|
132
132
|
}
|
|
133
133
|
}
|
|
134
134
|
/**
|
|
135
|
-
* Parse a GFF3 feature line.
|
|
136
|
-
*
|
|
135
|
+
* Parse a GFF3 feature line. Unescaping is skipped entirely for lines with no
|
|
136
|
+
* '%' character, which is the common case.
|
|
137
137
|
*
|
|
138
138
|
* @param line - GFF3 feature line
|
|
139
|
-
* @param shouldUnescape - whether to unescape percent-encoded values
|
|
140
139
|
* @returns The parsed feature
|
|
141
140
|
*/
|
|
142
|
-
export function parseFeature(line
|
|
141
|
+
export function parseFeature(line) {
|
|
143
142
|
const f = line.split('\t');
|
|
143
|
+
const shouldUnescape = line.includes('%');
|
|
144
144
|
const startStr = f[3];
|
|
145
145
|
const endStr = f[4];
|
|
146
146
|
const scoreStr = f[5];
|
package/esm/util.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AAAA,8CAA8C;AAC9C,qEAAqE;AAErE,MAAM,UAAU,GAAuC,EAAE,CAAA;AACzD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;IAC7B,MAAM,GAAG,GAAG,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;IACzD,UAAU,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;IACxC,UAAU,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;AACxD,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,QAAQ,CAAC,SAAiB;IACxC,MAAM,GAAG,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IAClC,IAAI,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC;QACf,OAAO,SAAS,CAAA;IAClB,CAAC;IAED,IAAI,MAAM,GAAG,EAAE,CAAA;IACf,IAAI,OAAO,GAAG,CAAC,CAAA;IACf,IAAI,CAAC,GAAG,GAAG,CAAA;IAEX,OAAO,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC;QAC5B,MAAM,IAAI,GACR,SAAS,CAAC,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,SAAS,CAAC,MAAM;YAC9C,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;YAC3C,CAAC,CAAC,SAAS,CAAA;QACf,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACvB,MAAM,IAAI,SAAS,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,IAAI,CAAA;YAC5C,CAAC,IAAI,CAAC,CAAA;YACN,OAAO,GAAG,CAAC,CAAA;QACb,CAAC;aAAM,CAAC;YACN,mEAAmE;YACnE,wEAAwE;YACxE,CAAC,EAAE,CAAA;QACL,CAAC;IACH,CAAC;IAED,OAAO,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAA;AAC1C,CAAC;AAED,SAAS,OAAO,CAAC,CAAS;IACxB,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,CAAA;AACpC,CAAC;AAED,SAAS,QAAQ,CACf,CAAS,EACT,cAAuB,EACvB,KAAQ;IAER,OAAO,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AAC9D,CAAC;AAED,MAAM,sBAAsB,GAAG,IAAI,GAAG,CAAC;IACrC,OAAO;IACP,KAAK;IACL,QAAQ;IACR,OAAO;IACP,MAAM;IACN,QAAQ;IACR,OAAO;IACP,QAAQ;CACT,CAAC,CAAA;AAEF,uEAAuE;AACvE,sCAAsC;AACtC,MAAM,YAAY,GAAuC;IACvD,EAAE,EAAE,IAAI;IACR,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,aAAa,EAAE,eAAe;IAC9B,WAAW,EAAE,aAAa;IAC1B,KAAK,EAAE,OAAO;IACd,MAAM,EAAE,QAAQ;IAChB,GAAG,EAAE,KAAK;IACV,YAAY,EAAE,cAAc;IAC5B,EAAE,EAAE,IAAI;IACR,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,KAAK,EAAE,OAAO;IACd,MAAM,EAAE,QAAQ;IAChB,GAAG,EAAE,KAAK;CACX,CAAA;AAED,MAAM,UAAU,GAAuC;IACrD,GAAG,EAAE,CAAC;IACN,GAAG,EAAE,CAAC,CAAC;IACP,GAAG,EAAE,CAAC;CACP,CAAA;AAoBD;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAC7B,UAAkB,EAClB,MAA+B,EAC/B,cAAuB;IAEvB,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,UAAU,KAAK,GAAG,EAAE,CAAC;QAClD,OAAM;IACR,CAAC;IAED,IAAI,GAAG,GAAG,UAAU,CAAC,MAAM,CAAA;IAC3B,IAAI,UAAU,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACjC,GAAG,GAAG,UAAU,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAA;QACtD,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;IACvC,CAAC;IAED,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,OAAO,KAAK,GAAG,GAAG,EAAE,CAAC;QACnB,IAAI,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;QAC5C,IAAI,OAAO,KAAK,CAAC,CAAC,EAAE,CAAC;YACnB,OAAO,GAAG,GAAG,CAAA;QACf,CAAC;QAED,IAAI,OAAO,GAAG,KAAK,EAAE,CAAC;YACpB,MAAM,KAAK,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;YAC5C,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,KAAK,GAAG,OAAO,IAAI,KAAK,GAAG,CAAC,GAAG,OAAO,EAAE,CAAC;gBAC3D,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,CAAA;gBAC1C,IAAI,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,CAAA;gBAC3B,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;oBACtB,GAAG,GAAG,GAAG,CAAC,WAAW,EAAE,CAAA;oBACvB,IAAI,sBAAsB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;wBACpC,GAAG,IAAI,GAAG,CAAA;oBACZ,CAAC;gBACH,CAAC;gBAED,MAAM,MAAM,GAAa,EAAE,CAAA;gBAC3B,IAAI,QAAQ,GAAG,KAAK,GAAG,CAAC,CAAA;gBACxB,OAAO,QAAQ,GAAG,OAAO,EAAE,CAAC;oBAC1B,IAAI,QAAQ,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAA;oBAChD,IAAI,QAAQ,KAAK,CAAC,CAAC,IAAI,QAAQ,GAAG,OAAO,EAAE,CAAC;wBAC1C,QAAQ,GAAG,OAAO,CAAA;oBACpB,CAAC;oBACD,IAAI,QAAQ,GAAG,QAAQ,EAAE,CAAC;wBACxB,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAA;wBAChD,MAAM,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;oBACnD,CAAC;oBACD,QAAQ,GAAG,QAAQ,GAAG,CAAC,CAAA;gBACzB,CAAC;gBAED,MAAM,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAA;YACxD,CAAC;QACH,CAAC;QACD,KAAK,GAAG,OAAO,GAAG,CAAC,CAAA;IACrB,CAAC;AACH,CAAC;AAED
|
|
1
|
+
{"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AAAA,8CAA8C;AAC9C,qEAAqE;AAErE,MAAM,UAAU,GAAuC,EAAE,CAAA;AACzD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;IAC7B,MAAM,GAAG,GAAG,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;IACzD,UAAU,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;IACxC,UAAU,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;AACxD,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,QAAQ,CAAC,SAAiB;IACxC,MAAM,GAAG,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IAClC,IAAI,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC;QACf,OAAO,SAAS,CAAA;IAClB,CAAC;IAED,IAAI,MAAM,GAAG,EAAE,CAAA;IACf,IAAI,OAAO,GAAG,CAAC,CAAA;IACf,IAAI,CAAC,GAAG,GAAG,CAAA;IAEX,OAAO,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC;QAC5B,MAAM,IAAI,GACR,SAAS,CAAC,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,SAAS,CAAC,MAAM;YAC9C,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;YAC3C,CAAC,CAAC,SAAS,CAAA;QACf,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACvB,MAAM,IAAI,SAAS,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,IAAI,CAAA;YAC5C,CAAC,IAAI,CAAC,CAAA;YACN,OAAO,GAAG,CAAC,CAAA;QACb,CAAC;aAAM,CAAC;YACN,mEAAmE;YACnE,wEAAwE;YACxE,CAAC,EAAE,CAAA;QACL,CAAC;IACH,CAAC;IAED,OAAO,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAA;AAC1C,CAAC;AAED,SAAS,OAAO,CAAC,CAAS;IACxB,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,CAAA;AACpC,CAAC;AAED,SAAS,QAAQ,CACf,CAAS,EACT,cAAuB,EACvB,KAAQ;IAER,OAAO,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AAC9D,CAAC;AAED,MAAM,sBAAsB,GAAG,IAAI,GAAG,CAAC;IACrC,OAAO;IACP,KAAK;IACL,QAAQ;IACR,OAAO;IACP,MAAM;IACN,QAAQ;IACR,OAAO;IACP,QAAQ;CACT,CAAC,CAAA;AAEF,uEAAuE;AACvE,sCAAsC;AACtC,MAAM,YAAY,GAAuC;IACvD,EAAE,EAAE,IAAI;IACR,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,aAAa,EAAE,eAAe;IAC9B,WAAW,EAAE,aAAa;IAC1B,KAAK,EAAE,OAAO;IACd,MAAM,EAAE,QAAQ;IAChB,GAAG,EAAE,KAAK;IACV,YAAY,EAAE,cAAc;IAC5B,EAAE,EAAE,IAAI;IACR,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,KAAK,EAAE,OAAO;IACd,MAAM,EAAE,QAAQ;IAChB,GAAG,EAAE,KAAK;CACX,CAAA;AAED,MAAM,UAAU,GAAuC;IACrD,GAAG,EAAE,CAAC;IACN,GAAG,EAAE,CAAC,CAAC;IACP,GAAG,EAAE,CAAC;CACP,CAAA;AAoBD;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAC7B,UAAkB,EAClB,MAA+B,EAC/B,cAAuB;IAEvB,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,UAAU,KAAK,GAAG,EAAE,CAAC;QAClD,OAAM;IACR,CAAC;IAED,IAAI,GAAG,GAAG,UAAU,CAAC,MAAM,CAAA;IAC3B,IAAI,UAAU,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACjC,GAAG,GAAG,UAAU,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAA;QACtD,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;IACvC,CAAC;IAED,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,OAAO,KAAK,GAAG,GAAG,EAAE,CAAC;QACnB,IAAI,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;QAC5C,IAAI,OAAO,KAAK,CAAC,CAAC,EAAE,CAAC;YACnB,OAAO,GAAG,GAAG,CAAA;QACf,CAAC;QAED,IAAI,OAAO,GAAG,KAAK,EAAE,CAAC;YACpB,MAAM,KAAK,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;YAC5C,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,KAAK,GAAG,OAAO,IAAI,KAAK,GAAG,CAAC,GAAG,OAAO,EAAE,CAAC;gBAC3D,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,CAAA;gBAC1C,IAAI,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,CAAA;gBAC3B,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;oBACtB,GAAG,GAAG,GAAG,CAAC,WAAW,EAAE,CAAA;oBACvB,IAAI,sBAAsB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;wBACpC,GAAG,IAAI,GAAG,CAAA;oBACZ,CAAC;gBACH,CAAC;gBAED,MAAM,MAAM,GAAa,EAAE,CAAA;gBAC3B,IAAI,QAAQ,GAAG,KAAK,GAAG,CAAC,CAAA;gBACxB,OAAO,QAAQ,GAAG,OAAO,EAAE,CAAC;oBAC1B,IAAI,QAAQ,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAA;oBAChD,IAAI,QAAQ,KAAK,CAAC,CAAC,IAAI,QAAQ,GAAG,OAAO,EAAE,CAAC;wBAC1C,QAAQ,GAAG,OAAO,CAAA;oBACpB,CAAC;oBACD,IAAI,QAAQ,GAAG,QAAQ,EAAE,CAAC;wBACxB,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAA;wBAChD,MAAM,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;oBACnD,CAAC;oBACD,QAAQ,GAAG,QAAQ,GAAG,CAAC,CAAA;gBACzB,CAAC;gBAED,MAAM,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAA;YACxD,CAAC;QACH,CAAC;QACD,KAAK,GAAG,OAAO,GAAG,CAAC,CAAA;IACrB,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;IAC1B,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAA;IACzC,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACtB,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACpB,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACtB,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACnB,MAAM,UAAU,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IAExB,MAAM,MAAM,GAAe;QACzB,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,EAAE,CAAC;QAC5C,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,IAAI,CAAC;QAC7C,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,cAAc,EAAE,IAAI,CAAC;QAC3C,KAAK,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC;QAC5C,GAAG,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM;QAClC,KAAK,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,QAAQ;QAChD,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC;QACzB,KAAK,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,KAAK;QAC1C,WAAW,EAAE,EAAE;KAChB,CAAA;IAED,eAAe,CAAC,UAAU,EAAE,MAAM,EAAE,cAAc,CAAC,CAAA;IACnD,OAAO,MAAM,CAAA;AACf,CAAC"}
|
package/package.json
CHANGED
package/src/api.ts
CHANGED
|
@@ -2,19 +2,19 @@ import { parseFeature } from './util.ts'
|
|
|
2
2
|
|
|
3
3
|
import type { GffFeature } from './util.ts'
|
|
4
4
|
|
|
5
|
-
interface
|
|
5
|
+
export interface LineRecord {
|
|
6
|
+
/** Raw GFF3 feature line */
|
|
6
7
|
line: string
|
|
7
|
-
lineHash?: string | number
|
|
8
|
-
hasEscapes: boolean
|
|
9
8
|
}
|
|
10
9
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
10
|
+
/**
|
|
11
|
+
* A top-level parsed feature paired with the input record it came from. The
|
|
12
|
+
* parser stamps no identity onto the feature itself; callers that need a stable
|
|
13
|
+
* per-feature id (e.g. from a tabix byte offset) read it off their own `record`.
|
|
14
|
+
*/
|
|
15
|
+
export interface ParsedRecord<R extends LineRecord = LineRecord> {
|
|
16
|
+
feature: GffFeature
|
|
17
|
+
record: R
|
|
18
18
|
}
|
|
19
19
|
|
|
20
20
|
/** Extract the GFF3 feature type (column 3) from a raw line without a full split. */
|
|
@@ -52,6 +52,49 @@ function toStringArray(value: unknown): string[] {
|
|
|
52
52
|
return typeof value === 'string' ? [value] : []
|
|
53
53
|
}
|
|
54
54
|
|
|
55
|
+
/**
|
|
56
|
+
* Register a feature's ID and attach it to its parent(s), building the
|
|
57
|
+
* subfeature tree in `byId`/`orphans`. Returns true when the feature is
|
|
58
|
+
* top-level (has no Parent) and the caller should collect it.
|
|
59
|
+
*/
|
|
60
|
+
function linkFeature(
|
|
61
|
+
feature: GffFeature,
|
|
62
|
+
byId: Map<string, GffFeature>,
|
|
63
|
+
orphans: Map<string, GffFeature[]>,
|
|
64
|
+
): boolean {
|
|
65
|
+
const id = firstString(feature.id)
|
|
66
|
+
const parents = toStringArray(feature.parent)
|
|
67
|
+
|
|
68
|
+
// Register the id only the first time it is seen. Continuation lines
|
|
69
|
+
// (multi-location features such as a CDS spanning several segments share one
|
|
70
|
+
// ID across lines) skip registration but must still be attached to their
|
|
71
|
+
// parent below, so this is independent of the parent handling.
|
|
72
|
+
if (id && !byId.has(id)) {
|
|
73
|
+
byId.set(id, feature)
|
|
74
|
+
const waiting = orphans.get(id)
|
|
75
|
+
if (waiting) {
|
|
76
|
+
for (const w of waiting) {
|
|
77
|
+
feature.subfeatures.push(w)
|
|
78
|
+
}
|
|
79
|
+
orphans.delete(id)
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
for (const parentId of parents) {
|
|
84
|
+
const parentFeature = byId.get(parentId)
|
|
85
|
+
if (parentFeature) {
|
|
86
|
+
parentFeature.subfeatures.push(feature)
|
|
87
|
+
} else {
|
|
88
|
+
appendOrphan(orphans, parentId, feature)
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Every line of a top-level discontinuous feature (e.g. cDNA_match spanning
|
|
93
|
+
// several segments under one shared ID, with no Parent) is its own top-level
|
|
94
|
+
// item, so this is independent of whether the id was just registered.
|
|
95
|
+
return parents.length === 0
|
|
96
|
+
}
|
|
97
|
+
|
|
55
98
|
/**
|
|
56
99
|
* Synchronously parse a string containing GFF3 and return an array of the
|
|
57
100
|
* parsed features. Comments, directives, and `##FASTA` sections are ignored.
|
|
@@ -60,79 +103,45 @@ function toStringArray(value: unknown): string[] {
|
|
|
60
103
|
* @returns array of parsed features
|
|
61
104
|
*/
|
|
62
105
|
export function parseStringSync(str: string): GffFeature[] {
|
|
63
|
-
|
|
64
|
-
|
|
106
|
+
const items: GffFeature[] = []
|
|
107
|
+
const byId = new Map<string, GffFeature>()
|
|
108
|
+
const orphans = new Map<string, GffFeature[]>()
|
|
65
109
|
|
|
66
|
-
|
|
67
|
-
const lines = str.split(/\r?\n/)
|
|
68
|
-
const records: ParseInput[] = []
|
|
69
|
-
for (const line of lines) {
|
|
110
|
+
for (const line of str.split(/\r?\n/)) {
|
|
70
111
|
if (line.startsWith('##FASTA') || line.startsWith('>')) {
|
|
71
112
|
break
|
|
72
113
|
}
|
|
73
|
-
if (line.length
|
|
74
|
-
|
|
114
|
+
if (line.length !== 0 && !line.startsWith('#')) {
|
|
115
|
+
const feature = parseFeature(line)
|
|
116
|
+
if (linkFeature(feature, byId, orphans)) {
|
|
117
|
+
items.push(feature)
|
|
118
|
+
}
|
|
75
119
|
}
|
|
76
|
-
records.push({
|
|
77
|
-
line,
|
|
78
|
-
hasEscapes: line.includes('%'),
|
|
79
|
-
})
|
|
80
120
|
}
|
|
81
|
-
|
|
121
|
+
|
|
122
|
+
return items
|
|
82
123
|
}
|
|
83
124
|
|
|
84
125
|
/**
|
|
85
|
-
* Parse an array of
|
|
86
|
-
*
|
|
126
|
+
* Parse an array of records wrapping raw GFF3 lines, resolving parent/child
|
|
127
|
+
* relationships into `subfeatures`. Returns each top-level feature paired with
|
|
128
|
+
* the record it came from, so callers can attach their own identity (e.g. a
|
|
129
|
+
* byte offset) without the parser stamping anything onto the feature.
|
|
87
130
|
*
|
|
88
|
-
* @param records - Array of
|
|
89
|
-
* @returns
|
|
131
|
+
* @param records - Array of records, each carrying a raw GFF3 `line`
|
|
132
|
+
* @returns top-level features, each paired with its originating record
|
|
90
133
|
*/
|
|
91
|
-
export function parseRecords
|
|
92
|
-
|
|
134
|
+
export function parseRecords<R extends LineRecord>(
|
|
135
|
+
records: readonly R[],
|
|
136
|
+
): ParsedRecord<R>[] {
|
|
137
|
+
const items: ParsedRecord<R>[] = []
|
|
93
138
|
const byId = new Map<string, GffFeature>()
|
|
94
139
|
const orphans = new Map<string, GffFeature[]>()
|
|
95
140
|
|
|
96
141
|
for (const record of records) {
|
|
97
|
-
const feature = parseFeature(record.line
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
feature._lineHash = String(record.lineHash)
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
const id = firstString(feature.id)
|
|
104
|
-
const parents = toStringArray(feature.parent)
|
|
105
|
-
|
|
106
|
-
// A parentless line is a top-level item. Every line of a top-level
|
|
107
|
-
// discontinuous feature (e.g. cDNA_match/EST_match spanning several
|
|
108
|
-
// segments under one shared ID, with no Parent) is its own top-level
|
|
109
|
-
// item, so push regardless of whether the id is already registered.
|
|
110
|
-
if (parents.length === 0) {
|
|
111
|
-
items.push(feature)
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
// Register the id only the first time it is seen. Continuation lines
|
|
115
|
-
// (multi-location features such as a CDS spanning several segments share
|
|
116
|
-
// one ID across lines) skip registration but must still be attached to
|
|
117
|
-
// their parent below, so this is independent of the parent handling.
|
|
118
|
-
if (id && !byId.has(id)) {
|
|
119
|
-
byId.set(id, feature)
|
|
120
|
-
const waiting = orphans.get(id)
|
|
121
|
-
if (waiting) {
|
|
122
|
-
for (const w of waiting) {
|
|
123
|
-
feature.subfeatures.push(w)
|
|
124
|
-
}
|
|
125
|
-
orphans.delete(id)
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
for (const parentId of parents) {
|
|
130
|
-
const parentFeature = byId.get(parentId)
|
|
131
|
-
if (parentFeature) {
|
|
132
|
-
parentFeature.subfeatures.push(feature)
|
|
133
|
-
} else {
|
|
134
|
-
appendOrphan(orphans, parentId, feature)
|
|
135
|
-
}
|
|
142
|
+
const feature = parseFeature(record.line)
|
|
143
|
+
if (linkFeature(feature, byId, orphans)) {
|
|
144
|
+
items.push({ feature, record })
|
|
136
145
|
}
|
|
137
146
|
}
|
|
138
147
|
|
package/src/index.ts
CHANGED
package/src/util.ts
CHANGED
|
@@ -175,15 +175,15 @@ export function parseAttributes(
|
|
|
175
175
|
}
|
|
176
176
|
|
|
177
177
|
/**
|
|
178
|
-
* Parse a GFF3 feature line.
|
|
179
|
-
*
|
|
178
|
+
* Parse a GFF3 feature line. Unescaping is skipped entirely for lines with no
|
|
179
|
+
* '%' character, which is the common case.
|
|
180
180
|
*
|
|
181
181
|
* @param line - GFF3 feature line
|
|
182
|
-
* @param shouldUnescape - whether to unescape percent-encoded values
|
|
183
182
|
* @returns The parsed feature
|
|
184
183
|
*/
|
|
185
|
-
export function parseFeature(line: string
|
|
184
|
+
export function parseFeature(line: string): GffFeature {
|
|
186
185
|
const f = line.split('\t')
|
|
186
|
+
const shouldUnescape = line.includes('%')
|
|
187
187
|
const startStr = f[3]!
|
|
188
188
|
const endStr = f[4]!
|
|
189
189
|
const scoreStr = f[5]!
|