gff-nostream 1.3.9 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api.d.ts +23 -2
- package/dist/api.js +58 -3
- package/dist/api.js.map +1 -1
- package/dist/index.d.ts +3 -3
- package/dist/index.js +5 -3
- package/dist/index.js.map +1 -1
- package/dist/parse.d.ts +3 -16
- package/dist/parse.js +78 -120
- package/dist/parse.js.map +1 -1
- package/dist/util.d.ts +1 -0
- package/dist/util.js +92 -59
- package/dist/util.js.map +1 -1
- package/esm/api.d.ts +23 -2
- package/esm/api.js +55 -2
- package/esm/api.js.map +1 -1
- package/esm/index.d.ts +3 -3
- package/esm/index.js +2 -2
- package/esm/index.js.map +1 -1
- package/esm/parse.d.ts +3 -16
- package/esm/parse.js +101 -138
- package/esm/parse.js.map +1 -1
- package/esm/util.d.ts +1 -0
- package/esm/util.js +82 -58
- package/esm/util.js.map +1 -1
- package/package.json +27 -16
- package/src/api.ts +77 -12
- package/src/index.ts +6 -5
- package/src/parse.ts +76 -141
- package/src/util.ts +91 -62
package/src/util.ts
CHANGED
|
@@ -1,14 +1,28 @@
|
|
|
1
1
|
// Fast, low-level functions for parsing and formatting GFF3.
|
|
2
2
|
// JavaScript port of Robert Buels's Bio::GFF3::LowLevel Perl module.
|
|
3
3
|
|
|
4
|
+
const escapeRegex = /%([0-9A-Fa-f]{2})/g
|
|
5
|
+
const directiveRegex = /^\s*##\s*(\S+)\s*(.*)/
|
|
6
|
+
const lineEndRegex = /\r?\n$/
|
|
7
|
+
const whitespaceRegex = /\s+/
|
|
8
|
+
const nonDigitRegex = /\D/g
|
|
9
|
+
// eslint-disable-next-line no-control-regex
|
|
10
|
+
const attrEscapeRegex = /[\n;\r\t=%&,\u0000-\u001f\u007f-\u00ff]/g
|
|
11
|
+
// eslint-disable-next-line no-control-regex
|
|
12
|
+
const columnEscapeRegex = /[\n\r\t%\u0000-\u001f\u007f-\u00ff]/g
|
|
13
|
+
|
|
4
14
|
/**
|
|
5
15
|
* Unescape a string value used in a GFF3 attribute.
|
|
6
16
|
*
|
|
7
17
|
* @param stringVal - Escaped GFF3 string value
|
|
8
18
|
* @returns An unescaped string value
|
|
9
19
|
*/
|
|
20
|
+
|
|
10
21
|
export function unescape(stringVal: string): string {
|
|
11
|
-
|
|
22
|
+
if (!stringVal.includes('%')) {
|
|
23
|
+
return stringVal
|
|
24
|
+
}
|
|
25
|
+
return stringVal.replaceAll(escapeRegex, (_match, seq) =>
|
|
12
26
|
String.fromCharCode(parseInt(seq, 16)),
|
|
13
27
|
)
|
|
14
28
|
}
|
|
@@ -27,7 +41,7 @@ function _escape(regex: RegExp, s: string | number) {
|
|
|
27
41
|
* @returns An escaped string value
|
|
28
42
|
*/
|
|
29
43
|
export function escape(rawVal: string | number): string {
|
|
30
|
-
return _escape(
|
|
44
|
+
return _escape(attrEscapeRegex, rawVal)
|
|
31
45
|
}
|
|
32
46
|
|
|
33
47
|
/**
|
|
@@ -37,7 +51,7 @@ export function escape(rawVal: string | number): string {
|
|
|
37
51
|
* @returns An escaped column value
|
|
38
52
|
*/
|
|
39
53
|
export function escapeColumn(rawVal: string | number): string {
|
|
40
|
-
return _escape(
|
|
54
|
+
return _escape(columnEscapeRegex, rawVal)
|
|
41
55
|
}
|
|
42
56
|
|
|
43
57
|
/**
|
|
@@ -53,29 +67,32 @@ export function parseAttributes(attrString: string): GFF3Attributes {
|
|
|
53
67
|
|
|
54
68
|
const attrs: GFF3Attributes = {}
|
|
55
69
|
|
|
56
|
-
attrString
|
|
57
|
-
|
|
58
|
-
.
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
70
|
+
let str = attrString
|
|
71
|
+
if (str.endsWith('\n')) {
|
|
72
|
+
str = str.slice(0, str.endsWith('\r\n') ? -2 : -1)
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
for (const a of str.split(';')) {
|
|
76
|
+
const eqIdx = a.indexOf('=')
|
|
77
|
+
if (eqIdx === -1) {
|
|
78
|
+
continue
|
|
79
|
+
}
|
|
80
|
+
const value = a.slice(eqIdx + 1)
|
|
81
|
+
if (!value.length) {
|
|
82
|
+
continue
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const tag = a.slice(0, eqIdx).trim()
|
|
86
|
+
let arec = attrs[tag]
|
|
87
|
+
if (!arec) {
|
|
88
|
+
arec = []
|
|
89
|
+
attrs[tag] = arec
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
for (const s of value.split(',')) {
|
|
93
|
+
arec.push(unescape(s.trim()))
|
|
94
|
+
}
|
|
95
|
+
}
|
|
79
96
|
return attrs
|
|
80
97
|
}
|
|
81
98
|
|
|
@@ -86,22 +103,41 @@ export function parseAttributes(attrString: string): GFF3Attributes {
|
|
|
86
103
|
* @returns The parsed feature
|
|
87
104
|
*/
|
|
88
105
|
export function parseFeature(line: string): GFF3FeatureLine {
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
106
|
+
return parseFieldsArray(line.split('\t'))
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Parse a GFF3 feature from a pre-split fields array
|
|
111
|
+
*
|
|
112
|
+
* @param f - Array of 9 GFF3 column values (use null or '.' for empty values)
|
|
113
|
+
* @returns The parsed feature
|
|
114
|
+
*/
|
|
115
|
+
function norm(a: string | null | undefined) {
|
|
116
|
+
return a === '.' || a === '' || a === undefined ? null : a
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
export function parseFieldsArray(f: (string | null | undefined)[]): GFF3FeatureLine {
|
|
120
|
+
const seq_id = norm(f[0])
|
|
121
|
+
const source = norm(f[1])
|
|
122
|
+
const type = norm(f[2])
|
|
123
|
+
const start = norm(f[3])
|
|
124
|
+
const end = norm(f[4])
|
|
125
|
+
const score = norm(f[5])
|
|
126
|
+
const strand = norm(f[6])
|
|
127
|
+
const phase = norm(f[7])
|
|
128
|
+
const attrString = norm(f[8])
|
|
129
|
+
|
|
130
|
+
return {
|
|
131
|
+
seq_id: seq_id ? unescape(seq_id) : null,
|
|
132
|
+
source: source ? unescape(source) : null,
|
|
133
|
+
type: type ? unescape(type) : null,
|
|
134
|
+
start: start === null ? null : parseInt(start, 10),
|
|
135
|
+
end: end === null ? null : parseInt(end, 10),
|
|
136
|
+
score: score === null ? null : parseFloat(score),
|
|
137
|
+
strand,
|
|
138
|
+
phase,
|
|
139
|
+
attributes: attrString === null ? null : parseAttributes(attrString),
|
|
103
140
|
}
|
|
104
|
-
return parsed
|
|
105
141
|
}
|
|
106
142
|
|
|
107
143
|
/**
|
|
@@ -117,7 +153,7 @@ export function parseDirective(
|
|
|
117
153
|
| GFF3SequenceRegionDirective
|
|
118
154
|
| GFF3GenomeBuildDirective
|
|
119
155
|
| null {
|
|
120
|
-
const match =
|
|
156
|
+
const match = directiveRegex.exec(line)
|
|
121
157
|
if (!match) {
|
|
122
158
|
return null
|
|
123
159
|
}
|
|
@@ -126,22 +162,22 @@ export function parseDirective(
|
|
|
126
162
|
let [, , contents] = match
|
|
127
163
|
|
|
128
164
|
const parsed: GFF3Directive = { directive: name }
|
|
129
|
-
if (contents
|
|
130
|
-
contents = contents
|
|
165
|
+
if (contents!.length) {
|
|
166
|
+
contents = contents!.replace(lineEndRegex, '')
|
|
131
167
|
parsed.value = contents
|
|
132
168
|
}
|
|
133
169
|
|
|
134
170
|
// do a little additional parsing for sequence-region and genome-build directives
|
|
135
171
|
if (name === 'sequence-region') {
|
|
136
|
-
const c = contents
|
|
172
|
+
const c = contents!.split(whitespaceRegex, 3)
|
|
137
173
|
return {
|
|
138
174
|
...parsed,
|
|
139
175
|
seq_id: c[0],
|
|
140
|
-
start: c[1]?.replaceAll(
|
|
141
|
-
end: c[2]?.replaceAll(
|
|
176
|
+
start: c[1]?.replaceAll(nonDigitRegex, ''),
|
|
177
|
+
end: c[2]?.replaceAll(nonDigitRegex, ''),
|
|
142
178
|
} as GFF3SequenceRegionDirective
|
|
143
179
|
} else if (name === 'genome-build') {
|
|
144
|
-
const [source, buildName] = contents
|
|
180
|
+
const [source, buildName] = contents!.split(whitespaceRegex, 2)
|
|
145
181
|
return {
|
|
146
182
|
...parsed,
|
|
147
183
|
source,
|
|
@@ -160,22 +196,12 @@ export function parseDirective(
|
|
|
160
196
|
*/
|
|
161
197
|
export function formatAttributes(attrs: GFF3Attributes): string {
|
|
162
198
|
const attrOrder: string[] = []
|
|
163
|
-
|
|
199
|
+
for (const [tag, val] of Object.entries(attrs)) {
|
|
164
200
|
if (!val) {
|
|
165
|
-
|
|
201
|
+
continue
|
|
166
202
|
}
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
valstring = escape(val.toString())
|
|
170
|
-
// } else if (Array.isArray(val.values)) {
|
|
171
|
-
// valstring = val.values.map(escape).join(',')
|
|
172
|
-
} else if (Array.isArray(val)) {
|
|
173
|
-
valstring = val.map(escape).join(',')
|
|
174
|
-
} else {
|
|
175
|
-
valstring = escape(val)
|
|
176
|
-
}
|
|
177
|
-
attrOrder.push(`${escape(tag)}=${valstring}`)
|
|
178
|
-
})
|
|
203
|
+
attrOrder.push(`${escape(tag)}=${val.map(escape).join(',')}`)
|
|
204
|
+
}
|
|
179
205
|
return attrOrder.length ? attrOrder.join(';') : '.'
|
|
180
206
|
}
|
|
181
207
|
|
|
@@ -184,6 +210,7 @@ function _formatSingleFeature(
|
|
|
184
210
|
seenFeature: Record<string, boolean | undefined>,
|
|
185
211
|
) {
|
|
186
212
|
const attrString =
|
|
213
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
187
214
|
f.attributes === null || f.attributes === undefined
|
|
188
215
|
? '.'
|
|
189
216
|
: formatAttributes(f.attributes)
|
|
@@ -366,7 +393,9 @@ function _isFeatureLineWithRefs(
|
|
|
366
393
|
featureLine: GFF3FeatureLine | GFF3FeatureLineWithRefs,
|
|
367
394
|
): featureLine is GFF3FeatureLineWithRefs {
|
|
368
395
|
return (
|
|
396
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
369
397
|
(featureLine as GFF3FeatureLineWithRefs).child_features !== undefined &&
|
|
398
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
370
399
|
(featureLine as GFF3FeatureLineWithRefs).derived_features !== undefined
|
|
371
400
|
)
|
|
372
401
|
}
|