gff-nostream 3.0.1 → 3.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -75
- package/dist/api.js +14 -21
- package/dist/api.js.map +1 -1
- package/dist/util.d.ts +1 -1
- package/dist/util.js +135 -216
- package/dist/util.js.map +1 -1
- package/esm/api.js +14 -21
- package/esm/api.js.map +1 -1
- package/esm/util.d.ts +1 -1
- package/esm/util.js +135 -216
- package/esm/util.js.map +1 -1
- package/package.json +15 -22
- package/src/api.ts +15 -22
- package/src/util.ts +185 -254
package/src/api.ts
CHANGED
|
@@ -43,15 +43,14 @@ export function parseStringSyncJBrowse(str: string): JBrowseFeature[] {
|
|
|
43
43
|
function stringToRecords(str: string) {
|
|
44
44
|
const lines = str.split(/\r?\n/)
|
|
45
45
|
const records: LineRecord[] = []
|
|
46
|
-
for (
|
|
47
|
-
|
|
48
|
-
if (line.length === 0 || line[0] === '#') {
|
|
46
|
+
for (const line of lines) {
|
|
47
|
+
if (line.length === 0 || line.startsWith('#')) {
|
|
49
48
|
if (line.startsWith('##FASTA')) {
|
|
50
49
|
break
|
|
51
50
|
}
|
|
52
51
|
continue
|
|
53
52
|
}
|
|
54
|
-
if (line
|
|
53
|
+
if (line.startsWith('>')) {
|
|
55
54
|
break
|
|
56
55
|
}
|
|
57
56
|
records.push({
|
|
@@ -76,8 +75,7 @@ export function parseRecords(records: LineRecord[]): GFF3Feature[] {
|
|
|
76
75
|
const byId = new Map<string, GFF3Feature>()
|
|
77
76
|
const orphans = new Map<string, GFF3Feature[]>()
|
|
78
77
|
|
|
79
|
-
for (
|
|
80
|
-
const record = records[i]!
|
|
78
|
+
for (const record of records) {
|
|
81
79
|
const featureLine = (
|
|
82
80
|
record.hasEscapes
|
|
83
81
|
? parseFeature(record.line)
|
|
@@ -87,9 +85,7 @@ export function parseRecords(records: LineRecord[]): GFF3Feature[] {
|
|
|
87
85
|
featureLine.derived_features = []
|
|
88
86
|
|
|
89
87
|
if (record.lineHash !== undefined) {
|
|
90
|
-
|
|
91
|
-
featureLine.attributes = {}
|
|
92
|
-
}
|
|
88
|
+
featureLine.attributes ??= {}
|
|
93
89
|
featureLine.attributes._lineHash = [String(record.lineHash)]
|
|
94
90
|
}
|
|
95
91
|
|
|
@@ -104,7 +100,7 @@ export function parseRecords(records: LineRecord[]): GFF3Feature[] {
|
|
|
104
100
|
|
|
105
101
|
let feature: GFF3Feature
|
|
106
102
|
if (ids) {
|
|
107
|
-
const id = ids[0]
|
|
103
|
+
const id = ids[0]
|
|
108
104
|
const existing = byId.get(id)
|
|
109
105
|
if (existing) {
|
|
110
106
|
existing.push(featureLine)
|
|
@@ -117,8 +113,8 @@ export function parseRecords(records: LineRecord[]): GFF3Feature[] {
|
|
|
117
113
|
byId.set(id, feature)
|
|
118
114
|
const waiting = orphans.get(id)
|
|
119
115
|
if (waiting) {
|
|
120
|
-
for (
|
|
121
|
-
featureLine.child_features.push(
|
|
116
|
+
for (const w of waiting) {
|
|
117
|
+
featureLine.child_features.push(w)
|
|
122
118
|
}
|
|
123
119
|
orphans.delete(id)
|
|
124
120
|
}
|
|
@@ -128,12 +124,11 @@ export function parseRecords(records: LineRecord[]): GFF3Feature[] {
|
|
|
128
124
|
}
|
|
129
125
|
|
|
130
126
|
if (parents) {
|
|
131
|
-
for (
|
|
132
|
-
const parentId = parents[j]!
|
|
127
|
+
for (const parentId of parents) {
|
|
133
128
|
const parent = byId.get(parentId)
|
|
134
129
|
if (parent) {
|
|
135
|
-
for (
|
|
136
|
-
|
|
130
|
+
for (const p of parent) {
|
|
131
|
+
p.child_features.push(feature)
|
|
137
132
|
}
|
|
138
133
|
} else {
|
|
139
134
|
let arr = orphans.get(parentId)
|
|
@@ -162,8 +157,7 @@ export function parseRecordsJBrowse(records: LineRecord[]): JBrowseFeature[] {
|
|
|
162
157
|
const byId = new Map<string, JBrowseFeature>()
|
|
163
158
|
const orphans = new Map<string, JBrowseFeature[]>()
|
|
164
159
|
|
|
165
|
-
for (
|
|
166
|
-
const record = records[i]!
|
|
160
|
+
for (const record of records) {
|
|
167
161
|
const feature = record.hasEscapes
|
|
168
162
|
? parseFeatureJBrowse(record.line)
|
|
169
163
|
: parseFeatureJBrowseNoUnescape(record.line)
|
|
@@ -189,8 +183,8 @@ export function parseRecordsJBrowse(records: LineRecord[]): JBrowseFeature[] {
|
|
|
189
183
|
byId.set(id, feature)
|
|
190
184
|
const waiting = orphans.get(id)
|
|
191
185
|
if (waiting) {
|
|
192
|
-
for (
|
|
193
|
-
feature.subfeatures.push(
|
|
186
|
+
for (const w of waiting) {
|
|
187
|
+
feature.subfeatures.push(w)
|
|
194
188
|
}
|
|
195
189
|
orphans.delete(id)
|
|
196
190
|
}
|
|
@@ -199,8 +193,7 @@ export function parseRecordsJBrowse(records: LineRecord[]): JBrowseFeature[] {
|
|
|
199
193
|
|
|
200
194
|
if (parent) {
|
|
201
195
|
const parents = Array.isArray(parent) ? parent : [parent]
|
|
202
|
-
for (
|
|
203
|
-
const parentId = parents[j]!
|
|
196
|
+
for (const parentId of parents) {
|
|
204
197
|
const parentFeature = byId.get(parentId)
|
|
205
198
|
if (parentFeature) {
|
|
206
199
|
parentFeature.subfeatures.push(feature)
|
package/src/util.ts
CHANGED
|
@@ -1,17 +1,12 @@
|
|
|
1
1
|
// Fast, low-level functions for parsing and formatting GFF3.
|
|
2
2
|
// JavaScript port of Robert Buels's Bio::GFF3::LowLevel Perl module.
|
|
3
3
|
|
|
4
|
-
const escapeRegex = /%([0-9A-Fa-f]{2})/g
|
|
5
4
|
const directiveRegex = /^\s*##\s*(\S+)\s*(.*)/
|
|
6
5
|
const lineEndRegex = /\r?\n$/
|
|
7
6
|
const whitespaceRegex = /\s+/
|
|
8
7
|
const nonDigitRegex = /\D/g
|
|
9
|
-
// eslint-disable-next-line no-control-regex
|
|
10
|
-
const attrEscapeRegex = /[\n;\r\t=%&,\u0000-\u001f\u007f-\u00ff]/g
|
|
11
|
-
// eslint-disable-next-line no-control-regex
|
|
12
|
-
const columnEscapeRegex = /[\n\r\t%\u0000-\u001f\u007f-\u00ff]/g
|
|
13
8
|
|
|
14
|
-
const HEX_LOOKUP: Record<string, string> = {}
|
|
9
|
+
const HEX_LOOKUP: Record<string, string | undefined> = {}
|
|
15
10
|
for (let i = 0; i < 256; i++) {
|
|
16
11
|
const hex = i.toString(16).toUpperCase().padStart(2, '0')
|
|
17
12
|
HEX_LOOKUP[hex] = String.fromCharCode(i)
|
|
@@ -25,17 +20,40 @@ for (let i = 0; i < 256; i++) {
|
|
|
25
20
|
* @returns An unescaped string value
|
|
26
21
|
*/
|
|
27
22
|
|
|
28
|
-
export function unescape(
|
|
29
|
-
|
|
23
|
+
export function unescape(stringVal: string) {
|
|
24
|
+
const idx = stringVal.indexOf('%')
|
|
25
|
+
if (idx === -1) {
|
|
26
|
+
return stringVal
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
let result = ''
|
|
30
|
+
let lastIdx = 0
|
|
31
|
+
let i = idx
|
|
32
|
+
|
|
33
|
+
while (i < stringVal.length) {
|
|
34
|
+
if (stringVal[i] === '%' && i + 2 < stringVal.length) {
|
|
35
|
+
result += stringVal.slice(lastIdx, i)
|
|
36
|
+
const hex = stringVal.slice(i + 1, i + 3)
|
|
37
|
+
const char = HEX_LOOKUP[hex]
|
|
38
|
+
if (char !== undefined) {
|
|
39
|
+
result += char
|
|
40
|
+
} else {
|
|
41
|
+
result += stringVal.slice(i, i + 3)
|
|
42
|
+
}
|
|
43
|
+
i += 3
|
|
44
|
+
lastIdx = i
|
|
45
|
+
} else {
|
|
46
|
+
i++
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return result + stringVal.slice(lastIdx)
|
|
30
51
|
}
|
|
31
52
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
* @returns Parsed attributes
|
|
37
|
-
*/
|
|
38
|
-
export function parseAttributes(attrString: string): GFF3Attributes {
|
|
53
|
+
function parseAttributesImpl(
|
|
54
|
+
attrString: string,
|
|
55
|
+
shouldUnescape: boolean,
|
|
56
|
+
): GFF3Attributes {
|
|
39
57
|
if (attrString.length === 0 || attrString === '.') {
|
|
40
58
|
return {}
|
|
41
59
|
}
|
|
@@ -73,7 +91,7 @@ export function parseAttributes(attrString: string): GFF3Attributes {
|
|
|
73
91
|
}
|
|
74
92
|
if (commaIdx > valStart) {
|
|
75
93
|
const val = attrString.slice(valStart, commaIdx)
|
|
76
|
-
arec.push(unescape(val))
|
|
94
|
+
arec.push(shouldUnescape ? unescape(val) : val)
|
|
77
95
|
}
|
|
78
96
|
valStart = commaIdx + 1
|
|
79
97
|
}
|
|
@@ -84,6 +102,16 @@ export function parseAttributes(attrString: string): GFF3Attributes {
|
|
|
84
102
|
return attrs
|
|
85
103
|
}
|
|
86
104
|
|
|
105
|
+
/**
|
|
106
|
+
* Parse the 9th column (attributes) of a GFF3 feature line.
|
|
107
|
+
*
|
|
108
|
+
* @param attrString - String of GFF3 9th column
|
|
109
|
+
* @returns Parsed attributes
|
|
110
|
+
*/
|
|
111
|
+
export function parseAttributes(attrString: string): GFF3Attributes {
|
|
112
|
+
return parseAttributesImpl(attrString, true)
|
|
113
|
+
}
|
|
114
|
+
|
|
87
115
|
/**
|
|
88
116
|
* Parse the 9th column (attributes) of a GFF3 feature line without unescaping.
|
|
89
117
|
* Fast path for data known to contain no escaped characters.
|
|
@@ -92,59 +120,42 @@ export function parseAttributes(attrString: string): GFF3Attributes {
|
|
|
92
120
|
* @returns Parsed attributes
|
|
93
121
|
*/
|
|
94
122
|
export function parseAttributesNoUnescape(attrString: string): GFF3Attributes {
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
const attrs: GFF3Attributes = {}
|
|
100
|
-
let len = attrString.length
|
|
101
|
-
|
|
102
|
-
if (attrString[len - 1] === '\n') {
|
|
103
|
-
len = attrString[len - 2] === '\r' ? len - 2 : len - 1
|
|
104
|
-
attrString = attrString.slice(0, len)
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
let start = 0
|
|
108
|
-
while (start < len) {
|
|
109
|
-
let semiIdx = attrString.indexOf(';', start)
|
|
110
|
-
if (semiIdx === -1) {
|
|
111
|
-
semiIdx = len
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
if (semiIdx > start) {
|
|
115
|
-
const eqIdx = attrString.indexOf('=', start)
|
|
116
|
-
if (eqIdx !== -1 && eqIdx < semiIdx && eqIdx + 1 < semiIdx) {
|
|
117
|
-
const tag = attrString.slice(start, eqIdx)
|
|
118
|
-
let arec = attrs[tag]
|
|
119
|
-
if (!arec) {
|
|
120
|
-
arec = []
|
|
121
|
-
attrs[tag] = arec
|
|
122
|
-
}
|
|
123
|
+
return parseAttributesImpl(attrString, false)
|
|
124
|
+
}
|
|
123
125
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
if (commaIdx === -1 || commaIdx > semiIdx) {
|
|
128
|
-
commaIdx = semiIdx
|
|
129
|
-
}
|
|
130
|
-
if (commaIdx > valStart) {
|
|
131
|
-
arec.push(attrString.slice(valStart, commaIdx))
|
|
132
|
-
}
|
|
133
|
-
valStart = commaIdx + 1
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
start = semiIdx + 1
|
|
126
|
+
function normImpl(s: string, shouldUnescape: boolean) {
|
|
127
|
+
if (s.length === 0 || s === '.') {
|
|
128
|
+
return null
|
|
138
129
|
}
|
|
139
|
-
return
|
|
130
|
+
return shouldUnescape ? unescape(s) : s
|
|
140
131
|
}
|
|
141
132
|
|
|
142
|
-
function
|
|
143
|
-
|
|
144
|
-
|
|
133
|
+
function parseFeatureImpl(
|
|
134
|
+
line: string,
|
|
135
|
+
shouldUnescape: boolean,
|
|
136
|
+
): GFF3FeatureLine {
|
|
137
|
+
const f = line.split('\t')
|
|
138
|
+
const startStr = f[3]
|
|
139
|
+
const endStr = f[4]
|
|
140
|
+
const scoreStr = f[5]
|
|
141
|
+
const strand = f[6]
|
|
142
|
+
const phase = f[7]
|
|
143
|
+
const attrString = f[8]
|
|
145
144
|
|
|
146
|
-
|
|
147
|
-
|
|
145
|
+
return {
|
|
146
|
+
seq_id: normImpl(f[0], shouldUnescape),
|
|
147
|
+
source: normImpl(f[1], shouldUnescape),
|
|
148
|
+
type: normImpl(f[2], shouldUnescape),
|
|
149
|
+
start: startStr.length === 0 || startStr === '.' ? null : +startStr,
|
|
150
|
+
end: endStr.length === 0 || endStr === '.' ? null : +endStr,
|
|
151
|
+
score: scoreStr.length === 0 || scoreStr === '.' ? null : +scoreStr,
|
|
152
|
+
strand: normImpl(strand, false),
|
|
153
|
+
phase: normImpl(phase, false),
|
|
154
|
+
attributes:
|
|
155
|
+
attrString.length === 0 || attrString === '.'
|
|
156
|
+
? null
|
|
157
|
+
: parseAttributesImpl(attrString, shouldUnescape),
|
|
158
|
+
}
|
|
148
159
|
}
|
|
149
160
|
|
|
150
161
|
/**
|
|
@@ -154,28 +165,7 @@ function norm(s: string) {
|
|
|
154
165
|
* @returns The parsed feature
|
|
155
166
|
*/
|
|
156
167
|
export function parseFeature(line: string): GFF3FeatureLine {
|
|
157
|
-
|
|
158
|
-
const seq_id = f[0]!
|
|
159
|
-
const source = f[1]!
|
|
160
|
-
const type = f[2]!
|
|
161
|
-
const startStr = f[3]!
|
|
162
|
-
const endStr = f[4]!
|
|
163
|
-
const scoreStr = f[5]!
|
|
164
|
-
const strand = f[6]!
|
|
165
|
-
const phase = f[7]!
|
|
166
|
-
const attrString = f[8]!
|
|
167
|
-
|
|
168
|
-
return {
|
|
169
|
-
seq_id: normUnescape(seq_id),
|
|
170
|
-
source: normUnescape(source),
|
|
171
|
-
type: normUnescape(type),
|
|
172
|
-
start: startStr.length === 0 || startStr === '.' ? null : +startStr,
|
|
173
|
-
end: endStr.length === 0 || endStr === '.' ? null : +endStr,
|
|
174
|
-
score: scoreStr.length === 0 || scoreStr === '.' ? null : +scoreStr,
|
|
175
|
-
strand: norm(strand),
|
|
176
|
-
phase: norm(phase),
|
|
177
|
-
attributes: attrString.length === 0 || attrString === '.' ? null : parseAttributes(attrString),
|
|
178
|
-
}
|
|
168
|
+
return parseFeatureImpl(line, true)
|
|
179
169
|
}
|
|
180
170
|
|
|
181
171
|
/**
|
|
@@ -186,38 +176,12 @@ export function parseFeature(line: string): GFF3FeatureLine {
|
|
|
186
176
|
* @returns The parsed feature
|
|
187
177
|
*/
|
|
188
178
|
export function parseFeatureNoUnescape(line: string): GFF3FeatureLine {
|
|
189
|
-
|
|
190
|
-
const seq_id = f[0]!
|
|
191
|
-
const source = f[1]!
|
|
192
|
-
const type = f[2]!
|
|
193
|
-
const startStr = f[3]!
|
|
194
|
-
const endStr = f[4]!
|
|
195
|
-
const scoreStr = f[5]!
|
|
196
|
-
const strand = f[6]!
|
|
197
|
-
const phase = f[7]!
|
|
198
|
-
const attrString = f[8]!
|
|
199
|
-
|
|
200
|
-
return {
|
|
201
|
-
seq_id: norm(seq_id),
|
|
202
|
-
source: norm(source),
|
|
203
|
-
type: norm(type),
|
|
204
|
-
start: startStr.length === 0 || startStr === '.' ? null : +startStr,
|
|
205
|
-
end: endStr.length === 0 || endStr === '.' ? null : +endStr,
|
|
206
|
-
score: scoreStr.length === 0 || scoreStr === '.' ? null : +scoreStr,
|
|
207
|
-
strand: norm(strand),
|
|
208
|
-
phase: norm(phase),
|
|
209
|
-
attributes: attrString.length === 0 || attrString === '.' ? null : parseAttributesNoUnescape(attrString),
|
|
210
|
-
}
|
|
179
|
+
return parseFeatureImpl(line, false)
|
|
211
180
|
}
|
|
212
181
|
|
|
213
|
-
|
|
214
|
-
* Parse a GFF3 feature from a pre-split fields array
|
|
215
|
-
*
|
|
216
|
-
* @param f - Array of 9 GFF3 column values (use null or '.' for empty values)
|
|
217
|
-
* @returns The parsed feature
|
|
218
|
-
*/
|
|
219
|
-
export function parseFieldsArray(
|
|
182
|
+
function parseFieldsArrayImpl(
|
|
220
183
|
f: (string | null | undefined)[],
|
|
184
|
+
shouldUnescape: boolean,
|
|
221
185
|
): GFF3FeatureLine {
|
|
222
186
|
const seq_id = f[0]
|
|
223
187
|
const source = f[1]
|
|
@@ -230,18 +194,33 @@ export function parseFieldsArray(
|
|
|
230
194
|
const attrString = f[8]
|
|
231
195
|
|
|
232
196
|
return {
|
|
233
|
-
seq_id: seq_id ?
|
|
234
|
-
source: source ?
|
|
235
|
-
type: type ?
|
|
197
|
+
seq_id: seq_id ? normImpl(seq_id, shouldUnescape) : null,
|
|
198
|
+
source: source ? normImpl(source, shouldUnescape) : null,
|
|
199
|
+
type: type ? normImpl(type, shouldUnescape) : null,
|
|
236
200
|
start: !startStr || startStr === '.' ? null : +startStr,
|
|
237
201
|
end: !endStr || endStr === '.' ? null : +endStr,
|
|
238
202
|
score: !scoreStr || scoreStr === '.' ? null : +scoreStr,
|
|
239
203
|
strand: strand && strand !== '.' ? strand : null,
|
|
240
204
|
phase: phase && phase !== '.' ? phase : null,
|
|
241
|
-
attributes:
|
|
205
|
+
attributes:
|
|
206
|
+
!attrString || attrString === '.'
|
|
207
|
+
? null
|
|
208
|
+
: parseAttributesImpl(attrString, shouldUnescape),
|
|
242
209
|
}
|
|
243
210
|
}
|
|
244
211
|
|
|
212
|
+
/**
|
|
213
|
+
* Parse a GFF3 feature from a pre-split fields array
|
|
214
|
+
*
|
|
215
|
+
* @param f - Array of 9 GFF3 column values (use null or '.' for empty values)
|
|
216
|
+
* @returns The parsed feature
|
|
217
|
+
*/
|
|
218
|
+
export function parseFieldsArray(
|
|
219
|
+
f: (string | null | undefined)[],
|
|
220
|
+
): GFF3FeatureLine {
|
|
221
|
+
return parseFieldsArrayImpl(f, true)
|
|
222
|
+
}
|
|
223
|
+
|
|
245
224
|
/**
|
|
246
225
|
* Parse a GFF3 feature from a pre-split fields array without unescaping.
|
|
247
226
|
* Fast path for data known to contain no escaped characters.
|
|
@@ -252,27 +231,7 @@ export function parseFieldsArray(
|
|
|
252
231
|
export function parseFieldsArrayNoUnescape(
|
|
253
232
|
f: (string | null | undefined)[],
|
|
254
233
|
): GFF3FeatureLine {
|
|
255
|
-
|
|
256
|
-
const source = f[1]
|
|
257
|
-
const type = f[2]
|
|
258
|
-
const startStr = f[3]
|
|
259
|
-
const endStr = f[4]
|
|
260
|
-
const scoreStr = f[5]
|
|
261
|
-
const strand = f[6]
|
|
262
|
-
const phase = f[7]
|
|
263
|
-
const attrString = f[8]
|
|
264
|
-
|
|
265
|
-
return {
|
|
266
|
-
seq_id: seq_id && seq_id !== '.' ? seq_id : null,
|
|
267
|
-
source: source && source !== '.' ? source : null,
|
|
268
|
-
type: type && type !== '.' ? type : null,
|
|
269
|
-
start: !startStr || startStr === '.' ? null : +startStr,
|
|
270
|
-
end: !endStr || endStr === '.' ? null : +endStr,
|
|
271
|
-
score: !scoreStr || scoreStr === '.' ? null : +scoreStr,
|
|
272
|
-
strand: strand && strand !== '.' ? strand : null,
|
|
273
|
-
phase: phase && phase !== '.' ? phase : null,
|
|
274
|
-
attributes: !attrString || attrString === '.' ? null : parseAttributesNoUnescape(attrString),
|
|
275
|
-
}
|
|
234
|
+
return parseFieldsArrayImpl(f, false)
|
|
276
235
|
}
|
|
277
236
|
|
|
278
237
|
/**
|
|
@@ -297,27 +256,26 @@ export function parseDirective(
|
|
|
297
256
|
let [, , contents] = match
|
|
298
257
|
|
|
299
258
|
const parsed: GFF3Directive = { directive: name }
|
|
300
|
-
if (contents
|
|
301
|
-
contents = contents
|
|
259
|
+
if (contents.length) {
|
|
260
|
+
contents = contents.replace(lineEndRegex, '')
|
|
302
261
|
parsed.value = contents
|
|
303
262
|
}
|
|
304
263
|
|
|
305
|
-
// do a little additional parsing for sequence-region and genome-build directives
|
|
306
264
|
if (name === 'sequence-region') {
|
|
307
|
-
const c = contents
|
|
265
|
+
const c = contents.split(whitespaceRegex, 3)
|
|
308
266
|
return {
|
|
309
267
|
...parsed,
|
|
310
268
|
seq_id: c[0],
|
|
311
269
|
start: c[1]?.replaceAll(nonDigitRegex, ''),
|
|
312
270
|
end: c[2]?.replaceAll(nonDigitRegex, ''),
|
|
313
|
-
}
|
|
271
|
+
}
|
|
314
272
|
} else if (name === 'genome-build') {
|
|
315
|
-
const [source, buildName] = contents
|
|
273
|
+
const [source, buildName] = contents.split(whitespaceRegex, 2)
|
|
316
274
|
return {
|
|
317
275
|
...parsed,
|
|
318
276
|
source,
|
|
319
277
|
buildName,
|
|
320
|
-
}
|
|
278
|
+
}
|
|
321
279
|
}
|
|
322
280
|
|
|
323
281
|
return parsed
|
|
@@ -425,6 +383,30 @@ const JBROWSE_DEFAULT_FIELDS = new Set([
|
|
|
425
383
|
'strand',
|
|
426
384
|
])
|
|
427
385
|
|
|
386
|
+
// Pre-computed lowercase for common GFF3 spec attribute names to avoid
|
|
387
|
+
// toLowerCase() calls in the hot path
|
|
388
|
+
const COMMON_ATTRS: Record<string, string | undefined> = {
|
|
389
|
+
ID: 'id',
|
|
390
|
+
Name: 'name',
|
|
391
|
+
Parent: 'parent',
|
|
392
|
+
Note: 'note',
|
|
393
|
+
Dbxref: 'dbxref',
|
|
394
|
+
Ontology_term: 'ontology_term',
|
|
395
|
+
Is_circular: 'is_circular',
|
|
396
|
+
Alias: 'alias',
|
|
397
|
+
Target: 'target',
|
|
398
|
+
Gap: 'gap',
|
|
399
|
+
Derives_from: 'derives_from',
|
|
400
|
+
id: 'id',
|
|
401
|
+
name: 'name',
|
|
402
|
+
parent: 'parent',
|
|
403
|
+
note: 'note',
|
|
404
|
+
dbxref: 'dbxref',
|
|
405
|
+
alias: 'alias',
|
|
406
|
+
target: 'target',
|
|
407
|
+
gap: 'gap',
|
|
408
|
+
}
|
|
409
|
+
|
|
428
410
|
export interface JBrowseFeature {
|
|
429
411
|
start: number
|
|
430
412
|
end: number
|
|
@@ -438,22 +420,10 @@ export interface JBrowseFeature {
|
|
|
438
420
|
[key: string]: unknown
|
|
439
421
|
}
|
|
440
422
|
|
|
441
|
-
function
|
|
442
|
-
if (s === '+') {
|
|
443
|
-
return 1
|
|
444
|
-
}
|
|
445
|
-
if (s === '-') {
|
|
446
|
-
return -1
|
|
447
|
-
}
|
|
448
|
-
if (s === '.') {
|
|
449
|
-
return 0
|
|
450
|
-
}
|
|
451
|
-
return undefined
|
|
452
|
-
}
|
|
453
|
-
|
|
454
|
-
export function parseAttributesJBrowse(
|
|
423
|
+
function parseAttributesJBrowseImpl(
|
|
455
424
|
attrString: string,
|
|
456
425
|
result: Record<string, unknown>,
|
|
426
|
+
shouldUnescape: boolean,
|
|
457
427
|
) {
|
|
458
428
|
if (attrString.length === 0 || attrString === '.') {
|
|
459
429
|
return
|
|
@@ -481,9 +451,12 @@ export function parseAttributesJBrowse(
|
|
|
481
451
|
continue
|
|
482
452
|
}
|
|
483
453
|
|
|
484
|
-
let key = tag
|
|
485
|
-
if (
|
|
486
|
-
key
|
|
454
|
+
let key = COMMON_ATTRS[tag]
|
|
455
|
+
if (key === undefined) {
|
|
456
|
+
key = tag.toLowerCase()
|
|
457
|
+
if (JBROWSE_DEFAULT_FIELDS.has(key)) {
|
|
458
|
+
key += '2'
|
|
459
|
+
}
|
|
487
460
|
}
|
|
488
461
|
|
|
489
462
|
const values: string[] = []
|
|
@@ -495,7 +468,7 @@ export function parseAttributesJBrowse(
|
|
|
495
468
|
}
|
|
496
469
|
if (commaIdx > valStart) {
|
|
497
470
|
const val = attrString.slice(valStart, commaIdx)
|
|
498
|
-
values.push(unescape(val))
|
|
471
|
+
values.push(shouldUnescape ? unescape(val) : val)
|
|
499
472
|
}
|
|
500
473
|
valStart = commaIdx + 1
|
|
501
474
|
}
|
|
@@ -507,113 +480,71 @@ export function parseAttributesJBrowse(
|
|
|
507
480
|
}
|
|
508
481
|
}
|
|
509
482
|
|
|
510
|
-
export function
|
|
483
|
+
export function parseAttributesJBrowse(
|
|
511
484
|
attrString: string,
|
|
512
485
|
result: Record<string, unknown>,
|
|
513
486
|
) {
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
}
|
|
517
|
-
|
|
518
|
-
let len = attrString.length
|
|
519
|
-
if (attrString[len - 1] === '\n') {
|
|
520
|
-
len = attrString[len - 2] === '\r' ? len - 2 : len - 1
|
|
521
|
-
attrString = attrString.slice(0, len)
|
|
522
|
-
}
|
|
523
|
-
|
|
524
|
-
let start = 0
|
|
525
|
-
while (start < len) {
|
|
526
|
-
let semiIdx = attrString.indexOf(';', start)
|
|
527
|
-
if (semiIdx === -1) {
|
|
528
|
-
semiIdx = len
|
|
529
|
-
}
|
|
530
|
-
|
|
531
|
-
if (semiIdx > start) {
|
|
532
|
-
const eqIdx = attrString.indexOf('=', start)
|
|
533
|
-
if (eqIdx !== -1 && eqIdx < semiIdx && eqIdx + 1 < semiIdx) {
|
|
534
|
-
const tag = attrString.slice(start, eqIdx)
|
|
535
|
-
if (tag === '_lineHash') {
|
|
536
|
-
start = semiIdx + 1
|
|
537
|
-
continue
|
|
538
|
-
}
|
|
539
|
-
|
|
540
|
-
let key = tag.toLowerCase()
|
|
541
|
-
if (JBROWSE_DEFAULT_FIELDS.has(key)) {
|
|
542
|
-
key += '2'
|
|
543
|
-
}
|
|
544
|
-
|
|
545
|
-
const values: string[] = []
|
|
546
|
-
let valStart = eqIdx + 1
|
|
547
|
-
while (valStart < semiIdx) {
|
|
548
|
-
let commaIdx = attrString.indexOf(',', valStart)
|
|
549
|
-
if (commaIdx === -1 || commaIdx > semiIdx) {
|
|
550
|
-
commaIdx = semiIdx
|
|
551
|
-
}
|
|
552
|
-
if (commaIdx > valStart) {
|
|
553
|
-
values.push(attrString.slice(valStart, commaIdx))
|
|
554
|
-
}
|
|
555
|
-
valStart = commaIdx + 1
|
|
556
|
-
}
|
|
487
|
+
parseAttributesJBrowseImpl(attrString, result, true)
|
|
488
|
+
}
|
|
557
489
|
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
490
|
+
export function parseAttributesJBrowseNoUnescape(
|
|
491
|
+
attrString: string,
|
|
492
|
+
result: Record<string, unknown>,
|
|
493
|
+
) {
|
|
494
|
+
parseAttributesJBrowseImpl(attrString, result, false)
|
|
563
495
|
}
|
|
564
496
|
|
|
565
|
-
|
|
497
|
+
function parseFeatureJBrowseImpl(
|
|
498
|
+
line: string,
|
|
499
|
+
shouldUnescape: boolean,
|
|
500
|
+
): JBrowseFeature {
|
|
566
501
|
const f = line.split('\t')
|
|
567
|
-
const seq_id = f[0]
|
|
568
|
-
const source = f[1]
|
|
569
|
-
const type = f[2]
|
|
570
|
-
const startStr = f[3]
|
|
571
|
-
const endStr = f[4]
|
|
572
|
-
const scoreStr = f[5]
|
|
573
|
-
const strand = f[6]
|
|
574
|
-
const phase = f[7]
|
|
575
|
-
const attrString = f[8]
|
|
502
|
+
const seq_id = f[0]
|
|
503
|
+
const source = f[1]
|
|
504
|
+
const type = f[2]
|
|
505
|
+
const startStr = f[3]
|
|
506
|
+
const endStr = f[4]
|
|
507
|
+
const scoreStr = f[5]
|
|
508
|
+
const strand = f[6]
|
|
509
|
+
const phase = f[7]
|
|
510
|
+
const attrString = f[8]
|
|
576
511
|
|
|
577
512
|
const result: JBrowseFeature = {
|
|
578
|
-
refName:
|
|
579
|
-
|
|
580
|
-
|
|
513
|
+
refName:
|
|
514
|
+
seq_id.length === 0 || seq_id === '.'
|
|
515
|
+
? ''
|
|
516
|
+
: shouldUnescape
|
|
517
|
+
? unescape(seq_id)
|
|
518
|
+
: seq_id,
|
|
519
|
+
source:
|
|
520
|
+
source.length === 0 || source === '.'
|
|
521
|
+
? null
|
|
522
|
+
: shouldUnescape
|
|
523
|
+
? unescape(source)
|
|
524
|
+
: source,
|
|
525
|
+
type:
|
|
526
|
+
type.length === 0 || type === '.'
|
|
527
|
+
? null
|
|
528
|
+
: shouldUnescape
|
|
529
|
+
? unescape(type)
|
|
530
|
+
: type,
|
|
581
531
|
start: startStr.length === 0 || startStr === '.' ? 0 : +startStr - 1,
|
|
582
532
|
end: endStr.length === 0 || endStr === '.' ? 0 : +endStr,
|
|
583
533
|
score: scoreStr.length === 0 || scoreStr === '.' ? undefined : +scoreStr,
|
|
584
|
-
strand:
|
|
534
|
+
strand:
|
|
535
|
+
strand === '+' ? 1 : strand === '-' ? -1 : strand === '.' ? 0 : undefined,
|
|
585
536
|
phase: phase.length === 0 || phase === '.' ? undefined : +phase,
|
|
586
537
|
subfeatures: [],
|
|
587
538
|
}
|
|
588
539
|
|
|
589
|
-
|
|
540
|
+
parseAttributesJBrowseImpl(attrString, result, shouldUnescape)
|
|
590
541
|
return result
|
|
591
542
|
}
|
|
592
543
|
|
|
593
|
-
export function
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
const source = f[1]!
|
|
597
|
-
const type = f[2]!
|
|
598
|
-
const startStr = f[3]!
|
|
599
|
-
const endStr = f[4]!
|
|
600
|
-
const scoreStr = f[5]!
|
|
601
|
-
const strand = f[6]!
|
|
602
|
-
const phase = f[7]!
|
|
603
|
-
const attrString = f[8]!
|
|
604
|
-
|
|
605
|
-
const result: JBrowseFeature = {
|
|
606
|
-
refName: seq_id.length === 0 || seq_id === '.' ? '' : seq_id,
|
|
607
|
-
source: source.length === 0 || source === '.' ? null : source,
|
|
608
|
-
type: type.length === 0 || type === '.' ? null : type,
|
|
609
|
-
start: startStr.length === 0 || startStr === '.' ? 0 : +startStr - 1,
|
|
610
|
-
end: endStr.length === 0 || endStr === '.' ? 0 : +endStr,
|
|
611
|
-
score: scoreStr.length === 0 || scoreStr === '.' ? undefined : +scoreStr,
|
|
612
|
-
strand: parseStrand(strand),
|
|
613
|
-
phase: phase.length === 0 || phase === '.' ? undefined : +phase,
|
|
614
|
-
subfeatures: [],
|
|
615
|
-
}
|
|
544
|
+
export function parseFeatureJBrowse(line: string): JBrowseFeature {
|
|
545
|
+
return parseFeatureJBrowseImpl(line, true)
|
|
546
|
+
}
|
|
616
547
|
|
|
617
|
-
|
|
618
|
-
return
|
|
548
|
+
export function parseFeatureJBrowseNoUnescape(line: string): JBrowseFeature {
|
|
549
|
+
return parseFeatureJBrowseImpl(line, false)
|
|
619
550
|
}
|