gff-nostream 3.0.2 → 3.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -72
- package/dist/api.js +14 -21
- package/dist/api.js.map +1 -1
- package/dist/util.js +112 -218
- package/dist/util.js.map +1 -1
- package/esm/api.js +14 -21
- package/esm/api.js.map +1 -1
- package/esm/util.js +112 -218
- package/esm/util.js.map +1 -1
- package/package.json +27 -31
- package/src/api.ts +14 -21
- package/src/util.ts +146 -241
package/src/api.ts
CHANGED
|
@@ -43,15 +43,14 @@ export function parseStringSyncJBrowse(str: string): JBrowseFeature[] {
|
|
|
43
43
|
function stringToRecords(str: string) {
|
|
44
44
|
const lines = str.split(/\r?\n/)
|
|
45
45
|
const records: LineRecord[] = []
|
|
46
|
-
for (
|
|
47
|
-
|
|
48
|
-
if (line.length === 0 || line[0] === '#') {
|
|
46
|
+
for (const line of lines) {
|
|
47
|
+
if (line.length === 0 || line.startsWith('#')) {
|
|
49
48
|
if (line.startsWith('##FASTA')) {
|
|
50
49
|
break
|
|
51
50
|
}
|
|
52
51
|
continue
|
|
53
52
|
}
|
|
54
|
-
if (line
|
|
53
|
+
if (line.startsWith('>')) {
|
|
55
54
|
break
|
|
56
55
|
}
|
|
57
56
|
records.push({
|
|
@@ -76,8 +75,7 @@ export function parseRecords(records: LineRecord[]): GFF3Feature[] {
|
|
|
76
75
|
const byId = new Map<string, GFF3Feature>()
|
|
77
76
|
const orphans = new Map<string, GFF3Feature[]>()
|
|
78
77
|
|
|
79
|
-
for (
|
|
80
|
-
const record = records[i]!
|
|
78
|
+
for (const record of records) {
|
|
81
79
|
const featureLine = (
|
|
82
80
|
record.hasEscapes
|
|
83
81
|
? parseFeature(record.line)
|
|
@@ -87,9 +85,7 @@ export function parseRecords(records: LineRecord[]): GFF3Feature[] {
|
|
|
87
85
|
featureLine.derived_features = []
|
|
88
86
|
|
|
89
87
|
if (record.lineHash !== undefined) {
|
|
90
|
-
|
|
91
|
-
featureLine.attributes = {}
|
|
92
|
-
}
|
|
88
|
+
featureLine.attributes ??= {}
|
|
93
89
|
featureLine.attributes._lineHash = [String(record.lineHash)]
|
|
94
90
|
}
|
|
95
91
|
|
|
@@ -117,8 +113,8 @@ export function parseRecords(records: LineRecord[]): GFF3Feature[] {
|
|
|
117
113
|
byId.set(id, feature)
|
|
118
114
|
const waiting = orphans.get(id)
|
|
119
115
|
if (waiting) {
|
|
120
|
-
for (
|
|
121
|
-
featureLine.child_features.push(
|
|
116
|
+
for (const w of waiting) {
|
|
117
|
+
featureLine.child_features.push(w)
|
|
122
118
|
}
|
|
123
119
|
orphans.delete(id)
|
|
124
120
|
}
|
|
@@ -128,12 +124,11 @@ export function parseRecords(records: LineRecord[]): GFF3Feature[] {
|
|
|
128
124
|
}
|
|
129
125
|
|
|
130
126
|
if (parents) {
|
|
131
|
-
for (
|
|
132
|
-
const parentId = parents[j]!
|
|
127
|
+
for (const parentId of parents) {
|
|
133
128
|
const parent = byId.get(parentId)
|
|
134
129
|
if (parent) {
|
|
135
|
-
for (
|
|
136
|
-
|
|
130
|
+
for (const p of parent) {
|
|
131
|
+
p.child_features.push(feature)
|
|
137
132
|
}
|
|
138
133
|
} else {
|
|
139
134
|
let arr = orphans.get(parentId)
|
|
@@ -162,8 +157,7 @@ export function parseRecordsJBrowse(records: LineRecord[]): JBrowseFeature[] {
|
|
|
162
157
|
const byId = new Map<string, JBrowseFeature>()
|
|
163
158
|
const orphans = new Map<string, JBrowseFeature[]>()
|
|
164
159
|
|
|
165
|
-
for (
|
|
166
|
-
const record = records[i]!
|
|
160
|
+
for (const record of records) {
|
|
167
161
|
const feature = record.hasEscapes
|
|
168
162
|
? parseFeatureJBrowse(record.line)
|
|
169
163
|
: parseFeatureJBrowseNoUnescape(record.line)
|
|
@@ -189,8 +183,8 @@ export function parseRecordsJBrowse(records: LineRecord[]): JBrowseFeature[] {
|
|
|
189
183
|
byId.set(id, feature)
|
|
190
184
|
const waiting = orphans.get(id)
|
|
191
185
|
if (waiting) {
|
|
192
|
-
for (
|
|
193
|
-
feature.subfeatures.push(
|
|
186
|
+
for (const w of waiting) {
|
|
187
|
+
feature.subfeatures.push(w)
|
|
194
188
|
}
|
|
195
189
|
orphans.delete(id)
|
|
196
190
|
}
|
|
@@ -199,8 +193,7 @@ export function parseRecordsJBrowse(records: LineRecord[]): JBrowseFeature[] {
|
|
|
199
193
|
|
|
200
194
|
if (parent) {
|
|
201
195
|
const parents = Array.isArray(parent) ? parent : [parent]
|
|
202
|
-
for (
|
|
203
|
-
const parentId = parents[j]!
|
|
196
|
+
for (const parentId of parents) {
|
|
204
197
|
const parentFeature = byId.get(parentId)
|
|
205
198
|
if (parentFeature) {
|
|
206
199
|
parentFeature.subfeatures.push(feature)
|
package/src/util.ts
CHANGED
|
@@ -1,17 +1,12 @@
|
|
|
1
1
|
// Fast, low-level functions for parsing and formatting GFF3.
|
|
2
2
|
// JavaScript port of Robert Buels's Bio::GFF3::LowLevel Perl module.
|
|
3
3
|
|
|
4
|
-
const escapeRegex = /%([0-9A-Fa-f]{2})/g
|
|
5
4
|
const directiveRegex = /^\s*##\s*(\S+)\s*(.*)/
|
|
6
5
|
const lineEndRegex = /\r?\n$/
|
|
7
6
|
const whitespaceRegex = /\s+/
|
|
8
7
|
const nonDigitRegex = /\D/g
|
|
9
|
-
// eslint-disable-next-line no-control-regex
|
|
10
|
-
const attrEscapeRegex = /[\n;\r\t=%&,\u0000-\u001f\u007f-\u00ff]/g
|
|
11
|
-
// eslint-disable-next-line no-control-regex
|
|
12
|
-
const columnEscapeRegex = /[\n\r\t%\u0000-\u001f\u007f-\u00ff]/g
|
|
13
8
|
|
|
14
|
-
const HEX_LOOKUP: Record<string, string> = {}
|
|
9
|
+
const HEX_LOOKUP: Record<string, string | undefined> = {}
|
|
15
10
|
for (let i = 0; i < 256; i++) {
|
|
16
11
|
const hex = i.toString(16).toUpperCase().padStart(2, '0')
|
|
17
12
|
HEX_LOOKUP[hex] = String.fromCharCode(i)
|
|
@@ -55,13 +50,10 @@ export function unescape(stringVal: string) {
|
|
|
55
50
|
return result + stringVal.slice(lastIdx)
|
|
56
51
|
}
|
|
57
52
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
* @returns Parsed attributes
|
|
63
|
-
*/
|
|
64
|
-
export function parseAttributes(attrString: string): GFF3Attributes {
|
|
53
|
+
function parseAttributesImpl(
|
|
54
|
+
attrString: string,
|
|
55
|
+
shouldUnescape: boolean,
|
|
56
|
+
): GFF3Attributes {
|
|
65
57
|
if (attrString.length === 0 || attrString === '.') {
|
|
66
58
|
return {}
|
|
67
59
|
}
|
|
@@ -99,7 +91,7 @@ export function parseAttributes(attrString: string): GFF3Attributes {
|
|
|
99
91
|
}
|
|
100
92
|
if (commaIdx > valStart) {
|
|
101
93
|
const val = attrString.slice(valStart, commaIdx)
|
|
102
|
-
arec.push(unescape(val))
|
|
94
|
+
arec.push(shouldUnescape ? unescape(val) : val)
|
|
103
95
|
}
|
|
104
96
|
valStart = commaIdx + 1
|
|
105
97
|
}
|
|
@@ -110,6 +102,16 @@ export function parseAttributes(attrString: string): GFF3Attributes {
|
|
|
110
102
|
return attrs
|
|
111
103
|
}
|
|
112
104
|
|
|
105
|
+
/**
|
|
106
|
+
* Parse the 9th column (attributes) of a GFF3 feature line.
|
|
107
|
+
*
|
|
108
|
+
* @param attrString - String of GFF3 9th column
|
|
109
|
+
* @returns Parsed attributes
|
|
110
|
+
*/
|
|
111
|
+
export function parseAttributes(attrString: string): GFF3Attributes {
|
|
112
|
+
return parseAttributesImpl(attrString, true)
|
|
113
|
+
}
|
|
114
|
+
|
|
113
115
|
/**
|
|
114
116
|
* Parse the 9th column (attributes) of a GFF3 feature line without unescaping.
|
|
115
117
|
* Fast path for data known to contain no escaped characters.
|
|
@@ -118,72 +120,21 @@ export function parseAttributes(attrString: string): GFF3Attributes {
|
|
|
118
120
|
* @returns Parsed attributes
|
|
119
121
|
*/
|
|
120
122
|
export function parseAttributesNoUnescape(attrString: string): GFF3Attributes {
|
|
121
|
-
|
|
122
|
-
return {}
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
const attrs: GFF3Attributes = {}
|
|
126
|
-
let len = attrString.length
|
|
127
|
-
|
|
128
|
-
if (attrString[len - 1] === '\n') {
|
|
129
|
-
len = attrString[len - 2] === '\r' ? len - 2 : len - 1
|
|
130
|
-
attrString = attrString.slice(0, len)
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
let start = 0
|
|
134
|
-
while (start < len) {
|
|
135
|
-
let semiIdx = attrString.indexOf(';', start)
|
|
136
|
-
if (semiIdx === -1) {
|
|
137
|
-
semiIdx = len
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
if (semiIdx > start) {
|
|
141
|
-
const eqIdx = attrString.indexOf('=', start)
|
|
142
|
-
if (eqIdx !== -1 && eqIdx < semiIdx && eqIdx + 1 < semiIdx) {
|
|
143
|
-
const tag = attrString.slice(start, eqIdx)
|
|
144
|
-
let arec = attrs[tag]
|
|
145
|
-
if (!arec) {
|
|
146
|
-
arec = []
|
|
147
|
-
attrs[tag] = arec
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
let valStart = eqIdx + 1
|
|
151
|
-
while (valStart < semiIdx) {
|
|
152
|
-
let commaIdx = attrString.indexOf(',', valStart)
|
|
153
|
-
if (commaIdx === -1 || commaIdx > semiIdx) {
|
|
154
|
-
commaIdx = semiIdx
|
|
155
|
-
}
|
|
156
|
-
if (commaIdx > valStart) {
|
|
157
|
-
arec.push(attrString.slice(valStart, commaIdx))
|
|
158
|
-
}
|
|
159
|
-
valStart = commaIdx + 1
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
start = semiIdx + 1
|
|
164
|
-
}
|
|
165
|
-
return attrs
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
function normUnescape(s: string) {
|
|
169
|
-
return s.length === 0 || s === '.' ? null : unescape(s)
|
|
123
|
+
return parseAttributesImpl(attrString, false)
|
|
170
124
|
}
|
|
171
125
|
|
|
172
|
-
function
|
|
173
|
-
|
|
126
|
+
function normImpl(s: string, shouldUnescape: boolean) {
|
|
127
|
+
if (s.length === 0 || s === '.') {
|
|
128
|
+
return null
|
|
129
|
+
}
|
|
130
|
+
return shouldUnescape ? unescape(s) : s
|
|
174
131
|
}
|
|
175
132
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
* @returns The parsed feature
|
|
181
|
-
*/
|
|
182
|
-
export function parseFeature(line: string): GFF3FeatureLine {
|
|
133
|
+
function parseFeatureImpl(
|
|
134
|
+
line: string,
|
|
135
|
+
shouldUnescape: boolean,
|
|
136
|
+
): GFF3FeatureLine {
|
|
183
137
|
const f = line.split('\t')
|
|
184
|
-
const seq_id = f[0]!
|
|
185
|
-
const source = f[1]!
|
|
186
|
-
const type = f[2]!
|
|
187
138
|
const startStr = f[3]!
|
|
188
139
|
const endStr = f[4]!
|
|
189
140
|
const scoreStr = f[5]!
|
|
@@ -192,58 +143,45 @@ export function parseFeature(line: string): GFF3FeatureLine {
|
|
|
192
143
|
const attrString = f[8]!
|
|
193
144
|
|
|
194
145
|
return {
|
|
195
|
-
seq_id:
|
|
196
|
-
source:
|
|
197
|
-
type:
|
|
146
|
+
seq_id: normImpl(f[0]!, shouldUnescape),
|
|
147
|
+
source: normImpl(f[1]!, shouldUnescape),
|
|
148
|
+
type: normImpl(f[2]!, shouldUnescape),
|
|
198
149
|
start: startStr.length === 0 || startStr === '.' ? null : +startStr,
|
|
199
150
|
end: endStr.length === 0 || endStr === '.' ? null : +endStr,
|
|
200
151
|
score: scoreStr.length === 0 || scoreStr === '.' ? null : +scoreStr,
|
|
201
|
-
strand:
|
|
202
|
-
phase:
|
|
203
|
-
attributes:
|
|
152
|
+
strand: normImpl(strand, false),
|
|
153
|
+
phase: normImpl(phase, false),
|
|
154
|
+
attributes:
|
|
155
|
+
attrString.length === 0 || attrString === '.'
|
|
156
|
+
? null
|
|
157
|
+
: parseAttributesImpl(attrString, shouldUnescape),
|
|
204
158
|
}
|
|
205
159
|
}
|
|
206
160
|
|
|
207
161
|
/**
|
|
208
|
-
* Parse a GFF3 feature line
|
|
209
|
-
* Fast path for data known to contain no escaped characters.
|
|
162
|
+
* Parse a GFF3 feature line
|
|
210
163
|
*
|
|
211
164
|
* @param line - GFF3 feature line
|
|
212
165
|
* @returns The parsed feature
|
|
213
166
|
*/
|
|
214
|
-
export function
|
|
215
|
-
|
|
216
|
-
const seq_id = f[0]!
|
|
217
|
-
const source = f[1]!
|
|
218
|
-
const type = f[2]!
|
|
219
|
-
const startStr = f[3]!
|
|
220
|
-
const endStr = f[4]!
|
|
221
|
-
const scoreStr = f[5]!
|
|
222
|
-
const strand = f[6]!
|
|
223
|
-
const phase = f[7]!
|
|
224
|
-
const attrString = f[8]!
|
|
225
|
-
|
|
226
|
-
return {
|
|
227
|
-
seq_id: norm(seq_id),
|
|
228
|
-
source: norm(source),
|
|
229
|
-
type: norm(type),
|
|
230
|
-
start: startStr.length === 0 || startStr === '.' ? null : +startStr,
|
|
231
|
-
end: endStr.length === 0 || endStr === '.' ? null : +endStr,
|
|
232
|
-
score: scoreStr.length === 0 || scoreStr === '.' ? null : +scoreStr,
|
|
233
|
-
strand: norm(strand),
|
|
234
|
-
phase: norm(phase),
|
|
235
|
-
attributes: attrString.length === 0 || attrString === '.' ? null : parseAttributesNoUnescape(attrString),
|
|
236
|
-
}
|
|
167
|
+
export function parseFeature(line: string): GFF3FeatureLine {
|
|
168
|
+
return parseFeatureImpl(line, true)
|
|
237
169
|
}
|
|
238
170
|
|
|
239
171
|
/**
|
|
240
|
-
* Parse a GFF3 feature
|
|
172
|
+
* Parse a GFF3 feature line without unescaping.
|
|
173
|
+
* Fast path for data known to contain no escaped characters.
|
|
241
174
|
*
|
|
242
|
-
* @param
|
|
175
|
+
* @param line - GFF3 feature line
|
|
243
176
|
* @returns The parsed feature
|
|
244
177
|
*/
|
|
245
|
-
export function
|
|
178
|
+
export function parseFeatureNoUnescape(line: string): GFF3FeatureLine {
|
|
179
|
+
return parseFeatureImpl(line, false)
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
function parseFieldsArrayImpl(
|
|
246
183
|
f: (string | null | undefined)[],
|
|
184
|
+
shouldUnescape: boolean,
|
|
247
185
|
): GFF3FeatureLine {
|
|
248
186
|
const seq_id = f[0]
|
|
249
187
|
const source = f[1]
|
|
@@ -256,18 +194,33 @@ export function parseFieldsArray(
|
|
|
256
194
|
const attrString = f[8]
|
|
257
195
|
|
|
258
196
|
return {
|
|
259
|
-
seq_id: seq_id ?
|
|
260
|
-
source: source ?
|
|
261
|
-
type: type ?
|
|
197
|
+
seq_id: seq_id ? normImpl(seq_id, shouldUnescape) : null,
|
|
198
|
+
source: source ? normImpl(source, shouldUnescape) : null,
|
|
199
|
+
type: type ? normImpl(type, shouldUnescape) : null,
|
|
262
200
|
start: !startStr || startStr === '.' ? null : +startStr,
|
|
263
201
|
end: !endStr || endStr === '.' ? null : +endStr,
|
|
264
202
|
score: !scoreStr || scoreStr === '.' ? null : +scoreStr,
|
|
265
203
|
strand: strand && strand !== '.' ? strand : null,
|
|
266
204
|
phase: phase && phase !== '.' ? phase : null,
|
|
267
|
-
attributes:
|
|
205
|
+
attributes:
|
|
206
|
+
!attrString || attrString === '.'
|
|
207
|
+
? null
|
|
208
|
+
: parseAttributesImpl(attrString, shouldUnescape),
|
|
268
209
|
}
|
|
269
210
|
}
|
|
270
211
|
|
|
212
|
+
/**
|
|
213
|
+
* Parse a GFF3 feature from a pre-split fields array
|
|
214
|
+
*
|
|
215
|
+
* @param f - Array of 9 GFF3 column values (use null or '.' for empty values)
|
|
216
|
+
* @returns The parsed feature
|
|
217
|
+
*/
|
|
218
|
+
export function parseFieldsArray(
|
|
219
|
+
f: (string | null | undefined)[],
|
|
220
|
+
): GFF3FeatureLine {
|
|
221
|
+
return parseFieldsArrayImpl(f, true)
|
|
222
|
+
}
|
|
223
|
+
|
|
271
224
|
/**
|
|
272
225
|
* Parse a GFF3 feature from a pre-split fields array without unescaping.
|
|
273
226
|
* Fast path for data known to contain no escaped characters.
|
|
@@ -278,27 +231,7 @@ export function parseFieldsArray(
|
|
|
278
231
|
export function parseFieldsArrayNoUnescape(
|
|
279
232
|
f: (string | null | undefined)[],
|
|
280
233
|
): GFF3FeatureLine {
|
|
281
|
-
|
|
282
|
-
const source = f[1]
|
|
283
|
-
const type = f[2]
|
|
284
|
-
const startStr = f[3]
|
|
285
|
-
const endStr = f[4]
|
|
286
|
-
const scoreStr = f[5]
|
|
287
|
-
const strand = f[6]
|
|
288
|
-
const phase = f[7]
|
|
289
|
-
const attrString = f[8]
|
|
290
|
-
|
|
291
|
-
return {
|
|
292
|
-
seq_id: seq_id && seq_id !== '.' ? seq_id : null,
|
|
293
|
-
source: source && source !== '.' ? source : null,
|
|
294
|
-
type: type && type !== '.' ? type : null,
|
|
295
|
-
start: !startStr || startStr === '.' ? null : +startStr,
|
|
296
|
-
end: !endStr || endStr === '.' ? null : +endStr,
|
|
297
|
-
score: !scoreStr || scoreStr === '.' ? null : +scoreStr,
|
|
298
|
-
strand: strand && strand !== '.' ? strand : null,
|
|
299
|
-
phase: phase && phase !== '.' ? phase : null,
|
|
300
|
-
attributes: !attrString || attrString === '.' ? null : parseAttributesNoUnescape(attrString),
|
|
301
|
-
}
|
|
234
|
+
return parseFieldsArrayImpl(f, false)
|
|
302
235
|
}
|
|
303
236
|
|
|
304
237
|
/**
|
|
@@ -322,28 +255,27 @@ export function parseDirective(
|
|
|
322
255
|
const [, name] = match
|
|
323
256
|
let [, , contents] = match
|
|
324
257
|
|
|
325
|
-
const parsed: GFF3Directive = { directive: name }
|
|
258
|
+
const parsed: GFF3Directive = { directive: name! }
|
|
326
259
|
if (contents!.length) {
|
|
327
260
|
contents = contents!.replace(lineEndRegex, '')
|
|
328
261
|
parsed.value = contents
|
|
329
262
|
}
|
|
330
263
|
|
|
331
|
-
// do a little additional parsing for sequence-region and genome-build directives
|
|
332
264
|
if (name === 'sequence-region') {
|
|
333
265
|
const c = contents!.split(whitespaceRegex, 3)
|
|
334
266
|
return {
|
|
335
267
|
...parsed,
|
|
336
|
-
seq_id: c[0]
|
|
337
|
-
start: c[1]
|
|
338
|
-
end: c[2]
|
|
339
|
-
}
|
|
268
|
+
seq_id: c[0]!,
|
|
269
|
+
start: c[1]!.replaceAll(nonDigitRegex, ''),
|
|
270
|
+
end: c[2]!.replaceAll(nonDigitRegex, ''),
|
|
271
|
+
}
|
|
340
272
|
} else if (name === 'genome-build') {
|
|
341
273
|
const [source, buildName] = contents!.split(whitespaceRegex, 2)
|
|
342
274
|
return {
|
|
343
275
|
...parsed,
|
|
344
|
-
source
|
|
345
|
-
buildName
|
|
346
|
-
}
|
|
276
|
+
source: source!,
|
|
277
|
+
buildName: buildName!,
|
|
278
|
+
}
|
|
347
279
|
}
|
|
348
280
|
|
|
349
281
|
return parsed
|
|
@@ -451,6 +383,30 @@ const JBROWSE_DEFAULT_FIELDS = new Set([
|
|
|
451
383
|
'strand',
|
|
452
384
|
])
|
|
453
385
|
|
|
386
|
+
// Pre-computed lowercase for common GFF3 spec attribute names to avoid
|
|
387
|
+
// toLowerCase() calls in the hot path
|
|
388
|
+
const COMMON_ATTRS: Record<string, string | undefined> = {
|
|
389
|
+
ID: 'id',
|
|
390
|
+
Name: 'name',
|
|
391
|
+
Parent: 'parent',
|
|
392
|
+
Note: 'note',
|
|
393
|
+
Dbxref: 'dbxref',
|
|
394
|
+
Ontology_term: 'ontology_term',
|
|
395
|
+
Is_circular: 'is_circular',
|
|
396
|
+
Alias: 'alias',
|
|
397
|
+
Target: 'target',
|
|
398
|
+
Gap: 'gap',
|
|
399
|
+
Derives_from: 'derives_from',
|
|
400
|
+
id: 'id',
|
|
401
|
+
name: 'name',
|
|
402
|
+
parent: 'parent',
|
|
403
|
+
note: 'note',
|
|
404
|
+
dbxref: 'dbxref',
|
|
405
|
+
alias: 'alias',
|
|
406
|
+
target: 'target',
|
|
407
|
+
gap: 'gap',
|
|
408
|
+
}
|
|
409
|
+
|
|
454
410
|
export interface JBrowseFeature {
|
|
455
411
|
start: number
|
|
456
412
|
end: number
|
|
@@ -464,22 +420,10 @@ export interface JBrowseFeature {
|
|
|
464
420
|
[key: string]: unknown
|
|
465
421
|
}
|
|
466
422
|
|
|
467
|
-
function
|
|
468
|
-
if (s === '+') {
|
|
469
|
-
return 1
|
|
470
|
-
}
|
|
471
|
-
if (s === '-') {
|
|
472
|
-
return -1
|
|
473
|
-
}
|
|
474
|
-
if (s === '.') {
|
|
475
|
-
return 0
|
|
476
|
-
}
|
|
477
|
-
return undefined
|
|
478
|
-
}
|
|
479
|
-
|
|
480
|
-
export function parseAttributesJBrowse(
|
|
423
|
+
function parseAttributesJBrowseImpl(
|
|
481
424
|
attrString: string,
|
|
482
425
|
result: Record<string, unknown>,
|
|
426
|
+
shouldUnescape: boolean,
|
|
483
427
|
) {
|
|
484
428
|
if (attrString.length === 0 || attrString === '.') {
|
|
485
429
|
return
|
|
@@ -507,9 +451,12 @@ export function parseAttributesJBrowse(
|
|
|
507
451
|
continue
|
|
508
452
|
}
|
|
509
453
|
|
|
510
|
-
let key = tag
|
|
511
|
-
if (
|
|
512
|
-
key
|
|
454
|
+
let key = COMMON_ATTRS[tag]
|
|
455
|
+
if (key === undefined) {
|
|
456
|
+
key = tag.toLowerCase()
|
|
457
|
+
if (JBROWSE_DEFAULT_FIELDS.has(key)) {
|
|
458
|
+
key += '2'
|
|
459
|
+
}
|
|
513
460
|
}
|
|
514
461
|
|
|
515
462
|
const values: string[] = []
|
|
@@ -521,7 +468,7 @@ export function parseAttributesJBrowse(
|
|
|
521
468
|
}
|
|
522
469
|
if (commaIdx > valStart) {
|
|
523
470
|
const val = attrString.slice(valStart, commaIdx)
|
|
524
|
-
values.push(unescape(val))
|
|
471
|
+
values.push(shouldUnescape ? unescape(val) : val)
|
|
525
472
|
}
|
|
526
473
|
valStart = commaIdx + 1
|
|
527
474
|
}
|
|
@@ -533,62 +480,24 @@ export function parseAttributesJBrowse(
|
|
|
533
480
|
}
|
|
534
481
|
}
|
|
535
482
|
|
|
536
|
-
export function
|
|
483
|
+
export function parseAttributesJBrowse(
|
|
537
484
|
attrString: string,
|
|
538
485
|
result: Record<string, unknown>,
|
|
539
486
|
) {
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
}
|
|
543
|
-
|
|
544
|
-
let len = attrString.length
|
|
545
|
-
if (attrString[len - 1] === '\n') {
|
|
546
|
-
len = attrString[len - 2] === '\r' ? len - 2 : len - 1
|
|
547
|
-
attrString = attrString.slice(0, len)
|
|
548
|
-
}
|
|
549
|
-
|
|
550
|
-
let start = 0
|
|
551
|
-
while (start < len) {
|
|
552
|
-
let semiIdx = attrString.indexOf(';', start)
|
|
553
|
-
if (semiIdx === -1) {
|
|
554
|
-
semiIdx = len
|
|
555
|
-
}
|
|
556
|
-
|
|
557
|
-
if (semiIdx > start) {
|
|
558
|
-
const eqIdx = attrString.indexOf('=', start)
|
|
559
|
-
if (eqIdx !== -1 && eqIdx < semiIdx && eqIdx + 1 < semiIdx) {
|
|
560
|
-
const tag = attrString.slice(start, eqIdx)
|
|
561
|
-
if (tag === '_lineHash') {
|
|
562
|
-
start = semiIdx + 1
|
|
563
|
-
continue
|
|
564
|
-
}
|
|
565
|
-
|
|
566
|
-
let key = tag.toLowerCase()
|
|
567
|
-
if (JBROWSE_DEFAULT_FIELDS.has(key)) {
|
|
568
|
-
key += '2'
|
|
569
|
-
}
|
|
570
|
-
|
|
571
|
-
const values: string[] = []
|
|
572
|
-
let valStart = eqIdx + 1
|
|
573
|
-
while (valStart < semiIdx) {
|
|
574
|
-
let commaIdx = attrString.indexOf(',', valStart)
|
|
575
|
-
if (commaIdx === -1 || commaIdx > semiIdx) {
|
|
576
|
-
commaIdx = semiIdx
|
|
577
|
-
}
|
|
578
|
-
if (commaIdx > valStart) {
|
|
579
|
-
values.push(attrString.slice(valStart, commaIdx))
|
|
580
|
-
}
|
|
581
|
-
valStart = commaIdx + 1
|
|
582
|
-
}
|
|
487
|
+
parseAttributesJBrowseImpl(attrString, result, true)
|
|
488
|
+
}
|
|
583
489
|
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
490
|
+
export function parseAttributesJBrowseNoUnescape(
|
|
491
|
+
attrString: string,
|
|
492
|
+
result: Record<string, unknown>,
|
|
493
|
+
) {
|
|
494
|
+
parseAttributesJBrowseImpl(attrString, result, false)
|
|
589
495
|
}
|
|
590
496
|
|
|
591
|
-
|
|
497
|
+
function parseFeatureJBrowseImpl(
|
|
498
|
+
line: string,
|
|
499
|
+
shouldUnescape: boolean,
|
|
500
|
+
): JBrowseFeature {
|
|
592
501
|
const f = line.split('\t')
|
|
593
502
|
const seq_id = f[0]!
|
|
594
503
|
const source = f[1]!
|
|
@@ -601,45 +510,41 @@ export function parseFeatureJBrowse(line: string): JBrowseFeature {
|
|
|
601
510
|
const attrString = f[8]!
|
|
602
511
|
|
|
603
512
|
const result: JBrowseFeature = {
|
|
604
|
-
refName:
|
|
605
|
-
|
|
606
|
-
|
|
513
|
+
refName:
|
|
514
|
+
seq_id.length === 0 || seq_id === '.'
|
|
515
|
+
? ''
|
|
516
|
+
: shouldUnescape
|
|
517
|
+
? unescape(seq_id)
|
|
518
|
+
: seq_id,
|
|
519
|
+
source:
|
|
520
|
+
source.length === 0 || source === '.'
|
|
521
|
+
? null
|
|
522
|
+
: shouldUnescape
|
|
523
|
+
? unescape(source)
|
|
524
|
+
: source,
|
|
525
|
+
type:
|
|
526
|
+
type.length === 0 || type === '.'
|
|
527
|
+
? null
|
|
528
|
+
: shouldUnescape
|
|
529
|
+
? unescape(type)
|
|
530
|
+
: type,
|
|
607
531
|
start: startStr.length === 0 || startStr === '.' ? 0 : +startStr - 1,
|
|
608
532
|
end: endStr.length === 0 || endStr === '.' ? 0 : +endStr,
|
|
609
533
|
score: scoreStr.length === 0 || scoreStr === '.' ? undefined : +scoreStr,
|
|
610
|
-
strand:
|
|
534
|
+
strand:
|
|
535
|
+
strand === '+' ? 1 : strand === '-' ? -1 : strand === '.' ? 0 : undefined,
|
|
611
536
|
phase: phase.length === 0 || phase === '.' ? undefined : +phase,
|
|
612
537
|
subfeatures: [],
|
|
613
538
|
}
|
|
614
539
|
|
|
615
|
-
|
|
540
|
+
parseAttributesJBrowseImpl(attrString, result, shouldUnescape)
|
|
616
541
|
return result
|
|
617
542
|
}
|
|
618
543
|
|
|
619
|
-
export function
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
const source = f[1]!
|
|
623
|
-
const type = f[2]!
|
|
624
|
-
const startStr = f[3]!
|
|
625
|
-
const endStr = f[4]!
|
|
626
|
-
const scoreStr = f[5]!
|
|
627
|
-
const strand = f[6]!
|
|
628
|
-
const phase = f[7]!
|
|
629
|
-
const attrString = f[8]!
|
|
630
|
-
|
|
631
|
-
const result: JBrowseFeature = {
|
|
632
|
-
refName: seq_id.length === 0 || seq_id === '.' ? '' : seq_id,
|
|
633
|
-
source: source.length === 0 || source === '.' ? null : source,
|
|
634
|
-
type: type.length === 0 || type === '.' ? null : type,
|
|
635
|
-
start: startStr.length === 0 || startStr === '.' ? 0 : +startStr - 1,
|
|
636
|
-
end: endStr.length === 0 || endStr === '.' ? 0 : +endStr,
|
|
637
|
-
score: scoreStr.length === 0 || scoreStr === '.' ? undefined : +scoreStr,
|
|
638
|
-
strand: parseStrand(strand),
|
|
639
|
-
phase: phase.length === 0 || phase === '.' ? undefined : +phase,
|
|
640
|
-
subfeatures: [],
|
|
641
|
-
}
|
|
544
|
+
export function parseFeatureJBrowse(line: string): JBrowseFeature {
|
|
545
|
+
return parseFeatureJBrowseImpl(line, true)
|
|
546
|
+
}
|
|
642
547
|
|
|
643
|
-
|
|
644
|
-
return
|
|
548
|
+
export function parseFeatureJBrowseNoUnescape(line: string): JBrowseFeature {
|
|
549
|
+
return parseFeatureJBrowseImpl(line, false)
|
|
645
550
|
}
|