gff-nostream 3.0.4 → 3.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -4
- package/dist/api.d.ts +4 -2
- package/dist/api.js +66 -64
- package/dist/api.js.map +1 -1
- package/dist/util.d.ts +0 -15
- package/dist/util.js +37 -101
- package/dist/util.js.map +1 -1
- package/esm/api.d.ts +4 -2
- package/esm/api.js +65 -63
- package/esm/api.js.map +1 -1
- package/esm/util.d.ts +0 -15
- package/esm/util.js +37 -99
- package/esm/util.js.map +1 -1
- package/package.json +19 -16
- package/src/api.ts +67 -67
- package/src/util.ts +53 -127
package/src/api.ts
CHANGED
|
@@ -14,8 +14,10 @@ import type {
|
|
|
14
14
|
export interface LineRecord {
|
|
15
15
|
line: string
|
|
16
16
|
lineHash?: string | number
|
|
17
|
-
|
|
18
|
-
|
|
17
|
+
/** Optional passthrough byte offsets — not used by the parser */
|
|
18
|
+
start?: number
|
|
19
|
+
/** Optional passthrough byte offsets — not used by the parser */
|
|
20
|
+
end?: number
|
|
19
21
|
hasEscapes: boolean
|
|
20
22
|
}
|
|
21
23
|
|
|
@@ -44,19 +46,14 @@ function stringToRecords(str: string) {
|
|
|
44
46
|
const lines = str.split(/\r?\n/)
|
|
45
47
|
const records: LineRecord[] = []
|
|
46
48
|
for (const line of lines) {
|
|
49
|
+
if (line.startsWith('##FASTA') || line.startsWith('>')) {
|
|
50
|
+
break
|
|
51
|
+
}
|
|
47
52
|
if (line.length === 0 || line.startsWith('#')) {
|
|
48
|
-
if (line.startsWith('##FASTA')) {
|
|
49
|
-
break
|
|
50
|
-
}
|
|
51
53
|
continue
|
|
52
54
|
}
|
|
53
|
-
if (line.startsWith('>')) {
|
|
54
|
-
break
|
|
55
|
-
}
|
|
56
55
|
records.push({
|
|
57
56
|
line,
|
|
58
|
-
start: 0,
|
|
59
|
-
end: 0,
|
|
60
57
|
hasEscapes: line.includes('%'),
|
|
61
58
|
})
|
|
62
59
|
}
|
|
@@ -95,48 +92,52 @@ export function parseRecords(records: LineRecord[]): GFF3Feature[] {
|
|
|
95
92
|
|
|
96
93
|
if (!ids && !parents) {
|
|
97
94
|
items.push([featureLine])
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
95
|
+
} else {
|
|
96
|
+
let feature: GFF3Feature
|
|
97
|
+
if (ids) {
|
|
98
|
+
const id = ids[0]!
|
|
99
|
+
const existing = byId.get(id)
|
|
100
|
+
if (existing) {
|
|
101
|
+
// Multi-location continuation: share child_features/derived_features
|
|
102
|
+
// with the first line so children remain visible across all lines
|
|
103
|
+
// regardless of arrival order.
|
|
104
|
+
featureLine.child_features = existing[0]!.child_features
|
|
105
|
+
featureLine.derived_features = existing[0]!.derived_features
|
|
106
|
+
existing.push(featureLine)
|
|
107
|
+
feature = existing
|
|
108
|
+
} else {
|
|
109
|
+
feature = [featureLine]
|
|
110
|
+
if (!parents) {
|
|
111
|
+
items.push(feature)
|
|
112
|
+
}
|
|
113
|
+
byId.set(id, feature)
|
|
114
|
+
const waiting = orphans.get(id)
|
|
115
|
+
if (waiting) {
|
|
116
|
+
for (const w of waiting) {
|
|
117
|
+
featureLine.child_features.push(w)
|
|
118
|
+
}
|
|
119
|
+
orphans.delete(id)
|
|
118
120
|
}
|
|
119
|
-
orphans.delete(id)
|
|
120
121
|
}
|
|
122
|
+
} else {
|
|
123
|
+
feature = [featureLine]
|
|
121
124
|
}
|
|
122
|
-
} else {
|
|
123
|
-
feature = [featureLine]
|
|
124
|
-
}
|
|
125
125
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
126
|
+
if (parents) {
|
|
127
|
+
for (const parentId of parents) {
|
|
128
|
+
const parent = byId.get(parentId)
|
|
129
|
+
if (parent) {
|
|
130
|
+
// child_features is shared across all parent feature lines,
|
|
131
|
+
// so push once via the first line.
|
|
132
|
+
parent[0]!.child_features.push(feature)
|
|
133
|
+
} else {
|
|
134
|
+
let arr = orphans.get(parentId)
|
|
135
|
+
if (!arr) {
|
|
136
|
+
arr = []
|
|
137
|
+
orphans.set(parentId, arr)
|
|
138
|
+
}
|
|
139
|
+
arr.push(feature)
|
|
132
140
|
}
|
|
133
|
-
} else {
|
|
134
|
-
let arr = orphans.get(parentId)
|
|
135
|
-
if (!arr) {
|
|
136
|
-
arr = []
|
|
137
|
-
orphans.set(parentId, arr)
|
|
138
|
-
}
|
|
139
|
-
arr.push(feature)
|
|
140
141
|
}
|
|
141
142
|
}
|
|
142
143
|
}
|
|
@@ -166,17 +167,16 @@ export function parseRecordsJBrowse(records: LineRecord[]): JBrowseFeature[] {
|
|
|
166
167
|
feature._lineHash = String(record.lineHash)
|
|
167
168
|
}
|
|
168
169
|
|
|
169
|
-
|
|
170
|
+
// attribute parsing collapses single-element arrays to scalars, so id can
|
|
171
|
+
// be string | string[]; defensively take the first if multi-valued.
|
|
172
|
+
const rawId = feature.id as string | string[] | undefined
|
|
173
|
+
const id = Array.isArray(rawId) ? rawId[0] : rawId
|
|
170
174
|
const parent = feature.parent as string | string[] | undefined
|
|
171
175
|
|
|
172
176
|
if (!id && !parent) {
|
|
173
177
|
items.push(feature)
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
if (id) {
|
|
178
|
-
const existing = byId.get(id)
|
|
179
|
-
if (!existing) {
|
|
178
|
+
} else if (!id || !byId.has(id)) {
|
|
179
|
+
if (id) {
|
|
180
180
|
if (!parent) {
|
|
181
181
|
items.push(feature)
|
|
182
182
|
}
|
|
@@ -189,21 +189,21 @@ export function parseRecordsJBrowse(records: LineRecord[]): JBrowseFeature[] {
|
|
|
189
189
|
orphans.delete(id)
|
|
190
190
|
}
|
|
191
191
|
}
|
|
192
|
-
}
|
|
193
192
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
193
|
+
if (parent) {
|
|
194
|
+
const parents = Array.isArray(parent) ? parent : [parent]
|
|
195
|
+
for (const parentId of parents) {
|
|
196
|
+
const parentFeature = byId.get(parentId)
|
|
197
|
+
if (parentFeature) {
|
|
198
|
+
parentFeature.subfeatures.push(feature)
|
|
199
|
+
} else {
|
|
200
|
+
let arr = orphans.get(parentId)
|
|
201
|
+
if (!arr) {
|
|
202
|
+
arr = []
|
|
203
|
+
orphans.set(parentId, arr)
|
|
204
|
+
}
|
|
205
|
+
arr.push(feature)
|
|
205
206
|
}
|
|
206
|
-
arr.push(feature)
|
|
207
207
|
}
|
|
208
208
|
}
|
|
209
209
|
}
|
package/src/util.ts
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
// JavaScript port of Robert Buels's Bio::GFF3::LowLevel Perl module.
|
|
3
3
|
|
|
4
4
|
const directiveRegex = /^\s*##\s*(\S+)\s*(.*)/
|
|
5
|
-
const lineEndRegex = /\r?\n$/
|
|
6
5
|
const whitespaceRegex = /\s+/
|
|
7
6
|
const nonDigitRegex = /\D/g
|
|
8
7
|
|
|
@@ -123,11 +122,20 @@ export function parseAttributesNoUnescape(attrString: string): GFF3Attributes {
|
|
|
123
122
|
return parseAttributesImpl(attrString, false)
|
|
124
123
|
}
|
|
125
124
|
|
|
126
|
-
function
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
125
|
+
function isEmpty(s: string) {
|
|
126
|
+
return s.length === 0 || s === '.'
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function strField<E extends null | ''>(
|
|
130
|
+
s: string,
|
|
131
|
+
shouldUnescape: boolean,
|
|
132
|
+
empty: E,
|
|
133
|
+
) {
|
|
134
|
+
return isEmpty(s) ? empty : shouldUnescape ? unescape(s) : s
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function numField(s: string) {
|
|
138
|
+
return isEmpty(s) ? null : +s
|
|
131
139
|
}
|
|
132
140
|
|
|
133
141
|
function parseFeatureImpl(
|
|
@@ -135,26 +143,19 @@ function parseFeatureImpl(
|
|
|
135
143
|
shouldUnescape: boolean,
|
|
136
144
|
): GFF3FeatureLine {
|
|
137
145
|
const f = line.split('\t')
|
|
138
|
-
const
|
|
139
|
-
const endStr = f[4]
|
|
140
|
-
const scoreStr = f[5]
|
|
141
|
-
const strand = f[6]
|
|
142
|
-
const phase = f[7]
|
|
143
|
-
const attrString = f[8]
|
|
144
|
-
|
|
146
|
+
const attrString = f[8]!
|
|
145
147
|
return {
|
|
146
|
-
seq_id:
|
|
147
|
-
source:
|
|
148
|
-
type:
|
|
149
|
-
start:
|
|
150
|
-
end:
|
|
151
|
-
score:
|
|
152
|
-
strand:
|
|
153
|
-
phase:
|
|
154
|
-
attributes:
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
: parseAttributesImpl(attrString, shouldUnescape),
|
|
148
|
+
seq_id: strField(f[0]!, shouldUnescape, null),
|
|
149
|
+
source: strField(f[1]!, shouldUnescape, null),
|
|
150
|
+
type: strField(f[2]!, shouldUnescape, null),
|
|
151
|
+
start: numField(f[3]!),
|
|
152
|
+
end: numField(f[4]!),
|
|
153
|
+
score: numField(f[5]!),
|
|
154
|
+
strand: strField(f[6]!, false, null),
|
|
155
|
+
phase: strField(f[7]!, false, null),
|
|
156
|
+
attributes: isEmpty(attrString)
|
|
157
|
+
? null
|
|
158
|
+
: parseAttributesImpl(attrString, shouldUnescape),
|
|
158
159
|
}
|
|
159
160
|
}
|
|
160
161
|
|
|
@@ -179,61 +180,6 @@ export function parseFeatureNoUnescape(line: string): GFF3FeatureLine {
|
|
|
179
180
|
return parseFeatureImpl(line, false)
|
|
180
181
|
}
|
|
181
182
|
|
|
182
|
-
function parseFieldsArrayImpl(
|
|
183
|
-
f: (string | null | undefined)[],
|
|
184
|
-
shouldUnescape: boolean,
|
|
185
|
-
): GFF3FeatureLine {
|
|
186
|
-
const seq_id = f[0]
|
|
187
|
-
const source = f[1]
|
|
188
|
-
const type = f[2]
|
|
189
|
-
const startStr = f[3]
|
|
190
|
-
const endStr = f[4]
|
|
191
|
-
const scoreStr = f[5]
|
|
192
|
-
const strand = f[6]
|
|
193
|
-
const phase = f[7]
|
|
194
|
-
const attrString = f[8]
|
|
195
|
-
|
|
196
|
-
return {
|
|
197
|
-
seq_id: seq_id ? normImpl(seq_id, shouldUnescape) : null,
|
|
198
|
-
source: source ? normImpl(source, shouldUnescape) : null,
|
|
199
|
-
type: type ? normImpl(type, shouldUnescape) : null,
|
|
200
|
-
start: !startStr || startStr === '.' ? null : +startStr,
|
|
201
|
-
end: !endStr || endStr === '.' ? null : +endStr,
|
|
202
|
-
score: !scoreStr || scoreStr === '.' ? null : +scoreStr,
|
|
203
|
-
strand: strand && strand !== '.' ? strand : null,
|
|
204
|
-
phase: phase && phase !== '.' ? phase : null,
|
|
205
|
-
attributes:
|
|
206
|
-
!attrString || attrString === '.'
|
|
207
|
-
? null
|
|
208
|
-
: parseAttributesImpl(attrString, shouldUnescape),
|
|
209
|
-
}
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
/**
|
|
213
|
-
* Parse a GFF3 feature from a pre-split fields array
|
|
214
|
-
*
|
|
215
|
-
* @param f - Array of 9 GFF3 column values (use null or '.' for empty values)
|
|
216
|
-
* @returns The parsed feature
|
|
217
|
-
*/
|
|
218
|
-
export function parseFieldsArray(
|
|
219
|
-
f: (string | null | undefined)[],
|
|
220
|
-
): GFF3FeatureLine {
|
|
221
|
-
return parseFieldsArrayImpl(f, true)
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
/**
|
|
225
|
-
* Parse a GFF3 feature from a pre-split fields array without unescaping.
|
|
226
|
-
* Fast path for data known to contain no escaped characters.
|
|
227
|
-
*
|
|
228
|
-
* @param f - Array of 9 GFF3 column values (use null or '.' for empty values)
|
|
229
|
-
* @returns The parsed feature
|
|
230
|
-
*/
|
|
231
|
-
export function parseFieldsArrayNoUnescape(
|
|
232
|
-
f: (string | null | undefined)[],
|
|
233
|
-
): GFF3FeatureLine {
|
|
234
|
-
return parseFieldsArrayImpl(f, false)
|
|
235
|
-
}
|
|
236
|
-
|
|
237
183
|
/**
|
|
238
184
|
* Parse a GFF3 directive line.
|
|
239
185
|
*
|
|
@@ -252,29 +198,28 @@ export function parseDirective(
|
|
|
252
198
|
return null
|
|
253
199
|
}
|
|
254
200
|
|
|
255
|
-
const
|
|
256
|
-
|
|
201
|
+
const name = match[1]!
|
|
202
|
+
const contents = match[2]!
|
|
257
203
|
|
|
258
204
|
const parsed: GFF3Directive = { directive: name }
|
|
259
205
|
if (contents.length) {
|
|
260
|
-
|
|
261
|
-
parsed.value = contents
|
|
206
|
+
parsed.value = contents.trimEnd()
|
|
262
207
|
}
|
|
263
208
|
|
|
264
209
|
if (name === 'sequence-region') {
|
|
265
210
|
const c = contents.split(whitespaceRegex, 3)
|
|
266
211
|
return {
|
|
267
212
|
...parsed,
|
|
268
|
-
seq_id: c[0]
|
|
269
|
-
start: c[1]
|
|
270
|
-
end: c[2]
|
|
213
|
+
seq_id: c[0]!,
|
|
214
|
+
start: c[1]!.replaceAll(nonDigitRegex, ''),
|
|
215
|
+
end: c[2]!.replaceAll(nonDigitRegex, ''),
|
|
271
216
|
}
|
|
272
217
|
} else if (name === 'genome-build') {
|
|
273
218
|
const [source, buildName] = contents.split(whitespaceRegex, 2)
|
|
274
219
|
return {
|
|
275
220
|
...parsed,
|
|
276
|
-
source
|
|
277
|
-
buildName
|
|
221
|
+
source: source!,
|
|
222
|
+
buildName: buildName!,
|
|
278
223
|
}
|
|
279
224
|
}
|
|
280
225
|
|
|
@@ -446,11 +391,6 @@ function parseAttributesJBrowseImpl(
|
|
|
446
391
|
const eqIdx = attrString.indexOf('=', start)
|
|
447
392
|
if (eqIdx !== -1 && eqIdx < semiIdx && eqIdx + 1 < semiIdx) {
|
|
448
393
|
const tag = attrString.slice(start, eqIdx)
|
|
449
|
-
if (tag === '_lineHash') {
|
|
450
|
-
start = semiIdx + 1
|
|
451
|
-
continue
|
|
452
|
-
}
|
|
453
|
-
|
|
454
394
|
let key = COMMON_ATTRS[tag]
|
|
455
395
|
if (key === undefined) {
|
|
456
396
|
key = tag.toLowerCase()
|
|
@@ -494,46 +434,32 @@ export function parseAttributesJBrowseNoUnescape(
|
|
|
494
434
|
parseAttributesJBrowseImpl(attrString, result, false)
|
|
495
435
|
}
|
|
496
436
|
|
|
437
|
+
const STRAND_MAP: Record<string, number | undefined> = {
|
|
438
|
+
'+': 1,
|
|
439
|
+
'-': -1,
|
|
440
|
+
'.': 0,
|
|
441
|
+
}
|
|
442
|
+
|
|
497
443
|
function parseFeatureJBrowseImpl(
|
|
498
444
|
line: string,
|
|
499
445
|
shouldUnescape: boolean,
|
|
500
446
|
): JBrowseFeature {
|
|
501
447
|
const f = line.split('\t')
|
|
502
|
-
const
|
|
503
|
-
const
|
|
504
|
-
const
|
|
505
|
-
const
|
|
506
|
-
const
|
|
507
|
-
const scoreStr = f[5]
|
|
508
|
-
const strand = f[6]
|
|
509
|
-
const phase = f[7]
|
|
510
|
-
const attrString = f[8]
|
|
448
|
+
const startStr = f[3]!
|
|
449
|
+
const endStr = f[4]!
|
|
450
|
+
const scoreStr = f[5]!
|
|
451
|
+
const phase = f[7]!
|
|
452
|
+
const attrString = f[8]!
|
|
511
453
|
|
|
512
454
|
const result: JBrowseFeature = {
|
|
513
|
-
refName:
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
? null
|
|
522
|
-
: shouldUnescape
|
|
523
|
-
? unescape(source)
|
|
524
|
-
: source,
|
|
525
|
-
type:
|
|
526
|
-
type.length === 0 || type === '.'
|
|
527
|
-
? null
|
|
528
|
-
: shouldUnescape
|
|
529
|
-
? unescape(type)
|
|
530
|
-
: type,
|
|
531
|
-
start: startStr.length === 0 || startStr === '.' ? 0 : +startStr - 1,
|
|
532
|
-
end: endStr.length === 0 || endStr === '.' ? 0 : +endStr,
|
|
533
|
-
score: scoreStr.length === 0 || scoreStr === '.' ? undefined : +scoreStr,
|
|
534
|
-
strand:
|
|
535
|
-
strand === '+' ? 1 : strand === '-' ? -1 : strand === '.' ? 0 : undefined,
|
|
536
|
-
phase: phase.length === 0 || phase === '.' ? undefined : +phase,
|
|
455
|
+
refName: strField(f[0]!, shouldUnescape, ''),
|
|
456
|
+
source: strField(f[1]!, shouldUnescape, null),
|
|
457
|
+
type: strField(f[2]!, shouldUnescape, null),
|
|
458
|
+
start: isEmpty(startStr) ? 0 : +startStr - 1,
|
|
459
|
+
end: isEmpty(endStr) ? 0 : +endStr,
|
|
460
|
+
score: isEmpty(scoreStr) ? undefined : +scoreStr,
|
|
461
|
+
strand: STRAND_MAP[f[6]!],
|
|
462
|
+
phase: isEmpty(phase) ? undefined : +phase,
|
|
537
463
|
subfeatures: [],
|
|
538
464
|
}
|
|
539
465
|
|