gff-nostream 3.0.10 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -89
- package/dist/api.d.ts +6 -21
- package/dist/api.js +29 -122
- package/dist/api.js.map +1 -1
- package/dist/index.d.ts +2 -2
- package/dist/index.js +1 -3
- package/dist/index.js.map +1 -1
- package/dist/util.d.ts +20 -120
- package/dist/util.js +29 -179
- package/dist/util.js.map +1 -1
- package/esm/api.d.ts +6 -21
- package/esm/api.js +30 -121
- package/esm/api.js.map +1 -1
- package/esm/index.d.ts +2 -2
- package/esm/index.js +1 -1
- package/esm/index.js.map +1 -1
- package/esm/util.d.ts +20 -120
- package/esm/util.js +29 -172
- package/esm/util.js.map +1 -1
- package/package.json +5 -5
- package/src/api.ts +39 -150
- package/src/index.ts +2 -18
- package/src/util.ts +39 -308
package/src/api.ts
CHANGED
|
@@ -1,15 +1,6 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
parseFeatureJBrowseNoUnescape,
|
|
5
|
-
parseFeatureNoUnescape,
|
|
6
|
-
} from './util.ts'
|
|
7
|
-
|
|
8
|
-
import type {
|
|
9
|
-
GFF3Feature,
|
|
10
|
-
GFF3FeatureLineWithRefs,
|
|
11
|
-
JBrowseFeature,
|
|
12
|
-
} from './util.ts'
|
|
1
|
+
import { parseFeature } from './util.ts'
|
|
2
|
+
|
|
3
|
+
import type { GffFeature } from './util.ts'
|
|
13
4
|
|
|
14
5
|
interface ParseInput {
|
|
15
6
|
line: string
|
|
@@ -45,8 +36,8 @@ function appendOrphan<T>(orphans: Map<string, T[]>, key: string, value: T) {
|
|
|
45
36
|
}
|
|
46
37
|
|
|
47
38
|
/**
|
|
48
|
-
* The
|
|
49
|
-
*
|
|
39
|
+
* The parser collapses single-element attribute arrays to scalars, so a raw
|
|
40
|
+
* ID/Parent value can be a string, a string array, or absent. These coerce
|
|
50
41
|
* those `unknown` values without typecasts.
|
|
51
42
|
*/
|
|
52
43
|
function firstString(value: unknown): string | undefined {
|
|
@@ -63,25 +54,15 @@ function toStringArray(value: unknown): string[] {
|
|
|
63
54
|
|
|
64
55
|
/**
|
|
65
56
|
* Synchronously parse a string containing GFF3 and return an array of the
|
|
66
|
-
* parsed
|
|
57
|
+
* parsed features. Comments, directives, and `##FASTA` sections are ignored.
|
|
67
58
|
*
|
|
68
59
|
* @param str - GFF3 string
|
|
69
60
|
* @returns array of parsed features
|
|
70
61
|
*/
|
|
71
|
-
export function parseStringSync(str: string):
|
|
62
|
+
export function parseStringSync(str: string): GffFeature[] {
|
|
72
63
|
return parseRecords(stringToRecords(str))
|
|
73
64
|
}
|
|
74
65
|
|
|
75
|
-
/**
|
|
76
|
-
* Synchronously parse a string containing GFF3 directly into JBrowse format.
|
|
77
|
-
*
|
|
78
|
-
* @param str - GFF3 string
|
|
79
|
-
* @returns array of JBrowse-format features
|
|
80
|
-
*/
|
|
81
|
-
export function parseStringSyncJBrowse(str: string): JBrowseFeature[] {
|
|
82
|
-
return parseRecordsJBrowse(stringToRecords(str))
|
|
83
|
-
}
|
|
84
|
-
|
|
85
66
|
function stringToRecords(str: string) {
|
|
86
67
|
const lines = str.split(/\r?\n/)
|
|
87
68
|
const records: ParseInput[] = []
|
|
@@ -102,102 +83,18 @@ function stringToRecords(str: string) {
|
|
|
102
83
|
|
|
103
84
|
/**
|
|
104
85
|
* Parse an array of LineRecord objects containing raw GFF3 lines.
|
|
105
|
-
* Supports parent/child relationships.
|
|
106
|
-
*
|
|
107
|
-
* @param records - Array of LineRecord objects with raw line and metadata
|
|
108
|
-
* @returns array of parsed features
|
|
109
|
-
*/
|
|
110
|
-
export function parseRecords(records: ParseInput[]): GFF3Feature[] {
|
|
111
|
-
const items: GFF3Feature[] = []
|
|
112
|
-
const byId = new Map<string, GFF3Feature>()
|
|
113
|
-
const orphans = new Map<string, GFF3Feature[]>()
|
|
114
|
-
|
|
115
|
-
for (const record of records) {
|
|
116
|
-
const parsed = record.hasEscapes
|
|
117
|
-
? parseFeature(record.line)
|
|
118
|
-
: parseFeatureNoUnescape(record.line)
|
|
119
|
-
const featureLine: GFF3FeatureLineWithRefs = {
|
|
120
|
-
...parsed,
|
|
121
|
-
child_features: [],
|
|
122
|
-
derived_features: [],
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
if (record.lineHash !== undefined) {
|
|
126
|
-
featureLine.attributes ??= {}
|
|
127
|
-
featureLine.attributes._lineHash = [String(record.lineHash)]
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
const attrs = featureLine.attributes
|
|
131
|
-
const ids = attrs?.ID
|
|
132
|
-
const parents = attrs?.Parent
|
|
133
|
-
|
|
134
|
-
if (!ids && !parents) {
|
|
135
|
-
items.push([featureLine])
|
|
136
|
-
} else {
|
|
137
|
-
let feature: GFF3Feature
|
|
138
|
-
if (ids) {
|
|
139
|
-
const id = ids[0]!
|
|
140
|
-
const existing = byId.get(id)
|
|
141
|
-
if (existing) {
|
|
142
|
-
// Multi-location continuation: share child_features/derived_features
|
|
143
|
-
// with the first line so children remain visible across all lines
|
|
144
|
-
// regardless of arrival order.
|
|
145
|
-
featureLine.child_features = existing[0]!.child_features
|
|
146
|
-
featureLine.derived_features = existing[0]!.derived_features
|
|
147
|
-
existing.push(featureLine)
|
|
148
|
-
feature = existing
|
|
149
|
-
} else {
|
|
150
|
-
feature = [featureLine]
|
|
151
|
-
if (!parents) {
|
|
152
|
-
items.push(feature)
|
|
153
|
-
}
|
|
154
|
-
byId.set(id, feature)
|
|
155
|
-
const waiting = orphans.get(id)
|
|
156
|
-
if (waiting) {
|
|
157
|
-
for (const w of waiting) {
|
|
158
|
-
featureLine.child_features.push(w)
|
|
159
|
-
}
|
|
160
|
-
orphans.delete(id)
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
} else {
|
|
164
|
-
feature = [featureLine]
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
if (parents) {
|
|
168
|
-
for (const parentId of parents) {
|
|
169
|
-
const parent = byId.get(parentId)
|
|
170
|
-
if (parent) {
|
|
171
|
-
// child_features is shared across all parent feature lines,
|
|
172
|
-
// so push once via the first line.
|
|
173
|
-
parent[0]!.child_features.push(feature)
|
|
174
|
-
} else {
|
|
175
|
-
appendOrphan(orphans, parentId, feature)
|
|
176
|
-
}
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
return items
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
/**
|
|
186
|
-
* Parse an array of LineRecord objects directly into JBrowse feature format.
|
|
187
86
|
* Supports parent/child relationships via subfeatures.
|
|
188
87
|
*
|
|
189
88
|
* @param records - Array of LineRecord objects with raw line and metadata
|
|
190
|
-
* @returns array of
|
|
89
|
+
* @returns array of parsed features
|
|
191
90
|
*/
|
|
192
|
-
export function
|
|
193
|
-
const items:
|
|
194
|
-
const byId = new Map<string,
|
|
195
|
-
const orphans = new Map<string,
|
|
91
|
+
export function parseRecords(records: ParseInput[]): GffFeature[] {
|
|
92
|
+
const items: GffFeature[] = []
|
|
93
|
+
const byId = new Map<string, GffFeature>()
|
|
94
|
+
const orphans = new Map<string, GffFeature[]>()
|
|
196
95
|
|
|
197
96
|
for (const record of records) {
|
|
198
|
-
const feature = record.hasEscapes
|
|
199
|
-
? parseFeatureJBrowse(record.line)
|
|
200
|
-
: parseFeatureJBrowseNoUnescape(record.line)
|
|
97
|
+
const feature = parseFeature(record.line, record.hasEscapes)
|
|
201
98
|
|
|
202
99
|
if (record.lineHash !== undefined) {
|
|
203
100
|
feature._lineHash = String(record.lineHash)
|
|
@@ -206,34 +103,35 @@ export function parseRecordsJBrowse(records: ParseInput[]): JBrowseFeature[] {
|
|
|
206
103
|
const id = firstString(feature.id)
|
|
207
104
|
const parents = toStringArray(feature.parent)
|
|
208
105
|
|
|
209
|
-
|
|
106
|
+
// A parentless line is a top-level item. Every line of a top-level
|
|
107
|
+
// discontinuous feature (e.g. cDNA_match/EST_match spanning several
|
|
108
|
+
// segments under one shared ID, with no Parent) is its own top-level
|
|
109
|
+
// item, so push regardless of whether the id is already registered.
|
|
110
|
+
if (parents.length === 0) {
|
|
210
111
|
items.push(feature)
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
const
|
|
222
|
-
|
|
223
|
-
for (const w of waiting) {
|
|
224
|
-
feature.subfeatures.push(w)
|
|
225
|
-
}
|
|
226
|
-
orphans.delete(id)
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Register the id only the first time it is seen. Continuation lines
|
|
115
|
+
// (multi-location features such as a CDS spanning several segments share
|
|
116
|
+
// one ID across lines) skip registration but must still be attached to
|
|
117
|
+
// their parent below, so this is independent of the parent handling.
|
|
118
|
+
if (id && !byId.has(id)) {
|
|
119
|
+
byId.set(id, feature)
|
|
120
|
+
const waiting = orphans.get(id)
|
|
121
|
+
if (waiting) {
|
|
122
|
+
for (const w of waiting) {
|
|
123
|
+
feature.subfeatures.push(w)
|
|
227
124
|
}
|
|
125
|
+
orphans.delete(id)
|
|
228
126
|
}
|
|
127
|
+
}
|
|
229
128
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
}
|
|
129
|
+
for (const parentId of parents) {
|
|
130
|
+
const parentFeature = byId.get(parentId)
|
|
131
|
+
if (parentFeature) {
|
|
132
|
+
parentFeature.subfeatures.push(feature)
|
|
133
|
+
} else {
|
|
134
|
+
appendOrphan(orphans, parentId, feature)
|
|
237
135
|
}
|
|
238
136
|
}
|
|
239
137
|
}
|
|
@@ -241,13 +139,4 @@ export function parseRecordsJBrowse(records: ParseInput[]): JBrowseFeature[] {
|
|
|
241
139
|
return items
|
|
242
140
|
}
|
|
243
141
|
|
|
244
|
-
export type {
|
|
245
|
-
GFF3Comment,
|
|
246
|
-
GFF3Directive,
|
|
247
|
-
GFF3Feature,
|
|
248
|
-
GFF3FeatureLine,
|
|
249
|
-
GFF3FeatureLineWithRefs,
|
|
250
|
-
GFF3Item,
|
|
251
|
-
GFF3Sequence,
|
|
252
|
-
JBrowseFeature,
|
|
253
|
-
} from './util.ts'
|
|
142
|
+
export type { GffFeature } from './util.ts'
|
package/src/index.ts
CHANGED
|
@@ -1,19 +1,3 @@
|
|
|
1
|
-
export {
|
|
2
|
-
extractType,
|
|
3
|
-
parseRecords,
|
|
4
|
-
parseRecordsJBrowse,
|
|
5
|
-
parseStringSync,
|
|
6
|
-
parseStringSyncJBrowse,
|
|
7
|
-
} from './api.ts'
|
|
1
|
+
export { extractType, parseRecords, parseStringSync } from './api.ts'
|
|
8
2
|
|
|
9
|
-
export type {
|
|
10
|
-
GFF3Comment,
|
|
11
|
-
GFF3Directive,
|
|
12
|
-
GFF3Feature,
|
|
13
|
-
GFF3FeatureLine,
|
|
14
|
-
GFF3FeatureLineWithRefs,
|
|
15
|
-
GFF3Item,
|
|
16
|
-
GFF3Sequence,
|
|
17
|
-
JBrowseFeature,
|
|
18
|
-
LineRecord,
|
|
19
|
-
} from './api.ts'
|
|
3
|
+
export type { GffFeature, LineRecord } from './api.ts'
|
package/src/util.ts
CHANGED
|
@@ -1,10 +1,6 @@
|
|
|
1
|
-
// Fast, low-level functions for parsing
|
|
1
|
+
// Fast, low-level functions for parsing GFF3.
|
|
2
2
|
// JavaScript port of Robert Buels's Bio::GFF3::LowLevel Perl module.
|
|
3
3
|
|
|
4
|
-
const directiveRegex = /^\s*##\s*(\S+)\s*(.*)/
|
|
5
|
-
const whitespaceRegex = /\s+/
|
|
6
|
-
const nonDigitRegex = /\D/g
|
|
7
|
-
|
|
8
4
|
const HEX_LOOKUP: Record<string, string | undefined> = {}
|
|
9
5
|
for (let i = 0; i < 256; i++) {
|
|
10
6
|
const hex = i.toString(16).toUpperCase().padStart(2, '0')
|
|
@@ -18,7 +14,6 @@ for (let i = 0; i < 256; i++) {
|
|
|
18
14
|
* @param stringVal - Escaped GFF3 string value
|
|
19
15
|
* @returns An unescaped string value
|
|
20
16
|
*/
|
|
21
|
-
|
|
22
17
|
export function unescape(stringVal: string) {
|
|
23
18
|
const idx = stringVal.indexOf('%')
|
|
24
19
|
if (idx === -1) {
|
|
@@ -30,18 +25,17 @@ export function unescape(stringVal: string) {
|
|
|
30
25
|
let i = idx
|
|
31
26
|
|
|
32
27
|
while (i < stringVal.length) {
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
} else {
|
|
40
|
-
result += stringVal.slice(i, i + 3)
|
|
41
|
-
}
|
|
28
|
+
const char =
|
|
29
|
+
stringVal[i] === '%' && i + 2 < stringVal.length
|
|
30
|
+
? HEX_LOOKUP[stringVal.slice(i + 1, i + 3)]
|
|
31
|
+
: undefined
|
|
32
|
+
if (char !== undefined) {
|
|
33
|
+
result += stringVal.slice(lastIdx, i) + char
|
|
42
34
|
i += 3
|
|
43
35
|
lastIdx = i
|
|
44
36
|
} else {
|
|
37
|
+
// Not a valid escape: advance one char so a '%' that begins a real
|
|
38
|
+
// escape immediately after isn't swallowed (e.g. the %20 in "a%b%20c").
|
|
45
39
|
i++
|
|
46
40
|
}
|
|
47
41
|
}
|
|
@@ -49,79 +43,6 @@ export function unescape(stringVal: string) {
|
|
|
49
43
|
return result + stringVal.slice(lastIdx)
|
|
50
44
|
}
|
|
51
45
|
|
|
52
|
-
function parseAttributesImpl(
|
|
53
|
-
attrString: string,
|
|
54
|
-
shouldUnescape: boolean,
|
|
55
|
-
): GFF3Attributes {
|
|
56
|
-
if (attrString.length === 0 || attrString === '.') {
|
|
57
|
-
return {}
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
const attrs: GFF3Attributes = {}
|
|
61
|
-
let len = attrString.length
|
|
62
|
-
|
|
63
|
-
if (attrString[len - 1] === '\n') {
|
|
64
|
-
len = attrString[len - 2] === '\r' ? len - 2 : len - 1
|
|
65
|
-
attrString = attrString.slice(0, len)
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
let start = 0
|
|
69
|
-
while (start < len) {
|
|
70
|
-
let semiIdx = attrString.indexOf(';', start)
|
|
71
|
-
if (semiIdx === -1) {
|
|
72
|
-
semiIdx = len
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
if (semiIdx > start) {
|
|
76
|
-
const eqIdx = attrString.indexOf('=', start)
|
|
77
|
-
if (eqIdx !== -1 && eqIdx < semiIdx && eqIdx + 1 < semiIdx) {
|
|
78
|
-
const tag = attrString.slice(start, eqIdx)
|
|
79
|
-
let arec = attrs[tag]
|
|
80
|
-
if (!arec) {
|
|
81
|
-
arec = []
|
|
82
|
-
attrs[tag] = arec
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
let valStart = eqIdx + 1
|
|
86
|
-
while (valStart < semiIdx) {
|
|
87
|
-
let commaIdx = attrString.indexOf(',', valStart)
|
|
88
|
-
if (commaIdx === -1 || commaIdx > semiIdx) {
|
|
89
|
-
commaIdx = semiIdx
|
|
90
|
-
}
|
|
91
|
-
if (commaIdx > valStart) {
|
|
92
|
-
const val = attrString.slice(valStart, commaIdx)
|
|
93
|
-
arec.push(shouldUnescape ? unescape(val) : val)
|
|
94
|
-
}
|
|
95
|
-
valStart = commaIdx + 1
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
start = semiIdx + 1
|
|
100
|
-
}
|
|
101
|
-
return attrs
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
/**
|
|
105
|
-
* Parse the 9th column (attributes) of a GFF3 feature line.
|
|
106
|
-
*
|
|
107
|
-
* @param attrString - String of GFF3 9th column
|
|
108
|
-
* @returns Parsed attributes
|
|
109
|
-
*/
|
|
110
|
-
export function parseAttributes(attrString: string): GFF3Attributes {
|
|
111
|
-
return parseAttributesImpl(attrString, true)
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
/**
|
|
115
|
-
* Parse the 9th column (attributes) of a GFF3 feature line without unescaping.
|
|
116
|
-
* Fast path for data known to contain no escaped characters.
|
|
117
|
-
*
|
|
118
|
-
* @param attrString - String of GFF3 9th column
|
|
119
|
-
* @returns Parsed attributes
|
|
120
|
-
*/
|
|
121
|
-
export function parseAttributesNoUnescape(attrString: string): GFF3Attributes {
|
|
122
|
-
return parseAttributesImpl(attrString, false)
|
|
123
|
-
}
|
|
124
|
-
|
|
125
46
|
function isEmpty(s: string) {
|
|
126
47
|
return s.length === 0 || s === '.'
|
|
127
48
|
}
|
|
@@ -134,189 +55,6 @@ function strField<E extends null | ''>(
|
|
|
134
55
|
return isEmpty(s) ? empty : shouldUnescape ? unescape(s) : s
|
|
135
56
|
}
|
|
136
57
|
|
|
137
|
-
function numField(s: string) {
|
|
138
|
-
return isEmpty(s) ? null : +s
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
function parseFeatureImpl(
|
|
142
|
-
line: string,
|
|
143
|
-
shouldUnescape: boolean,
|
|
144
|
-
): GFF3FeatureLine {
|
|
145
|
-
const f = line.split('\t')
|
|
146
|
-
const attrString = f[8]!
|
|
147
|
-
return {
|
|
148
|
-
seq_id: strField(f[0]!, shouldUnescape, null),
|
|
149
|
-
source: strField(f[1]!, shouldUnescape, null),
|
|
150
|
-
type: strField(f[2]!, shouldUnescape, null),
|
|
151
|
-
start: numField(f[3]!),
|
|
152
|
-
end: numField(f[4]!),
|
|
153
|
-
score: numField(f[5]!),
|
|
154
|
-
strand: strField(f[6]!, false, null),
|
|
155
|
-
phase: strField(f[7]!, false, null),
|
|
156
|
-
attributes: isEmpty(attrString)
|
|
157
|
-
? null
|
|
158
|
-
: parseAttributesImpl(attrString, shouldUnescape),
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
/**
|
|
163
|
-
* Parse a GFF3 feature line
|
|
164
|
-
*
|
|
165
|
-
* @param line - GFF3 feature line
|
|
166
|
-
* @returns The parsed feature
|
|
167
|
-
*/
|
|
168
|
-
export function parseFeature(line: string): GFF3FeatureLine {
|
|
169
|
-
return parseFeatureImpl(line, true)
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
/**
|
|
173
|
-
* Parse a GFF3 feature line without unescaping.
|
|
174
|
-
* Fast path for data known to contain no escaped characters.
|
|
175
|
-
*
|
|
176
|
-
* @param line - GFF3 feature line
|
|
177
|
-
* @returns The parsed feature
|
|
178
|
-
*/
|
|
179
|
-
export function parseFeatureNoUnescape(line: string): GFF3FeatureLine {
|
|
180
|
-
return parseFeatureImpl(line, false)
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
/**
|
|
184
|
-
* Parse a GFF3 directive line.
|
|
185
|
-
*
|
|
186
|
-
* @param line - GFF3 directive line
|
|
187
|
-
* @returns The parsed directive
|
|
188
|
-
*/
|
|
189
|
-
export function parseDirective(
|
|
190
|
-
line: string,
|
|
191
|
-
):
|
|
192
|
-
| GFF3Directive
|
|
193
|
-
| GFF3SequenceRegionDirective
|
|
194
|
-
| GFF3GenomeBuildDirective
|
|
195
|
-
| null {
|
|
196
|
-
const match = directiveRegex.exec(line)
|
|
197
|
-
if (!match) {
|
|
198
|
-
return null
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
const name = match[1]!
|
|
202
|
-
const contents = match[2]!
|
|
203
|
-
|
|
204
|
-
const parsed: GFF3Directive = { directive: name }
|
|
205
|
-
if (contents.length) {
|
|
206
|
-
parsed.value = contents.trimEnd()
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
if (name === 'sequence-region') {
|
|
210
|
-
const c = contents.split(whitespaceRegex, 3)
|
|
211
|
-
return {
|
|
212
|
-
...parsed,
|
|
213
|
-
seq_id: c[0]!,
|
|
214
|
-
start: c[1]!.replaceAll(nonDigitRegex, ''),
|
|
215
|
-
end: c[2]!.replaceAll(nonDigitRegex, ''),
|
|
216
|
-
}
|
|
217
|
-
} else if (name === 'genome-build') {
|
|
218
|
-
const [source, buildName] = contents.split(whitespaceRegex, 2)
|
|
219
|
-
return {
|
|
220
|
-
...parsed,
|
|
221
|
-
source: source!,
|
|
222
|
-
buildName: buildName!,
|
|
223
|
-
}
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
return parsed
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
/** A record of GFF3 attribute identifiers and the values of those identifiers */
|
|
230
|
-
export type GFF3Attributes = Record<string, string[] | undefined>
|
|
231
|
-
|
|
232
|
-
/** A representation of a single line of a GFF3 file */
|
|
233
|
-
export interface GFF3FeatureLine {
|
|
234
|
-
/** The ID of the landmark used to establish the coordinate system for the current feature */
|
|
235
|
-
seq_id: string | null
|
|
236
|
-
/** A free text qualifier intended to describe the algorithm or operating procedure that generated this feature */
|
|
237
|
-
source: string | null
|
|
238
|
-
/** The type of the feature */
|
|
239
|
-
type: string | null
|
|
240
|
-
/** The start coordinates of the feature */
|
|
241
|
-
start: number | null
|
|
242
|
-
/** The end coordinates of the feature */
|
|
243
|
-
end: number | null
|
|
244
|
-
/** The score of the feature */
|
|
245
|
-
score: number | null
|
|
246
|
-
/** The strand of the feature */
|
|
247
|
-
strand: string | null
|
|
248
|
-
/** For features of type "CDS", the phase indicates where the next codon begins relative to the 5' end of the current CDS feature */
|
|
249
|
-
phase: string | null
|
|
250
|
-
/** Feature attributes */
|
|
251
|
-
attributes: GFF3Attributes | null
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
/**
|
|
255
|
-
* A GFF3 Feature line that includes references to other features defined in
|
|
256
|
-
* their "Parent" or "Derives_from" attributes
|
|
257
|
-
*/
|
|
258
|
-
export interface GFF3FeatureLineWithRefs extends GFF3FeatureLine {
|
|
259
|
-
/** An array of child features */
|
|
260
|
-
child_features: GFF3Feature[]
|
|
261
|
-
/** An array of features derived from this feature */
|
|
262
|
-
derived_features: GFF3Feature[]
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
/**
|
|
266
|
-
* A GFF3 feature, which may include multiple individual feature lines
|
|
267
|
-
*/
|
|
268
|
-
export type GFF3Feature = GFF3FeatureLineWithRefs[]
|
|
269
|
-
|
|
270
|
-
/** A GFF3 directive */
|
|
271
|
-
export interface GFF3Directive {
|
|
272
|
-
/** The name of the directive */
|
|
273
|
-
directive: string
|
|
274
|
-
/** The string value of the directive */
|
|
275
|
-
value?: string
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
/** A GFF3 sequence-region directive */
|
|
279
|
-
export interface GFF3SequenceRegionDirective extends GFF3Directive {
|
|
280
|
-
/** The string value of the directive */
|
|
281
|
-
value: string
|
|
282
|
-
/** The sequence ID parsed from the directive */
|
|
283
|
-
seq_id: string
|
|
284
|
-
/** The sequence start parsed from the directive */
|
|
285
|
-
start: string
|
|
286
|
-
/** The sequence end parsed from the directive */
|
|
287
|
-
end: string
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
/** A GFF3 genome-build directive */
|
|
291
|
-
export interface GFF3GenomeBuildDirective extends GFF3Directive {
|
|
292
|
-
/** The string value of the directive */
|
|
293
|
-
value: string
|
|
294
|
-
/** The genome build source parsed from the directive */
|
|
295
|
-
source: string
|
|
296
|
-
/** The genome build name parsed from the directive */
|
|
297
|
-
buildName: string
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
/** A GFF3 comment */
|
|
301
|
-
export interface GFF3Comment {
|
|
302
|
-
/** The text of the comment */
|
|
303
|
-
comment: string
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
/** A GFF3 FASTA single sequence */
|
|
307
|
-
export interface GFF3Sequence {
|
|
308
|
-
/** The ID of the sequence */
|
|
309
|
-
id: string
|
|
310
|
-
/** The description of the sequence */
|
|
311
|
-
description?: string
|
|
312
|
-
/** The sequence */
|
|
313
|
-
sequence: string
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
export type GFF3Item = GFF3Feature | GFF3Directive | GFF3Comment | GFF3Sequence
|
|
317
|
-
|
|
318
|
-
// JBrowse format types and parsing functions
|
|
319
|
-
|
|
320
58
|
const JBROWSE_DEFAULT_FIELDS = new Set([
|
|
321
59
|
'start',
|
|
322
60
|
'end',
|
|
@@ -352,7 +90,18 @@ const COMMON_ATTRS: Record<string, string | undefined> = {
|
|
|
352
90
|
gap: 'gap',
|
|
353
91
|
}
|
|
354
92
|
|
|
355
|
-
|
|
93
|
+
const STRAND_MAP: Record<string, number | undefined> = {
|
|
94
|
+
'+': 1,
|
|
95
|
+
'-': -1,
|
|
96
|
+
'.': 0,
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* A parsed GFF3 feature: a flat object with 0-based half-open coordinates,
|
|
101
|
+
* numeric strand (`1`/`-1`/`0`), attributes spread as lowercase top-level keys,
|
|
102
|
+
* and child features nested under `subfeatures`.
|
|
103
|
+
*/
|
|
104
|
+
export interface GffFeature {
|
|
356
105
|
start: number
|
|
357
106
|
end: number
|
|
358
107
|
strand?: number
|
|
@@ -361,11 +110,16 @@ export interface JBrowseFeature {
|
|
|
361
110
|
refName: string
|
|
362
111
|
phase?: number
|
|
363
112
|
score?: number
|
|
364
|
-
subfeatures:
|
|
113
|
+
subfeatures: GffFeature[]
|
|
365
114
|
[key: string]: unknown
|
|
366
115
|
}
|
|
367
116
|
|
|
368
|
-
|
|
117
|
+
/**
|
|
118
|
+
* Parse the 9th column (attributes) of a GFF3 feature line into `result`,
|
|
119
|
+
* lowercasing keys and suffixing any that collide with a default field name.
|
|
120
|
+
* Pass shouldUnescape=false as a fast path for data with no escaped characters.
|
|
121
|
+
*/
|
|
122
|
+
export function parseAttributes(
|
|
369
123
|
attrString: string,
|
|
370
124
|
result: Record<string, unknown>,
|
|
371
125
|
shouldUnescape: boolean,
|
|
@@ -420,30 +174,15 @@ function parseAttributesJBrowseImpl(
|
|
|
420
174
|
}
|
|
421
175
|
}
|
|
422
176
|
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
result: Record<string, unknown>,
|
|
433
|
-
) {
|
|
434
|
-
parseAttributesJBrowseImpl(attrString, result, false)
|
|
435
|
-
}
|
|
436
|
-
|
|
437
|
-
const STRAND_MAP: Record<string, number | undefined> = {
|
|
438
|
-
'+': 1,
|
|
439
|
-
'-': -1,
|
|
440
|
-
'.': 0,
|
|
441
|
-
}
|
|
442
|
-
|
|
443
|
-
function parseFeatureJBrowseImpl(
|
|
444
|
-
line: string,
|
|
445
|
-
shouldUnescape: boolean,
|
|
446
|
-
): JBrowseFeature {
|
|
177
|
+
/**
|
|
178
|
+
* Parse a GFF3 feature line. Pass shouldUnescape=false as a fast path for data
|
|
179
|
+
* known to contain no escaped characters.
|
|
180
|
+
*
|
|
181
|
+
* @param line - GFF3 feature line
|
|
182
|
+
* @param shouldUnescape - whether to unescape percent-encoded values
|
|
183
|
+
* @returns The parsed feature
|
|
184
|
+
*/
|
|
185
|
+
export function parseFeature(line: string, shouldUnescape: boolean): GffFeature {
|
|
447
186
|
const f = line.split('\t')
|
|
448
187
|
const startStr = f[3]!
|
|
449
188
|
const endStr = f[4]!
|
|
@@ -451,7 +190,7 @@ function parseFeatureJBrowseImpl(
|
|
|
451
190
|
const phase = f[7]!
|
|
452
191
|
const attrString = f[8]!
|
|
453
192
|
|
|
454
|
-
const result:
|
|
193
|
+
const result: GffFeature = {
|
|
455
194
|
refName: strField(f[0]!, shouldUnescape, ''),
|
|
456
195
|
source: strField(f[1]!, shouldUnescape, null),
|
|
457
196
|
type: strField(f[2]!, shouldUnescape, null),
|
|
@@ -463,14 +202,6 @@ function parseFeatureJBrowseImpl(
|
|
|
463
202
|
subfeatures: [],
|
|
464
203
|
}
|
|
465
204
|
|
|
466
|
-
|
|
205
|
+
parseAttributes(attrString, result, shouldUnescape)
|
|
467
206
|
return result
|
|
468
207
|
}
|
|
469
|
-
|
|
470
|
-
export function parseFeatureJBrowse(line: string): JBrowseFeature {
|
|
471
|
-
return parseFeatureJBrowseImpl(line, true)
|
|
472
|
-
}
|
|
473
|
-
|
|
474
|
-
export function parseFeatureJBrowseNoUnescape(line: string): JBrowseFeature {
|
|
475
|
-
return parseFeatureJBrowseImpl(line, false)
|
|
476
|
-
}
|