@gmod/bed 2.0.7 → 2.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +5 -0
- package/dist/defaultTypes.js +5 -1
- package/dist/defaultTypes.js.map +1 -1
- package/package.json +4 -3
- package/src/as/README.md +7 -0
- package/src/as/autoSqlSchemas.js +176 -0
- package/src/autoSql.js +1501 -0
- package/src/defaultTypes.js +8 -0
- package/src/index.js +3 -0
- package/src/parser.js +104 -0
- package/src/util.js +26 -0
package/src/index.js
ADDED
package/src/parser.js
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import parser from './autoSql'
|
|
2
|
+
import types from './defaultTypes'
|
|
3
|
+
import { detectTypes } from './util'
|
|
4
|
+
|
|
5
|
+
const strandMap = { '.': 0, '-': -1, '+': 1 }
|
|
6
|
+
|
|
7
|
+
// heuristic that a BED file is BED12 like...the number in col 10 is blockCount-like
|
|
8
|
+
function isBed12Like(fields) {
|
|
9
|
+
return (
|
|
10
|
+
fields.length >= 12 &&
|
|
11
|
+
!Number.isNaN(parseInt(fields[9], 10)) &&
|
|
12
|
+
fields[10]?.split(',').filter(f => !!f).length === parseInt(fields[9], 10)
|
|
13
|
+
)
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export default class BED {
|
|
17
|
+
constructor(args = {}) {
|
|
18
|
+
if (args.autoSql) {
|
|
19
|
+
this.autoSql = detectTypes(parser.parse(args.autoSql))
|
|
20
|
+
} else if (args.type) {
|
|
21
|
+
if (!types[args.type]) {
|
|
22
|
+
throw new Error('Type not found')
|
|
23
|
+
}
|
|
24
|
+
this.autoSql = detectTypes(types[args.type])
|
|
25
|
+
} else {
|
|
26
|
+
this.autoSql = detectTypes(types.defaultBedSchema)
|
|
27
|
+
this.attemptDefaultBed = true
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/*
|
|
32
|
+
* parses a line of text as a BED line with the loaded autoSql schema
|
|
33
|
+
*
|
|
34
|
+
* @param line - a BED line as tab delimited text or array
|
|
35
|
+
* @param opts - supply opts.uniqueId
|
|
36
|
+
* @return a object representing a feature
|
|
37
|
+
*/
|
|
38
|
+
parseLine(line, opts = {}) {
|
|
39
|
+
const { autoSql } = this
|
|
40
|
+
const { uniqueId } = opts
|
|
41
|
+
let fields = line
|
|
42
|
+
if (!Array.isArray(line)) {
|
|
43
|
+
if (line.startsWith('track') || line.startsWith('browser')) {
|
|
44
|
+
throw new Error(
|
|
45
|
+
`track and browser line parsing is not supported, please filter:\n${line}`,
|
|
46
|
+
)
|
|
47
|
+
}
|
|
48
|
+
fields = line.split('\t')
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
let feature = {}
|
|
52
|
+
if (
|
|
53
|
+
!this.attemptDefaultBed ||
|
|
54
|
+
(this.attemptDefaultBed && isBed12Like(fields))
|
|
55
|
+
) {
|
|
56
|
+
for (let i = 0; i < autoSql.fields.length; i++) {
|
|
57
|
+
const autoField = autoSql.fields[i]
|
|
58
|
+
let columnVal = fields[i]
|
|
59
|
+
const { isNumeric, isArray, arrayIsNumeric, name } = autoField
|
|
60
|
+
if (columnVal === null || columnVal === undefined) {
|
|
61
|
+
break
|
|
62
|
+
}
|
|
63
|
+
if (columnVal !== '.') {
|
|
64
|
+
if (isNumeric) {
|
|
65
|
+
const num = Number(columnVal)
|
|
66
|
+
columnVal = Number.isNaN(num) ? columnVal : num
|
|
67
|
+
} else if (isArray) {
|
|
68
|
+
columnVal = columnVal.split(',')
|
|
69
|
+
if (columnVal[columnVal.length - 1] === '') {
|
|
70
|
+
columnVal.pop()
|
|
71
|
+
}
|
|
72
|
+
if (arrayIsNumeric) {
|
|
73
|
+
columnVal = columnVal.map(str => Number(str))
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
feature[name] = columnVal
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
} else {
|
|
81
|
+
const fieldNames = ['chrom', 'chromStart', 'chromEnd', 'name']
|
|
82
|
+
feature = Object.fromEntries(
|
|
83
|
+
fields.map((f, i) => [fieldNames[i] || 'field' + i, f]),
|
|
84
|
+
)
|
|
85
|
+
feature.chromStart = +feature.chromStart
|
|
86
|
+
feature.chromEnd = +feature.chromEnd
|
|
87
|
+
if (!Number.isNaN(Number.parseFloat(feature.field4))) {
|
|
88
|
+
feature.score = +feature.field4
|
|
89
|
+
delete feature.field4
|
|
90
|
+
}
|
|
91
|
+
if (feature.field5 === '+' || feature.field5 === '-') {
|
|
92
|
+
feature.strand = feature.field5
|
|
93
|
+
delete feature.field5
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
if (uniqueId) {
|
|
97
|
+
feature.uniqueId = uniqueId
|
|
98
|
+
}
|
|
99
|
+
feature.strand = strandMap[feature.strand] || 0
|
|
100
|
+
|
|
101
|
+
feature.chrom = decodeURIComponent(feature.chrom)
|
|
102
|
+
return feature
|
|
103
|
+
}
|
|
104
|
+
}
|
package/src/util.js
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* adds some type annotations to the autoSql schema
|
|
3
|
+
* for numeric fields ['uint', 'int', 'float', 'long'] "isNumeric" is added
|
|
4
|
+
* for array types "isArray" is added
|
|
5
|
+
* for numeric array types "isArray" and "arrayIsNumeric" is set
|
|
6
|
+
*
|
|
7
|
+
* @param autoSql - an autoSql schema from the peg parser
|
|
8
|
+
* @return autoSql with type annotations added
|
|
9
|
+
*/
|
|
10
|
+
export function detectTypes(autoSql) {
|
|
11
|
+
const numericTypes = ['uint', 'int', 'float', 'long']
|
|
12
|
+
const fields = autoSql.fields.map(autoField => {
|
|
13
|
+
const type = {}
|
|
14
|
+
if (!autoField.size && numericTypes.includes(autoField.type)) {
|
|
15
|
+
type.isNumeric = true
|
|
16
|
+
}
|
|
17
|
+
if (autoField.size && autoField.type !== 'char') {
|
|
18
|
+
type.isArray = true
|
|
19
|
+
}
|
|
20
|
+
if (autoField.size && numericTypes.includes(autoField.type)) {
|
|
21
|
+
type.arrayIsNumeric = true
|
|
22
|
+
}
|
|
23
|
+
return { ...autoField, ...type }
|
|
24
|
+
})
|
|
25
|
+
return { ...autoSql, fields }
|
|
26
|
+
}
|