@teselagen/bio-parsers 0.1.26 → 0.1.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +24219 -39924
- package/index.mjs +24238 -39921
- package/index.umd.js +32684 -48391
- package/package.json +3 -7
- package/src/ab1ToJson.js +177 -0
- package/src/anyToJson.js +225 -0
- package/src/fastaToJson.js +101 -0
- package/src/genbankToJson.d.__ts +20 -0
- package/src/genbankToJson.js +688 -0
- package/src/geneiousXmlToJson.js +147 -0
- package/src/gffToJson.js +43 -0
- package/src/index.js +23 -0
- package/src/jbeiXmlToJson.js +109 -0
- package/src/jsonToBed.js +39 -0
- package/src/jsonToFasta.js +33 -0
- package/src/jsonToGenbank.js +423 -0
- package/src/jsonToJsonString.js +26 -0
- package/src/sbolXmlToJson.js +135 -0
- package/src/snapgeneToJson.js +245 -0
- package/src/utils/NameUtils.js +10 -0
- package/src/utils/ParserUtil.js +93 -0
- package/src/utils/cleanUpTeselagenJsonForExport.js +13 -0
- package/src/utils/constants.js +24 -0
- package/src/utils/convertOldSequenceDataToNewDataType.js +64 -0
- package/src/utils/createInitialSequence.js +14 -0
- package/src/utils/extractFileExtension.js +14 -0
- package/src/utils/flattenSequenceArray.js +17 -0
- package/src/utils/getArrayBufferFromFile.js +32 -0
- package/src/utils/isBrowser.js +1 -0
- package/src/utils/parseUracilFeatures.js +13 -0
- package/src/utils/pragmasAndTypes.js +21 -0
- package/src/utils/searchWholeObjByName.js +98 -0
- package/src/utils/splitStringIntoLines.js +13 -0
- package/src/utils/unmangleUrls.js +34 -0
- package/src/utils/validateSequence.js +349 -0
- package/src/utils/validateSequenceArray.js +20 -0
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/* eslint-disable no-var*/
|
|
2
|
+
|
|
3
|
+
import validateSequenceArray from "./utils/validateSequenceArray";
|
|
4
|
+
import {
|
|
5
|
+
searchWholeObjByNameSimple,
|
|
6
|
+
searchWholeObjByNameSimpleArray
|
|
7
|
+
} from "./utils/searchWholeObjByName";
|
|
8
|
+
|
|
9
|
+
import { XMLParser } from "fast-xml-parser";
|
|
10
|
+
import { forEach, flatMap } from "lodash";
|
|
11
|
+
import { filter } from "lodash";
|
|
12
|
+
|
|
13
|
+
//Here's what should be in the callback:
|
|
14
|
+
// {
|
|
15
|
+
// parsedSequence:
|
|
16
|
+
// messages:
|
|
17
|
+
// success:
|
|
18
|
+
// }
|
|
19
|
+
async function geneiousXmlToJson(string, options) {
|
|
20
|
+
options = options || {};
|
|
21
|
+
|
|
22
|
+
const onFileParsed = function (sequences) {
|
|
23
|
+
//before we call the onFileParsed callback, we need to validate the sequence
|
|
24
|
+
return validateSequenceArray(sequences, options);
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
try {
|
|
28
|
+
const result = new XMLParser({
|
|
29
|
+
isArray: () => true
|
|
30
|
+
}).parse(string);
|
|
31
|
+
const geneiousJsonMatches = searchWholeObjByNameSimpleArray(
|
|
32
|
+
"geneiousDocument",
|
|
33
|
+
result
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
const resultArray = [];
|
|
37
|
+
if (!geneiousJsonMatches?.length) {
|
|
38
|
+
return onFileParsed({
|
|
39
|
+
success: false,
|
|
40
|
+
messages: ["Error: XML is not valid geneious format"]
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
forEach(geneiousJsonMatches, (geneiousJson) => {
|
|
44
|
+
const response = {
|
|
45
|
+
parsedSequence: null,
|
|
46
|
+
messages: [],
|
|
47
|
+
success: true
|
|
48
|
+
};
|
|
49
|
+
try {
|
|
50
|
+
response.parsedSequence = parseGeneiousJson(geneiousJson, options);
|
|
51
|
+
resultArray.push(response);
|
|
52
|
+
} catch (e) {
|
|
53
|
+
console.error("error:", e);
|
|
54
|
+
console.error("error.stack: ", e.stack);
|
|
55
|
+
resultArray.push({
|
|
56
|
+
success: false,
|
|
57
|
+
messages: ["Error while parsing Geneious format"]
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
const toRet = filter(
|
|
62
|
+
resultArray,
|
|
63
|
+
(r) => r?.parsedSequence?.sequence?.length
|
|
64
|
+
);
|
|
65
|
+
if (toRet.length) return toRet;
|
|
66
|
+
return onFileParsed(resultArray);
|
|
67
|
+
} catch (e) {
|
|
68
|
+
console.error(`e:`, e);
|
|
69
|
+
return onFileParsed({
|
|
70
|
+
success: false,
|
|
71
|
+
messages: ["Error parsing geneious to JSON"]
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
function parseGeneiousJson(geneiousJson) {
|
|
76
|
+
const circular = searchWholeObjByNameSimple("isCircular", geneiousJson);
|
|
77
|
+
|
|
78
|
+
let geneiousJsonInner = searchWholeObjByNameSimple(
|
|
79
|
+
"originalElement",
|
|
80
|
+
geneiousJson
|
|
81
|
+
);
|
|
82
|
+
geneiousJsonInner = searchWholeObjByNameSimple(
|
|
83
|
+
"XMLSerialisableRootElement",
|
|
84
|
+
geneiousJsonInner
|
|
85
|
+
);
|
|
86
|
+
|
|
87
|
+
const sequence = searchWholeObjByNameSimple(
|
|
88
|
+
"charSequence",
|
|
89
|
+
geneiousJsonInner
|
|
90
|
+
);
|
|
91
|
+
const features = flatMap(
|
|
92
|
+
searchWholeObjByNameSimpleArray("annotation", geneiousJsonInner),
|
|
93
|
+
function (feature) {
|
|
94
|
+
if (feature) {
|
|
95
|
+
const name = (
|
|
96
|
+
searchWholeObjByNameSimple("description", feature) || ""
|
|
97
|
+
).substring(0, 255);
|
|
98
|
+
const intervals = searchWholeObjByNameSimpleArray("interval", feature);
|
|
99
|
+
const type = searchWholeObjByNameSimple("type", feature);
|
|
100
|
+
const firstInterval = intervals[0];
|
|
101
|
+
const lastInterval = intervals[intervals.length - 1];
|
|
102
|
+
const start =
|
|
103
|
+
searchWholeObjByNameSimple("minimumIndex", firstInterval) - 1;
|
|
104
|
+
const end =
|
|
105
|
+
searchWholeObjByNameSimple("maximumIndex", lastInterval) - 1;
|
|
106
|
+
let locations;
|
|
107
|
+
if (intervals.length > 1) {
|
|
108
|
+
locations = intervals.map((i) => {
|
|
109
|
+
const start = searchWholeObjByNameSimple("minimumIndex", i) - 1;
|
|
110
|
+
const end = searchWholeObjByNameSimple("maximumIndex", i) - 1;
|
|
111
|
+
return {
|
|
112
|
+
start,
|
|
113
|
+
end
|
|
114
|
+
};
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
const strand =
|
|
118
|
+
searchWholeObjByNameSimple("direction", firstInterval) ===
|
|
119
|
+
"leftToRight"
|
|
120
|
+
? 1
|
|
121
|
+
: -1;
|
|
122
|
+
const arrowheadType =
|
|
123
|
+
searchWholeObjByNameSimple("direction", firstInterval) === "none"
|
|
124
|
+
? "NONE"
|
|
125
|
+
: undefined;
|
|
126
|
+
return {
|
|
127
|
+
name,
|
|
128
|
+
type,
|
|
129
|
+
locations,
|
|
130
|
+
arrowheadType,
|
|
131
|
+
start,
|
|
132
|
+
end,
|
|
133
|
+
strand
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
);
|
|
138
|
+
const name = searchWholeObjByNameSimple("name", geneiousJsonInner);
|
|
139
|
+
return {
|
|
140
|
+
sequence,
|
|
141
|
+
circular,
|
|
142
|
+
name: name,
|
|
143
|
+
features
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
export default geneiousXmlToJson;
|
package/src/gffToJson.js
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import gff from "@gmod/gff";
|
|
2
|
+
import _ from "lodash";
|
|
3
|
+
|
|
4
|
+
function gffToJson(string) {
|
|
5
|
+
const arrayOfThings = gff.parseStringSync(string);
|
|
6
|
+
const results = [];
|
|
7
|
+
const sequences = [];
|
|
8
|
+
const features = {};
|
|
9
|
+
arrayOfThings.forEach((featureOrSeq) => {
|
|
10
|
+
if (featureOrSeq.sequence) {
|
|
11
|
+
sequences.push(featureOrSeq);
|
|
12
|
+
} else {
|
|
13
|
+
const feature = featureOrSeq[0];
|
|
14
|
+
if (!features[feature.seq_id]) features[feature.seq_id] = [];
|
|
15
|
+
const attributes = feature.attributes || {};
|
|
16
|
+
const name = _.get(attributes, "ID[0]");
|
|
17
|
+
features[feature.seq_id].push({
|
|
18
|
+
name,
|
|
19
|
+
start: feature.start,
|
|
20
|
+
end: feature.end,
|
|
21
|
+
strand: feature.strand === "+" ? 1 : -1,
|
|
22
|
+
type: feature.type,
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
});
|
|
26
|
+
sequences.forEach((sequence) => {
|
|
27
|
+
const sequenceId = sequence.id;
|
|
28
|
+
const result = {
|
|
29
|
+
messages: [],
|
|
30
|
+
success: true,
|
|
31
|
+
parsedSequence: {
|
|
32
|
+
name: sequenceId,
|
|
33
|
+
sequence: sequence.sequence,
|
|
34
|
+
circular: false,
|
|
35
|
+
features: features[sequence.id],
|
|
36
|
+
},
|
|
37
|
+
};
|
|
38
|
+
results.push(result);
|
|
39
|
+
});
|
|
40
|
+
return results;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export default gffToJson;
|
package/src/index.js
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export { default as anyToJson } from "./anyToJson";
|
|
2
|
+
export { default as fastaToJson } from "./fastaToJson";
|
|
3
|
+
export { default as genbankToJson } from "./genbankToJson";
|
|
4
|
+
export { default as sbolXmlToJson } from "./sbolXmlToJson";
|
|
5
|
+
export { default as geneiousXmlToJson } from "./geneiousXmlToJson";
|
|
6
|
+
export { default as jbeiXmlToJson } from "./jbeiXmlToJson";
|
|
7
|
+
export { default as jsonToGenbank } from "./jsonToGenbank";
|
|
8
|
+
export {
|
|
9
|
+
default as ab1ToJson,
|
|
10
|
+
convertBasePosTraceToPerBpTrace
|
|
11
|
+
} from "./ab1ToJson";
|
|
12
|
+
export { default as jsonToFasta } from "./jsonToFasta";
|
|
13
|
+
export { default as snapgeneToJson } from "./snapgeneToJson";
|
|
14
|
+
export { default as jsonToBed } from "./jsonToBed";
|
|
15
|
+
export { default as cleanUpTeselagenJsonForExport } from "./utils/cleanUpTeselagenJsonForExport";
|
|
16
|
+
export {
|
|
17
|
+
default as searchWholeObjByName,
|
|
18
|
+
searchWholeObjByNameSimple,
|
|
19
|
+
searchWholeObjByNameSimpleArray
|
|
20
|
+
} from "./utils/searchWholeObjByName";
|
|
21
|
+
export { default as parseUracilFeatures } from "./utils/parseUracilFeatures";
|
|
22
|
+
export { default as jsonToJsonString } from "./jsonToJsonString";
|
|
23
|
+
export { default as validateSequenceArray } from "./utils/validateSequenceArray";
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/* eslint-disable no-var*/
|
|
2
|
+
|
|
3
|
+
import validateSequenceArray from "./utils/validateSequenceArray";
|
|
4
|
+
import { XMLParser } from "fast-xml-parser";
|
|
5
|
+
import { filter } from "lodash";
|
|
6
|
+
|
|
7
|
+
//Here's what should be in the callback:
|
|
8
|
+
// {
|
|
9
|
+
// parsedSequence:
|
|
10
|
+
// messages:
|
|
11
|
+
// success:
|
|
12
|
+
// }
|
|
13
|
+
async function jbeiXmlToJson(string, options) {
|
|
14
|
+
options = options || {};
|
|
15
|
+
|
|
16
|
+
const onFileParsed = function (sequences) {
|
|
17
|
+
//before we call the onFileParsed callback, we need to validate the sequence
|
|
18
|
+
return validateSequenceArray(sequences, options);
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
try {
|
|
22
|
+
const res = new XMLParser({}).parse(string);
|
|
23
|
+
const jbeiSeq = res["seq:seq"];
|
|
24
|
+
const resultArray = [];
|
|
25
|
+
if (!jbeiSeq) {
|
|
26
|
+
return onFileParsed({
|
|
27
|
+
success: false,
|
|
28
|
+
messages: ["Error: XML is not valid jbei format"],
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const response = {
|
|
33
|
+
parsedSequence: null,
|
|
34
|
+
messages: [],
|
|
35
|
+
success: true,
|
|
36
|
+
};
|
|
37
|
+
try {
|
|
38
|
+
response.parsedSequence = parseJbeiXml(jbeiSeq, options);
|
|
39
|
+
resultArray.push(response);
|
|
40
|
+
} catch (e) {
|
|
41
|
+
console.error("error:", e);
|
|
42
|
+
console.error("error.stack: ", e.stack);
|
|
43
|
+
resultArray.push({
|
|
44
|
+
success: false,
|
|
45
|
+
messages: ["Error while parsing JBEI format"],
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const toRet = filter(
|
|
50
|
+
resultArray,
|
|
51
|
+
(r) => r?.parsedSequence?.sequence?.length
|
|
52
|
+
);
|
|
53
|
+
if (toRet.length) return toRet;
|
|
54
|
+
return onFileParsed(resultArray);
|
|
55
|
+
} catch (e) {
|
|
56
|
+
console.error(`e:`, e);
|
|
57
|
+
return onFileParsed({
|
|
58
|
+
success: false,
|
|
59
|
+
messages: ["Error parsing jbei to JSON"],
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
function parseJbeiXml(jbeiJson) {
|
|
64
|
+
const {
|
|
65
|
+
"seq:sequence": sequence,
|
|
66
|
+
"seq:name": name,
|
|
67
|
+
"seq:circular": circular,
|
|
68
|
+
"seq:features": { "seq:feature": features },
|
|
69
|
+
} = jbeiJson;
|
|
70
|
+
return {
|
|
71
|
+
sequence,
|
|
72
|
+
circular,
|
|
73
|
+
name: name,
|
|
74
|
+
features: (Array.isArray(features) ? features : [features]).map(
|
|
75
|
+
({
|
|
76
|
+
"seq:complement": complement,
|
|
77
|
+
"seq:label": label,
|
|
78
|
+
"seq:type": type,
|
|
79
|
+
"seq:location": jbeiLocations,
|
|
80
|
+
}) => {
|
|
81
|
+
let start, end;
|
|
82
|
+
const locs = Array.isArray(jbeiLocations)
|
|
83
|
+
? jbeiLocations
|
|
84
|
+
: [jbeiLocations];
|
|
85
|
+
const locations = locs.map(
|
|
86
|
+
({ "seq:genbankStart": gbStart, "seq:end": normEnd }, i) => {
|
|
87
|
+
if (i === 0) start = gbStart - 1;
|
|
88
|
+
if (i === locs.length - 1) end = normEnd - 1;
|
|
89
|
+
return {
|
|
90
|
+
start: gbStart - 1,
|
|
91
|
+
end: normEnd - 1,
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
);
|
|
95
|
+
|
|
96
|
+
return {
|
|
97
|
+
start,
|
|
98
|
+
end,
|
|
99
|
+
locations: locations.length > 1 ? locations : undefined,
|
|
100
|
+
name: label,
|
|
101
|
+
type,
|
|
102
|
+
strand: complement ? -1 : 1,
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
),
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
export default jbeiXmlToJson;
|
package/src/jsonToBed.js
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { tidyUpSequenceData } from "@teselagen/sequence-utils";
|
|
2
|
+
|
|
3
|
+
function jsonToBed(jsonSequence, options = {}) {
|
|
4
|
+
const sequenceInfo = options.featuresOnly
|
|
5
|
+
? jsonSequence
|
|
6
|
+
: tidyUpSequenceData(jsonSequence);
|
|
7
|
+
const { name, features, size, description, circular } = sequenceInfo;
|
|
8
|
+
|
|
9
|
+
let sequenceNameToMatchFasta = "";
|
|
10
|
+
sequenceNameToMatchFasta += `${name || "Untitled Sequence"}|`;
|
|
11
|
+
sequenceNameToMatchFasta += "|" + size;
|
|
12
|
+
sequenceNameToMatchFasta += description ? "|" + description : "";
|
|
13
|
+
sequenceNameToMatchFasta += "|" + (circular ? "circular" : "linear");
|
|
14
|
+
const sequenceNameToUse = options.sequenceName || sequenceNameToMatchFasta;
|
|
15
|
+
let outString = "";
|
|
16
|
+
outString += `track name="${sequenceNameToUse}" description="${name} Annotations" itemRgb="On"\n`;
|
|
17
|
+
|
|
18
|
+
features.forEach(function(feat) {
|
|
19
|
+
const { start, end, name, type, forward, strand } = feat;
|
|
20
|
+
const label = name ? name : type;
|
|
21
|
+
let orientation;
|
|
22
|
+
if (forward || strand === 1) {
|
|
23
|
+
orientation = "+";
|
|
24
|
+
} else if (!forward || strand === -1) {
|
|
25
|
+
orientation = "-";
|
|
26
|
+
} else {
|
|
27
|
+
// "." = no strand
|
|
28
|
+
orientation = ".";
|
|
29
|
+
}
|
|
30
|
+
const color = type === "CDS" ? "230,88,0" : "";
|
|
31
|
+
// chromStart is 0-based, chromEnd of the BED file format is not included in the feature
|
|
32
|
+
// when there is no thick part, thickStart and thickEnd are usually set to the chromStart position
|
|
33
|
+
outString += `${sequenceNameToUse}\t${start}\t${end +
|
|
34
|
+
1}\t${label}\t\t${orientation}\t\t\t${color}\n`;
|
|
35
|
+
});
|
|
36
|
+
return outString;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export default jsonToBed;
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { tidyUpSequenceData } from "@teselagen/sequence-utils";
|
|
2
|
+
import { mangleOrStripUrls } from "./utils/unmangleUrls";
|
|
3
|
+
|
|
4
|
+
export default function jsonToFasta(jsonSequence, options) {
|
|
5
|
+
const cleanedData = tidyUpSequenceData(jsonSequence);
|
|
6
|
+
const {
|
|
7
|
+
name,
|
|
8
|
+
circular,
|
|
9
|
+
description,
|
|
10
|
+
size,
|
|
11
|
+
sequence,
|
|
12
|
+
isProtein,
|
|
13
|
+
proteinSize,
|
|
14
|
+
proteinSequence,
|
|
15
|
+
} = cleanedData;
|
|
16
|
+
|
|
17
|
+
options = options || {};
|
|
18
|
+
let seqToUse = sequence;
|
|
19
|
+
let sizeToUse = size;
|
|
20
|
+
if (isProtein && proteinSequence) {
|
|
21
|
+
seqToUse = proteinSequence;
|
|
22
|
+
sizeToUse = proteinSize;
|
|
23
|
+
}
|
|
24
|
+
// options.reformatSeqName = options.reformatSeqName === false ? false : true;
|
|
25
|
+
let fastaString = "";
|
|
26
|
+
fastaString += `>${name || "Untitled Sequence"}|`;
|
|
27
|
+
fastaString += "|" + sizeToUse;
|
|
28
|
+
fastaString += description ? "|" + mangleOrStripUrls(description, options) : "";
|
|
29
|
+
fastaString += "|" + (circular ? "circular" : "linear");
|
|
30
|
+
fastaString += "\n";
|
|
31
|
+
fastaString += (seqToUse.match(/.{1,80}/g) || []).join("\n");
|
|
32
|
+
return fastaString;
|
|
33
|
+
}
|