@teselagen/bio-parsers 0.1.27 → 0.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,147 @@
1
+ /* eslint-disable no-var*/
2
+
3
+ import validateSequenceArray from "./utils/validateSequenceArray";
4
+ import {
5
+ searchWholeObjByNameSimple,
6
+ searchWholeObjByNameSimpleArray
7
+ } from "./utils/searchWholeObjByName";
8
+
9
+ import { XMLParser } from "fast-xml-parser";
10
+ import { forEach, flatMap } from "lodash";
11
+ import { filter } from "lodash";
12
+
13
+ //Here's what should be in the callback:
14
+ // {
15
+ // parsedSequence:
16
+ // messages:
17
+ // success:
18
+ // }
19
+ async function geneiousXmlToJson(string, options) {
20
+ options = options || {};
21
+
22
+ const onFileParsed = function (sequences) {
23
+ //before we call the onFileParsed callback, we need to validate the sequence
24
+ return validateSequenceArray(sequences, options);
25
+ };
26
+
27
+ try {
28
+ const result = new XMLParser({
29
+ isArray: () => true
30
+ }).parse(string);
31
+ const geneiousJsonMatches = searchWholeObjByNameSimpleArray(
32
+ "geneiousDocument",
33
+ result
34
+ );
35
+
36
+ const resultArray = [];
37
+ if (!geneiousJsonMatches?.length) {
38
+ return onFileParsed({
39
+ success: false,
40
+ messages: ["Error: XML is not valid geneious format"]
41
+ });
42
+ }
43
+ forEach(geneiousJsonMatches, (geneiousJson) => {
44
+ const response = {
45
+ parsedSequence: null,
46
+ messages: [],
47
+ success: true
48
+ };
49
+ try {
50
+ response.parsedSequence = parseGeneiousJson(geneiousJson, options);
51
+ resultArray.push(response);
52
+ } catch (e) {
53
+ console.error("error:", e);
54
+ console.error("error.stack: ", e.stack);
55
+ resultArray.push({
56
+ success: false,
57
+ messages: ["Error while parsing Geneious format"]
58
+ });
59
+ }
60
+ });
61
+ const toRet = filter(
62
+ resultArray,
63
+ (r) => r?.parsedSequence?.sequence?.length
64
+ );
65
+ if (toRet.length) return toRet;
66
+ return onFileParsed(resultArray);
67
+ } catch (e) {
68
+ console.error(`e:`, e);
69
+ return onFileParsed({
70
+ success: false,
71
+ messages: ["Error parsing geneious to JSON"]
72
+ });
73
+ }
74
+ }
75
+ function parseGeneiousJson(geneiousJson) {
76
+ const circular = searchWholeObjByNameSimple("isCircular", geneiousJson);
77
+
78
+ let geneiousJsonInner = searchWholeObjByNameSimple(
79
+ "originalElement",
80
+ geneiousJson
81
+ );
82
+ geneiousJsonInner = searchWholeObjByNameSimple(
83
+ "XMLSerialisableRootElement",
84
+ geneiousJsonInner
85
+ );
86
+
87
+ const sequence = searchWholeObjByNameSimple(
88
+ "charSequence",
89
+ geneiousJsonInner
90
+ );
91
+ const features = flatMap(
92
+ searchWholeObjByNameSimpleArray("annotation", geneiousJsonInner),
93
+ function (feature) {
94
+ if (feature) {
95
+ const name = (
96
+ searchWholeObjByNameSimple("description", feature) || ""
97
+ ).substring(0, 255);
98
+ const intervals = searchWholeObjByNameSimpleArray("interval", feature);
99
+ const type = searchWholeObjByNameSimple("type", feature);
100
+ const firstInterval = intervals[0];
101
+ const lastInterval = intervals[intervals.length - 1];
102
+ const start =
103
+ searchWholeObjByNameSimple("minimumIndex", firstInterval) - 1;
104
+ const end =
105
+ searchWholeObjByNameSimple("maximumIndex", lastInterval) - 1;
106
+ let locations;
107
+ if (intervals.length > 1) {
108
+ locations = intervals.map((i) => {
109
+ const start = searchWholeObjByNameSimple("minimumIndex", i) - 1;
110
+ const end = searchWholeObjByNameSimple("maximumIndex", i) - 1;
111
+ return {
112
+ start,
113
+ end
114
+ };
115
+ });
116
+ }
117
+ const strand =
118
+ searchWholeObjByNameSimple("direction", firstInterval) ===
119
+ "leftToRight"
120
+ ? 1
121
+ : -1;
122
+ const arrowheadType =
123
+ searchWholeObjByNameSimple("direction", firstInterval) === "none"
124
+ ? "NONE"
125
+ : undefined;
126
+ return {
127
+ name,
128
+ type,
129
+ locations,
130
+ arrowheadType,
131
+ start,
132
+ end,
133
+ strand
134
+ };
135
+ }
136
+ }
137
+ );
138
+ const name = searchWholeObjByNameSimple("name", geneiousJsonInner);
139
+ return {
140
+ sequence,
141
+ circular,
142
+ name: name,
143
+ features
144
+ };
145
+ }
146
+
147
+ export default geneiousXmlToJson;
@@ -0,0 +1,43 @@
1
+ import gff from "@gmod/gff";
2
+ import _ from "lodash";
3
+
4
+ function gffToJson(string) {
5
+ const arrayOfThings = gff.parseStringSync(string);
6
+ const results = [];
7
+ const sequences = [];
8
+ const features = {};
9
+ arrayOfThings.forEach((featureOrSeq) => {
10
+ if (featureOrSeq.sequence) {
11
+ sequences.push(featureOrSeq);
12
+ } else {
13
+ const feature = featureOrSeq[0];
14
+ if (!features[feature.seq_id]) features[feature.seq_id] = [];
15
+ const attributes = feature.attributes || {};
16
+ const name = _.get(attributes, "ID[0]");
17
+ features[feature.seq_id].push({
18
+ name,
19
+ start: feature.start,
20
+ end: feature.end,
21
+ strand: feature.strand === "+" ? 1 : -1,
22
+ type: feature.type,
23
+ });
24
+ }
25
+ });
26
+ sequences.forEach((sequence) => {
27
+ const sequenceId = sequence.id;
28
+ const result = {
29
+ messages: [],
30
+ success: true,
31
+ parsedSequence: {
32
+ name: sequenceId,
33
+ sequence: sequence.sequence,
34
+ circular: false,
35
+ features: features[sequence.id],
36
+ },
37
+ };
38
+ results.push(result);
39
+ });
40
+ return results;
41
+ }
42
+
43
+ export default gffToJson;
package/src/index.js ADDED
@@ -0,0 +1,23 @@
1
+ export { default as anyToJson } from "./anyToJson";
2
+ export { default as fastaToJson } from "./fastaToJson";
3
+ export { default as genbankToJson } from "./genbankToJson";
4
+ export { default as sbolXmlToJson } from "./sbolXmlToJson";
5
+ export { default as geneiousXmlToJson } from "./geneiousXmlToJson";
6
+ export { default as jbeiXmlToJson } from "./jbeiXmlToJson";
7
+ export { default as jsonToGenbank } from "./jsonToGenbank";
8
+ export {
9
+ default as ab1ToJson,
10
+ convertBasePosTraceToPerBpTrace
11
+ } from "./ab1ToJson";
12
+ export { default as jsonToFasta } from "./jsonToFasta";
13
+ export { default as snapgeneToJson } from "./snapgeneToJson";
14
+ export { default as jsonToBed } from "./jsonToBed";
15
+ export { default as cleanUpTeselagenJsonForExport } from "./utils/cleanUpTeselagenJsonForExport";
16
+ export {
17
+ default as searchWholeObjByName,
18
+ searchWholeObjByNameSimple,
19
+ searchWholeObjByNameSimpleArray
20
+ } from "./utils/searchWholeObjByName";
21
+ export { default as parseUracilFeatures } from "./utils/parseUracilFeatures";
22
+ export { default as jsonToJsonString } from "./jsonToJsonString";
23
+ export { default as validateSequenceArray } from "./utils/validateSequenceArray";
@@ -0,0 +1,109 @@
1
+ /* eslint-disable no-var*/
2
+
3
+ import validateSequenceArray from "./utils/validateSequenceArray";
4
+ import { XMLParser } from "fast-xml-parser";
5
+ import { filter } from "lodash";
6
+
7
+ //Here's what should be in the callback:
8
+ // {
9
+ // parsedSequence:
10
+ // messages:
11
+ // success:
12
+ // }
13
+ async function jbeiXmlToJson(string, options) {
14
+ options = options || {};
15
+
16
+ const onFileParsed = function (sequences) {
17
+ //before we call the onFileParsed callback, we need to validate the sequence
18
+ return validateSequenceArray(sequences, options);
19
+ };
20
+
21
+ try {
22
+ const res = new XMLParser({}).parse(string);
23
+ const jbeiSeq = res["seq:seq"];
24
+ const resultArray = [];
25
+ if (!jbeiSeq) {
26
+ return onFileParsed({
27
+ success: false,
28
+ messages: ["Error: XML is not valid jbei format"],
29
+ });
30
+ }
31
+
32
+ const response = {
33
+ parsedSequence: null,
34
+ messages: [],
35
+ success: true,
36
+ };
37
+ try {
38
+ response.parsedSequence = parseJbeiXml(jbeiSeq, options);
39
+ resultArray.push(response);
40
+ } catch (e) {
41
+ console.error("error:", e);
42
+ console.error("error.stack: ", e.stack);
43
+ resultArray.push({
44
+ success: false,
45
+ messages: ["Error while parsing JBEI format"],
46
+ });
47
+ }
48
+
49
+ const toRet = filter(
50
+ resultArray,
51
+ (r) => r?.parsedSequence?.sequence?.length
52
+ );
53
+ if (toRet.length) return toRet;
54
+ return onFileParsed(resultArray);
55
+ } catch (e) {
56
+ console.error(`e:`, e);
57
+ return onFileParsed({
58
+ success: false,
59
+ messages: ["Error parsing jbei to JSON"],
60
+ });
61
+ }
62
+ }
63
+ function parseJbeiXml(jbeiJson) {
64
+ const {
65
+ "seq:sequence": sequence,
66
+ "seq:name": name,
67
+ "seq:circular": circular,
68
+ "seq:features": { "seq:feature": features },
69
+ } = jbeiJson;
70
+ return {
71
+ sequence,
72
+ circular,
73
+ name: name,
74
+ features: (Array.isArray(features) ? features : [features]).map(
75
+ ({
76
+ "seq:complement": complement,
77
+ "seq:label": label,
78
+ "seq:type": type,
79
+ "seq:location": jbeiLocations,
80
+ }) => {
81
+ let start, end;
82
+ const locs = Array.isArray(jbeiLocations)
83
+ ? jbeiLocations
84
+ : [jbeiLocations];
85
+ const locations = locs.map(
86
+ ({ "seq:genbankStart": gbStart, "seq:end": normEnd }, i) => {
87
+ if (i === 0) start = gbStart - 1;
88
+ if (i === locs.length - 1) end = normEnd - 1;
89
+ return {
90
+ start: gbStart - 1,
91
+ end: normEnd - 1,
92
+ };
93
+ }
94
+ );
95
+
96
+ return {
97
+ start,
98
+ end,
99
+ locations: locations.length > 1 ? locations : undefined,
100
+ name: label,
101
+ type,
102
+ strand: complement ? -1 : 1,
103
+ };
104
+ }
105
+ ),
106
+ };
107
+ }
108
+
109
+ export default jbeiXmlToJson;
@@ -0,0 +1,39 @@
1
+ import { tidyUpSequenceData } from "@teselagen/sequence-utils";
2
+
3
+ function jsonToBed(jsonSequence, options = {}) {
4
+ const sequenceInfo = options.featuresOnly
5
+ ? jsonSequence
6
+ : tidyUpSequenceData(jsonSequence);
7
+ const { name, features, size, description, circular } = sequenceInfo;
8
+
9
+ let sequenceNameToMatchFasta = "";
10
+ sequenceNameToMatchFasta += `${name || "Untitled Sequence"}|`;
11
+ sequenceNameToMatchFasta += "|" + size;
12
+ sequenceNameToMatchFasta += description ? "|" + description : "";
13
+ sequenceNameToMatchFasta += "|" + (circular ? "circular" : "linear");
14
+ const sequenceNameToUse = options.sequenceName || sequenceNameToMatchFasta;
15
+ let outString = "";
16
+ outString += `track name="${sequenceNameToUse}" description="${name} Annotations" itemRgb="On"\n`;
17
+
18
+ features.forEach(function(feat) {
19
+ const { start, end, name, type, forward, strand } = feat;
20
+ const label = name ? name : type;
21
+ let orientation;
22
+ if (forward || strand === 1) {
23
+ orientation = "+";
24
+ } else if (!forward || strand === -1) {
25
+ orientation = "-";
26
+ } else {
27
+ // "." = no strand
28
+ orientation = ".";
29
+ }
30
+ const color = type === "CDS" ? "230,88,0" : "";
31
+ // chromStart is 0-based, chromEnd of the BED file format is not included in the feature
32
+ // when there is no thick part, thickStart and thickEnd are usually set to the chromStart position
33
+ outString += `${sequenceNameToUse}\t${start}\t${end +
34
+ 1}\t${label}\t\t${orientation}\t\t\t${color}\n`;
35
+ });
36
+ return outString;
37
+ }
38
+
39
+ export default jsonToBed;
@@ -0,0 +1,33 @@
1
+ import { tidyUpSequenceData } from "@teselagen/sequence-utils";
2
+ import { mangleOrStripUrls } from "./utils/unmangleUrls";
3
+
4
+ export default function jsonToFasta(jsonSequence, options) {
5
+ const cleanedData = tidyUpSequenceData(jsonSequence);
6
+ const {
7
+ name,
8
+ circular,
9
+ description,
10
+ size,
11
+ sequence,
12
+ isProtein,
13
+ proteinSize,
14
+ proteinSequence,
15
+ } = cleanedData;
16
+
17
+ options = options || {};
18
+ let seqToUse = sequence;
19
+ let sizeToUse = size;
20
+ if (isProtein && proteinSequence) {
21
+ seqToUse = proteinSequence;
22
+ sizeToUse = proteinSize;
23
+ }
24
+ // options.reformatSeqName = options.reformatSeqName === false ? false : true;
25
+ let fastaString = "";
26
+ fastaString += `>${name || "Untitled Sequence"}|`;
27
+ fastaString += "|" + sizeToUse;
28
+ fastaString += description ? "|" + mangleOrStripUrls(description, options) : "";
29
+ fastaString += "|" + (circular ? "circular" : "linear");
30
+ fastaString += "\n";
31
+ fastaString += (seqToUse.match(/.{1,80}/g) || []).join("\n");
32
+ return fastaString;
33
+ }