@teselagen/bio-parsers 0.3.8 → 0.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +330 -0
- package/index.js +70 -68
- package/index.mjs +70 -68
- package/index.umd.js +70 -68
- package/package.json +2 -3
- package/src/ab1ToJson.js +13 -18
- package/src/anyToJson.js +6 -6
- package/src/genbankToJson.js +21 -20
- package/src/geneiousXmlToJson.js +3 -6
- package/src/gffToJson.js +5 -5
- package/src/jbeiXmlToJson.js +10 -13
- package/src/jsonToBed.js +4 -3
- package/src/jsonToFasta.js +4 -2
- package/src/jsonToGenbank.js +13 -12
- package/src/jsonToJsonString.js +1 -1
- package/src/sbolXmlToJson.js +9 -9
- package/src/snapgeneToJson.js +14 -12
- package/src/utils/NameUtils.js +1 -1
- package/src/utils/ParserUtil.js +81 -83
- package/src/utils/cleanUpTeselagenJsonForExport.js +8 -9
- package/src/utils/constants.js +22 -22
- package/src/utils/convertOldSequenceDataToNewDataType.js +5 -6
- package/src/utils/createInitialSequence.js +13 -11
- package/src/utils/extractFileExtension.js +11 -13
- package/src/utils/flattenSequenceArray.js +14 -14
- package/src/utils/getArrayBufferFromFile.js +5 -5
- package/src/utils/isBrowser.js +2 -1
- package/src/utils/parseUracilFeatures.js +2 -2
- package/src/utils/pragmasAndTypes.js +3 -2
- package/src/utils/searchWholeObjByName.js +3 -3
- package/src/utils/splitStringIntoLines.js +13 -12
- package/src/utils/validateSequence.js +9 -9
- package/src/utils/validateSequenceArray.js +17 -17
- package/utils/getArrayBufferFromFile.d.ts +1 -1
package/src/gffToJson.js
CHANGED
|
@@ -6,7 +6,7 @@ function gffToJson(string) {
|
|
|
6
6
|
const results = [];
|
|
7
7
|
const sequences = [];
|
|
8
8
|
const features = {};
|
|
9
|
-
arrayOfThings.forEach(
|
|
9
|
+
arrayOfThings.forEach(featureOrSeq => {
|
|
10
10
|
if (featureOrSeq.sequence) {
|
|
11
11
|
sequences.push(featureOrSeq);
|
|
12
12
|
} else {
|
|
@@ -19,11 +19,11 @@ function gffToJson(string) {
|
|
|
19
19
|
start: feature.start,
|
|
20
20
|
end: feature.end,
|
|
21
21
|
strand: feature.strand === "+" ? 1 : -1,
|
|
22
|
-
type: feature.type
|
|
22
|
+
type: feature.type
|
|
23
23
|
});
|
|
24
24
|
}
|
|
25
25
|
});
|
|
26
|
-
sequences.forEach(
|
|
26
|
+
sequences.forEach(sequence => {
|
|
27
27
|
const sequenceId = sequence.id;
|
|
28
28
|
const result = {
|
|
29
29
|
messages: [],
|
|
@@ -32,8 +32,8 @@ function gffToJson(string) {
|
|
|
32
32
|
name: sequenceId,
|
|
33
33
|
sequence: sequence.sequence,
|
|
34
34
|
circular: false,
|
|
35
|
-
features: features[sequence.id]
|
|
36
|
-
}
|
|
35
|
+
features: features[sequence.id]
|
|
36
|
+
}
|
|
37
37
|
};
|
|
38
38
|
results.push(result);
|
|
39
39
|
});
|
package/src/jbeiXmlToJson.js
CHANGED
|
@@ -25,14 +25,14 @@ async function jbeiXmlToJson(string, options) {
|
|
|
25
25
|
if (!jbeiSeq) {
|
|
26
26
|
return onFileParsed({
|
|
27
27
|
success: false,
|
|
28
|
-
messages: ["Error: XML is not valid jbei format"]
|
|
28
|
+
messages: ["Error: XML is not valid jbei format"]
|
|
29
29
|
});
|
|
30
30
|
}
|
|
31
31
|
|
|
32
32
|
const response = {
|
|
33
33
|
parsedSequence: null,
|
|
34
34
|
messages: [],
|
|
35
|
-
success: true
|
|
35
|
+
success: true
|
|
36
36
|
};
|
|
37
37
|
try {
|
|
38
38
|
response.parsedSequence = parseJbeiXml(jbeiSeq, options);
|
|
@@ -42,21 +42,18 @@ async function jbeiXmlToJson(string, options) {
|
|
|
42
42
|
console.error("error.stack: ", e.stack);
|
|
43
43
|
resultArray.push({
|
|
44
44
|
success: false,
|
|
45
|
-
messages: ["Error while parsing JBEI format"]
|
|
45
|
+
messages: ["Error while parsing JBEI format"]
|
|
46
46
|
});
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
-
const toRet = filter(
|
|
50
|
-
resultArray,
|
|
51
|
-
(r) => r?.parsedSequence?.sequence?.length
|
|
52
|
-
);
|
|
49
|
+
const toRet = filter(resultArray, r => r?.parsedSequence?.sequence?.length);
|
|
53
50
|
if (toRet.length) return toRet;
|
|
54
51
|
return onFileParsed(resultArray);
|
|
55
52
|
} catch (e) {
|
|
56
53
|
console.error(`e:`, e);
|
|
57
54
|
return onFileParsed({
|
|
58
55
|
success: false,
|
|
59
|
-
messages: ["Error parsing jbei to JSON"]
|
|
56
|
+
messages: ["Error parsing jbei to JSON"]
|
|
60
57
|
});
|
|
61
58
|
}
|
|
62
59
|
}
|
|
@@ -65,7 +62,7 @@ function parseJbeiXml(jbeiJson) {
|
|
|
65
62
|
"seq:sequence": sequence,
|
|
66
63
|
"seq:name": name,
|
|
67
64
|
"seq:circular": circular,
|
|
68
|
-
"seq:features": { "seq:feature": features }
|
|
65
|
+
"seq:features": { "seq:feature": features }
|
|
69
66
|
} = jbeiJson;
|
|
70
67
|
return {
|
|
71
68
|
sequence,
|
|
@@ -76,7 +73,7 @@ function parseJbeiXml(jbeiJson) {
|
|
|
76
73
|
"seq:complement": complement,
|
|
77
74
|
"seq:label": label,
|
|
78
75
|
"seq:type": type,
|
|
79
|
-
"seq:location": jbeiLocations
|
|
76
|
+
"seq:location": jbeiLocations
|
|
80
77
|
}) => {
|
|
81
78
|
let start, end;
|
|
82
79
|
const locs = Array.isArray(jbeiLocations)
|
|
@@ -88,7 +85,7 @@ function parseJbeiXml(jbeiJson) {
|
|
|
88
85
|
if (i === locs.length - 1) end = normEnd - 1;
|
|
89
86
|
return {
|
|
90
87
|
start: gbStart - 1,
|
|
91
|
-
end: normEnd - 1
|
|
88
|
+
end: normEnd - 1
|
|
92
89
|
};
|
|
93
90
|
}
|
|
94
91
|
);
|
|
@@ -99,10 +96,10 @@ function parseJbeiXml(jbeiJson) {
|
|
|
99
96
|
locations: locations.length > 1 ? locations : undefined,
|
|
100
97
|
name: label,
|
|
101
98
|
type,
|
|
102
|
-
strand: complement ? -1 : 1
|
|
99
|
+
strand: complement ? -1 : 1
|
|
103
100
|
};
|
|
104
101
|
}
|
|
105
|
-
)
|
|
102
|
+
)
|
|
106
103
|
};
|
|
107
104
|
}
|
|
108
105
|
|
package/src/jsonToBed.js
CHANGED
|
@@ -15,7 +15,7 @@ function jsonToBed(jsonSequence, options = {}) {
|
|
|
15
15
|
let outString = "";
|
|
16
16
|
outString += `track name="${sequenceNameToUse}" description="${name} Annotations" itemRgb="On"\n`;
|
|
17
17
|
|
|
18
|
-
features.forEach(function(feat) {
|
|
18
|
+
features.forEach(function (feat) {
|
|
19
19
|
const { start, end, name, type, forward, strand } = feat;
|
|
20
20
|
const label = name ? name : type;
|
|
21
21
|
let orientation;
|
|
@@ -30,8 +30,9 @@ function jsonToBed(jsonSequence, options = {}) {
|
|
|
30
30
|
const color = type === "CDS" ? "230,88,0" : "";
|
|
31
31
|
// chromStart is 0-based, chromEnd of the BED file format is not included in the feature
|
|
32
32
|
// when there is no thick part, thickStart and thickEnd are usually set to the chromStart position
|
|
33
|
-
outString += `${sequenceNameToUse}\t${start}\t${
|
|
34
|
-
1
|
|
33
|
+
outString += `${sequenceNameToUse}\t${start}\t${
|
|
34
|
+
end + 1
|
|
35
|
+
}\t${label}\t\t${orientation}\t\t\t${color}\n`;
|
|
35
36
|
});
|
|
36
37
|
return outString;
|
|
37
38
|
}
|
package/src/jsonToFasta.js
CHANGED
|
@@ -11,7 +11,7 @@ export default function jsonToFasta(jsonSequence, options) {
|
|
|
11
11
|
sequence,
|
|
12
12
|
isProtein,
|
|
13
13
|
proteinSize,
|
|
14
|
-
proteinSequence
|
|
14
|
+
proteinSequence
|
|
15
15
|
} = cleanedData;
|
|
16
16
|
|
|
17
17
|
options = options || {};
|
|
@@ -25,7 +25,9 @@ export default function jsonToFasta(jsonSequence, options) {
|
|
|
25
25
|
let fastaString = "";
|
|
26
26
|
fastaString += `>${name || "Untitled Sequence"}|`;
|
|
27
27
|
fastaString += "|" + sizeToUse;
|
|
28
|
-
fastaString += description
|
|
28
|
+
fastaString += description
|
|
29
|
+
? "|" + mangleOrStripUrls(description, options)
|
|
30
|
+
: "";
|
|
29
31
|
fastaString += "|" + (circular ? "circular" : "linear");
|
|
30
32
|
fastaString += "\n";
|
|
31
33
|
fastaString += (seqToUse.match(/.{1,80}/g) || []).join("\n");
|
package/src/jsonToGenbank.js
CHANGED
|
@@ -49,7 +49,7 @@ const StringUtil = {
|
|
|
49
49
|
let str = line;
|
|
50
50
|
while (str.length < length) str = str + padString;
|
|
51
51
|
return str;
|
|
52
|
-
}
|
|
52
|
+
}
|
|
53
53
|
};
|
|
54
54
|
|
|
55
55
|
const DIGEST_PART_EXPORT_FIELD_MAP = {
|
|
@@ -64,7 +64,7 @@ const DIGEST_PART_EXPORT_FIELD_MAP = {
|
|
|
64
64
|
"re3Prime.recognitionRegex": "re3PrimePattern",
|
|
65
65
|
re3PrimeOverhang: "re3PrimeOverhang",
|
|
66
66
|
re3PrimeOverhangStrand: "re3PrimeOverhangStrand",
|
|
67
|
-
re3PrimeRecognitionTypeCode: "re3PrimeRecognitionTypeCode"
|
|
67
|
+
re3PrimeRecognitionTypeCode: "re3PrimeRecognitionTypeCode"
|
|
68
68
|
};
|
|
69
69
|
|
|
70
70
|
function cutUpArray(val, start, end) {
|
|
@@ -129,7 +129,7 @@ export default function (_serSeq, options) {
|
|
|
129
129
|
|
|
130
130
|
serSeq.features = map(serSeq.features).concat(
|
|
131
131
|
flatMap(pragmasAndTypes, ({ pragma, type }) => {
|
|
132
|
-
return flatMap(serSeq[type],
|
|
132
|
+
return flatMap(serSeq[type], ann => {
|
|
133
133
|
if (!isObject(ann)) {
|
|
134
134
|
return [];
|
|
135
135
|
}
|
|
@@ -142,7 +142,7 @@ export default function (_serSeq, options) {
|
|
|
142
142
|
ann.notes = pragma
|
|
143
143
|
? {
|
|
144
144
|
...ann.notes,
|
|
145
|
-
pragma: [pragma]
|
|
145
|
+
pragma: [pragma]
|
|
146
146
|
}
|
|
147
147
|
: ann.notes;
|
|
148
148
|
return ann;
|
|
@@ -164,7 +164,7 @@ export default function (_serSeq, options) {
|
|
|
164
164
|
lines.push(
|
|
165
165
|
featureToGenbankString(feat, {
|
|
166
166
|
...options,
|
|
167
|
-
featurePadLength: longestFeatureTypeLength + 1
|
|
167
|
+
featurePadLength: longestFeatureTypeLength + 1
|
|
168
168
|
})
|
|
169
169
|
);
|
|
170
170
|
});
|
|
@@ -200,22 +200,23 @@ function createGenbankLocus(serSeq, options) {
|
|
|
200
200
|
serSeq.sequence = serSeq.sequence.symbols.split("");
|
|
201
201
|
}
|
|
202
202
|
|
|
203
|
-
let tmp;
|
|
204
203
|
let dnaType;
|
|
205
204
|
if (serSeq.isProtein) {
|
|
206
205
|
dnaType = "";
|
|
207
206
|
} else if (serSeq.type === "RNA") {
|
|
208
|
-
dnaType = serSeq?.doubleStranded
|
|
207
|
+
dnaType = serSeq?.doubleStranded
|
|
208
|
+
? "RNA"
|
|
209
|
+
: serSeq?.sequenceTypeFromLocus ?? "ss-RNA";
|
|
209
210
|
} else {
|
|
210
|
-
dnaType = serSeq?.doubleStranded
|
|
211
|
+
dnaType = serSeq?.doubleStranded
|
|
212
|
+
? "DNA"
|
|
213
|
+
: serSeq?.sequenceTypeFromLocus ?? "DNA";
|
|
211
214
|
}
|
|
212
215
|
const date = getCurrentDateString();
|
|
213
216
|
|
|
214
217
|
let line = StringUtil.rpad("LOCUS", " ", 12);
|
|
215
218
|
let nameToUse = serSeq.name || "Untitled_Sequence";
|
|
216
|
-
nameToUse = options.reformatSeqName
|
|
217
|
-
? reformatName(nameToUse)
|
|
218
|
-
: nameToUse;
|
|
219
|
+
nameToUse = options.reformatSeqName ? reformatName(nameToUse) : nameToUse;
|
|
219
220
|
line += StringUtil.rpad(nameToUse, " ", 16);
|
|
220
221
|
line += " "; // T.H line 2778 of GenbankFormat.as col 29 space
|
|
221
222
|
line += StringUtil.lpad(String(serSeq.sequence.length), " ", 11);
|
|
@@ -223,7 +224,7 @@ function createGenbankLocus(serSeq, options) {
|
|
|
223
224
|
// if (strandType !== "") {
|
|
224
225
|
// tmp = strandType + "-";
|
|
225
226
|
// } else {
|
|
226
|
-
tmp = "";
|
|
227
|
+
const tmp = "";
|
|
227
228
|
// }
|
|
228
229
|
line += StringUtil.lpad(tmp, " ", 3);
|
|
229
230
|
line += StringUtil.rpad(dnaType, " ", 6);
|
package/src/jsonToJsonString.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { omit } from "lodash";
|
|
2
2
|
|
|
3
|
-
import cleanUpTeselagenJsonForExport from "./utils/cleanUpTeselagenJsonForExport"
|
|
3
|
+
import cleanUpTeselagenJsonForExport from "./utils/cleanUpTeselagenJsonForExport";
|
|
4
4
|
import { tidyUpSequenceData } from "@teselagen/sequence-utils";
|
|
5
5
|
|
|
6
6
|
/**
|
package/src/sbolXmlToJson.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import validateSequenceArray from "./utils/validateSequenceArray";
|
|
3
3
|
import searchWholeObjByName, {
|
|
4
4
|
searchWholeObjByNameSimple,
|
|
5
|
-
searchWholeObjByNameSimpleArray
|
|
5
|
+
searchWholeObjByNameSimpleArray
|
|
6
6
|
} from "./utils/searchWholeObjByName";
|
|
7
7
|
|
|
8
8
|
import { XMLParser } from "fast-xml-parser";
|
|
@@ -23,12 +23,12 @@ async function sbolXmlToJson(string, options) {
|
|
|
23
23
|
let response = {
|
|
24
24
|
parsedSequence: null,
|
|
25
25
|
messages: [],
|
|
26
|
-
success: true
|
|
26
|
+
success: true
|
|
27
27
|
};
|
|
28
28
|
try {
|
|
29
29
|
const result = new XMLParser({
|
|
30
30
|
isArray: () => true,
|
|
31
|
-
ignoreAttributes: false
|
|
31
|
+
ignoreAttributes: false
|
|
32
32
|
}).parse(string);
|
|
33
33
|
const sbolJsonMatches = searchWholeObjByName("DnaComponent", result);
|
|
34
34
|
if (sbolJsonMatches[0]) {
|
|
@@ -38,7 +38,7 @@ async function sbolXmlToJson(string, options) {
|
|
|
38
38
|
response = {
|
|
39
39
|
parsedSequence: null,
|
|
40
40
|
messages: [],
|
|
41
|
-
success: true
|
|
41
|
+
success: true
|
|
42
42
|
};
|
|
43
43
|
response.parsedSequence = parseSbolJson(
|
|
44
44
|
sbolJsonMatches[0].value[i],
|
|
@@ -49,7 +49,7 @@ async function sbolXmlToJson(string, options) {
|
|
|
49
49
|
console.error("error.stack: ", e.stack);
|
|
50
50
|
resultArray.push({
|
|
51
51
|
success: false,
|
|
52
|
-
messages: ["Error while parsing Sbol format"]
|
|
52
|
+
messages: ["Error while parsing Sbol format"]
|
|
53
53
|
});
|
|
54
54
|
}
|
|
55
55
|
if (response.parsedSequence.features.length > 0) {
|
|
@@ -63,13 +63,13 @@ async function sbolXmlToJson(string, options) {
|
|
|
63
63
|
} else {
|
|
64
64
|
return onFileParsed({
|
|
65
65
|
success: false,
|
|
66
|
-
messages: ["Error: XML is not valid Jbei or Sbol format"]
|
|
66
|
+
messages: ["Error: XML is not valid Jbei or Sbol format"]
|
|
67
67
|
});
|
|
68
68
|
}
|
|
69
69
|
} catch (e) {
|
|
70
70
|
return onFileParsed({
|
|
71
71
|
success: false,
|
|
72
|
-
messages: ["Error parsing XML to JSON"]
|
|
72
|
+
messages: ["Error parsing XML to JSON"]
|
|
73
73
|
});
|
|
74
74
|
}
|
|
75
75
|
}
|
|
@@ -124,11 +124,11 @@ function parseSbolJson(sbolJson, options) {
|
|
|
124
124
|
end: parseInt(
|
|
125
125
|
get(feature, "bioEnd[0]") - (options.inclusive1BasedEnd ? 0 : 1)
|
|
126
126
|
),
|
|
127
|
-
strand: get(feature, "strand[0]")
|
|
127
|
+
strand: get(feature, "strand[0]") //+ or -
|
|
128
128
|
// notes: feature['seq:label'],
|
|
129
129
|
};
|
|
130
130
|
}
|
|
131
|
-
})
|
|
131
|
+
})
|
|
132
132
|
};
|
|
133
133
|
}
|
|
134
134
|
|
package/src/snapgeneToJson.js
CHANGED
|
@@ -13,7 +13,7 @@ import { get } from "lodash";
|
|
|
13
13
|
import { XMLParser } from "fast-xml-parser";
|
|
14
14
|
import extractFileExtension from "./utils/extractFileExtension";
|
|
15
15
|
|
|
16
|
-
const Buffer = buffer.Buffer
|
|
16
|
+
const Buffer = buffer.Buffer;
|
|
17
17
|
|
|
18
18
|
async function snapgeneToJson(fileObj, options = {}) {
|
|
19
19
|
try {
|
|
@@ -59,7 +59,7 @@ async function snapgeneToJson(fileObj, options = {}) {
|
|
|
59
59
|
isDNA: !!(await unpack(2, "H")) && !isProtein,
|
|
60
60
|
exportVersion: await unpack(2, "H"),
|
|
61
61
|
importVersion: await unpack(2, "H"),
|
|
62
|
-
features: []
|
|
62
|
+
features: []
|
|
63
63
|
};
|
|
64
64
|
while (offset <= arrayBuffer.byteLength) {
|
|
65
65
|
// # READ THE WHOLE FILE, BLOCK BY BLOCK, UNTIL THE END
|
|
@@ -109,24 +109,24 @@ async function snapgeneToJson(fileObj, options = {}) {
|
|
|
109
109
|
0: [1, "NONE"], // non-directional feature (in that case, the attribute is generally absent altogether)
|
|
110
110
|
1: [1, "TOP"], // forward strand
|
|
111
111
|
2: [-1, "BOTTOM"], // reverse strand
|
|
112
|
-
3: [1, "BOTH"]
|
|
112
|
+
3: [1, "BOTH"] // bi-directional feature
|
|
113
113
|
};
|
|
114
114
|
const xml = await read(block_size, "utf8");
|
|
115
115
|
const b = new XMLParser({
|
|
116
116
|
ignoreAttributes: false,
|
|
117
117
|
attributeNamePrefix: "",
|
|
118
|
-
isArray:
|
|
118
|
+
isArray: name => name === "Feature" || name === "Segment"
|
|
119
119
|
}).parse(xml);
|
|
120
120
|
const { Features: { Feature = [] } = {} } = b;
|
|
121
121
|
data.features = [];
|
|
122
|
-
Feature.forEach(
|
|
122
|
+
Feature.forEach(feat => {
|
|
123
123
|
const { directionality, Segment = [], name, type } = feat;
|
|
124
124
|
// let color;
|
|
125
125
|
let maxStart = 0;
|
|
126
126
|
let maxEnd = 0;
|
|
127
127
|
const locations =
|
|
128
128
|
Segment &&
|
|
129
|
-
Segment.map(
|
|
129
|
+
Segment.map(seg => {
|
|
130
130
|
if (!seg) throw new Error("invalid feature definition");
|
|
131
131
|
const { range } = seg;
|
|
132
132
|
// color = seg.color;
|
|
@@ -137,7 +137,7 @@ async function snapgeneToJson(fileObj, options = {}) {
|
|
|
137
137
|
maxEnd = Math.max(maxEnd, end);
|
|
138
138
|
return {
|
|
139
139
|
start,
|
|
140
|
-
end
|
|
140
|
+
end
|
|
141
141
|
};
|
|
142
142
|
});
|
|
143
143
|
|
|
@@ -146,9 +146,11 @@ async function snapgeneToJson(fileObj, options = {}) {
|
|
|
146
146
|
type,
|
|
147
147
|
...(locations?.length > 1 && { locations }),
|
|
148
148
|
strand: directionality ? strand_dict[directionality][0] : 1,
|
|
149
|
-
arrowheadType: directionality
|
|
149
|
+
arrowheadType: directionality
|
|
150
|
+
? strand_dict[directionality][1]
|
|
151
|
+
: "NONE",
|
|
150
152
|
start: maxStart,
|
|
151
|
-
end: maxEnd
|
|
153
|
+
end: maxEnd
|
|
152
154
|
// color,
|
|
153
155
|
});
|
|
154
156
|
});
|
|
@@ -183,8 +185,8 @@ async function snapgeneToJson(fileObj, options = {}) {
|
|
|
183
185
|
return [
|
|
184
186
|
{
|
|
185
187
|
success: false,
|
|
186
|
-
messages: ["Import Error: Invalid File"]
|
|
187
|
-
}
|
|
188
|
+
messages: ["Import Error: Invalid File"]
|
|
189
|
+
}
|
|
188
190
|
];
|
|
189
191
|
}
|
|
190
192
|
}
|
|
@@ -193,7 +195,7 @@ function getStartAndEndFromRangeString(rangestring) {
|
|
|
193
195
|
const [start, end] = rangestring.split("-");
|
|
194
196
|
return {
|
|
195
197
|
start: start - 1,
|
|
196
|
-
end: end - 1
|
|
198
|
+
end: end - 1
|
|
197
199
|
};
|
|
198
200
|
}
|
|
199
201
|
|
package/src/utils/NameUtils.js
CHANGED
package/src/utils/ParserUtil.js
CHANGED
|
@@ -1,93 +1,91 @@
|
|
|
1
1
|
const ParserUtil = {};
|
|
2
|
-
ParserUtil.postProcessGenbankFeature = function(feat) {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
// feat.start = null;
|
|
70
|
-
// feat.end = null;
|
|
71
|
-
// }
|
|
72
|
-
|
|
73
|
-
return feat;
|
|
2
|
+
ParserUtil.postProcessGenbankFeature = function (feat) {
|
|
3
|
+
let name = null;
|
|
4
|
+
// let nameIndex = null;
|
|
5
|
+
|
|
6
|
+
// let hasName = false;
|
|
7
|
+
let usingLabel = false;
|
|
8
|
+
let usingGene = false;
|
|
9
|
+
|
|
10
|
+
for (let j = 0; j < feat.notes.length; j++) {
|
|
11
|
+
const note = feat.notes[j];
|
|
12
|
+
const key = note.name;
|
|
13
|
+
const value = note.value;
|
|
14
|
+
|
|
15
|
+
// SET THE LABEL FIELD. DO NOT STORE AS AN ATTRIBUTE
|
|
16
|
+
|
|
17
|
+
if (this.isAGenbankFeatureLabel(key)) {
|
|
18
|
+
// Priority for name attributes is: 'label' > 'gene' > 'organism'.
|
|
19
|
+
// We check to see if the current name is from a lower-priority
|
|
20
|
+
// attribute. If it is, we store it as an attribute and then
|
|
21
|
+
// replace it with the current higher-priority attribute.
|
|
22
|
+
|
|
23
|
+
if (key === "label") {
|
|
24
|
+
// Label has top priority.
|
|
25
|
+
|
|
26
|
+
name = value;
|
|
27
|
+
// nameIndex = j;
|
|
28
|
+
|
|
29
|
+
usingLabel = true;
|
|
30
|
+
} else if (key === "gene") {
|
|
31
|
+
// If we're not using the label for the name, use the
|
|
32
|
+
// current 'gene' attribute. If we are using label for
|
|
33
|
+
// the name, just save the current attribute as a normal
|
|
34
|
+
// attribute.
|
|
35
|
+
if (!usingLabel) {
|
|
36
|
+
name = value;
|
|
37
|
+
// nameIndex = j;
|
|
38
|
+
|
|
39
|
+
usingGene = true;
|
|
40
|
+
}
|
|
41
|
+
} else if (!usingLabel && !usingGene) {
|
|
42
|
+
// If we don't have a label from either a 'gene' or a
|
|
43
|
+
// 'label' field, use the current field as the name.
|
|
44
|
+
|
|
45
|
+
name = value;
|
|
46
|
+
// nameIndex = j;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// hasName = true;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
feat.name = name || "";
|
|
54
|
+
// if(nameIndex !== null) {
|
|
55
|
+
// feat.notes.splice(nameIndex, 1);
|
|
56
|
+
// }
|
|
57
|
+
//
|
|
58
|
+
// if(feat.locations.length > 0) {
|
|
59
|
+
// var loc = feat.locations[0];
|
|
60
|
+
// feat.start = loc.start;
|
|
61
|
+
// feat.end = loc.end;
|
|
62
|
+
// }
|
|
63
|
+
// else {
|
|
64
|
+
// feat.start = null;
|
|
65
|
+
// feat.end = null;
|
|
66
|
+
// }
|
|
67
|
+
|
|
68
|
+
return feat;
|
|
74
69
|
};
|
|
75
70
|
|
|
76
|
-
|
|
77
|
-
|
|
78
71
|
/**
|
|
79
72
|
* isAFeatureLabel
|
|
80
73
|
* @param {string} name Name of a attribute or qualifier
|
|
81
74
|
* @return {boolean} isALabel
|
|
82
75
|
*/
|
|
83
|
-
ParserUtil.isAGenbankFeatureLabel = function(name) {
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
76
|
+
ParserUtil.isAGenbankFeatureLabel = function (name) {
|
|
77
|
+
if (
|
|
78
|
+
name === "label" ||
|
|
79
|
+
name === "name" ||
|
|
80
|
+
name === "ApEinfo_label" ||
|
|
81
|
+
name === "note" ||
|
|
82
|
+
name === "gene" ||
|
|
83
|
+
name === "organism" ||
|
|
84
|
+
name === "locus_tag"
|
|
85
|
+
) {
|
|
86
|
+
return true;
|
|
87
|
+
} else {
|
|
88
|
+
return false;
|
|
89
|
+
}
|
|
92
90
|
};
|
|
93
91
|
export default ParserUtil;
|
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
import { cloneDeep, forEach } from "lodash";
|
|
2
2
|
|
|
3
3
|
export default function cleanUpTeselagenJsonForExport(tgJson) {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
4
|
+
const seqData = cloneDeep(tgJson);
|
|
5
|
+
if (!seqData) return seqData;
|
|
6
|
+
delete seqData.cutsites;
|
|
7
|
+
delete seqData.orfs;
|
|
8
|
+
forEach(seqData.translations, t => {
|
|
9
|
+
delete t.aminoAcids;
|
|
10
|
+
});
|
|
11
|
+
return seqData;
|
|
12
12
|
}
|
|
13
|
-
|
package/src/utils/constants.js
CHANGED
|
@@ -1,24 +1,24 @@
|
|
|
1
|
-
export const untitledSequenceName =
|
|
1
|
+
export const untitledSequenceName = "Untitled Sequence";
|
|
2
2
|
|
|
3
3
|
export const gbDivisions = {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
4
|
+
// https://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html#GenBankDivisionB
|
|
5
|
+
PRI: true, //- primate sequences
|
|
6
|
+
ROD: true, //- rodent sequences
|
|
7
|
+
MAM: true, //- other mammalian sequences
|
|
8
|
+
VRT: true, //- other vertebrate sequences
|
|
9
|
+
INV: true, //- invertebrate sequences
|
|
10
|
+
PLN: true, //- plant, fungal, and algal sequences
|
|
11
|
+
BCT: true, //- bacterial sequences
|
|
12
|
+
VRL: true, //- viral sequences
|
|
13
|
+
PHG: true, //- bacteriophage sequences
|
|
14
|
+
SYN: true, //- synthetic sequences
|
|
15
|
+
UNA: true, //- unannotated sequences
|
|
16
|
+
EST: true, //- EST sequences (expressed sequence tags)
|
|
17
|
+
PAT: true, //- patent sequences
|
|
18
|
+
STS: true, //- STS sequences (sequence tagged sites)
|
|
19
|
+
GSS: true, //- GSS sequences (genome survey sequences)
|
|
20
|
+
HTG: true, //- HTG sequences (high-throughput genomic sequences)
|
|
21
|
+
HTC: true, //- unfinished high-throughput cDNA sequencing
|
|
22
|
+
ENV: true, //- environmental sampling sequences
|
|
23
|
+
CON: true //- sequence assembly instructions on how to construct contigs from multiple GenBank records.
|
|
24
|
+
};
|