@teselagen/bio-parsers 0.1.26 → 0.1.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +24219 -39924
- package/index.mjs +24238 -39921
- package/index.umd.js +32684 -48391
- package/package.json +3 -7
- package/src/ab1ToJson.js +177 -0
- package/src/anyToJson.js +225 -0
- package/src/fastaToJson.js +101 -0
- package/src/genbankToJson.d.__ts +20 -0
- package/src/genbankToJson.js +688 -0
- package/src/geneiousXmlToJson.js +147 -0
- package/src/gffToJson.js +43 -0
- package/src/index.js +23 -0
- package/src/jbeiXmlToJson.js +109 -0
- package/src/jsonToBed.js +39 -0
- package/src/jsonToFasta.js +33 -0
- package/src/jsonToGenbank.js +423 -0
- package/src/jsonToJsonString.js +26 -0
- package/src/sbolXmlToJson.js +135 -0
- package/src/snapgeneToJson.js +245 -0
- package/src/utils/NameUtils.js +10 -0
- package/src/utils/ParserUtil.js +93 -0
- package/src/utils/cleanUpTeselagenJsonForExport.js +13 -0
- package/src/utils/constants.js +24 -0
- package/src/utils/convertOldSequenceDataToNewDataType.js +64 -0
- package/src/utils/createInitialSequence.js +14 -0
- package/src/utils/extractFileExtension.js +14 -0
- package/src/utils/flattenSequenceArray.js +17 -0
- package/src/utils/getArrayBufferFromFile.js +32 -0
- package/src/utils/isBrowser.js +1 -0
- package/src/utils/parseUracilFeatures.js +13 -0
- package/src/utils/pragmasAndTypes.js +21 -0
- package/src/utils/searchWholeObjByName.js +98 -0
- package/src/utils/splitStringIntoLines.js +13 -0
- package/src/utils/unmangleUrls.js +34 -0
- package/src/utils/validateSequence.js +349 -0
- package/src/utils/validateSequenceArray.js +20 -0
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
/* eslint-disable no-var*/
|
|
2
|
+
import { get, cloneDeep, map, each, isObject, flatMap } from "lodash";
|
|
3
|
+
import color from "color";
|
|
4
|
+
|
|
5
|
+
import pragmasAndTypes from "./utils/pragmasAndTypes.js";
|
|
6
|
+
import { mangleOrStripUrls } from "./utils/unmangleUrls.js";
|
|
7
|
+
import { reformatName } from "./utils/NameUtils.js";
|
|
8
|
+
import { getFeatureToColorMap } from "@teselagen/sequence-utils";
|
|
9
|
+
const StringUtil = {
|
|
10
|
+
/** Trims white space at beginning and end of string
|
|
11
|
+
* @param {string} line
|
|
12
|
+
* @returns {string} line
|
|
13
|
+
*/
|
|
14
|
+
trim: function (line) {
|
|
15
|
+
return line.replace(/^\s+|\s+$/g, "");
|
|
16
|
+
},
|
|
17
|
+
|
|
18
|
+
/** Trims white space at beginning string
|
|
19
|
+
* @param {string} line
|
|
20
|
+
* @returns {string} line
|
|
21
|
+
*/
|
|
22
|
+
ltrim: function (line) {
|
|
23
|
+
return line.replace(/^\s+/, "");
|
|
24
|
+
},
|
|
25
|
+
|
|
26
|
+
/** Trims white space at end of string
|
|
27
|
+
* @param {string} line
|
|
28
|
+
* @returns {string} line
|
|
29
|
+
*/
|
|
30
|
+
rtrim: function (line) {
|
|
31
|
+
return line.replace(/\s+$/, "");
|
|
32
|
+
},
|
|
33
|
+
|
|
34
|
+
/** Pads white space at beginning of string
|
|
35
|
+
* @param {string} line
|
|
36
|
+
* @returns {string} line
|
|
37
|
+
*/
|
|
38
|
+
lpad: function (line, padString, length) {
|
|
39
|
+
let str = line;
|
|
40
|
+
while (str.length < length) str = padString + str;
|
|
41
|
+
return str;
|
|
42
|
+
},
|
|
43
|
+
|
|
44
|
+
/** Pads white space at end of string
|
|
45
|
+
* @param {string} line
|
|
46
|
+
* @returns {string} line
|
|
47
|
+
*/
|
|
48
|
+
rpad: function (line, padString, length) {
|
|
49
|
+
let str = line;
|
|
50
|
+
while (str.length < length) str = str + padString;
|
|
51
|
+
return str;
|
|
52
|
+
},
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
const DIGEST_PART_EXPORT_FIELD_MAP = {
|
|
56
|
+
isDigestPart: "isDigestPart",
|
|
57
|
+
isDigestValid: "isDigestValid",
|
|
58
|
+
"re5Prime.name": "re5PrimeName",
|
|
59
|
+
"re5Prime.recognitionRegex": "re5PrimePattern",
|
|
60
|
+
re5PrimeOverhang: "re5PrimeOverhang",
|
|
61
|
+
re5PrimeOverhangStrand: "re5PrimeOverhangStrand",
|
|
62
|
+
re5PrimeRecognitionTypeCode: "re5PrimeRecognitionTypeCode",
|
|
63
|
+
"re3Prime.name": "re3PrimeName",
|
|
64
|
+
"re3Prime.recognitionRegex": "re3PrimePattern",
|
|
65
|
+
re3PrimeOverhang: "re3PrimeOverhang",
|
|
66
|
+
re3PrimeOverhangStrand: "re3PrimeOverhangStrand",
|
|
67
|
+
re3PrimeRecognitionTypeCode: "re3PrimeRecognitionTypeCode",
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
function cutUpArray(val, start, end) {
|
|
71
|
+
return val.slice(start, end).join("");
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function cutUpStr(val, start, end) {
|
|
75
|
+
return val.slice(start, end);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export default function (_serSeq, options) {
|
|
79
|
+
options = options || {};
|
|
80
|
+
options.reformatSeqName = options.reformatSeqName !== false;
|
|
81
|
+
const serSeq = cloneDeep(_serSeq);
|
|
82
|
+
if (!serSeq) return false;
|
|
83
|
+
|
|
84
|
+
try {
|
|
85
|
+
if (serSeq.isProtein || serSeq.type === "protein" || serSeq.type === "AA") {
|
|
86
|
+
serSeq.isProtein = true;
|
|
87
|
+
serSeq.sequence = serSeq.proteinSequence || serSeq.sequence;
|
|
88
|
+
options.isProtein = true;
|
|
89
|
+
}
|
|
90
|
+
let content = null;
|
|
91
|
+
const cutUp = typeof serSeq.sequence === "string" ? cutUpStr : cutUpArray;
|
|
92
|
+
if (!serSeq.sequence) serSeq.sequence = "";
|
|
93
|
+
|
|
94
|
+
let lines = [];
|
|
95
|
+
lines.push(createGenbankLocus(serSeq, options));
|
|
96
|
+
if (serSeq.definition || serSeq.description) {
|
|
97
|
+
lines.push(
|
|
98
|
+
"DEFINITION " +
|
|
99
|
+
mangleOrStripUrls(serSeq.definition || serSeq.description, options)
|
|
100
|
+
);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if (serSeq.accession) {
|
|
104
|
+
lines.push("ACCESSION " + serSeq.accession);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (serSeq.version) {
|
|
108
|
+
lines.push("VERSION " + serSeq.version);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if (serSeq.extraLines) {
|
|
112
|
+
lines = lines.concat(serSeq.extraLines);
|
|
113
|
+
}
|
|
114
|
+
if (serSeq.comments) {
|
|
115
|
+
serSeq.comments.forEach(function (comment) {
|
|
116
|
+
lines.push("COMMENT " + comment);
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
if (serSeq.teselagen_unique_id) {
|
|
120
|
+
lines.push(
|
|
121
|
+
"COMMENT teselagen_unique_id: " + serSeq.teselagen_unique_id
|
|
122
|
+
);
|
|
123
|
+
}
|
|
124
|
+
if (serSeq.library) {
|
|
125
|
+
lines.push("COMMENT library: " + serSeq.library);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
let longestFeatureTypeLength = 15;
|
|
129
|
+
|
|
130
|
+
serSeq.features = map(serSeq.features).concat(
|
|
131
|
+
flatMap(pragmasAndTypes, ({ pragma, type }) => {
|
|
132
|
+
return flatMap(serSeq[type], (ann) => {
|
|
133
|
+
if (!isObject(ann)) {
|
|
134
|
+
return [];
|
|
135
|
+
}
|
|
136
|
+
if (type === "primers") {
|
|
137
|
+
ann.type = "primer_bind";
|
|
138
|
+
}
|
|
139
|
+
if (type === "parts" && ann.isDigestPart) {
|
|
140
|
+
addDigestPartFieldsToNotes(ann);
|
|
141
|
+
}
|
|
142
|
+
ann.notes = pragma
|
|
143
|
+
? {
|
|
144
|
+
...ann.notes,
|
|
145
|
+
pragma: [pragma],
|
|
146
|
+
}
|
|
147
|
+
: ann.notes;
|
|
148
|
+
return ann;
|
|
149
|
+
});
|
|
150
|
+
})
|
|
151
|
+
);
|
|
152
|
+
serSeq.features.forEach(({ type }) => {
|
|
153
|
+
if (type && type.length > longestFeatureTypeLength) {
|
|
154
|
+
longestFeatureTypeLength = type.length;
|
|
155
|
+
}
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
let printedFeatureHeader;
|
|
159
|
+
each(serSeq.features, function (feat) {
|
|
160
|
+
if (!printedFeatureHeader) {
|
|
161
|
+
printedFeatureHeader = true;
|
|
162
|
+
lines.push("FEATURES Location/Qualifiers");
|
|
163
|
+
}
|
|
164
|
+
lines.push(
|
|
165
|
+
featureToGenbankString(feat, {
|
|
166
|
+
...options,
|
|
167
|
+
featurePadLength: longestFeatureTypeLength + 1,
|
|
168
|
+
})
|
|
169
|
+
);
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
lines.push("ORIGIN ");
|
|
173
|
+
for (let i = 0; i < serSeq.sequence.length; i = i + 60) {
|
|
174
|
+
const line = [];
|
|
175
|
+
const ind = StringUtil.lpad("" + (i + 1), " ", 9);
|
|
176
|
+
line.push(ind);
|
|
177
|
+
|
|
178
|
+
for (let j = i; j < i + 60; j = j + 10) {
|
|
179
|
+
// line.push(serSeq.sequence.slice(j,j+10).join(''));
|
|
180
|
+
line.push(cutUp(serSeq.sequence, j, j + 10));
|
|
181
|
+
}
|
|
182
|
+
lines.push(line.join(" "));
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
lines.push("//");
|
|
186
|
+
|
|
187
|
+
content = lines.join("\r\n");
|
|
188
|
+
// return cb(err, content);
|
|
189
|
+
return content;
|
|
190
|
+
} catch (e) {
|
|
191
|
+
console.warn("Error processing sequence << Check jsonToGenbank.js");
|
|
192
|
+
console.warn(serSeq);
|
|
193
|
+
console.warn(e.stack);
|
|
194
|
+
return false;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
function createGenbankLocus(serSeq, options) {
|
|
199
|
+
if (serSeq.sequence.symbols) {
|
|
200
|
+
serSeq.sequence = serSeq.sequence.symbols.split("");
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
let tmp;
|
|
204
|
+
let dnaType;
|
|
205
|
+
if (serSeq.isProtein) {
|
|
206
|
+
dnaType = "";
|
|
207
|
+
} else if (serSeq.type === "RNA") {
|
|
208
|
+
dnaType = serSeq?.doubleStranded ? 'RNA' : serSeq?.sequenceTypeFromLocus ?? "ss-RNA";
|
|
209
|
+
} else {
|
|
210
|
+
dnaType = serSeq?.doubleStranded ? 'DNA' : serSeq?.sequenceTypeFromLocus ?? "DNA";
|
|
211
|
+
}
|
|
212
|
+
const date = getCurrentDateString();
|
|
213
|
+
|
|
214
|
+
let line = StringUtil.rpad("LOCUS", " ", 12);
|
|
215
|
+
let nameToUse = serSeq.name || "Untitled_Sequence";
|
|
216
|
+
nameToUse = options.reformatSeqName
|
|
217
|
+
? reformatName(nameToUse)
|
|
218
|
+
: nameToUse;
|
|
219
|
+
line += StringUtil.rpad(nameToUse, " ", 16);
|
|
220
|
+
line += " "; // T.H line 2778 of GenbankFormat.as col 29 space
|
|
221
|
+
line += StringUtil.lpad(String(serSeq.sequence.length), " ", 11);
|
|
222
|
+
line += serSeq.isProtein ? " aa " : " bp "; // col 41
|
|
223
|
+
// if (strandType !== "") {
|
|
224
|
+
// tmp = strandType + "-";
|
|
225
|
+
// } else {
|
|
226
|
+
tmp = "";
|
|
227
|
+
// }
|
|
228
|
+
line += StringUtil.lpad(tmp, " ", 3);
|
|
229
|
+
line += StringUtil.rpad(dnaType, " ", 6);
|
|
230
|
+
line += " ";
|
|
231
|
+
|
|
232
|
+
if (!serSeq.circular || serSeq.circular === "0") {
|
|
233
|
+
line += "linear ";
|
|
234
|
+
//line += " ";
|
|
235
|
+
} else {
|
|
236
|
+
line += "circular";
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
line += " "; //col 64
|
|
240
|
+
line += StringUtil.rpad(serSeq.gbDivision || "SYN", " ", 1);
|
|
241
|
+
// }
|
|
242
|
+
line += " "; // col 68
|
|
243
|
+
// DOES NOT PARSE DATE USEFULLY ORIGINALLY!
|
|
244
|
+
line += date;
|
|
245
|
+
//line += "\n";
|
|
246
|
+
|
|
247
|
+
return line;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
function getCurrentDateString() {
|
|
251
|
+
let date = new Date();
|
|
252
|
+
date = date.toString().split(" ");
|
|
253
|
+
const day = date[2];
|
|
254
|
+
const month = date[1].toUpperCase();
|
|
255
|
+
const year = date[3];
|
|
256
|
+
return day + "-" + month + "-" + year;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
function featureNoteInDataToGenbankString(name, value, options) {
|
|
260
|
+
return (
|
|
261
|
+
StringUtil.lpad("/", " ", 22) +
|
|
262
|
+
name +
|
|
263
|
+
'="' +
|
|
264
|
+
mangleOrStripUrls(value, options) +
|
|
265
|
+
'"'
|
|
266
|
+
);
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
function featureToGenbankString(feat, options) {
|
|
270
|
+
const lines = [];
|
|
271
|
+
if (feat.type === "primer") {
|
|
272
|
+
feat.type = "primer_bind";
|
|
273
|
+
}
|
|
274
|
+
const line =
|
|
275
|
+
" " +
|
|
276
|
+
StringUtil.rpad(feat.type || "misc_feature", " ", options.featurePadLength);
|
|
277
|
+
let locStr = "";
|
|
278
|
+
|
|
279
|
+
//for(var i=0;i<feat.locations.length;i++) {
|
|
280
|
+
// var loc = feat.locations[i];
|
|
281
|
+
// locStr.push((loc.start+1) + '..' + loc.end);
|
|
282
|
+
//}
|
|
283
|
+
|
|
284
|
+
if (feat.locations && feat.locations.length > 1) {
|
|
285
|
+
feat.locations.forEach((loc, i) => {
|
|
286
|
+
locStr +=
|
|
287
|
+
getProteinStart(
|
|
288
|
+
parseInt(loc.start, 10) + (options.inclusive1BasedStart ? 0 : 1),
|
|
289
|
+
options.isProtein
|
|
290
|
+
) +
|
|
291
|
+
".." +
|
|
292
|
+
getProteinEnd(
|
|
293
|
+
parseInt(loc.end, 10) + (options.inclusive1BasedEnd ? 0 : 1),
|
|
294
|
+
options.isProtein
|
|
295
|
+
);
|
|
296
|
+
|
|
297
|
+
if (i !== feat.locations.length - 1) {
|
|
298
|
+
locStr += ",";
|
|
299
|
+
}
|
|
300
|
+
});
|
|
301
|
+
locStr = "join(" + locStr + ")";
|
|
302
|
+
} else {
|
|
303
|
+
locStr +=
|
|
304
|
+
getProteinStart(
|
|
305
|
+
parseInt(feat.start, 10) + (options.inclusive1BasedStart ? 0 : 1),
|
|
306
|
+
options.isProtein
|
|
307
|
+
) +
|
|
308
|
+
".." +
|
|
309
|
+
getProteinEnd(
|
|
310
|
+
parseInt(feat.end, 10) + (options.inclusive1BasedEnd ? 0 : 1),
|
|
311
|
+
options.isProtein
|
|
312
|
+
);
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// locStr = locStr.join(",");
|
|
316
|
+
|
|
317
|
+
if (feat.strand === -1) {
|
|
318
|
+
locStr = "complement(" + locStr + ")";
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
lines.push(line + locStr);
|
|
322
|
+
|
|
323
|
+
lines.push(
|
|
324
|
+
featureNoteInDataToGenbankString(
|
|
325
|
+
"label",
|
|
326
|
+
feat.name || "Untitled Feature",
|
|
327
|
+
options
|
|
328
|
+
)
|
|
329
|
+
);
|
|
330
|
+
|
|
331
|
+
if (feat.bases && feat.bases.length && feat.type === "primer_bind") {
|
|
332
|
+
addToNotes(feat, "note", `sequence: ${feat.bases}`);
|
|
333
|
+
}
|
|
334
|
+
if (feat.primerBindsOn && feat.type === "primer_bind") {
|
|
335
|
+
addToNotes(feat, "primerBindsOn", feat.primerBindsOn);
|
|
336
|
+
}
|
|
337
|
+
if (feat.overlapsSelf) {
|
|
338
|
+
addToNotes(feat, "pragma", "overlapsSelf");
|
|
339
|
+
}
|
|
340
|
+
if (feat.arrowheadType) {
|
|
341
|
+
const valToAdd =
|
|
342
|
+
feat.arrowheadType.toUpperCase() === "BOTH"
|
|
343
|
+
? "BOTH"
|
|
344
|
+
: feat.arrowheadType.toUpperCase() === "NONE"
|
|
345
|
+
? "NONE"
|
|
346
|
+
: undefined;
|
|
347
|
+
|
|
348
|
+
if (valToAdd) addToNotes(feat, "direction", valToAdd);
|
|
349
|
+
}
|
|
350
|
+
let notes = feat.notes;
|
|
351
|
+
|
|
352
|
+
if (notes) {
|
|
353
|
+
try {
|
|
354
|
+
if (typeof notes === "string") {
|
|
355
|
+
try {
|
|
356
|
+
notes = JSON.parse(notes);
|
|
357
|
+
} catch (e) {
|
|
358
|
+
console.warn("Warning: Note incorrectly sent as a string.");
|
|
359
|
+
notes = {}; //set the notes to a blank object
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
Object.keys(notes).forEach(function (key) {
|
|
363
|
+
if (key === "color" || key === "labelColor") return; //we'll handle this below
|
|
364
|
+
if (notes[key] instanceof Array) {
|
|
365
|
+
notes[key].forEach(function (value) {
|
|
366
|
+
lines.push(featureNoteInDataToGenbankString(key, value, options));
|
|
367
|
+
});
|
|
368
|
+
} else {
|
|
369
|
+
console.warn("Warning: Note object expected array values");
|
|
370
|
+
console.warn(notes);
|
|
371
|
+
}
|
|
372
|
+
});
|
|
373
|
+
} catch (e) {
|
|
374
|
+
console.warn("Warning: Note cannot be processed");
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
feat.color = (feat.notes && feat.notes.color) || feat.color;
|
|
378
|
+
feat.labelColor = (feat.notes && feat.notes.labelColor) || feat.labelColor;
|
|
379
|
+
|
|
380
|
+
if (
|
|
381
|
+
feat.color &&
|
|
382
|
+
color.rgb(feat.color).string() !==
|
|
383
|
+
color
|
|
384
|
+
.rgb(getFeatureToColorMap({ includeHidden: true })[feat.type])
|
|
385
|
+
.string() //don't save a color note if the color is already the same as our defaults
|
|
386
|
+
) {
|
|
387
|
+
lines.push(featureNoteInDataToGenbankString("color", feat.color, options));
|
|
388
|
+
}
|
|
389
|
+
if (feat.labelColor) {
|
|
390
|
+
lines.push(
|
|
391
|
+
featureNoteInDataToGenbankString("labelColor", feat.labelColor, options)
|
|
392
|
+
);
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
return lines.join("\r\n");
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
function getProteinStart(val, isProtein) {
|
|
399
|
+
if (!isProtein) return val;
|
|
400
|
+
return Math.floor((val + 2) / 3);
|
|
401
|
+
}
|
|
402
|
+
function getProteinEnd(val, isProtein) {
|
|
403
|
+
if (!isProtein) return val;
|
|
404
|
+
return Math.floor(val / 3);
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
function addToNotes(ann, key, val) {
|
|
408
|
+
if (!ann.notes) {
|
|
409
|
+
ann.notes = {};
|
|
410
|
+
}
|
|
411
|
+
if (!ann.notes[key]) {
|
|
412
|
+
ann.notes[key] = [];
|
|
413
|
+
}
|
|
414
|
+
ann.notes[key].push(val);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
function addDigestPartFieldsToNotes(ann) {
|
|
418
|
+
Object.entries(DIGEST_PART_EXPORT_FIELD_MAP).forEach(
|
|
419
|
+
([digestFieldPath, digestFieldName]) => {
|
|
420
|
+
addToNotes(ann, digestFieldName, String(get(ann, digestFieldPath)));
|
|
421
|
+
}
|
|
422
|
+
);
|
|
423
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { omit } from "lodash";
|
|
2
|
+
|
|
3
|
+
import cleanUpTeselagenJsonForExport from "./utils/cleanUpTeselagenJsonForExport"
|
|
4
|
+
import { tidyUpSequenceData } from "@teselagen/sequence-utils";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* @param {*} incomingJson
|
|
8
|
+
* @returns a sequence json cleaned up and converted to a string with extranous fields ommited
|
|
9
|
+
*/
|
|
10
|
+
export default function jsonToJsonString(incomingJson) {
|
|
11
|
+
return JSON.stringify(
|
|
12
|
+
omit(
|
|
13
|
+
cleanUpTeselagenJsonForExport(
|
|
14
|
+
tidyUpSequenceData(incomingJson, { annotationsAsObjects: false })
|
|
15
|
+
),
|
|
16
|
+
[
|
|
17
|
+
"sequenceFragments",
|
|
18
|
+
"sequenceFeatures",
|
|
19
|
+
"cutsites",
|
|
20
|
+
"orfs",
|
|
21
|
+
"filteredParts",
|
|
22
|
+
"filteredFeatures"
|
|
23
|
+
]
|
|
24
|
+
)
|
|
25
|
+
);
|
|
26
|
+
}
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
/* eslint-disable no-var*/
|
|
2
|
+
import validateSequenceArray from "./utils/validateSequenceArray";
|
|
3
|
+
import searchWholeObjByName, {
|
|
4
|
+
searchWholeObjByNameSimple,
|
|
5
|
+
searchWholeObjByNameSimpleArray,
|
|
6
|
+
} from "./utils/searchWholeObjByName";
|
|
7
|
+
|
|
8
|
+
import { XMLParser } from "fast-xml-parser";
|
|
9
|
+
import { flatMap, get } from "lodash";
|
|
10
|
+
|
|
11
|
+
//Here's what should be in the callback:
|
|
12
|
+
// {
|
|
13
|
+
// parsedSequence:
|
|
14
|
+
// messages:
|
|
15
|
+
// success:
|
|
16
|
+
// }
|
|
17
|
+
async function sbolXmlToJson(string, options) {
|
|
18
|
+
options = options || {};
|
|
19
|
+
const onFileParsed = function (sequences) {
|
|
20
|
+
//before we call the onFileParsed callback, we need to validate the sequence
|
|
21
|
+
return validateSequenceArray(sequences, options);
|
|
22
|
+
};
|
|
23
|
+
let response = {
|
|
24
|
+
parsedSequence: null,
|
|
25
|
+
messages: [],
|
|
26
|
+
success: true,
|
|
27
|
+
};
|
|
28
|
+
try {
|
|
29
|
+
const result = new XMLParser({
|
|
30
|
+
isArray: () => true,
|
|
31
|
+
ignoreAttributes: false,
|
|
32
|
+
}).parse(string);
|
|
33
|
+
const sbolJsonMatches = searchWholeObjByName("DnaComponent", result);
|
|
34
|
+
if (sbolJsonMatches[0]) {
|
|
35
|
+
const resultArray = [];
|
|
36
|
+
for (let i = 0; i < sbolJsonMatches[0].value.length; i++) {
|
|
37
|
+
try {
|
|
38
|
+
response = {
|
|
39
|
+
parsedSequence: null,
|
|
40
|
+
messages: [],
|
|
41
|
+
success: true,
|
|
42
|
+
};
|
|
43
|
+
response.parsedSequence = parseSbolJson(
|
|
44
|
+
sbolJsonMatches[0].value[i],
|
|
45
|
+
options
|
|
46
|
+
);
|
|
47
|
+
} catch (e) {
|
|
48
|
+
console.error("error:", e);
|
|
49
|
+
console.error("error.stack: ", e.stack);
|
|
50
|
+
resultArray.push({
|
|
51
|
+
success: false,
|
|
52
|
+
messages: ["Error while parsing Sbol format"],
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
if (response.parsedSequence.features.length > 0) {
|
|
56
|
+
response.messages.push(
|
|
57
|
+
"SBOL feature types are stored in feature notes"
|
|
58
|
+
);
|
|
59
|
+
}
|
|
60
|
+
resultArray.push(response);
|
|
61
|
+
}
|
|
62
|
+
return onFileParsed(resultArray);
|
|
63
|
+
} else {
|
|
64
|
+
return onFileParsed({
|
|
65
|
+
success: false,
|
|
66
|
+
messages: ["Error: XML is not valid Jbei or Sbol format"],
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
} catch (e) {
|
|
70
|
+
return onFileParsed({
|
|
71
|
+
success: false,
|
|
72
|
+
messages: ["Error parsing XML to JSON"],
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
// Converts SBOL formats.
|
|
77
|
+
// * Specifications for SBOL can be found at http://www.sbolstandard.org/specification/core-data-model
|
|
78
|
+
// *
|
|
79
|
+
// * The hierarcy of the components in an SBOL object is:
|
|
80
|
+
// *
|
|
81
|
+
// * The hierarchy is Collection -> DnaComponent -> DnaSequence
|
|
82
|
+
// *
|
|
83
|
+
// * Check for each level and parse downward from there.
|
|
84
|
+
// tnrtodo: this should be tested with a wider variety of sbol file types!
|
|
85
|
+
function parseSbolJson(sbolJson, options) {
|
|
86
|
+
let name;
|
|
87
|
+
if (get(sbolJson, "name[0]")) {
|
|
88
|
+
name = get(sbolJson, "name[0]");
|
|
89
|
+
} else {
|
|
90
|
+
name = get(sbolJson, "displayId[0]");
|
|
91
|
+
}
|
|
92
|
+
return {
|
|
93
|
+
// circular: get(sbolJson, "seq:circular[0]"), //tnrtodo this needs to be changed
|
|
94
|
+
circular: false,
|
|
95
|
+
sequence: get(sbolJson, "dnaSequence[0].DnaSequence[0].nucleotides"),
|
|
96
|
+
name: name,
|
|
97
|
+
features: flatMap(sbolJson.annotation, function (annotation) {
|
|
98
|
+
const feature = get(annotation, "SequenceAnnotation[0]");
|
|
99
|
+
if (feature) {
|
|
100
|
+
const notes = searchWholeObjByNameSimpleArray("@_ns2:about", feature);
|
|
101
|
+
const otherNotes = searchWholeObjByNameSimpleArray(
|
|
102
|
+
"@_ns2:resource",
|
|
103
|
+
feature
|
|
104
|
+
);
|
|
105
|
+
const newNotes = {};
|
|
106
|
+
[...notes, ...otherNotes].forEach(function (note) {
|
|
107
|
+
if (note) {
|
|
108
|
+
if (!newNotes.about) newNotes.about = [];
|
|
109
|
+
newNotes.about.push(note);
|
|
110
|
+
}
|
|
111
|
+
});
|
|
112
|
+
const featureName =
|
|
113
|
+
searchWholeObjByNameSimple("name", feature) ||
|
|
114
|
+
searchWholeObjByNameSimple("displayId", feature);
|
|
115
|
+
return {
|
|
116
|
+
name: featureName,
|
|
117
|
+
notes: newNotes,
|
|
118
|
+
type: "misc_feature", // sbol represents the feature type in what we are parsing as notes as the URL is difficult to follow
|
|
119
|
+
// type: feature['seq:label'], //tnrtodo: figure out if an annotation type is passed
|
|
120
|
+
// id: feature['seq:label'],
|
|
121
|
+
start: parseInt(
|
|
122
|
+
get(feature, "bioStart[0]") - (options.inclusive1BasedStart ? 0 : 1)
|
|
123
|
+
),
|
|
124
|
+
end: parseInt(
|
|
125
|
+
get(feature, "bioEnd[0]") - (options.inclusive1BasedEnd ? 0 : 1)
|
|
126
|
+
),
|
|
127
|
+
strand: get(feature, "strand[0]"), //+ or -
|
|
128
|
+
// notes: feature['seq:label'],
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
}),
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
export default sbolXmlToJson;
|