@teselagen/bio-parsers 0.3.8 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +330 -0
- package/index.js +49 -47
- package/index.mjs +49 -47
- package/index.umd.js +49 -47
- package/package.json +1 -2
- package/src/ab1ToJson.js +13 -18
- package/src/anyToJson.js +6 -6
- package/src/genbankToJson.js +21 -20
- package/src/geneiousXmlToJson.js +3 -6
- package/src/gffToJson.js +5 -5
- package/src/jbeiXmlToJson.js +10 -13
- package/src/jsonToBed.js +4 -3
- package/src/jsonToFasta.js +4 -2
- package/src/jsonToGenbank.js +13 -12
- package/src/jsonToJsonString.js +1 -1
- package/src/sbolXmlToJson.js +9 -9
- package/src/snapgeneToJson.js +14 -12
- package/src/utils/NameUtils.js +1 -1
- package/src/utils/ParserUtil.js +81 -83
- package/src/utils/cleanUpTeselagenJsonForExport.js +8 -9
- package/src/utils/constants.js +22 -22
- package/src/utils/convertOldSequenceDataToNewDataType.js +5 -6
- package/src/utils/createInitialSequence.js +13 -11
- package/src/utils/extractFileExtension.js +11 -13
- package/src/utils/flattenSequenceArray.js +14 -14
- package/src/utils/getArrayBufferFromFile.js +5 -5
- package/src/utils/isBrowser.js +2 -1
- package/src/utils/parseUracilFeatures.js +2 -2
- package/src/utils/pragmasAndTypes.js +3 -2
- package/src/utils/searchWholeObjByName.js +3 -3
- package/src/utils/splitStringIntoLines.js +13 -12
- package/src/utils/validateSequence.js +9 -9
- package/src/utils/validateSequenceArray.js +17 -17
- package/utils/getArrayBufferFromFile.d.ts +1 -1
package/src/utils/constants.js
CHANGED
|
@@ -1,24 +1,24 @@
|
|
|
1
|
-
export const untitledSequenceName =
|
|
1
|
+
export const untitledSequenceName = "Untitled Sequence";
|
|
2
2
|
|
|
3
3
|
export const gbDivisions = {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
4
|
+
// https://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html#GenBankDivisionB
|
|
5
|
+
PRI: true, //- primate sequences
|
|
6
|
+
ROD: true, //- rodent sequences
|
|
7
|
+
MAM: true, //- other mammalian sequences
|
|
8
|
+
VRT: true, //- other vertebrate sequences
|
|
9
|
+
INV: true, //- invertebrate sequences
|
|
10
|
+
PLN: true, //- plant, fungal, and algal sequences
|
|
11
|
+
BCT: true, //- bacterial sequences
|
|
12
|
+
VRL: true, //- viral sequences
|
|
13
|
+
PHG: true, //- bacteriophage sequences
|
|
14
|
+
SYN: true, //- synthetic sequences
|
|
15
|
+
UNA: true, //- unannotated sequences
|
|
16
|
+
EST: true, //- EST sequences (expressed sequence tags)
|
|
17
|
+
PAT: true, //- patent sequences
|
|
18
|
+
STS: true, //- STS sequences (sequence tagged sites)
|
|
19
|
+
GSS: true, //- GSS sequences (genome survey sequences)
|
|
20
|
+
HTG: true, //- HTG sequences (high-throughput genomic sequences)
|
|
21
|
+
HTC: true, //- unfinished high-throughput cDNA sequencing
|
|
22
|
+
ENV: true, //- environmental sampling sequences
|
|
23
|
+
CON: true //- sequence assembly instructions on how to construct contigs from multiple GenBank records.
|
|
24
|
+
};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { isRangeWithinRange } from
|
|
1
|
+
import { isRangeWithinRange } from "@teselagen/range-utils";
|
|
2
2
|
|
|
3
3
|
export default function convertOldSequenceDataToNewDataType(
|
|
4
4
|
oldTeselagenJson,
|
|
@@ -7,7 +7,7 @@ export default function convertOldSequenceDataToNewDataType(
|
|
|
7
7
|
if (opts && opts.splitLocations) {
|
|
8
8
|
//after the file has been parsed, but before it's been saved, check for features with multiple locations and split them
|
|
9
9
|
oldTeselagenJson &&
|
|
10
|
-
oldTeselagenJson.features.forEach(function(feature) {
|
|
10
|
+
oldTeselagenJson.features.forEach(function (feature) {
|
|
11
11
|
if (feature.locations && feature.locations[0]) {
|
|
12
12
|
if (feature.locations.length > 1) {
|
|
13
13
|
for (let i = 1; i < feature.locations.length; i++) {
|
|
@@ -31,11 +31,10 @@ export default function convertOldSequenceDataToNewDataType(
|
|
|
31
31
|
}
|
|
32
32
|
delete feature.locations;
|
|
33
33
|
});
|
|
34
|
-
|
|
35
34
|
} else {
|
|
36
35
|
//mange locations
|
|
37
36
|
oldTeselagenJson &&
|
|
38
|
-
oldTeselagenJson.features.forEach(function(feature) {
|
|
37
|
+
oldTeselagenJson.features.forEach(function (feature) {
|
|
39
38
|
if (feature.locations && feature.locations[0]) {
|
|
40
39
|
//set the new starts and ends
|
|
41
40
|
feature.start = feature.locations[0].start;
|
|
@@ -53,7 +52,7 @@ export default function convertOldSequenceDataToNewDataType(
|
|
|
53
52
|
delete feature.locations;
|
|
54
53
|
}
|
|
55
54
|
} else {
|
|
56
|
-
delete feature.locations
|
|
55
|
+
delete feature.locations;
|
|
57
56
|
}
|
|
58
57
|
}
|
|
59
58
|
});
|
|
@@ -61,4 +60,4 @@ export default function convertOldSequenceDataToNewDataType(
|
|
|
61
60
|
if (Array.isArray(oldTeselagenJson.sequence)) {
|
|
62
61
|
oldTeselagenJson.sequence = oldTeselagenJson.sequence.join("");
|
|
63
62
|
}
|
|
64
|
-
}
|
|
63
|
+
}
|
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
import { untitledSequenceName } from "./constants";
|
|
2
2
|
|
|
3
3
|
export default function createInitialSequence(options) {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
}
|
|
4
|
+
options = options || {};
|
|
5
|
+
return {
|
|
6
|
+
messages: [],
|
|
7
|
+
success: true,
|
|
8
|
+
parsedSequence: {
|
|
9
|
+
features: [],
|
|
10
|
+
name:
|
|
11
|
+
(options.fileName && options.fileName.replace(/\.[^/.]+$/, "")) ||
|
|
12
|
+
untitledSequenceName,
|
|
13
|
+
sequence: ""
|
|
14
|
+
}
|
|
15
|
+
};
|
|
16
|
+
}
|
|
@@ -1,14 +1,12 @@
|
|
|
1
1
|
export default function extractFileExtension(name) {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
};
|
|
14
|
-
|
|
2
|
+
if (typeof name === "string") {
|
|
3
|
+
let ext = "";
|
|
4
|
+
const match = name.match(/\.(\w+)$/);
|
|
5
|
+
if (match && match[1]) {
|
|
6
|
+
ext = match[1];
|
|
7
|
+
}
|
|
8
|
+
return ext;
|
|
9
|
+
} else {
|
|
10
|
+
return "";
|
|
11
|
+
}
|
|
12
|
+
}
|
|
@@ -1,17 +1,17 @@
|
|
|
1
|
-
import convertOldSequenceDataToNewDataType from
|
|
1
|
+
import convertOldSequenceDataToNewDataType from "./convertOldSequenceDataToNewDataType.js";
|
|
2
2
|
|
|
3
3
|
export default function flattenSequenceArray(parsingResultArray, opts) {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
}
|
|
9
|
-
//should convert the old data type to the new data type (flattened sequence)
|
|
10
|
-
parsingResultArray.forEach(function(parsingResult) {
|
|
11
|
-
if (parsingResult.success) {
|
|
12
|
-
convertOldSequenceDataToNewDataType(parsingResult.parsedSequence, opts);
|
|
13
|
-
}
|
|
14
|
-
});
|
|
4
|
+
if (parsingResultArray) {
|
|
5
|
+
if (!Array.isArray(parsingResultArray)) {
|
|
6
|
+
//wrap the parsingResult into an array if it isn't one already
|
|
7
|
+
parsingResultArray = [parsingResultArray];
|
|
15
8
|
}
|
|
16
|
-
|
|
17
|
-
|
|
9
|
+
//should convert the old data type to the new data type (flattened sequence)
|
|
10
|
+
parsingResultArray.forEach(function (parsingResult) {
|
|
11
|
+
if (parsingResult.success) {
|
|
12
|
+
convertOldSequenceDataToNewDataType(parsingResult.parsedSequence, opts);
|
|
13
|
+
}
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
return parsingResultArray;
|
|
17
|
+
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { Buffer } from
|
|
2
|
-
import isBrowser from
|
|
1
|
+
import { Buffer } from "buffer";
|
|
2
|
+
import isBrowser from "./isBrowser";
|
|
3
3
|
|
|
4
4
|
export default function getArrayBufferFromFile(file) {
|
|
5
5
|
if (!isBrowser) {
|
|
@@ -9,11 +9,11 @@ export default function getArrayBufferFromFile(file) {
|
|
|
9
9
|
|
|
10
10
|
const reader = new window.FileReader();
|
|
11
11
|
return new Promise((resolve, reject) => {
|
|
12
|
-
reader.onload =
|
|
12
|
+
reader.onload = e => {
|
|
13
13
|
resolve(e.target.result);
|
|
14
14
|
};
|
|
15
|
-
reader.onerror =
|
|
16
|
-
console.error(
|
|
15
|
+
reader.onerror = err => {
|
|
16
|
+
console.error("err:", err);
|
|
17
17
|
reject(err);
|
|
18
18
|
};
|
|
19
19
|
reader.readAsArrayBuffer(
|
package/src/utils/isBrowser.js
CHANGED
|
@@ -1 +1,2 @@
|
|
|
1
|
-
export default typeof window !==
|
|
1
|
+
export default typeof window !== "undefined" &&
|
|
2
|
+
typeof window.document !== "undefined";
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
export default [
|
|
2
|
-
{
|
|
2
|
+
{
|
|
3
|
+
//primers don't need a pragma because they already have a feature type of primer_bind
|
|
3
4
|
type: "primers"
|
|
4
5
|
},
|
|
5
6
|
{
|
|
@@ -18,4 +19,4 @@ export default [
|
|
|
18
19
|
pragma: "j5_lineage_annotation",
|
|
19
20
|
type: "lineageAnnotations"
|
|
20
21
|
}
|
|
21
|
-
]
|
|
22
|
+
];
|
|
@@ -16,7 +16,7 @@ class Match {
|
|
|
16
16
|
logValue() {
|
|
17
17
|
const val = this.value;
|
|
18
18
|
// if value is an object then just toString it
|
|
19
|
-
const isPrimitive =
|
|
19
|
+
const isPrimitive = x => Object(x) !== x;
|
|
20
20
|
return isPrimitive(val) || Array.isArray(val) ? val : {}.toString.call(val);
|
|
21
21
|
}
|
|
22
22
|
|
|
@@ -39,7 +39,7 @@ export default function searchWholeObjByName(what, where) {
|
|
|
39
39
|
|
|
40
40
|
const matches = [];
|
|
41
41
|
matches.log = function () {
|
|
42
|
-
this.forEach(
|
|
42
|
+
this.forEach(m => m.log());
|
|
43
43
|
};
|
|
44
44
|
|
|
45
45
|
// a non-recursive solution to avoid call stack limits
|
|
@@ -77,7 +77,7 @@ export default function searchWholeObjByName(what, where) {
|
|
|
77
77
|
path: `${path}.${prop}`,
|
|
78
78
|
obj: where,
|
|
79
79
|
prop,
|
|
80
|
-
type
|
|
80
|
+
type
|
|
81
81
|
});
|
|
82
82
|
matches.push(match);
|
|
83
83
|
}
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
export default function splitStringIntoLines(string) {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
}
|
|
2
|
+
let lines = [];
|
|
3
|
+
if (string === "") {
|
|
4
|
+
return lines;
|
|
5
|
+
} else {
|
|
6
|
+
lines = string.split(/\r?\n/);
|
|
7
|
+
// eslint-disable-next-line eqeqeq
|
|
8
|
+
if (lines.length == 1) {
|
|
9
|
+
//tnr: not sure why this check is being made... but keeping it in because it is probably doing something
|
|
10
|
+
lines = string.split("\\n");
|
|
11
|
+
}
|
|
12
|
+
return lines;
|
|
13
|
+
}
|
|
14
|
+
}
|
|
@@ -40,7 +40,7 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
40
40
|
"isSingleStrandedDNA",
|
|
41
41
|
"isDoubleStrandedRNA",
|
|
42
42
|
"isProtein"
|
|
43
|
-
].forEach(
|
|
43
|
+
].forEach(k => {
|
|
44
44
|
if (options[k] !== undefined && sequence[k] === undefined) {
|
|
45
45
|
sequence[k] = options[k];
|
|
46
46
|
}
|
|
@@ -110,7 +110,7 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
110
110
|
//todo: this logic won't catch every case of RNA, so we should probably handle RNA conversion at another level..
|
|
111
111
|
const temp = sequence.sequence;
|
|
112
112
|
if (!sequence.isOligo) {
|
|
113
|
-
sequence.sequence = sequence.sequence.replace(/u/gi,
|
|
113
|
+
sequence.sequence = sequence.sequence.replace(/u/gi, u =>
|
|
114
114
|
u === "U" ? "T" : "t"
|
|
115
115
|
);
|
|
116
116
|
}
|
|
@@ -299,16 +299,16 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
299
299
|
}
|
|
300
300
|
if (
|
|
301
301
|
feature.notes.pragma &&
|
|
302
|
-
some(feature.notes.pragma,
|
|
302
|
+
some(feature.notes.pragma, p => p === "overlapsSelf")
|
|
303
303
|
) {
|
|
304
304
|
feature.overlapsSelf = true;
|
|
305
305
|
feature.notes.pragma = filter(
|
|
306
306
|
feature.notes.pragma,
|
|
307
|
-
|
|
307
|
+
p => p !== "overlapsSelf"
|
|
308
308
|
);
|
|
309
309
|
}
|
|
310
310
|
feature.notes.note &&
|
|
311
|
-
some(feature.notes.note,
|
|
311
|
+
some(feature.notes.note, n => {
|
|
312
312
|
if (
|
|
313
313
|
n &&
|
|
314
314
|
typeof n === "string" &&
|
|
@@ -317,7 +317,7 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
317
317
|
//remove it after we're parsed it out
|
|
318
318
|
feature.notes.note = filter(
|
|
319
319
|
feature.notes.note,
|
|
320
|
-
|
|
320
|
+
p => p && !p.toLowerCase().includes("sequence:")
|
|
321
321
|
);
|
|
322
322
|
if (feature.notes.note.length === 0) {
|
|
323
323
|
delete feature.notes.note;
|
|
@@ -333,7 +333,7 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
333
333
|
});
|
|
334
334
|
|
|
335
335
|
feature.notes.primerBindsOn &&
|
|
336
|
-
some(feature.notes.primerBindsOn,
|
|
336
|
+
some(feature.notes.primerBindsOn, n => {
|
|
337
337
|
if (n) {
|
|
338
338
|
feature.primerBindsOn = n;
|
|
339
339
|
delete feature.notes.primerBindsOn;
|
|
@@ -344,7 +344,7 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
344
344
|
if (
|
|
345
345
|
options[`accept${upperFirst(type)}`] !== false && //acceptParts, acceptWarnings,
|
|
346
346
|
feature.notes.pragma &&
|
|
347
|
-
some(feature.notes.pragma,
|
|
347
|
+
some(feature.notes.pragma, p => p === pragma)
|
|
348
348
|
) {
|
|
349
349
|
if (!sequence[type]) {
|
|
350
350
|
sequence[type] = []; //initialize an empty array if necessary
|
|
@@ -356,7 +356,7 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
356
356
|
}
|
|
357
357
|
}
|
|
358
358
|
forEach(feature.notes, (noteArray, key) => {
|
|
359
|
-
feature.notes[key] = map(noteArray,
|
|
359
|
+
feature.notes[key] = map(noteArray, note => {
|
|
360
360
|
return unmangleUrls(note);
|
|
361
361
|
});
|
|
362
362
|
});
|
|
@@ -1,20 +1,20 @@
|
|
|
1
|
-
import validateSequence from
|
|
1
|
+
import validateSequence from "./validateSequence.js";
|
|
2
2
|
|
|
3
3
|
export default function validateSequenceArray(parsingResultArray, options) {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
}
|
|
9
|
-
//should convert the old data type to the new data type (flattened sequence)
|
|
10
|
-
parsingResultArray.forEach(function(parsingResult) {
|
|
11
|
-
if (parsingResult.success) {
|
|
12
|
-
const res = validateSequence(parsingResult.parsedSequence, options);
|
|
13
|
-
//add any validation error messages to the parsed sequence results messages
|
|
14
|
-
parsingResult.messages = parsingResult.messages.concat(res.messages);
|
|
15
|
-
parsingResult.parsedSequence = res.validatedAndCleanedSequence;
|
|
16
|
-
}
|
|
17
|
-
});
|
|
4
|
+
if (parsingResultArray) {
|
|
5
|
+
if (!Array.isArray(parsingResultArray)) {
|
|
6
|
+
//wrap the parsingResult into an array if it isn't one already
|
|
7
|
+
parsingResultArray = [parsingResultArray];
|
|
18
8
|
}
|
|
19
|
-
|
|
20
|
-
|
|
9
|
+
//should convert the old data type to the new data type (flattened sequence)
|
|
10
|
+
parsingResultArray.forEach(function (parsingResult) {
|
|
11
|
+
if (parsingResult.success) {
|
|
12
|
+
const res = validateSequence(parsingResult.parsedSequence, options);
|
|
13
|
+
//add any validation error messages to the parsed sequence results messages
|
|
14
|
+
parsingResult.messages = parsingResult.messages.concat(res.messages);
|
|
15
|
+
parsingResult.parsedSequence = res.validatedAndCleanedSequence;
|
|
16
|
+
}
|
|
17
|
+
});
|
|
18
|
+
}
|
|
19
|
+
return parsingResultArray;
|
|
20
|
+
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
export default function getArrayBufferFromFile(file: any):
|
|
1
|
+
export default function getArrayBufferFromFile(file: any): Promise<any> | ArrayBuffer;
|