@teselagen/bio-parsers 0.1.27 → 0.1.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +24219 -39924
- package/index.mjs +24238 -39921
- package/index.umd.js +32684 -48391
- package/package.json +5 -8
- package/src/ab1ToJson.js +177 -0
- package/src/anyToJson.js +225 -0
- package/src/fastaToJson.js +101 -0
- package/src/genbankToJson.d.__ts +20 -0
- package/src/genbankToJson.js +688 -0
- package/src/geneiousXmlToJson.js +147 -0
- package/src/gffToJson.js +43 -0
- package/src/index.js +23 -0
- package/src/jbeiXmlToJson.js +109 -0
- package/src/jsonToBed.js +39 -0
- package/src/jsonToFasta.js +33 -0
- package/src/jsonToGenbank.js +423 -0
- package/src/jsonToJsonString.js +26 -0
- package/src/sbolXmlToJson.js +135 -0
- package/src/snapgeneToJson.js +245 -0
- package/src/utils/NameUtils.js +10 -0
- package/src/utils/ParserUtil.js +93 -0
- package/src/utils/cleanUpTeselagenJsonForExport.js +13 -0
- package/src/utils/constants.js +24 -0
- package/src/utils/convertOldSequenceDataToNewDataType.js +64 -0
- package/src/utils/createInitialSequence.js +14 -0
- package/src/utils/extractFileExtension.js +14 -0
- package/src/utils/flattenSequenceArray.js +17 -0
- package/src/utils/getArrayBufferFromFile.js +32 -0
- package/src/utils/isBrowser.js +1 -0
- package/src/utils/parseUracilFeatures.js +13 -0
- package/src/utils/pragmasAndTypes.js +21 -0
- package/src/utils/searchWholeObjByName.js +98 -0
- package/src/utils/splitStringIntoLines.js +13 -0
- package/src/utils/unmangleUrls.js +34 -0
- package/src/utils/validateSequence.js +349 -0
- package/src/utils/validateSequenceArray.js +20 -0
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
//note: Huge credit and thanks go to IsaacLuo from whose python repository this code was adapted
|
|
2
|
+
// https://github.com/IsaacLuo/SnapGeneFileReader
|
|
3
|
+
|
|
4
|
+
import bufferpack from "bufferpack";
|
|
5
|
+
import { StringDecoder } from "string_decoder";
|
|
6
|
+
import buffer from "buffer";
|
|
7
|
+
|
|
8
|
+
import getArrayBufferFromFile from "./utils/getArrayBufferFromFile";
|
|
9
|
+
import createInitialSequence from "./utils/createInitialSequence";
|
|
10
|
+
import validateSequenceArray from "./utils/validateSequenceArray";
|
|
11
|
+
import flattenSequenceArray from "./utils/flattenSequenceArray";
|
|
12
|
+
import { get } from "lodash";
|
|
13
|
+
import { XMLParser } from "fast-xml-parser";
|
|
14
|
+
import extractFileExtension from "./utils/extractFileExtension";
|
|
15
|
+
|
|
16
|
+
const Buffer = buffer.Buffer
|
|
17
|
+
|
|
18
|
+
async function snapgeneToJson(fileObj, options = {}) {
|
|
19
|
+
try {
|
|
20
|
+
const returnVal = createInitialSequence(options);
|
|
21
|
+
const arrayBuffer = await getArrayBufferFromFile(fileObj);
|
|
22
|
+
const ext = extractFileExtension(options.fileName);
|
|
23
|
+
let isProtein = options.isProtein;
|
|
24
|
+
if (ext && /^(prot)$/.test(ext)) {
|
|
25
|
+
isProtein = true;
|
|
26
|
+
options.isProtein = true;
|
|
27
|
+
}
|
|
28
|
+
let offset = 0;
|
|
29
|
+
// eslint-disable-next-line no-inner-declarations
|
|
30
|
+
function read(size, fmt) {
|
|
31
|
+
const buffer = Buffer.from(arrayBuffer.slice(offset, size + offset));
|
|
32
|
+
offset += size;
|
|
33
|
+
if (fmt) {
|
|
34
|
+
const decoder = new StringDecoder(fmt);
|
|
35
|
+
const toRet = decoder.write(buffer);
|
|
36
|
+
return toRet;
|
|
37
|
+
} else {
|
|
38
|
+
return buffer;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
// eslint-disable-next-line no-inner-declarations
|
|
42
|
+
async function unpack(size, mode) {
|
|
43
|
+
const buffer = await read(size);
|
|
44
|
+
const unpacked = await bufferpack.unpack(">" + mode, buffer);
|
|
45
|
+
if (unpacked === undefined) return undefined;
|
|
46
|
+
return await unpacked[0];
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
await read(1); //read the first byte
|
|
50
|
+
// READ THE DOCUMENT PROPERTIES
|
|
51
|
+
const length = await unpack(4, "I");
|
|
52
|
+
const title = await read(8, "ascii");
|
|
53
|
+
if (length !== 14 || title !== "SnapGene") {
|
|
54
|
+
throw new Error("Wrong format for a SnapGene file !");
|
|
55
|
+
}
|
|
56
|
+
const data = await {
|
|
57
|
+
...returnVal.parsedSequence,
|
|
58
|
+
isProtein,
|
|
59
|
+
isDNA: !!(await unpack(2, "H")) && !isProtein,
|
|
60
|
+
exportVersion: await unpack(2, "H"),
|
|
61
|
+
importVersion: await unpack(2, "H"),
|
|
62
|
+
features: [],
|
|
63
|
+
};
|
|
64
|
+
while (offset <= arrayBuffer.byteLength) {
|
|
65
|
+
// # READ THE WHOLE FILE, BLOCK BY BLOCK, UNTIL THE END
|
|
66
|
+
const next_byte = await read(1);
|
|
67
|
+
// # next_byte table
|
|
68
|
+
// # 0: dna sequence
|
|
69
|
+
// # 1: compressed DNA
|
|
70
|
+
// # 2: unknown
|
|
71
|
+
// # 3: unknown
|
|
72
|
+
// # 5: primers
|
|
73
|
+
// # 6: notes
|
|
74
|
+
// # 7: history tree
|
|
75
|
+
// # 8: additional sequence properties segment
|
|
76
|
+
// # 9: file Description
|
|
77
|
+
// # 10: features
|
|
78
|
+
// # 11: history node
|
|
79
|
+
// # 13: unknown
|
|
80
|
+
// # 16: alignable sequence
|
|
81
|
+
// # 17: alignable sequence
|
|
82
|
+
// # 18: sequence trace
|
|
83
|
+
// # 19: Uracil Positions
|
|
84
|
+
// # 20: custom DNA colors
|
|
85
|
+
|
|
86
|
+
const block_size = await unpack(4, "I");
|
|
87
|
+
if (ord(next_byte) === 21 || ord(next_byte) === 0) {
|
|
88
|
+
// # READ THE SEQUENCE AND ITS PROPERTIES
|
|
89
|
+
const props = await unpack(1, "b");
|
|
90
|
+
const binaryRep = dec2bin(props);
|
|
91
|
+
|
|
92
|
+
data.circular = isFirstBitA1(binaryRep);
|
|
93
|
+
const size = block_size - 1;
|
|
94
|
+
if (size < 0) return;
|
|
95
|
+
data.size = isProtein ? size * 3 : size;
|
|
96
|
+
// data["dna"] = {
|
|
97
|
+
// topology="circular" if props & 0x01 else "linear",
|
|
98
|
+
// strandedness="double" if props & 0x02 > 0 else "single",
|
|
99
|
+
// damMethylated=props & 0x04 > 0,
|
|
100
|
+
// dcmMethylated=props & 0x08 > 0,
|
|
101
|
+
// ecoKIMethylated=props & 0x10 > 0,
|
|
102
|
+
// length=block_size - 1
|
|
103
|
+
// }
|
|
104
|
+
data.sequence = await read(size, "utf8");
|
|
105
|
+
} else if (ord(next_byte) === 10) {
|
|
106
|
+
// # READ THE FEATURES
|
|
107
|
+
const strand_dict = {
|
|
108
|
+
// [strand, arrowheadType]
|
|
109
|
+
0: [1, "NONE"], // non-directional feature (in that case, the attribute is generally absent altogether)
|
|
110
|
+
1: [1, "TOP"], // forward strand
|
|
111
|
+
2: [-1, "BOTTOM"], // reverse strand
|
|
112
|
+
3: [1, "BOTH"], // bi-directional feature
|
|
113
|
+
};
|
|
114
|
+
const xml = await read(block_size, "utf8");
|
|
115
|
+
const b = new XMLParser({
|
|
116
|
+
ignoreAttributes: false,
|
|
117
|
+
attributeNamePrefix: "",
|
|
118
|
+
isArray: (name) => name === "Feature" || name === "Segment",
|
|
119
|
+
}).parse(xml);
|
|
120
|
+
const { Features: { Feature = [] } = {} } = b;
|
|
121
|
+
data.features = [];
|
|
122
|
+
Feature.forEach((feat) => {
|
|
123
|
+
const { directionality, Segment = [], name, type } = feat;
|
|
124
|
+
// let color;
|
|
125
|
+
let maxStart = 0;
|
|
126
|
+
let maxEnd = 0;
|
|
127
|
+
const locations =
|
|
128
|
+
Segment &&
|
|
129
|
+
Segment.map((seg) => {
|
|
130
|
+
if (!seg) throw new Error("invalid feature definition");
|
|
131
|
+
const { range } = seg;
|
|
132
|
+
// color = seg.color;
|
|
133
|
+
let { start, end } = getStartAndEndFromRangeString(range);
|
|
134
|
+
start = isProtein ? start * 3 : start;
|
|
135
|
+
end = isProtein ? end * 3 + 2 : end;
|
|
136
|
+
maxStart = Math.max(maxStart, start);
|
|
137
|
+
maxEnd = Math.max(maxEnd, end);
|
|
138
|
+
return {
|
|
139
|
+
start,
|
|
140
|
+
end,
|
|
141
|
+
};
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
data.features.push({
|
|
145
|
+
name,
|
|
146
|
+
type,
|
|
147
|
+
...(locations?.length > 1 && { locations }),
|
|
148
|
+
strand: directionality ? strand_dict[directionality][0] : 1,
|
|
149
|
+
arrowheadType: directionality ? strand_dict[directionality][1] : "NONE",
|
|
150
|
+
start: maxStart,
|
|
151
|
+
end: maxEnd,
|
|
152
|
+
// color,
|
|
153
|
+
});
|
|
154
|
+
});
|
|
155
|
+
} else if (ord(next_byte) === 6) {
|
|
156
|
+
// # READ THE NOTES
|
|
157
|
+
|
|
158
|
+
const xml = await read(block_size, "utf8");
|
|
159
|
+
const b = new XMLParser({}).parse(xml);
|
|
160
|
+
const name = get(b, "Notes.CustomMapLabel");
|
|
161
|
+
if (name) {
|
|
162
|
+
data.name = name;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const description = get(b, "Notes.Description");
|
|
166
|
+
if (description && typeof description === "string") {
|
|
167
|
+
data.description = description
|
|
168
|
+
.replace("<html><body>", "")
|
|
169
|
+
.replace("</body></html>", ""); //fixes https://github.com/TeselaGen/ve-sequence-parsers/issues/225
|
|
170
|
+
}
|
|
171
|
+
} else {
|
|
172
|
+
// # WE IGNORE THE WHOLE BLOCK
|
|
173
|
+
await read(block_size); //we don't do anything with this
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
returnVal.parsedSequence = data;
|
|
177
|
+
return validateSequenceArray(
|
|
178
|
+
flattenSequenceArray([returnVal], options),
|
|
179
|
+
options
|
|
180
|
+
);
|
|
181
|
+
} catch (e) {
|
|
182
|
+
console.error("Error trying to parse file as snapgene:", e);
|
|
183
|
+
return [
|
|
184
|
+
{
|
|
185
|
+
success: false,
|
|
186
|
+
messages: ["Import Error: Invalid File"],
|
|
187
|
+
},
|
|
188
|
+
];
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
function getStartAndEndFromRangeString(rangestring) {
|
|
193
|
+
const [start, end] = rangestring.split("-");
|
|
194
|
+
return {
|
|
195
|
+
start: start - 1,
|
|
196
|
+
end: end - 1,
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function ord(string) {
|
|
201
|
+
// discuss at: http://locutus.io/php/ord/
|
|
202
|
+
// original by: Kevin van Zonneveld (http://kvz.io)
|
|
203
|
+
// bugfixed by: Onno Marsman (https://twitter.com/onnomarsman)
|
|
204
|
+
// improved by: Brett Zamir (http://brett-zamir.me)
|
|
205
|
+
// input by: incidence
|
|
206
|
+
// example 1: ord('K')
|
|
207
|
+
// returns 1: 75
|
|
208
|
+
// example 2: ord('\uD800\uDC00'); // surrogate pair to create a single Unicode character
|
|
209
|
+
// returns 2: 65536
|
|
210
|
+
const str = string + "";
|
|
211
|
+
const code = str.charCodeAt(0);
|
|
212
|
+
if (code >= 0xd800 && code <= 0xdbff) {
|
|
213
|
+
// High surrogate (could change last hex to 0xDB7F to treat
|
|
214
|
+
// high private surrogates as single characters)
|
|
215
|
+
const hi = code;
|
|
216
|
+
if (str.length === 1) {
|
|
217
|
+
// This is just a high surrogate with no following low surrogate,
|
|
218
|
+
// so we return its value;
|
|
219
|
+
return code;
|
|
220
|
+
// we could also throw an error as it is not a complete character,
|
|
221
|
+
// but someone may want to know
|
|
222
|
+
}
|
|
223
|
+
const low = str.charCodeAt(1);
|
|
224
|
+
return (hi - 0xd800) * 0x400 + (low - 0xdc00) + 0x10000;
|
|
225
|
+
}
|
|
226
|
+
if (code >= 0xdc00 && code <= 0xdfff) {
|
|
227
|
+
// Low surrogate
|
|
228
|
+
// This is just a low surrogate with no preceding high surrogate,
|
|
229
|
+
// so we return its value;
|
|
230
|
+
return code;
|
|
231
|
+
// we could also throw an error as it is not a complete character,
|
|
232
|
+
// but someone may want to know
|
|
233
|
+
}
|
|
234
|
+
return code;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
export default snapgeneToJson;
|
|
238
|
+
|
|
239
|
+
function dec2bin(dec) {
|
|
240
|
+
return (dec >>> 0).toString(2);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
function isFirstBitA1(num) {
|
|
244
|
+
return Number(num.toString().split("").pop()) === 1;
|
|
245
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
// Basically a copy of 'Teselagen.utils.NameUtils' for use within workers.
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Reformat name to replaces whitespace with underscores.
|
|
5
|
+
* @param {string} pName
|
|
6
|
+
* @returns {string} New name.
|
|
7
|
+
*/
|
|
8
|
+
export const reformatName = function (pName) {
|
|
9
|
+
return pName.toString().replace(/ /g, '_');
|
|
10
|
+
};
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
const ParserUtil = {};
|
|
2
|
+
ParserUtil.postProcessGenbankFeature = function(feat) {
|
|
3
|
+
let name = null;
|
|
4
|
+
// let nameIndex = null;
|
|
5
|
+
|
|
6
|
+
// let hasName = false;
|
|
7
|
+
let usingLabel = false;
|
|
8
|
+
let usingGene = false;
|
|
9
|
+
|
|
10
|
+
for (let j = 0; j < feat.notes.length; j++) {
|
|
11
|
+
const note = feat.notes[j];
|
|
12
|
+
const key = note.name;
|
|
13
|
+
const value = note.value;
|
|
14
|
+
|
|
15
|
+
// SET THE LABEL FIELD. DO NOT STORE AS AN ATTRIBUTE
|
|
16
|
+
|
|
17
|
+
if (this.isAGenbankFeatureLabel(key)) {
|
|
18
|
+
// Priority for name attributes is: 'label' > 'gene' > 'organism'.
|
|
19
|
+
// We check to see if the current name is from a lower-priority
|
|
20
|
+
// attribute. If it is, we store it as an attribute and then
|
|
21
|
+
// replace it with the current higher-priority attribute.
|
|
22
|
+
|
|
23
|
+
if (key === "label") {
|
|
24
|
+
// Label has top priority.
|
|
25
|
+
|
|
26
|
+
name = value;
|
|
27
|
+
// nameIndex = j;
|
|
28
|
+
|
|
29
|
+
usingLabel = true;
|
|
30
|
+
}
|
|
31
|
+
else if (key === "gene") {
|
|
32
|
+
|
|
33
|
+
// If we're not using the label for the name, use the
|
|
34
|
+
// current 'gene' attribute. If we are using label for
|
|
35
|
+
// the name, just save the current attribute as a normal
|
|
36
|
+
// attribute.
|
|
37
|
+
if (!usingLabel) {
|
|
38
|
+
|
|
39
|
+
name = value;
|
|
40
|
+
// nameIndex = j;
|
|
41
|
+
|
|
42
|
+
usingGene = true;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
else if (!usingLabel && !usingGene) {
|
|
46
|
+
// If we don't have a label from either a 'gene' or a
|
|
47
|
+
// 'label' field, use the current field as the name.
|
|
48
|
+
|
|
49
|
+
name = value;
|
|
50
|
+
// nameIndex = j;
|
|
51
|
+
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// hasName = true;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
feat.name = name || "";
|
|
59
|
+
// if(nameIndex !== null) {
|
|
60
|
+
// feat.notes.splice(nameIndex, 1);
|
|
61
|
+
// }
|
|
62
|
+
//
|
|
63
|
+
// if(feat.locations.length > 0) {
|
|
64
|
+
// var loc = feat.locations[0];
|
|
65
|
+
// feat.start = loc.start;
|
|
66
|
+
// feat.end = loc.end;
|
|
67
|
+
// }
|
|
68
|
+
// else {
|
|
69
|
+
// feat.start = null;
|
|
70
|
+
// feat.end = null;
|
|
71
|
+
// }
|
|
72
|
+
|
|
73
|
+
return feat;
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* isAFeatureLabel
|
|
80
|
+
* @param {string} name Name of a attribute or qualifier
|
|
81
|
+
* @return {boolean} isALabel
|
|
82
|
+
*/
|
|
83
|
+
ParserUtil.isAGenbankFeatureLabel = function(name) {
|
|
84
|
+
if (name === "label" || name === "name" || name === "ApEinfo_label" ||
|
|
85
|
+
name === "note" || name === "gene" || name === "organism" || name === "locus_tag") {
|
|
86
|
+
|
|
87
|
+
return true;
|
|
88
|
+
}
|
|
89
|
+
else {
|
|
90
|
+
return false;
|
|
91
|
+
}
|
|
92
|
+
};
|
|
93
|
+
export default ParserUtil;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { cloneDeep, forEach } from "lodash";
|
|
2
|
+
|
|
3
|
+
export default function cleanUpTeselagenJsonForExport(tgJson) {
|
|
4
|
+
const seqData = cloneDeep(tgJson);
|
|
5
|
+
if (!seqData) return seqData
|
|
6
|
+
delete seqData.cutsites;
|
|
7
|
+
delete seqData.orfs;
|
|
8
|
+
forEach(seqData.translations,(t)=>{
|
|
9
|
+
delete t.aminoAcids
|
|
10
|
+
})
|
|
11
|
+
return seqData
|
|
12
|
+
}
|
|
13
|
+
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export const untitledSequenceName = 'Untitled Sequence';
|
|
2
|
+
|
|
3
|
+
export const gbDivisions = {
|
|
4
|
+
// https://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html#GenBankDivisionB
|
|
5
|
+
PRI: true, //- primate sequences
|
|
6
|
+
ROD: true, //- rodent sequences
|
|
7
|
+
MAM: true, //- other mammalian sequences
|
|
8
|
+
VRT: true, //- other vertebrate sequences
|
|
9
|
+
INV: true, //- invertebrate sequences
|
|
10
|
+
PLN: true, //- plant, fungal, and algal sequences
|
|
11
|
+
BCT: true, //- bacterial sequences
|
|
12
|
+
VRL: true, //- viral sequences
|
|
13
|
+
PHG: true, //- bacteriophage sequences
|
|
14
|
+
SYN: true, //- synthetic sequences
|
|
15
|
+
UNA: true, //- unannotated sequences
|
|
16
|
+
EST: true, //- EST sequences (expressed sequence tags)
|
|
17
|
+
PAT: true, //- patent sequences
|
|
18
|
+
STS: true, //- STS sequences (sequence tagged sites)
|
|
19
|
+
GSS: true, //- GSS sequences (genome survey sequences)
|
|
20
|
+
HTG: true, //- HTG sequences (high-throughput genomic sequences)
|
|
21
|
+
HTC: true, //- unfinished high-throughput cDNA sequencing
|
|
22
|
+
ENV: true, //- environmental sampling sequences
|
|
23
|
+
CON: true, //- sequence assembly instructions on how to construct contigs from multiple GenBank records.
|
|
24
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { isRangeWithinRange } from '@teselagen/range-utils';
|
|
2
|
+
|
|
3
|
+
export default function convertOldSequenceDataToNewDataType(
|
|
4
|
+
oldTeselagenJson,
|
|
5
|
+
opts
|
|
6
|
+
) {
|
|
7
|
+
if (opts && opts.splitLocations) {
|
|
8
|
+
//after the file has been parsed, but before it's been saved, check for features with multiple locations and split them
|
|
9
|
+
oldTeselagenJson &&
|
|
10
|
+
oldTeselagenJson.features.forEach(function(feature) {
|
|
11
|
+
if (feature.locations && feature.locations[0]) {
|
|
12
|
+
if (feature.locations.length > 1) {
|
|
13
|
+
for (let i = 1; i < feature.locations.length; i++) {
|
|
14
|
+
//start at index 1, not 0!
|
|
15
|
+
//for every location except for the first one,
|
|
16
|
+
const location = feature.locations[i];
|
|
17
|
+
const clonedFeature = JSON.parse(JSON.stringify(feature));
|
|
18
|
+
clonedFeature.start = location.start;
|
|
19
|
+
clonedFeature.end = location.end;
|
|
20
|
+
delete clonedFeature.locations; //This array is no longer used to get start and end bp and doesn't need to be in db
|
|
21
|
+
//clonedFeature.locations = []; //strip the locations from the cloned feature (we won't be using locations whatsoever in our app)
|
|
22
|
+
oldTeselagenJson.features.push(clonedFeature);
|
|
23
|
+
}
|
|
24
|
+
//strip the locations from the original feature (this should prevent any
|
|
25
|
+
//issues from the locations data contradicting the feature start/end data)
|
|
26
|
+
//feature.locations = [];
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
feature.start = feature.locations[0].start;
|
|
30
|
+
feature.end = feature.locations[0].end;
|
|
31
|
+
}
|
|
32
|
+
delete feature.locations;
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
} else {
|
|
36
|
+
//mange locations
|
|
37
|
+
oldTeselagenJson &&
|
|
38
|
+
oldTeselagenJson.features.forEach(function(feature) {
|
|
39
|
+
if (feature.locations && feature.locations[0]) {
|
|
40
|
+
//set the new starts and ends
|
|
41
|
+
feature.start = feature.locations[0].start;
|
|
42
|
+
feature.end = feature.locations[feature.locations.length - 1].end;
|
|
43
|
+
if (feature.locations.length > 1) {
|
|
44
|
+
// make sure the locations all fit within the range
|
|
45
|
+
const locationError = feature.locations.some(location => {
|
|
46
|
+
return !isRangeWithinRange(
|
|
47
|
+
location,
|
|
48
|
+
feature,
|
|
49
|
+
oldTeselagenJson.sequence.length
|
|
50
|
+
);
|
|
51
|
+
});
|
|
52
|
+
if (locationError) {
|
|
53
|
+
delete feature.locations;
|
|
54
|
+
}
|
|
55
|
+
} else {
|
|
56
|
+
delete feature.locations
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
if (Array.isArray(oldTeselagenJson.sequence)) {
|
|
62
|
+
oldTeselagenJson.sequence = oldTeselagenJson.sequence.join("");
|
|
63
|
+
}
|
|
64
|
+
};
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { untitledSequenceName } from "./constants";
|
|
2
|
+
|
|
3
|
+
export default function createInitialSequence(options) {
|
|
4
|
+
options = options || {}
|
|
5
|
+
return {
|
|
6
|
+
messages: [],
|
|
7
|
+
success: true,
|
|
8
|
+
parsedSequence: {
|
|
9
|
+
features: [],
|
|
10
|
+
name: (options.fileName && options.fileName.replace(/\.[^/.]+$/, "")) || untitledSequenceName,
|
|
11
|
+
sequence: ''
|
|
12
|
+
}
|
|
13
|
+
};
|
|
14
|
+
};
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import convertOldSequenceDataToNewDataType from './convertOldSequenceDataToNewDataType.js';
|
|
2
|
+
|
|
3
|
+
export default function flattenSequenceArray(parsingResultArray, opts) {
|
|
4
|
+
if (parsingResultArray) {
|
|
5
|
+
if (!Array.isArray(parsingResultArray)) {
|
|
6
|
+
//wrap the parsingResult into an array if it isn't one already
|
|
7
|
+
parsingResultArray = [parsingResultArray];
|
|
8
|
+
}
|
|
9
|
+
//should convert the old data type to the new data type (flattened sequence)
|
|
10
|
+
parsingResultArray.forEach(function(parsingResult) {
|
|
11
|
+
if (parsingResult.success) {
|
|
12
|
+
convertOldSequenceDataToNewDataType(parsingResult.parsedSequence, opts);
|
|
13
|
+
}
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
return parsingResultArray;
|
|
17
|
+
};
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { Buffer } from 'buffer';
|
|
2
|
+
import isBrowser from './isBrowser';
|
|
3
|
+
|
|
4
|
+
export default function getArrayBufferFromFile(file) {
|
|
5
|
+
if (!isBrowser) {
|
|
6
|
+
//node environment
|
|
7
|
+
return toArrayBuffer(Buffer.isBuffer(file) ? file : file.buffer || file);
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
const reader = new window.FileReader();
|
|
11
|
+
return new Promise((resolve, reject) => {
|
|
12
|
+
reader.onload = (e) => {
|
|
13
|
+
resolve(e.target.result);
|
|
14
|
+
};
|
|
15
|
+
reader.onerror = (err) => {
|
|
16
|
+
console.error('err:', err);
|
|
17
|
+
reject(err);
|
|
18
|
+
};
|
|
19
|
+
reader.readAsArrayBuffer(
|
|
20
|
+
Buffer.isBuffer(file) ? file : file.buffer || file
|
|
21
|
+
);
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function toArrayBuffer(buffer) {
|
|
26
|
+
const ab = new ArrayBuffer(buffer.length);
|
|
27
|
+
const view = new Uint8Array(ab);
|
|
28
|
+
for (let i = 0; i < buffer.length; ++i) {
|
|
29
|
+
view[i] = buffer[i];
|
|
30
|
+
}
|
|
31
|
+
return ab;
|
|
32
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export default typeof window !== 'undefined' && typeof window.document !== 'undefined';
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export default function parseUracilFeatures(sequenceBps, featureList = []) {
|
|
2
|
+
const cleanedBps = sequenceBps.replace(/u/gi, (u, index) => {
|
|
3
|
+
featureList.push({
|
|
4
|
+
type: "misc_feature",
|
|
5
|
+
name: "tg_uracil",
|
|
6
|
+
strand: 1,
|
|
7
|
+
start: index,
|
|
8
|
+
end: index,
|
|
9
|
+
});
|
|
10
|
+
return u === "U" ? "T" : "t";
|
|
11
|
+
});
|
|
12
|
+
return cleanedBps
|
|
13
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
export default [
|
|
2
|
+
{ //primers don't need a pragma because they already have a feature type of primer_bind
|
|
3
|
+
type: "primers"
|
|
4
|
+
},
|
|
5
|
+
{
|
|
6
|
+
pragma: "Teselagen_Part",
|
|
7
|
+
type: "parts"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
pragma: "j5_warning",
|
|
11
|
+
type: "warnings"
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
pragma: "j5_assembly_piece",
|
|
15
|
+
type: "assemblyPieces"
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
pragma: "j5_lineage_annotation",
|
|
19
|
+
type: "lineageAnnotations"
|
|
20
|
+
}
|
|
21
|
+
]
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
//tnr: taken from https://github.com/angus-c/waldojs (not using waldojs as it is not being maintained and pulled in nasty babel runtime transforms)
|
|
2
|
+
/* eslint-disable eqeqeq */
|
|
3
|
+
import { isEqual } from "lodash";
|
|
4
|
+
|
|
5
|
+
class Match {
|
|
6
|
+
constructor(props) {
|
|
7
|
+
Object.assign(this, props);
|
|
8
|
+
this.value = this.obj[this.prop];
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
toString() {
|
|
12
|
+
const { path, type } = this;
|
|
13
|
+
return `${path} -> (${type}) ${this.logValue()}`;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
logValue() {
|
|
17
|
+
const val = this.value;
|
|
18
|
+
// if value is an object then just toString it
|
|
19
|
+
const isPrimitive = (x) => Object(x) !== x;
|
|
20
|
+
return isPrimitive(val) || Array.isArray(val) ? val : {}.toString.call(val);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
log() {
|
|
24
|
+
console.info(this.toString());
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const GLOBAL = typeof window == "object" ? window : global;
|
|
29
|
+
|
|
30
|
+
export default function searchWholeObjByName(what, where) {
|
|
31
|
+
const searchBy = (what, where, prop) => what == prop;
|
|
32
|
+
|
|
33
|
+
let data;
|
|
34
|
+
let alreadySeen;
|
|
35
|
+
|
|
36
|
+
const path = where == GLOBAL ? "GLOBAL" : "SRC";
|
|
37
|
+
const queue = [{ where, path }];
|
|
38
|
+
const seen = [];
|
|
39
|
+
|
|
40
|
+
const matches = [];
|
|
41
|
+
matches.log = function () {
|
|
42
|
+
this.forEach((m) => m.log());
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
// a non-recursive solution to avoid call stack limits
|
|
46
|
+
// http://www.jslab.dk/articles/non.recursive.preorder.traversal.part4
|
|
47
|
+
while ((data = queue.pop())) {
|
|
48
|
+
const { where, path } = data;
|
|
49
|
+
|
|
50
|
+
for (const prop in where) {
|
|
51
|
+
// IE may throw errors when accessing/coercing some properties
|
|
52
|
+
try {
|
|
53
|
+
// eslint-disable-next-line no-prototype-builtins
|
|
54
|
+
if (where.hasOwnProperty(prop)) {
|
|
55
|
+
// inspect objects
|
|
56
|
+
if ([where[prop]] == "[object Object]") {
|
|
57
|
+
// check if already searched (prevents circular references)
|
|
58
|
+
for (
|
|
59
|
+
let i = -1;
|
|
60
|
+
seen[++i] &&
|
|
61
|
+
!(alreadySeen = isEqual(seen[i].where, where[prop]) && seen[i]);
|
|
62
|
+
|
|
63
|
+
);
|
|
64
|
+
// add to stack
|
|
65
|
+
if (!alreadySeen) {
|
|
66
|
+
data = { where: where[prop], path: `${path}.${prop}` };
|
|
67
|
+
queue.push(data);
|
|
68
|
+
seen.push(data);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
// if match detected, push it.
|
|
72
|
+
if (searchBy(what, where, prop)) {
|
|
73
|
+
const type = alreadySeen
|
|
74
|
+
? `<${alreadySeen.path}>`
|
|
75
|
+
: typeof where[prop];
|
|
76
|
+
const match = new Match({
|
|
77
|
+
path: `${path}.${prop}`,
|
|
78
|
+
obj: where,
|
|
79
|
+
prop,
|
|
80
|
+
type,
|
|
81
|
+
});
|
|
82
|
+
matches.push(match);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
} catch (e) {
|
|
86
|
+
// don't throw errs
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return matches;
|
|
92
|
+
}
|
|
93
|
+
export function searchWholeObjByNameSimple(what, where) {
|
|
94
|
+
return searchWholeObjByName(what, where)?.[0]?.value?.[0];
|
|
95
|
+
}
|
|
96
|
+
export function searchWholeObjByNameSimpleArray(what, where) {
|
|
97
|
+
return searchWholeObjByName(what, where)?.[0]?.value;
|
|
98
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export default function splitStringIntoLines(string) {
|
|
2
|
+
let lines = [];
|
|
3
|
+
if (string === "") {
|
|
4
|
+
return lines;
|
|
5
|
+
}
|
|
6
|
+
else {
|
|
7
|
+
lines = string.split(/\r?\n/);
|
|
8
|
+
if (lines.length == 1) { //tnr: not sure why this check is being made... but keeping it in because it is probably doing something
|
|
9
|
+
lines = string.split('\\n');
|
|
10
|
+
}
|
|
11
|
+
return lines;
|
|
12
|
+
}
|
|
13
|
+
};
|