@teselagen/bio-parsers 0.3.8 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +330 -0
- package/index.js +49 -47
- package/index.mjs +49 -47
- package/index.umd.js +49 -47
- package/package.json +1 -2
- package/src/ab1ToJson.js +13 -18
- package/src/anyToJson.js +6 -6
- package/src/genbankToJson.js +21 -20
- package/src/geneiousXmlToJson.js +3 -6
- package/src/gffToJson.js +5 -5
- package/src/jbeiXmlToJson.js +10 -13
- package/src/jsonToBed.js +4 -3
- package/src/jsonToFasta.js +4 -2
- package/src/jsonToGenbank.js +13 -12
- package/src/jsonToJsonString.js +1 -1
- package/src/sbolXmlToJson.js +9 -9
- package/src/snapgeneToJson.js +14 -12
- package/src/utils/NameUtils.js +1 -1
- package/src/utils/ParserUtil.js +81 -83
- package/src/utils/cleanUpTeselagenJsonForExport.js +8 -9
- package/src/utils/constants.js +22 -22
- package/src/utils/convertOldSequenceDataToNewDataType.js +5 -6
- package/src/utils/createInitialSequence.js +13 -11
- package/src/utils/extractFileExtension.js +11 -13
- package/src/utils/flattenSequenceArray.js +14 -14
- package/src/utils/getArrayBufferFromFile.js +5 -5
- package/src/utils/isBrowser.js +2 -1
- package/src/utils/parseUracilFeatures.js +2 -2
- package/src/utils/pragmasAndTypes.js +3 -2
- package/src/utils/searchWholeObjByName.js +3 -3
- package/src/utils/splitStringIntoLines.js +13 -12
- package/src/utils/validateSequence.js +9 -9
- package/src/utils/validateSequenceArray.js +17 -17
- package/utils/getArrayBufferFromFile.d.ts +1 -1
package/README.md
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
# Bio Parsers
|
|
2
|
+
|
|
3
|
+
<!-- TOC -->
|
|
4
|
+
|
|
5
|
+
- [Bio Parsers](#bio-parsers)
|
|
6
|
+
- [About this Repo](#about-this-repo)
|
|
7
|
+
- [[CHANGELOG](CHANGELOG.md)](#changelogchangelogmd)
|
|
8
|
+
- [Exported Functions](#exported-functions)
|
|
9
|
+
- [Format Specification](#format-specification)
|
|
10
|
+
- [Usage](#usage)
|
|
11
|
+
- [install](#install)
|
|
12
|
+
- [jsonToGenbank (same interface as jsonToFasta)](#jsontogenbank-same-interface-as-jsontofasta)
|
|
13
|
+
- [anyToJson (same interface as genbankToJson, fastaToJson, xxxxToJson) (async required)](#anytojson-same-interface-as-genbanktojson-fastatojson-xxxxtojson-async-required)
|
|
14
|
+
- [Options (for anyToJson or xxxxToJson)](#options-for-anytojson-or-xxxxtojson)
|
|
15
|
+
- [ab1ToJson](#ab1tojson)
|
|
16
|
+
- [snapgeneToJson (.dna files)](#snapgenetojson-dna-files)
|
|
17
|
+
- [genbankToJson](#genbanktojson)
|
|
18
|
+
- [Updating this repo](#updating-this-repo)
|
|
19
|
+
- [Outside collaborators](#outside-collaborators)
|
|
20
|
+
- [Thanks/Collaborators](#thankscollaborators)
|
|
21
|
+
|
|
22
|
+
<!-- /TOC -->
|
|
23
|
+
|
|
24
|
+
## About this Repo
|
|
25
|
+
|
|
26
|
+
This repo contains a set of parsers to convert between datatypes through a generalized JSON format.
|
|
27
|
+
|
|
28
|
+
## [CHANGELOG](CHANGELOG.md)
|
|
29
|
+
|
|
30
|
+
## Exported Functions
|
|
31
|
+
|
|
32
|
+
Use the following exports to convert to a generalized JSON format:
|
|
33
|
+
|
|
34
|
+
```
|
|
35
|
+
fastaToJson //handles fasta files (.fa, .fasta)
|
|
36
|
+
genbankToJson //handles genbank files (.gb, .gbk)
|
|
37
|
+
ab1ToJson //handles .ab1 sequencing read files
|
|
38
|
+
sbolXmlToJson //handles .sbol files
|
|
39
|
+
geneiousXmlToJson //handles .genious files
|
|
40
|
+
jbeiXmlToJson //handles jbei .seq or .xml files
|
|
41
|
+
snapgeneToJson //handles snapgene (.dna) files
|
|
42
|
+
anyToJson //this handles any of the above file types based on file extension
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Use the following exports to convert from a generalized JSON format back to a specific format:
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
jsonToGenbank
|
|
49
|
+
jsonToFasta
|
|
50
|
+
jsonToBed
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Format Specification
|
|
54
|
+
|
|
55
|
+
The generalized JSON format looks like:
|
|
56
|
+
|
|
57
|
+
```js
|
|
58
|
+
const generalizedJsonFormat = {
|
|
59
|
+
size: 25,
|
|
60
|
+
sequence: "asaasdgasdgasdgasdgasgdasgdasdgasdgasgdagasdgasdfasdfdfasdfa",
|
|
61
|
+
circular: true,
|
|
62
|
+
name: "pBbS8c-RFP",
|
|
63
|
+
description: "",
|
|
64
|
+
parts: [
|
|
65
|
+
{
|
|
66
|
+
name: "part 1",
|
|
67
|
+
type: "CDS", //optional for parts
|
|
68
|
+
id: "092j92", //Must be a unique id. If no id is provided, we'll autogenerate one for you
|
|
69
|
+
start: 10, //0-based inclusive index
|
|
70
|
+
end: 30, //0-based inclusive index
|
|
71
|
+
strand: 1,
|
|
72
|
+
notes: {}
|
|
73
|
+
}
|
|
74
|
+
],
|
|
75
|
+
primers: [
|
|
76
|
+
{
|
|
77
|
+
name: "primer 1",
|
|
78
|
+
id: "092j92", //Must be a unique id. If no id is provided, we'll autogenerate one for you
|
|
79
|
+
start: 10, //0-based inclusive index
|
|
80
|
+
end: 30, //0-based inclusive index
|
|
81
|
+
strand: 1,
|
|
82
|
+
notes: {}
|
|
83
|
+
}
|
|
84
|
+
],
|
|
85
|
+
features: [
|
|
86
|
+
{
|
|
87
|
+
name: "anonymous feature",
|
|
88
|
+
type: "misc_feature",
|
|
89
|
+
id: "5590c1978979df000a4f02c7", //Must be a unique id. If no id is provided, we'll autogenerate one for you
|
|
90
|
+
start: 1,
|
|
91
|
+
end: 3,
|
|
92
|
+
strand: 1,
|
|
93
|
+
notes: {}
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
name: "coding region 1",
|
|
97
|
+
type: "CDS",
|
|
98
|
+
id: "5590c1d88979df000a4f02f5",
|
|
99
|
+
start: 12,
|
|
100
|
+
end: 9,
|
|
101
|
+
strand: -1,
|
|
102
|
+
notes: {}
|
|
103
|
+
}
|
|
104
|
+
],
|
|
105
|
+
//only if parsing in an ab1 file
|
|
106
|
+
chromatogramData: {
|
|
107
|
+
aTrace: [], //same as cTrace but for a
|
|
108
|
+
tTrace: [], //same as cTrace but for t
|
|
109
|
+
gTrace: [], //same as cTrace but for g
|
|
110
|
+
cTrace: [0, 0, 0, 1, 3, 5, 11, 24, 56, 68, 54, 30, 21, 3, 1, 4, 1, 0, 0, ...etc], //heights of the curve spaced 1 per x position (aka if the cTrace.length === 1000, then the max basePos can be is 1000)
|
|
111
|
+
basePos: [33, 46, 55, ...etc], //x position of the bases (can be unevenly spaced)
|
|
112
|
+
baseCalls: ["A", "T", ...etc],
|
|
113
|
+
qualNums: [] //or undefined if no qualNums are detected on the file
|
|
114
|
+
}
|
|
115
|
+
};
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Usage
|
|
119
|
+
|
|
120
|
+
### install
|
|
121
|
+
|
|
122
|
+
`npm install -S @teselagen/bio-parsers`
|
|
123
|
+
|
|
124
|
+
or
|
|
125
|
+
|
|
126
|
+
`yarn add @teselagen/bio-parsers`
|
|
127
|
+
|
|
128
|
+
or
|
|
129
|
+
|
|
130
|
+
use it from a script tag:
|
|
131
|
+
|
|
132
|
+
```html
|
|
133
|
+
<script src="https://unpkg.com/bio-parsers/umd/bio-parsers.js"></script>
|
|
134
|
+
<script>
|
|
135
|
+
async function main() {
|
|
136
|
+
var jsonOutput = await window.bioParsers.genbankToJson(
|
|
137
|
+
`LOCUS kc2 108 bp DNA linear 01-NOV-2016
|
|
138
|
+
COMMENT teselagen_unique_id: 581929a7bc6d3e00ac7394e8
|
|
139
|
+
FEATURES Location/Qualifiers
|
|
140
|
+
CDS 1..108
|
|
141
|
+
/label="GFPuv"
|
|
142
|
+
misc_feature 61..108
|
|
143
|
+
/label="gly_ser_linker"
|
|
144
|
+
bogus_dude 4..60
|
|
145
|
+
/label="ccmN_sig_pep"
|
|
146
|
+
misc_feature 4..60
|
|
147
|
+
/label="ccmN_nterm_sig_pep"
|
|
148
|
+
/pragma="Teselagen_Part"
|
|
149
|
+
/preferred5PrimeOverhangs=""
|
|
150
|
+
/preferred3PrimeOverhangs=""
|
|
151
|
+
ORIGIN
|
|
152
|
+
1 atgaaggtct acggcaagga acagtttttg cggatgcgcc agagcatgtt ccccgatcgc
|
|
153
|
+
61 ggtggcagtg gtagcgggag ctcgggtggc tcaggctctg ggg
|
|
154
|
+
//`
|
|
155
|
+
);
|
|
156
|
+
console.log("jsonOutput:", jsonOutput);
|
|
157
|
+
var genbankString = window.bioParsers.jsonToGenbank(jsonOutput[0].parsedSequence);
|
|
158
|
+
console.log(genbankString);
|
|
159
|
+
}
|
|
160
|
+
main();
|
|
161
|
+
</script>
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
see the `./umd_demo.html` file for a full working example
|
|
165
|
+
|
|
166
|
+
### jsonToGenbank (same interface as jsonToFasta)
|
|
167
|
+
|
|
168
|
+
```js
|
|
169
|
+
//To go from json to genbank:
|
|
170
|
+
import { jsonToGenbank } from "bio-parsers"
|
|
171
|
+
//You can pass an optional options object as the second argument. Here are the defaults
|
|
172
|
+
const options = {
|
|
173
|
+
isProtein: false, //by default the sequence will be parsed and validated as type DNA (unless U's instead of T's are found). If isProtein=true the sequence will be parsed and validated as a PROTEIN type (seqData.isProtein === true)
|
|
174
|
+
guessIfProtein: false, //if true the parser will attempt to guess if the sequence is of type DNA or type PROTEIN (this will override the isProtein flag)
|
|
175
|
+
guessIfProteinOptions: {
|
|
176
|
+
threshold = 0.90, //percent of characters that must be DNA letters to be considered of type DNA
|
|
177
|
+
dnaLetters = ['G', 'A', 'T', 'C'] //customizable set of letters to use as DNA
|
|
178
|
+
},
|
|
179
|
+
inclusive1BasedStart: false //by default feature starts are parsed out as 0-based and inclusive
|
|
180
|
+
inclusive1BasedEnd: false //by default feature ends are parsed out as 0-based and inclusive
|
|
181
|
+
// Example:
|
|
182
|
+
// 0123456
|
|
183
|
+
// ATGAGAG
|
|
184
|
+
// --fff-- (the feature covers GAG)
|
|
185
|
+
// 0-based inclusive start:
|
|
186
|
+
// feature.start = 2
|
|
187
|
+
// 1-based inclusive start:
|
|
188
|
+
// feature.start = 3
|
|
189
|
+
// 0-based inclusive end:
|
|
190
|
+
// feature.end = 4
|
|
191
|
+
// 1-based inclusive end:
|
|
192
|
+
// feature.end = 5
|
|
193
|
+
}
|
|
194
|
+
const genbankString = jsonToGenbank(generalizedJsonFormat, options)
|
|
195
|
+
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
### anyToJson (same interface as genbankToJson, fastaToJson, xxxxToJson) (async required)
|
|
199
|
+
|
|
200
|
+
```js
|
|
201
|
+
import { anyToJson } from "bio-parsers";
|
|
202
|
+
|
|
203
|
+
//note, anyToJson should be called using an await to allow for file parsing to occur (if a file is being passed)
|
|
204
|
+
const results = await anyToJson(
|
|
205
|
+
stringOrFile, //if ab1 files are being passed in you should pass files only, otherwise strings or files are fine as inputs
|
|
206
|
+
options //options.fileName (eg "pBad.ab1" or "pCherry.fasta") is important to pass here in order for the parser to!
|
|
207
|
+
);
|
|
208
|
+
|
|
209
|
+
//we always return an array of results because some files my contain multiple sequences
|
|
210
|
+
results[0].success; //either true or false
|
|
211
|
+
results[0].messages; //either an array of strings giving any warnings or errors generated during the parsing process
|
|
212
|
+
results[0].parsedSequence; //this will be the generalized json format as specified above :)
|
|
213
|
+
//chromatogram data will be here (ab1 only):
|
|
214
|
+
results[0].parsedSequence.chromatogramData;
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### Options (for anyToJson or xxxxToJson)
|
|
218
|
+
|
|
219
|
+
```js
|
|
220
|
+
//You can pass an optional options object as the third argument. Here are the defaults
|
|
221
|
+
const options = {
|
|
222
|
+
fileName: "example.gb", //the filename is used if none is found in the genbank
|
|
223
|
+
isProtein: false, //if you know that it is a protein string being parsed you can pass true here
|
|
224
|
+
parseFastaAsCircular: false; //by default fasta files are parsed as linear sequences. You can change this by setting parseFastaAsCircular=true
|
|
225
|
+
//genbankToJson options only
|
|
226
|
+
inclusive1BasedStart: false //by default feature starts are parsed out as 0-based and inclusive
|
|
227
|
+
inclusive1BasedEnd: false //by default feature ends are parsed out as 0-based and inclusive
|
|
228
|
+
acceptParts: true //by default features with a feature.notes.pragma[0] === "Teselagen_Part" are added to the sequenceData.parts array. Setting this to false will keep them as features instead
|
|
229
|
+
// fastaToJson options only
|
|
230
|
+
parseName: true //by default attempt to parse the name and description of sequence from the comment line. Setting this to false will keep the name unchanged with no description
|
|
231
|
+
}
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
### ab1ToJson
|
|
235
|
+
|
|
236
|
+
```js
|
|
237
|
+
import { ab1ToJson } from "bio-parsers";
|
|
238
|
+
const results = await ab1ToJson(
|
|
239
|
+
//this can be either a browser file <input type="file" id="input" multiple onchange="ab1ToJson(this.files[0])">
|
|
240
|
+
// or a node file ab1ToJson(fs.readFileSync(path.join(__dirname, './testData/ab1/example1.ab1')));
|
|
241
|
+
file,
|
|
242
|
+
options //options.fileName (eg "pBad.ab1" or "pCherry.fasta") is important to pass here in order for the parser to!
|
|
243
|
+
);
|
|
244
|
+
|
|
245
|
+
//we always return an array of results because some files my contain multiple sequences
|
|
246
|
+
results[0].success; //either true or false
|
|
247
|
+
results[0].messages; //either an array of strings giving any warnings or errors generated during the parsing process
|
|
248
|
+
results[0].parsedSequence; //this will be the generalized json format as specified above :)
|
|
249
|
+
//chromatogram data will be here (ab1 only):
|
|
250
|
+
results[0].parsedSequence.chromatogramData;
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
### snapgeneToJson (.dna files)
|
|
254
|
+
|
|
255
|
+
```js
|
|
256
|
+
import { snapgeneToJson } from "bio-parsers";
|
|
257
|
+
//file can be either a browser file <input type="file" id="input" multiple onchange="snapgeneToJson(this.files[0])">
|
|
258
|
+
// or a node file snapgeneToJson(fs.readFileSync(path.join(__dirname, './testData/ab1/example1.ab1')));
|
|
259
|
+
const results = await snapgeneToJson(file, options);
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
### genbankToJson
|
|
263
|
+
|
|
264
|
+
```js
|
|
265
|
+
import { genbankToJson } from "bio-parsers";
|
|
266
|
+
|
|
267
|
+
const result = genbankToJson(string, options);
|
|
268
|
+
|
|
269
|
+
console.info(result);
|
|
270
|
+
// [
|
|
271
|
+
// {
|
|
272
|
+
// "messages": [
|
|
273
|
+
// "Import Error: Illegal character(s) detected and removed from sequence. Allowed characters are: atgcyrswkmbvdhn",
|
|
274
|
+
// "Invalid feature end: 1384 detected for Homo sapiens and set to 1",
|
|
275
|
+
// ],
|
|
276
|
+
// "success": true,
|
|
277
|
+
// "parsedSequence": {
|
|
278
|
+
// "features": [
|
|
279
|
+
// {
|
|
280
|
+
// "notes": {
|
|
281
|
+
// "organism": [
|
|
282
|
+
// "Homo sapiens"
|
|
283
|
+
// ],
|
|
284
|
+
// "db_xref": [
|
|
285
|
+
// "taxon:9606"
|
|
286
|
+
// ],
|
|
287
|
+
// "chromosome": [
|
|
288
|
+
// "17"
|
|
289
|
+
// ],
|
|
290
|
+
// "map": [
|
|
291
|
+
// "17q21"
|
|
292
|
+
// ]
|
|
293
|
+
// },
|
|
294
|
+
// "type": "source",
|
|
295
|
+
// "strand": 1,
|
|
296
|
+
// "name": "Homo sapiens",
|
|
297
|
+
// "start": 0,
|
|
298
|
+
// "end": 1
|
|
299
|
+
// }
|
|
300
|
+
// ],
|
|
301
|
+
// "name": "NP_003623",
|
|
302
|
+
// "sequence": "gagaggggggttatccccccttcgtcagtcgatcgtaacgtatcagcagcgcgcgagattttctggcgcagtcag",
|
|
303
|
+
// "circular": true,
|
|
304
|
+
// "extraLines": [
|
|
305
|
+
// "DEFINITION contactin-associated protein 1 precursor [Homo sapiens].",
|
|
306
|
+
// "ACCESSION NP_003623",
|
|
307
|
+
// "VERSION NP_003623.1 GI:4505463",
|
|
308
|
+
// "DBSOURCE REFSEQ: accession NM_003632.2",
|
|
309
|
+
// "KEYWORDS RefSeq."
|
|
310
|
+
// ],
|
|
311
|
+
// "type": "DNA",
|
|
312
|
+
// "size": 925
|
|
313
|
+
// }
|
|
314
|
+
// }
|
|
315
|
+
// ]
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
You can see more examples by looking at the tests.
|
|
319
|
+
|
|
320
|
+
## Updating this repo
|
|
321
|
+
|
|
322
|
+
### Outside collaborators
|
|
323
|
+
|
|
324
|
+
fork and pull request please :)
|
|
325
|
+
|
|
326
|
+
## Thanks/Collaborators
|
|
327
|
+
|
|
328
|
+
- IsaacLuo - https://github.com/IsaacLuo/SnapGeneFileReader (from which the snapgene parser was adapted)
|
|
329
|
+
- Joshua Nixon (original collaborator)
|
|
330
|
+
- Thomas Rich (original collaborator)
|
package/index.js
CHANGED
|
@@ -6283,8 +6283,8 @@ function getOverlapsOfPotentiallyCircularRanges(rangeA, rangeB, maxRangeLength,
|
|
|
6283
6283
|
maxRangeLength
|
|
6284
6284
|
);
|
|
6285
6285
|
let overlaps = [];
|
|
6286
|
-
normalizedRangeA.forEach(function(nonCircularRangeA
|
|
6287
|
-
normalizedRangeB.forEach(function(nonCircularRangeB
|
|
6286
|
+
normalizedRangeA.forEach(function(nonCircularRangeA) {
|
|
6287
|
+
normalizedRangeB.forEach(function(nonCircularRangeB) {
|
|
6288
6288
|
const overlap = getOverlapOfNonCircularRanges(
|
|
6289
6289
|
nonCircularRangeA,
|
|
6290
6290
|
nonCircularRangeB
|
|
@@ -6296,7 +6296,7 @@ function getOverlapsOfPotentiallyCircularRanges(rangeA, rangeB, maxRangeLength,
|
|
|
6296
6296
|
});
|
|
6297
6297
|
if (joinIfPossible && normalizedRangeA.length === 2 && normalizedRangeB.length === 2 && maxRangeLength) {
|
|
6298
6298
|
const joinedOverlap = {};
|
|
6299
|
-
overlaps = lodashExports.flatMap(overlaps, (o
|
|
6299
|
+
overlaps = lodashExports.flatMap(overlaps, (o) => {
|
|
6300
6300
|
if (o.start === 0) {
|
|
6301
6301
|
joinedOverlap.end = o.end;
|
|
6302
6302
|
return [];
|
|
@@ -6394,12 +6394,14 @@ function trimRangeByAnotherRange(rangeToBeTrimmed, trimmingRange, sequenceLength
|
|
|
6394
6394
|
});
|
|
6395
6395
|
splitRangesToBeTrimmed[index] = nonCircularRangeToBeTrimmed;
|
|
6396
6396
|
});
|
|
6397
|
-
const outputSplitRanges = splitRangesToBeTrimmed.filter(
|
|
6398
|
-
|
|
6399
|
-
|
|
6397
|
+
const outputSplitRanges = splitRangesToBeTrimmed.filter(
|
|
6398
|
+
function(trimmedRange) {
|
|
6399
|
+
if (trimmedRange) {
|
|
6400
|
+
return true;
|
|
6401
|
+
}
|
|
6402
|
+
return false;
|
|
6400
6403
|
}
|
|
6401
|
-
|
|
6402
|
-
});
|
|
6404
|
+
);
|
|
6403
6405
|
let outputTrimmedRange;
|
|
6404
6406
|
if (outputSplitRanges.length < 0)
|
|
6405
6407
|
;
|
|
@@ -6449,8 +6451,14 @@ function normalizePositionByRangeLength(pPosition, sequenceLength, isInBetweenPo
|
|
|
6449
6451
|
__name(normalizePositionByRangeLength, "normalizePositionByRangeLength");
|
|
6450
6452
|
function translateRange(rangeToBeAdjusted, translateBy, rangeLength) {
|
|
6451
6453
|
return lodashExports.assign({}, rangeToBeAdjusted, {
|
|
6452
|
-
start: normalizePositionByRangeLength(
|
|
6453
|
-
|
|
6454
|
+
start: normalizePositionByRangeLength(
|
|
6455
|
+
rangeToBeAdjusted.start + translateBy,
|
|
6456
|
+
rangeLength
|
|
6457
|
+
),
|
|
6458
|
+
end: normalizePositionByRangeLength(
|
|
6459
|
+
rangeToBeAdjusted.end + translateBy,
|
|
6460
|
+
rangeLength
|
|
6461
|
+
)
|
|
6454
6462
|
});
|
|
6455
6463
|
}
|
|
6456
6464
|
__name(translateRange, "translateRange");
|
|
@@ -11391,13 +11399,13 @@ function coerceLocation({
|
|
|
11391
11399
|
messages.push(
|
|
11392
11400
|
"Invalid annotation start: " + location.start + " detected for " + location.name + " and set to size: " + size
|
|
11393
11401
|
);
|
|
11394
|
-
location.start = size - (isProtein ? 3 : 1);
|
|
11402
|
+
location.start = Math.max(0, size - (isProtein ? 3 : 1));
|
|
11395
11403
|
}
|
|
11396
11404
|
if (location.end < 0 || !(location.end <= size - 1) || location.end > size - 1) {
|
|
11397
11405
|
messages.push(
|
|
11398
11406
|
"Invalid annotation end: " + location.end + " detected for " + location.name + " and set to seq size: " + size
|
|
11399
11407
|
);
|
|
11400
|
-
location.end = size - 1;
|
|
11408
|
+
location.end = Math.max(0, size - 1);
|
|
11401
11409
|
}
|
|
11402
11410
|
if (location.start > location.end && circular === false) {
|
|
11403
11411
|
messages.push(
|
|
@@ -11410,9 +11418,9 @@ __name(coerceLocation, "coerceLocation");
|
|
|
11410
11418
|
function filterAminoAcidSequenceString(sequenceString, options) {
|
|
11411
11419
|
options = options || {};
|
|
11412
11420
|
if (options.includeStopCodon) {
|
|
11413
|
-
return sequenceString.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
|
|
11421
|
+
return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
|
|
11414
11422
|
}
|
|
11415
|
-
return sequenceString.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
|
|
11423
|
+
return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
|
|
11416
11424
|
}
|
|
11417
11425
|
__name(filterAminoAcidSequenceString, "filterAminoAcidSequenceString");
|
|
11418
11426
|
function getDegenerateDnaStringFromAAString(aaString) {
|
|
@@ -11610,7 +11618,7 @@ const calcTmMethods = {
|
|
|
11610
11618
|
calculateTemperature: function(sequence, type, A, R, C, Na) {
|
|
11611
11619
|
if (typeof type === "undefined") {
|
|
11612
11620
|
type = this.TABLE_BRESLAUER;
|
|
11613
|
-
} else if (type != this.TABLE_BRESLAUER &&
|
|
11621
|
+
} else if (type != this.TABLE_BRESLAUER && type != this.TABLE_UNIFIED && type != this.TABLE_SUGIMOTO) {
|
|
11614
11622
|
throw new Error("Invalid table type!");
|
|
11615
11623
|
}
|
|
11616
11624
|
if (!A) {
|
|
@@ -19761,18 +19769,20 @@ function genbankToJson(string, options = {}) {
|
|
|
19761
19769
|
const isKeyRunon = isKeywordRunon(line);
|
|
19762
19770
|
const isSubKey = isSubKeyword(line);
|
|
19763
19771
|
const isKey = isKeyword(line);
|
|
19764
|
-
if (
|
|
19765
|
-
|
|
19766
|
-
|
|
19767
|
-
|
|
19768
|
-
|
|
19769
|
-
|
|
19770
|
-
|
|
19771
|
-
|
|
19772
|
-
|
|
19773
|
-
|
|
19774
|
-
|
|
19775
|
-
|
|
19772
|
+
if (!isKeyRunon) {
|
|
19773
|
+
if (key === "LOCUS") {
|
|
19774
|
+
LINETYPE = key;
|
|
19775
|
+
} else if (key === "REFERENCE") {
|
|
19776
|
+
LINETYPE = key;
|
|
19777
|
+
} else if (key === "FEATURES") {
|
|
19778
|
+
LINETYPE = key;
|
|
19779
|
+
} else if (key === "ORIGIN") {
|
|
19780
|
+
LINETYPE = key;
|
|
19781
|
+
} else if (key === "//") {
|
|
19782
|
+
LINETYPE = key;
|
|
19783
|
+
} else if (isKey === true) {
|
|
19784
|
+
LINETYPE = key;
|
|
19785
|
+
}
|
|
19776
19786
|
}
|
|
19777
19787
|
if (line.trim() === "" || key === ";") {
|
|
19778
19788
|
return false;
|
|
@@ -19941,7 +19951,6 @@ function genbankToJson(string, options = {}) {
|
|
|
19941
19951
|
__name(parseOrigin, "parseOrigin");
|
|
19942
19952
|
function parseLocus(line) {
|
|
19943
19953
|
result = createInitialSequence(options);
|
|
19944
|
-
let locusName;
|
|
19945
19954
|
let circular;
|
|
19946
19955
|
let gbDivision;
|
|
19947
19956
|
let date;
|
|
@@ -19952,7 +19961,7 @@ function genbankToJson(string, options = {}) {
|
|
|
19952
19961
|
);
|
|
19953
19962
|
addMessage("Import Warning: Locus line contains no values: " + line);
|
|
19954
19963
|
}
|
|
19955
|
-
locusName = lineArr[1];
|
|
19964
|
+
const locusName = lineArr[1];
|
|
19956
19965
|
for (let i = 1; i < lineArr.length; i++) {
|
|
19957
19966
|
if (lineArr[i].match(/circular/gi)) {
|
|
19958
19967
|
circular = true;
|
|
@@ -20105,10 +20114,10 @@ function genbankToJson(string, options = {}) {
|
|
|
20105
20114
|
}
|
|
20106
20115
|
__name(parseFeatureLocation, "parseFeatureLocation");
|
|
20107
20116
|
function parseFeatureNote(line) {
|
|
20108
|
-
let newLine
|
|
20117
|
+
let newLine;
|
|
20109
20118
|
newLine = line.trimLeft();
|
|
20110
20119
|
newLine = newLine.replace(/^\/|"$/g, "");
|
|
20111
|
-
lineArr = newLine.split(/="|=/);
|
|
20120
|
+
const lineArr = newLine.split(/="|=/);
|
|
20112
20121
|
let val2 = lineArr.slice(1).join("=");
|
|
20113
20122
|
if (val2) {
|
|
20114
20123
|
val2 = val2.replace(/\\/g, " ");
|
|
@@ -29914,13 +29923,10 @@ function geneiousXmlToJson(string, options) {
|
|
|
29914
29923
|
});
|
|
29915
29924
|
}
|
|
29916
29925
|
});
|
|
29917
|
-
const toRet = lodashExports.filter(
|
|
29918
|
-
|
|
29919
|
-
(r)
|
|
29920
|
-
|
|
29921
|
-
return (_b3 = (_a3 = r == null ? void 0 : r.parsedSequence) == null ? void 0 : _a3.sequence) == null ? void 0 : _b3.length;
|
|
29922
|
-
}
|
|
29923
|
-
);
|
|
29926
|
+
const toRet = lodashExports.filter(resultArray, (r) => {
|
|
29927
|
+
var _a3, _b3;
|
|
29928
|
+
return (_b3 = (_a3 = r == null ? void 0 : r.parsedSequence) == null ? void 0 : _a3.sequence) == null ? void 0 : _b3.length;
|
|
29929
|
+
});
|
|
29924
29930
|
if (toRet.length)
|
|
29925
29931
|
return toRet;
|
|
29926
29932
|
return onFileParsed(resultArray);
|
|
@@ -30025,13 +30031,10 @@ function jbeiXmlToJson(string, options) {
|
|
|
30025
30031
|
messages: ["Error while parsing JBEI format"]
|
|
30026
30032
|
});
|
|
30027
30033
|
}
|
|
30028
|
-
const toRet = lodashExports.filter(
|
|
30029
|
-
|
|
30030
|
-
(r)
|
|
30031
|
-
|
|
30032
|
-
return (_b3 = (_a3 = r == null ? void 0 : r.parsedSequence) == null ? void 0 : _a3.sequence) == null ? void 0 : _b3.length;
|
|
30033
|
-
}
|
|
30034
|
-
);
|
|
30034
|
+
const toRet = lodashExports.filter(resultArray, (r) => {
|
|
30035
|
+
var _a3, _b3;
|
|
30036
|
+
return (_b3 = (_a3 = r == null ? void 0 : r.parsedSequence) == null ? void 0 : _a3.sequence) == null ? void 0 : _b3.length;
|
|
30037
|
+
});
|
|
30035
30038
|
if (toRet.length)
|
|
30036
30039
|
return toRet;
|
|
30037
30040
|
return onFileParsed(resultArray);
|
|
@@ -32415,7 +32418,6 @@ function createGenbankLocus(serSeq, options) {
|
|
|
32415
32418
|
if (serSeq.sequence.symbols) {
|
|
32416
32419
|
serSeq.sequence = serSeq.sequence.symbols.split("");
|
|
32417
32420
|
}
|
|
32418
|
-
let tmp;
|
|
32419
32421
|
let dnaType;
|
|
32420
32422
|
if (serSeq.isProtein) {
|
|
32421
32423
|
dnaType = "";
|
|
@@ -32432,7 +32434,7 @@ function createGenbankLocus(serSeq, options) {
|
|
|
32432
32434
|
line += " ";
|
|
32433
32435
|
line += StringUtil.lpad(String(serSeq.sequence.length), " ", 11);
|
|
32434
32436
|
line += serSeq.isProtein ? " aa " : " bp ";
|
|
32435
|
-
tmp = "";
|
|
32437
|
+
const tmp = "";
|
|
32436
32438
|
line += StringUtil.lpad(tmp, " ", 3);
|
|
32437
32439
|
line += StringUtil.rpad(dnaType, " ", 6);
|
|
32438
32440
|
line += " ";
|