@teselagen/bio-parsers 0.3.3 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +30 -9
- package/index.mjs +30 -9
- package/index.umd.js +30 -9
- package/package.json +3 -3
- package/src/genbankToJson.js +20 -7
- package/src/utils/validateSequence.js +25 -12
package/index.js
CHANGED
|
@@ -19285,9 +19285,7 @@ const reformatName = /* @__PURE__ */ __name(function(pName) {
|
|
|
19285
19285
|
return pName.toString().replace(/ /g, "_");
|
|
19286
19286
|
}, "reformatName");
|
|
19287
19287
|
function validateSequence(sequence, options = {}) {
|
|
19288
|
-
|
|
19289
|
-
isProtein,
|
|
19290
|
-
isOligo,
|
|
19288
|
+
const {
|
|
19291
19289
|
guessIfProtein,
|
|
19292
19290
|
guessIfProteinOptions,
|
|
19293
19291
|
reformatSeqName,
|
|
@@ -19297,6 +19295,19 @@ function validateSequence(sequence, options = {}) {
|
|
|
19297
19295
|
allowOverflowAnnotations,
|
|
19298
19296
|
coerceFeatureTypes
|
|
19299
19297
|
} = options;
|
|
19298
|
+
[
|
|
19299
|
+
"isDNA",
|
|
19300
|
+
"isOligo",
|
|
19301
|
+
"isRNA",
|
|
19302
|
+
"isDoubleStrandedDNA",
|
|
19303
|
+
"isSingleStrandedDNA",
|
|
19304
|
+
"isDoubleStrandedRNA",
|
|
19305
|
+
"isProtein"
|
|
19306
|
+
].forEach((k) => {
|
|
19307
|
+
if (options[k] !== void 0 && sequence[k] === void 0) {
|
|
19308
|
+
sequence[k] = options[k];
|
|
19309
|
+
}
|
|
19310
|
+
});
|
|
19300
19311
|
const response = {
|
|
19301
19312
|
validatedAndCleanedSequence: {},
|
|
19302
19313
|
messages: []
|
|
@@ -19333,13 +19344,13 @@ function validateSequence(sequence, options = {}) {
|
|
|
19333
19344
|
sequence.sequence = "";
|
|
19334
19345
|
}
|
|
19335
19346
|
let validChars;
|
|
19336
|
-
if (isProtein === void 0 && guessIfProtein) {
|
|
19337
|
-
isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
19347
|
+
if (sequence.isProtein === void 0 && guessIfProtein) {
|
|
19348
|
+
sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
19338
19349
|
sequence.sequence,
|
|
19339
19350
|
guessIfProteinOptions
|
|
19340
19351
|
);
|
|
19341
19352
|
}
|
|
19342
|
-
if (isProtein) {
|
|
19353
|
+
if (sequence.isProtein) {
|
|
19343
19354
|
validChars = filterAminoAcidSequenceString(sequence.sequence);
|
|
19344
19355
|
if (validChars !== sequence.sequence) {
|
|
19345
19356
|
sequence.sequence = validChars;
|
|
@@ -19355,14 +19366,15 @@ function validateSequence(sequence, options = {}) {
|
|
|
19355
19366
|
sequence.proteinSize = sequence.proteinSequence.length;
|
|
19356
19367
|
} else {
|
|
19357
19368
|
const temp = sequence.sequence;
|
|
19358
|
-
if (!isOligo) {
|
|
19369
|
+
if (!sequence.isOligo) {
|
|
19359
19370
|
sequence.sequence = sequence.sequence.replace(
|
|
19360
19371
|
/u/gi,
|
|
19361
19372
|
(u) => u === "U" ? "T" : "t"
|
|
19362
19373
|
);
|
|
19363
19374
|
}
|
|
19364
|
-
if (temp !== sequence.sequence) {
|
|
19375
|
+
if (temp !== sequence.sequence && !sequence.isDNA && !sequence.isProtein) {
|
|
19365
19376
|
sequence.type = "RNA";
|
|
19377
|
+
sequence.sequence = temp;
|
|
19366
19378
|
} else {
|
|
19367
19379
|
sequence.type = "DNA";
|
|
19368
19380
|
}
|
|
@@ -19375,7 +19387,7 @@ function validateSequence(sequence, options = {}) {
|
|
|
19375
19387
|
}
|
|
19376
19388
|
}
|
|
19377
19389
|
if (!sequence.size) {
|
|
19378
|
-
sequence.size = isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
|
|
19390
|
+
sequence.size = sequence.isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
|
|
19379
19391
|
}
|
|
19380
19392
|
let circularityExplicitlyDefined;
|
|
19381
19393
|
if (sequence.circular === false || sequence.circular === "false" || sequence.circular === -1) {
|
|
@@ -19948,7 +19960,13 @@ function genbankToJson(string, options = {}) {
|
|
|
19948
19960
|
}
|
|
19949
19961
|
options.sequenceTypeFromLocus = item;
|
|
19950
19962
|
if (item.match(/ss-dna/i)) {
|
|
19963
|
+
options.isDNA = true;
|
|
19951
19964
|
options.isSingleStrandedDNA = true;
|
|
19965
|
+
} else if (item.match(/rna/i)) {
|
|
19966
|
+
options.isRna = true;
|
|
19967
|
+
} else if (item.match(/ds-dna/i) || item.match(/dna/i)) {
|
|
19968
|
+
options.isDNA = true;
|
|
19969
|
+
options.isDoubleStrandedDNA = true;
|
|
19952
19970
|
}
|
|
19953
19971
|
if (item.match(/rna/i) && !item.match(/ss-rna/i)) {
|
|
19954
19972
|
options.isDoubleStrandedRNA = true;
|
|
@@ -19963,7 +19981,10 @@ function genbankToJson(string, options = {}) {
|
|
|
19963
19981
|
}
|
|
19964
19982
|
result.parsedSequence.gbDivision = gbDivision;
|
|
19965
19983
|
result.parsedSequence.sequenceTypeFromLocus = options.sequenceTypeFromLocus;
|
|
19984
|
+
result.parsedSequence.isDNA = options.isDNA;
|
|
19985
|
+
result.parsedSequence.isDoubleStrandedDNA = options.isDoubleStrandedDNA;
|
|
19966
19986
|
result.parsedSequence.isSingleStrandedDNA = options.isSingleStrandedDNA;
|
|
19987
|
+
result.parsedSequence.isRna = options.isRna;
|
|
19967
19988
|
result.parsedSequence.isDoubleStrandedRNA = options.isDoubleStrandedRNA;
|
|
19968
19989
|
result.parsedSequence.date = date;
|
|
19969
19990
|
result.parsedSequence.circular = circular;
|
package/index.mjs
CHANGED
|
@@ -19283,9 +19283,7 @@ const reformatName = /* @__PURE__ */ __name(function(pName) {
|
|
|
19283
19283
|
return pName.toString().replace(/ /g, "_");
|
|
19284
19284
|
}, "reformatName");
|
|
19285
19285
|
function validateSequence(sequence, options = {}) {
|
|
19286
|
-
|
|
19287
|
-
isProtein,
|
|
19288
|
-
isOligo,
|
|
19286
|
+
const {
|
|
19289
19287
|
guessIfProtein,
|
|
19290
19288
|
guessIfProteinOptions,
|
|
19291
19289
|
reformatSeqName,
|
|
@@ -19295,6 +19293,19 @@ function validateSequence(sequence, options = {}) {
|
|
|
19295
19293
|
allowOverflowAnnotations,
|
|
19296
19294
|
coerceFeatureTypes
|
|
19297
19295
|
} = options;
|
|
19296
|
+
[
|
|
19297
|
+
"isDNA",
|
|
19298
|
+
"isOligo",
|
|
19299
|
+
"isRNA",
|
|
19300
|
+
"isDoubleStrandedDNA",
|
|
19301
|
+
"isSingleStrandedDNA",
|
|
19302
|
+
"isDoubleStrandedRNA",
|
|
19303
|
+
"isProtein"
|
|
19304
|
+
].forEach((k) => {
|
|
19305
|
+
if (options[k] !== void 0 && sequence[k] === void 0) {
|
|
19306
|
+
sequence[k] = options[k];
|
|
19307
|
+
}
|
|
19308
|
+
});
|
|
19298
19309
|
const response = {
|
|
19299
19310
|
validatedAndCleanedSequence: {},
|
|
19300
19311
|
messages: []
|
|
@@ -19331,13 +19342,13 @@ function validateSequence(sequence, options = {}) {
|
|
|
19331
19342
|
sequence.sequence = "";
|
|
19332
19343
|
}
|
|
19333
19344
|
let validChars;
|
|
19334
|
-
if (isProtein === void 0 && guessIfProtein) {
|
|
19335
|
-
isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
19345
|
+
if (sequence.isProtein === void 0 && guessIfProtein) {
|
|
19346
|
+
sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
19336
19347
|
sequence.sequence,
|
|
19337
19348
|
guessIfProteinOptions
|
|
19338
19349
|
);
|
|
19339
19350
|
}
|
|
19340
|
-
if (isProtein) {
|
|
19351
|
+
if (sequence.isProtein) {
|
|
19341
19352
|
validChars = filterAminoAcidSequenceString(sequence.sequence);
|
|
19342
19353
|
if (validChars !== sequence.sequence) {
|
|
19343
19354
|
sequence.sequence = validChars;
|
|
@@ -19353,14 +19364,15 @@ function validateSequence(sequence, options = {}) {
|
|
|
19353
19364
|
sequence.proteinSize = sequence.proteinSequence.length;
|
|
19354
19365
|
} else {
|
|
19355
19366
|
const temp = sequence.sequence;
|
|
19356
|
-
if (!isOligo) {
|
|
19367
|
+
if (!sequence.isOligo) {
|
|
19357
19368
|
sequence.sequence = sequence.sequence.replace(
|
|
19358
19369
|
/u/gi,
|
|
19359
19370
|
(u) => u === "U" ? "T" : "t"
|
|
19360
19371
|
);
|
|
19361
19372
|
}
|
|
19362
|
-
if (temp !== sequence.sequence) {
|
|
19373
|
+
if (temp !== sequence.sequence && !sequence.isDNA && !sequence.isProtein) {
|
|
19363
19374
|
sequence.type = "RNA";
|
|
19375
|
+
sequence.sequence = temp;
|
|
19364
19376
|
} else {
|
|
19365
19377
|
sequence.type = "DNA";
|
|
19366
19378
|
}
|
|
@@ -19373,7 +19385,7 @@ function validateSequence(sequence, options = {}) {
|
|
|
19373
19385
|
}
|
|
19374
19386
|
}
|
|
19375
19387
|
if (!sequence.size) {
|
|
19376
|
-
sequence.size = isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
|
|
19388
|
+
sequence.size = sequence.isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
|
|
19377
19389
|
}
|
|
19378
19390
|
let circularityExplicitlyDefined;
|
|
19379
19391
|
if (sequence.circular === false || sequence.circular === "false" || sequence.circular === -1) {
|
|
@@ -19946,7 +19958,13 @@ function genbankToJson(string, options = {}) {
|
|
|
19946
19958
|
}
|
|
19947
19959
|
options.sequenceTypeFromLocus = item;
|
|
19948
19960
|
if (item.match(/ss-dna/i)) {
|
|
19961
|
+
options.isDNA = true;
|
|
19949
19962
|
options.isSingleStrandedDNA = true;
|
|
19963
|
+
} else if (item.match(/rna/i)) {
|
|
19964
|
+
options.isRna = true;
|
|
19965
|
+
} else if (item.match(/ds-dna/i) || item.match(/dna/i)) {
|
|
19966
|
+
options.isDNA = true;
|
|
19967
|
+
options.isDoubleStrandedDNA = true;
|
|
19950
19968
|
}
|
|
19951
19969
|
if (item.match(/rna/i) && !item.match(/ss-rna/i)) {
|
|
19952
19970
|
options.isDoubleStrandedRNA = true;
|
|
@@ -19961,7 +19979,10 @@ function genbankToJson(string, options = {}) {
|
|
|
19961
19979
|
}
|
|
19962
19980
|
result.parsedSequence.gbDivision = gbDivision;
|
|
19963
19981
|
result.parsedSequence.sequenceTypeFromLocus = options.sequenceTypeFromLocus;
|
|
19982
|
+
result.parsedSequence.isDNA = options.isDNA;
|
|
19983
|
+
result.parsedSequence.isDoubleStrandedDNA = options.isDoubleStrandedDNA;
|
|
19964
19984
|
result.parsedSequence.isSingleStrandedDNA = options.isSingleStrandedDNA;
|
|
19985
|
+
result.parsedSequence.isRna = options.isRna;
|
|
19965
19986
|
result.parsedSequence.isDoubleStrandedRNA = options.isDoubleStrandedRNA;
|
|
19966
19987
|
result.parsedSequence.date = date;
|
|
19967
19988
|
result.parsedSequence.circular = circular;
|
package/index.umd.js
CHANGED
|
@@ -19287,9 +19287,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19287
19287
|
return pName.toString().replace(/ /g, "_");
|
|
19288
19288
|
}, "reformatName");
|
|
19289
19289
|
function validateSequence(sequence, options = {}) {
|
|
19290
|
-
|
|
19291
|
-
isProtein,
|
|
19292
|
-
isOligo,
|
|
19290
|
+
const {
|
|
19293
19291
|
guessIfProtein,
|
|
19294
19292
|
guessIfProteinOptions,
|
|
19295
19293
|
reformatSeqName,
|
|
@@ -19299,6 +19297,19 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19299
19297
|
allowOverflowAnnotations,
|
|
19300
19298
|
coerceFeatureTypes
|
|
19301
19299
|
} = options;
|
|
19300
|
+
[
|
|
19301
|
+
"isDNA",
|
|
19302
|
+
"isOligo",
|
|
19303
|
+
"isRNA",
|
|
19304
|
+
"isDoubleStrandedDNA",
|
|
19305
|
+
"isSingleStrandedDNA",
|
|
19306
|
+
"isDoubleStrandedRNA",
|
|
19307
|
+
"isProtein"
|
|
19308
|
+
].forEach((k) => {
|
|
19309
|
+
if (options[k] !== void 0 && sequence[k] === void 0) {
|
|
19310
|
+
sequence[k] = options[k];
|
|
19311
|
+
}
|
|
19312
|
+
});
|
|
19302
19313
|
const response = {
|
|
19303
19314
|
validatedAndCleanedSequence: {},
|
|
19304
19315
|
messages: []
|
|
@@ -19335,13 +19346,13 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19335
19346
|
sequence.sequence = "";
|
|
19336
19347
|
}
|
|
19337
19348
|
let validChars;
|
|
19338
|
-
if (isProtein === void 0 && guessIfProtein) {
|
|
19339
|
-
isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
19349
|
+
if (sequence.isProtein === void 0 && guessIfProtein) {
|
|
19350
|
+
sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
19340
19351
|
sequence.sequence,
|
|
19341
19352
|
guessIfProteinOptions
|
|
19342
19353
|
);
|
|
19343
19354
|
}
|
|
19344
|
-
if (isProtein) {
|
|
19355
|
+
if (sequence.isProtein) {
|
|
19345
19356
|
validChars = filterAminoAcidSequenceString(sequence.sequence);
|
|
19346
19357
|
if (validChars !== sequence.sequence) {
|
|
19347
19358
|
sequence.sequence = validChars;
|
|
@@ -19357,14 +19368,15 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19357
19368
|
sequence.proteinSize = sequence.proteinSequence.length;
|
|
19358
19369
|
} else {
|
|
19359
19370
|
const temp = sequence.sequence;
|
|
19360
|
-
if (!isOligo) {
|
|
19371
|
+
if (!sequence.isOligo) {
|
|
19361
19372
|
sequence.sequence = sequence.sequence.replace(
|
|
19362
19373
|
/u/gi,
|
|
19363
19374
|
(u) => u === "U" ? "T" : "t"
|
|
19364
19375
|
);
|
|
19365
19376
|
}
|
|
19366
|
-
if (temp !== sequence.sequence) {
|
|
19377
|
+
if (temp !== sequence.sequence && !sequence.isDNA && !sequence.isProtein) {
|
|
19367
19378
|
sequence.type = "RNA";
|
|
19379
|
+
sequence.sequence = temp;
|
|
19368
19380
|
} else {
|
|
19369
19381
|
sequence.type = "DNA";
|
|
19370
19382
|
}
|
|
@@ -19377,7 +19389,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19377
19389
|
}
|
|
19378
19390
|
}
|
|
19379
19391
|
if (!sequence.size) {
|
|
19380
|
-
sequence.size = isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
|
|
19392
|
+
sequence.size = sequence.isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
|
|
19381
19393
|
}
|
|
19382
19394
|
let circularityExplicitlyDefined;
|
|
19383
19395
|
if (sequence.circular === false || sequence.circular === "false" || sequence.circular === -1) {
|
|
@@ -19950,7 +19962,13 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19950
19962
|
}
|
|
19951
19963
|
options.sequenceTypeFromLocus = item;
|
|
19952
19964
|
if (item.match(/ss-dna/i)) {
|
|
19965
|
+
options.isDNA = true;
|
|
19953
19966
|
options.isSingleStrandedDNA = true;
|
|
19967
|
+
} else if (item.match(/rna/i)) {
|
|
19968
|
+
options.isRna = true;
|
|
19969
|
+
} else if (item.match(/ds-dna/i) || item.match(/dna/i)) {
|
|
19970
|
+
options.isDNA = true;
|
|
19971
|
+
options.isDoubleStrandedDNA = true;
|
|
19954
19972
|
}
|
|
19955
19973
|
if (item.match(/rna/i) && !item.match(/ss-rna/i)) {
|
|
19956
19974
|
options.isDoubleStrandedRNA = true;
|
|
@@ -19965,7 +19983,10 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19965
19983
|
}
|
|
19966
19984
|
result.parsedSequence.gbDivision = gbDivision;
|
|
19967
19985
|
result.parsedSequence.sequenceTypeFromLocus = options.sequenceTypeFromLocus;
|
|
19986
|
+
result.parsedSequence.isDNA = options.isDNA;
|
|
19987
|
+
result.parsedSequence.isDoubleStrandedDNA = options.isDoubleStrandedDNA;
|
|
19968
19988
|
result.parsedSequence.isSingleStrandedDNA = options.isSingleStrandedDNA;
|
|
19989
|
+
result.parsedSequence.isRna = options.isRna;
|
|
19969
19990
|
result.parsedSequence.isDoubleStrandedRNA = options.isDoubleStrandedRNA;
|
|
19970
19991
|
result.parsedSequence.date = date;
|
|
19971
19992
|
result.parsedSequence.circular = circular;
|
package/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@teselagen/bio-parsers",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.5",
|
|
4
4
|
"type": "commonjs",
|
|
5
5
|
"dependencies": {
|
|
6
|
-
"@teselagen/sequence-utils": "0.3.
|
|
7
|
-
"@teselagen/range-utils": "0.3.
|
|
6
|
+
"@teselagen/sequence-utils": "0.3.5",
|
|
7
|
+
"@teselagen/range-utils": "0.3.5",
|
|
8
8
|
"@gmod/gff": "^1.2.1",
|
|
9
9
|
"buffer": "^6.0.3",
|
|
10
10
|
"bufferpack": "^0.0.6",
|
package/src/genbankToJson.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/* eslint-disable no-var*/
|
|
2
2
|
import { convertAACaretPositionOrRangeToDna } from "@teselagen/sequence-utils";
|
|
3
3
|
|
|
4
|
-
import
|
|
4
|
+
import { gbDivisions, untitledSequenceName } from "./utils/constants";
|
|
5
5
|
import flattenSequenceArray from "./utils/flattenSequenceArray";
|
|
6
6
|
import validateSequenceArray from "./utils/validateSequenceArray";
|
|
7
7
|
import splitStringIntoLines from "./utils/splitStringIntoLines.js";
|
|
@@ -11,7 +11,7 @@ import createInitialSequence from "./utils/createInitialSequence";
|
|
|
11
11
|
function genbankToJson(string, options = {}) {
|
|
12
12
|
const {
|
|
13
13
|
inclusive1BasedStart,
|
|
14
|
-
inclusive1BasedEnd
|
|
14
|
+
inclusive1BasedEnd
|
|
15
15
|
//these are also valid options:
|
|
16
16
|
// primersAsFeatures,
|
|
17
17
|
// sequenceTypeFromLocus,
|
|
@@ -43,7 +43,7 @@ function genbankToJson(string, options = {}) {
|
|
|
43
43
|
BASE_COUNT_TAG: "BASE COUNT",
|
|
44
44
|
//CONTIG_TAG: "CONTIG"
|
|
45
45
|
ORIGIN_TAG: "ORIGIN",
|
|
46
|
-
END_SEQUENCE_TAG: "//"
|
|
46
|
+
END_SEQUENCE_TAG: "//"
|
|
47
47
|
};
|
|
48
48
|
let hasFoundLocus = false;
|
|
49
49
|
let featureLocationIndentation;
|
|
@@ -214,7 +214,7 @@ function genbankToJson(string, options = {}) {
|
|
|
214
214
|
console.error("Error trying to parse file as .gb:", e);
|
|
215
215
|
result = {
|
|
216
216
|
success: false,
|
|
217
|
-
messages: ["Import Error: Invalid File"]
|
|
217
|
+
messages: ["Import Error: Invalid File"]
|
|
218
218
|
};
|
|
219
219
|
}
|
|
220
220
|
|
|
@@ -338,15 +338,25 @@ function genbankToJson(string, options = {}) {
|
|
|
338
338
|
|
|
339
339
|
if (
|
|
340
340
|
j === 4 &&
|
|
341
|
-
(item.match(/ds-dna/i) ||
|
|
341
|
+
(item.match(/ds-dna/i) ||
|
|
342
|
+
item.match(/ss-dna/i) ||
|
|
343
|
+
item.match(/dna/i) ||
|
|
344
|
+
item.match(/rna/i))
|
|
342
345
|
) {
|
|
343
346
|
if (options.isProtein === undefined) {
|
|
344
347
|
options.isProtein = false;
|
|
345
348
|
}
|
|
346
349
|
options.sequenceTypeFromLocus = item;
|
|
347
350
|
if (item.match(/ss-dna/i)) {
|
|
351
|
+
options.isDNA = true;
|
|
348
352
|
options.isSingleStrandedDNA = true;
|
|
353
|
+
} else if (item.match(/rna/i)) {
|
|
354
|
+
options.isRna = true;
|
|
355
|
+
} else if (item.match(/ds-dna/i) || item.match(/dna/i)) {
|
|
356
|
+
options.isDNA = true;
|
|
357
|
+
options.isDoubleStrandedDNA = true;
|
|
349
358
|
}
|
|
359
|
+
|
|
350
360
|
if (item.match(/rna/i) && !item.match(/ss-rna/i)) {
|
|
351
361
|
options.isDoubleStrandedRNA = true;
|
|
352
362
|
}
|
|
@@ -370,7 +380,10 @@ function genbankToJson(string, options = {}) {
|
|
|
370
380
|
}
|
|
371
381
|
result.parsedSequence.gbDivision = gbDivision;
|
|
372
382
|
result.parsedSequence.sequenceTypeFromLocus = options.sequenceTypeFromLocus;
|
|
383
|
+
result.parsedSequence.isDNA = options.isDNA;
|
|
384
|
+
result.parsedSequence.isDoubleStrandedDNA = options.isDoubleStrandedDNA;
|
|
373
385
|
result.parsedSequence.isSingleStrandedDNA = options.isSingleStrandedDNA;
|
|
386
|
+
result.parsedSequence.isRna = options.isRna;
|
|
374
387
|
result.parsedSequence.isDoubleStrandedRNA = options.isDoubleStrandedRNA;
|
|
375
388
|
result.parsedSequence.date = date;
|
|
376
389
|
result.parsedSequence.circular = circular;
|
|
@@ -462,7 +475,7 @@ function genbankToJson(string, options = {}) {
|
|
|
462
475
|
function newFeature() {
|
|
463
476
|
result.parsedSequence.features.push({
|
|
464
477
|
locations: [],
|
|
465
|
-
notes: {}
|
|
478
|
+
notes: {}
|
|
466
479
|
});
|
|
467
480
|
}
|
|
468
481
|
|
|
@@ -500,7 +513,7 @@ function genbankToJson(string, options = {}) {
|
|
|
500
513
|
}
|
|
501
514
|
const location = {
|
|
502
515
|
start: start,
|
|
503
|
-
end: end
|
|
516
|
+
end: end
|
|
504
517
|
};
|
|
505
518
|
const feat = getCurrentFeature();
|
|
506
519
|
feat.locations.push(
|
|
@@ -3,7 +3,7 @@ import { getFeatureTypes } from "@teselagen/sequence-utils";
|
|
|
3
3
|
import {
|
|
4
4
|
filterAminoAcidSequenceString,
|
|
5
5
|
filterSequenceString,
|
|
6
|
-
guessIfSequenceIsDnaAndNotProtein
|
|
6
|
+
guessIfSequenceIsDnaAndNotProtein
|
|
7
7
|
} from "@teselagen/sequence-utils";
|
|
8
8
|
import { filter, some, upperFirst } from "lodash";
|
|
9
9
|
import pragmasAndTypes from "./pragmasAndTypes.js";
|
|
@@ -22,9 +22,7 @@ import { reformatName } from "./NameUtils.js";
|
|
|
22
22
|
};
|
|
23
23
|
*/
|
|
24
24
|
export default function validateSequence(sequence, options = {}) {
|
|
25
|
-
|
|
26
|
-
isProtein,
|
|
27
|
-
isOligo,
|
|
25
|
+
const {
|
|
28
26
|
guessIfProtein,
|
|
29
27
|
guessIfProteinOptions,
|
|
30
28
|
reformatSeqName,
|
|
@@ -32,11 +30,25 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
32
30
|
inclusive1BasedEnd,
|
|
33
31
|
additionalValidChars,
|
|
34
32
|
allowOverflowAnnotations,
|
|
35
|
-
coerceFeatureTypes
|
|
33
|
+
coerceFeatureTypes
|
|
36
34
|
} = options;
|
|
35
|
+
[
|
|
36
|
+
"isDNA",
|
|
37
|
+
"isOligo",
|
|
38
|
+
"isRNA",
|
|
39
|
+
"isDoubleStrandedDNA",
|
|
40
|
+
"isSingleStrandedDNA",
|
|
41
|
+
"isDoubleStrandedRNA",
|
|
42
|
+
"isProtein"
|
|
43
|
+
].forEach((k) => {
|
|
44
|
+
if (options[k] !== undefined && sequence[k] === undefined) {
|
|
45
|
+
sequence[k] = options[k];
|
|
46
|
+
}
|
|
47
|
+
});
|
|
48
|
+
|
|
37
49
|
const response = {
|
|
38
50
|
validatedAndCleanedSequence: {},
|
|
39
|
-
messages: []
|
|
51
|
+
messages: []
|
|
40
52
|
};
|
|
41
53
|
if (!sequence || typeof sequence !== "object") {
|
|
42
54
|
throw new Error("Invalid sequence");
|
|
@@ -73,13 +85,13 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
73
85
|
sequence.sequence = "";
|
|
74
86
|
}
|
|
75
87
|
let validChars;
|
|
76
|
-
if (isProtein === undefined && guessIfProtein) {
|
|
77
|
-
isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
88
|
+
if (sequence.isProtein === undefined && guessIfProtein) {
|
|
89
|
+
sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
78
90
|
sequence.sequence,
|
|
79
91
|
guessIfProteinOptions
|
|
80
92
|
);
|
|
81
93
|
}
|
|
82
|
-
if (isProtein) {
|
|
94
|
+
if (sequence.isProtein) {
|
|
83
95
|
//tnr: add code to strip invalid protein data..
|
|
84
96
|
validChars = filterAminoAcidSequenceString(sequence.sequence);
|
|
85
97
|
if (validChars !== sequence.sequence) {
|
|
@@ -97,13 +109,14 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
97
109
|
} else {
|
|
98
110
|
//todo: this logic won't catch every case of RNA, so we should probably handle RNA conversion at another level..
|
|
99
111
|
const temp = sequence.sequence;
|
|
100
|
-
if (!isOligo) {
|
|
112
|
+
if (!sequence.isOligo) {
|
|
101
113
|
sequence.sequence = sequence.sequence.replace(/u/gi, (u) =>
|
|
102
114
|
u === "U" ? "T" : "t"
|
|
103
115
|
);
|
|
104
116
|
}
|
|
105
|
-
if (temp !== sequence.sequence) {
|
|
117
|
+
if (temp !== sequence.sequence && !sequence.isDNA && !sequence.isProtein) {
|
|
106
118
|
sequence.type = "RNA";
|
|
119
|
+
sequence.sequence = temp;
|
|
107
120
|
} else {
|
|
108
121
|
sequence.type = "DNA";
|
|
109
122
|
}
|
|
@@ -118,7 +131,7 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
118
131
|
}
|
|
119
132
|
|
|
120
133
|
if (!sequence.size) {
|
|
121
|
-
sequence.size = isProtein
|
|
134
|
+
sequence.size = sequence.isProtein
|
|
122
135
|
? sequence.proteinSequence.length * 3
|
|
123
136
|
: sequence.sequence.length;
|
|
124
137
|
}
|