@teselagen/bio-parsers 0.3.4 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +27 -11
- package/index.mjs +27 -11
- package/index.umd.js +27 -11
- package/package.json +3 -3
- package/src/genbankToJson.js +18 -9
- package/src/utils/validateSequence.js +24 -12
package/index.js
CHANGED
|
@@ -19285,9 +19285,7 @@ const reformatName = /* @__PURE__ */ __name(function(pName) {
|
|
|
19285
19285
|
return pName.toString().replace(/ /g, "_");
|
|
19286
19286
|
}, "reformatName");
|
|
19287
19287
|
function validateSequence(sequence, options = {}) {
|
|
19288
|
-
|
|
19289
|
-
isProtein,
|
|
19290
|
-
isOligo,
|
|
19288
|
+
const {
|
|
19291
19289
|
guessIfProtein,
|
|
19292
19290
|
guessIfProteinOptions,
|
|
19293
19291
|
reformatSeqName,
|
|
@@ -19297,6 +19295,19 @@ function validateSequence(sequence, options = {}) {
|
|
|
19297
19295
|
allowOverflowAnnotations,
|
|
19298
19296
|
coerceFeatureTypes
|
|
19299
19297
|
} = options;
|
|
19298
|
+
[
|
|
19299
|
+
"isDNA",
|
|
19300
|
+
"isOligo",
|
|
19301
|
+
"isRNA",
|
|
19302
|
+
"isDoubleStrandedDNA",
|
|
19303
|
+
"isSingleStrandedDNA",
|
|
19304
|
+
"isDoubleStrandedRNA",
|
|
19305
|
+
"isProtein"
|
|
19306
|
+
].forEach((k) => {
|
|
19307
|
+
if (options[k] !== void 0 && sequence[k] === void 0) {
|
|
19308
|
+
sequence[k] = options[k];
|
|
19309
|
+
}
|
|
19310
|
+
});
|
|
19300
19311
|
const response = {
|
|
19301
19312
|
validatedAndCleanedSequence: {},
|
|
19302
19313
|
messages: []
|
|
@@ -19333,13 +19344,13 @@ function validateSequence(sequence, options = {}) {
|
|
|
19333
19344
|
sequence.sequence = "";
|
|
19334
19345
|
}
|
|
19335
19346
|
let validChars;
|
|
19336
|
-
if (isProtein === void 0 && guessIfProtein) {
|
|
19337
|
-
isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
19347
|
+
if (sequence.isProtein === void 0 && guessIfProtein) {
|
|
19348
|
+
sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
19338
19349
|
sequence.sequence,
|
|
19339
19350
|
guessIfProteinOptions
|
|
19340
19351
|
);
|
|
19341
19352
|
}
|
|
19342
|
-
if (isProtein) {
|
|
19353
|
+
if (sequence.isProtein) {
|
|
19343
19354
|
validChars = filterAminoAcidSequenceString(sequence.sequence);
|
|
19344
19355
|
if (validChars !== sequence.sequence) {
|
|
19345
19356
|
sequence.sequence = validChars;
|
|
@@ -19355,13 +19366,13 @@ function validateSequence(sequence, options = {}) {
|
|
|
19355
19366
|
sequence.proteinSize = sequence.proteinSequence.length;
|
|
19356
19367
|
} else {
|
|
19357
19368
|
const temp = sequence.sequence;
|
|
19358
|
-
if (!isOligo) {
|
|
19369
|
+
if (!sequence.isOligo) {
|
|
19359
19370
|
sequence.sequence = sequence.sequence.replace(
|
|
19360
19371
|
/u/gi,
|
|
19361
19372
|
(u) => u === "U" ? "T" : "t"
|
|
19362
19373
|
);
|
|
19363
19374
|
}
|
|
19364
|
-
if (temp !== sequence.sequence) {
|
|
19375
|
+
if (temp !== sequence.sequence && !sequence.isDNA && !sequence.isProtein) {
|
|
19365
19376
|
sequence.type = "RNA";
|
|
19366
19377
|
sequence.sequence = temp;
|
|
19367
19378
|
} else {
|
|
@@ -19376,7 +19387,7 @@ function validateSequence(sequence, options = {}) {
|
|
|
19376
19387
|
}
|
|
19377
19388
|
}
|
|
19378
19389
|
if (!sequence.size) {
|
|
19379
|
-
sequence.size = isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
|
|
19390
|
+
sequence.size = sequence.isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
|
|
19380
19391
|
}
|
|
19381
19392
|
let circularityExplicitlyDefined;
|
|
19382
19393
|
if (sequence.circular === false || sequence.circular === "false" || sequence.circular === -1) {
|
|
@@ -19949,10 +19960,13 @@ function genbankToJson(string, options = {}) {
|
|
|
19949
19960
|
}
|
|
19950
19961
|
options.sequenceTypeFromLocus = item;
|
|
19951
19962
|
if (item.match(/ss-dna/i)) {
|
|
19963
|
+
options.isDNA = true;
|
|
19952
19964
|
options.isSingleStrandedDNA = true;
|
|
19953
|
-
}
|
|
19954
|
-
if (item.match(/rna/i)) {
|
|
19965
|
+
} else if (item.match(/rna/i)) {
|
|
19955
19966
|
options.isRna = true;
|
|
19967
|
+
} else if (item.match(/ds-dna/i) || item.match(/dna/i)) {
|
|
19968
|
+
options.isDNA = true;
|
|
19969
|
+
options.isDoubleStrandedDNA = true;
|
|
19956
19970
|
}
|
|
19957
19971
|
if (item.match(/rna/i) && !item.match(/ss-rna/i)) {
|
|
19958
19972
|
options.isDoubleStrandedRNA = true;
|
|
@@ -19967,6 +19981,8 @@ function genbankToJson(string, options = {}) {
|
|
|
19967
19981
|
}
|
|
19968
19982
|
result.parsedSequence.gbDivision = gbDivision;
|
|
19969
19983
|
result.parsedSequence.sequenceTypeFromLocus = options.sequenceTypeFromLocus;
|
|
19984
|
+
result.parsedSequence.isDNA = options.isDNA;
|
|
19985
|
+
result.parsedSequence.isDoubleStrandedDNA = options.isDoubleStrandedDNA;
|
|
19970
19986
|
result.parsedSequence.isSingleStrandedDNA = options.isSingleStrandedDNA;
|
|
19971
19987
|
result.parsedSequence.isRna = options.isRna;
|
|
19972
19988
|
result.parsedSequence.isDoubleStrandedRNA = options.isDoubleStrandedRNA;
|
package/index.mjs
CHANGED
|
@@ -19283,9 +19283,7 @@ const reformatName = /* @__PURE__ */ __name(function(pName) {
|
|
|
19283
19283
|
return pName.toString().replace(/ /g, "_");
|
|
19284
19284
|
}, "reformatName");
|
|
19285
19285
|
function validateSequence(sequence, options = {}) {
|
|
19286
|
-
|
|
19287
|
-
isProtein,
|
|
19288
|
-
isOligo,
|
|
19286
|
+
const {
|
|
19289
19287
|
guessIfProtein,
|
|
19290
19288
|
guessIfProteinOptions,
|
|
19291
19289
|
reformatSeqName,
|
|
@@ -19295,6 +19293,19 @@ function validateSequence(sequence, options = {}) {
|
|
|
19295
19293
|
allowOverflowAnnotations,
|
|
19296
19294
|
coerceFeatureTypes
|
|
19297
19295
|
} = options;
|
|
19296
|
+
[
|
|
19297
|
+
"isDNA",
|
|
19298
|
+
"isOligo",
|
|
19299
|
+
"isRNA",
|
|
19300
|
+
"isDoubleStrandedDNA",
|
|
19301
|
+
"isSingleStrandedDNA",
|
|
19302
|
+
"isDoubleStrandedRNA",
|
|
19303
|
+
"isProtein"
|
|
19304
|
+
].forEach((k) => {
|
|
19305
|
+
if (options[k] !== void 0 && sequence[k] === void 0) {
|
|
19306
|
+
sequence[k] = options[k];
|
|
19307
|
+
}
|
|
19308
|
+
});
|
|
19298
19309
|
const response = {
|
|
19299
19310
|
validatedAndCleanedSequence: {},
|
|
19300
19311
|
messages: []
|
|
@@ -19331,13 +19342,13 @@ function validateSequence(sequence, options = {}) {
|
|
|
19331
19342
|
sequence.sequence = "";
|
|
19332
19343
|
}
|
|
19333
19344
|
let validChars;
|
|
19334
|
-
if (isProtein === void 0 && guessIfProtein) {
|
|
19335
|
-
isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
19345
|
+
if (sequence.isProtein === void 0 && guessIfProtein) {
|
|
19346
|
+
sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
19336
19347
|
sequence.sequence,
|
|
19337
19348
|
guessIfProteinOptions
|
|
19338
19349
|
);
|
|
19339
19350
|
}
|
|
19340
|
-
if (isProtein) {
|
|
19351
|
+
if (sequence.isProtein) {
|
|
19341
19352
|
validChars = filterAminoAcidSequenceString(sequence.sequence);
|
|
19342
19353
|
if (validChars !== sequence.sequence) {
|
|
19343
19354
|
sequence.sequence = validChars;
|
|
@@ -19353,13 +19364,13 @@ function validateSequence(sequence, options = {}) {
|
|
|
19353
19364
|
sequence.proteinSize = sequence.proteinSequence.length;
|
|
19354
19365
|
} else {
|
|
19355
19366
|
const temp = sequence.sequence;
|
|
19356
|
-
if (!isOligo) {
|
|
19367
|
+
if (!sequence.isOligo) {
|
|
19357
19368
|
sequence.sequence = sequence.sequence.replace(
|
|
19358
19369
|
/u/gi,
|
|
19359
19370
|
(u) => u === "U" ? "T" : "t"
|
|
19360
19371
|
);
|
|
19361
19372
|
}
|
|
19362
|
-
if (temp !== sequence.sequence) {
|
|
19373
|
+
if (temp !== sequence.sequence && !sequence.isDNA && !sequence.isProtein) {
|
|
19363
19374
|
sequence.type = "RNA";
|
|
19364
19375
|
sequence.sequence = temp;
|
|
19365
19376
|
} else {
|
|
@@ -19374,7 +19385,7 @@ function validateSequence(sequence, options = {}) {
|
|
|
19374
19385
|
}
|
|
19375
19386
|
}
|
|
19376
19387
|
if (!sequence.size) {
|
|
19377
|
-
sequence.size = isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
|
|
19388
|
+
sequence.size = sequence.isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
|
|
19378
19389
|
}
|
|
19379
19390
|
let circularityExplicitlyDefined;
|
|
19380
19391
|
if (sequence.circular === false || sequence.circular === "false" || sequence.circular === -1) {
|
|
@@ -19947,10 +19958,13 @@ function genbankToJson(string, options = {}) {
|
|
|
19947
19958
|
}
|
|
19948
19959
|
options.sequenceTypeFromLocus = item;
|
|
19949
19960
|
if (item.match(/ss-dna/i)) {
|
|
19961
|
+
options.isDNA = true;
|
|
19950
19962
|
options.isSingleStrandedDNA = true;
|
|
19951
|
-
}
|
|
19952
|
-
if (item.match(/rna/i)) {
|
|
19963
|
+
} else if (item.match(/rna/i)) {
|
|
19953
19964
|
options.isRna = true;
|
|
19965
|
+
} else if (item.match(/ds-dna/i) || item.match(/dna/i)) {
|
|
19966
|
+
options.isDNA = true;
|
|
19967
|
+
options.isDoubleStrandedDNA = true;
|
|
19954
19968
|
}
|
|
19955
19969
|
if (item.match(/rna/i) && !item.match(/ss-rna/i)) {
|
|
19956
19970
|
options.isDoubleStrandedRNA = true;
|
|
@@ -19965,6 +19979,8 @@ function genbankToJson(string, options = {}) {
|
|
|
19965
19979
|
}
|
|
19966
19980
|
result.parsedSequence.gbDivision = gbDivision;
|
|
19967
19981
|
result.parsedSequence.sequenceTypeFromLocus = options.sequenceTypeFromLocus;
|
|
19982
|
+
result.parsedSequence.isDNA = options.isDNA;
|
|
19983
|
+
result.parsedSequence.isDoubleStrandedDNA = options.isDoubleStrandedDNA;
|
|
19968
19984
|
result.parsedSequence.isSingleStrandedDNA = options.isSingleStrandedDNA;
|
|
19969
19985
|
result.parsedSequence.isRna = options.isRna;
|
|
19970
19986
|
result.parsedSequence.isDoubleStrandedRNA = options.isDoubleStrandedRNA;
|
package/index.umd.js
CHANGED
|
@@ -19287,9 +19287,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19287
19287
|
return pName.toString().replace(/ /g, "_");
|
|
19288
19288
|
}, "reformatName");
|
|
19289
19289
|
function validateSequence(sequence, options = {}) {
|
|
19290
|
-
|
|
19291
|
-
isProtein,
|
|
19292
|
-
isOligo,
|
|
19290
|
+
const {
|
|
19293
19291
|
guessIfProtein,
|
|
19294
19292
|
guessIfProteinOptions,
|
|
19295
19293
|
reformatSeqName,
|
|
@@ -19299,6 +19297,19 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19299
19297
|
allowOverflowAnnotations,
|
|
19300
19298
|
coerceFeatureTypes
|
|
19301
19299
|
} = options;
|
|
19300
|
+
[
|
|
19301
|
+
"isDNA",
|
|
19302
|
+
"isOligo",
|
|
19303
|
+
"isRNA",
|
|
19304
|
+
"isDoubleStrandedDNA",
|
|
19305
|
+
"isSingleStrandedDNA",
|
|
19306
|
+
"isDoubleStrandedRNA",
|
|
19307
|
+
"isProtein"
|
|
19308
|
+
].forEach((k) => {
|
|
19309
|
+
if (options[k] !== void 0 && sequence[k] === void 0) {
|
|
19310
|
+
sequence[k] = options[k];
|
|
19311
|
+
}
|
|
19312
|
+
});
|
|
19302
19313
|
const response = {
|
|
19303
19314
|
validatedAndCleanedSequence: {},
|
|
19304
19315
|
messages: []
|
|
@@ -19335,13 +19346,13 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19335
19346
|
sequence.sequence = "";
|
|
19336
19347
|
}
|
|
19337
19348
|
let validChars;
|
|
19338
|
-
if (isProtein === void 0 && guessIfProtein) {
|
|
19339
|
-
isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
19349
|
+
if (sequence.isProtein === void 0 && guessIfProtein) {
|
|
19350
|
+
sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
19340
19351
|
sequence.sequence,
|
|
19341
19352
|
guessIfProteinOptions
|
|
19342
19353
|
);
|
|
19343
19354
|
}
|
|
19344
|
-
if (isProtein) {
|
|
19355
|
+
if (sequence.isProtein) {
|
|
19345
19356
|
validChars = filterAminoAcidSequenceString(sequence.sequence);
|
|
19346
19357
|
if (validChars !== sequence.sequence) {
|
|
19347
19358
|
sequence.sequence = validChars;
|
|
@@ -19357,13 +19368,13 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19357
19368
|
sequence.proteinSize = sequence.proteinSequence.length;
|
|
19358
19369
|
} else {
|
|
19359
19370
|
const temp = sequence.sequence;
|
|
19360
|
-
if (!isOligo) {
|
|
19371
|
+
if (!sequence.isOligo) {
|
|
19361
19372
|
sequence.sequence = sequence.sequence.replace(
|
|
19362
19373
|
/u/gi,
|
|
19363
19374
|
(u) => u === "U" ? "T" : "t"
|
|
19364
19375
|
);
|
|
19365
19376
|
}
|
|
19366
|
-
if (temp !== sequence.sequence) {
|
|
19377
|
+
if (temp !== sequence.sequence && !sequence.isDNA && !sequence.isProtein) {
|
|
19367
19378
|
sequence.type = "RNA";
|
|
19368
19379
|
sequence.sequence = temp;
|
|
19369
19380
|
} else {
|
|
@@ -19378,7 +19389,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19378
19389
|
}
|
|
19379
19390
|
}
|
|
19380
19391
|
if (!sequence.size) {
|
|
19381
|
-
sequence.size = isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
|
|
19392
|
+
sequence.size = sequence.isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
|
|
19382
19393
|
}
|
|
19383
19394
|
let circularityExplicitlyDefined;
|
|
19384
19395
|
if (sequence.circular === false || sequence.circular === "false" || sequence.circular === -1) {
|
|
@@ -19951,10 +19962,13 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19951
19962
|
}
|
|
19952
19963
|
options.sequenceTypeFromLocus = item;
|
|
19953
19964
|
if (item.match(/ss-dna/i)) {
|
|
19965
|
+
options.isDNA = true;
|
|
19954
19966
|
options.isSingleStrandedDNA = true;
|
|
19955
|
-
}
|
|
19956
|
-
if (item.match(/rna/i)) {
|
|
19967
|
+
} else if (item.match(/rna/i)) {
|
|
19957
19968
|
options.isRna = true;
|
|
19969
|
+
} else if (item.match(/ds-dna/i) || item.match(/dna/i)) {
|
|
19970
|
+
options.isDNA = true;
|
|
19971
|
+
options.isDoubleStrandedDNA = true;
|
|
19958
19972
|
}
|
|
19959
19973
|
if (item.match(/rna/i) && !item.match(/ss-rna/i)) {
|
|
19960
19974
|
options.isDoubleStrandedRNA = true;
|
|
@@ -19969,6 +19983,8 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19969
19983
|
}
|
|
19970
19984
|
result.parsedSequence.gbDivision = gbDivision;
|
|
19971
19985
|
result.parsedSequence.sequenceTypeFromLocus = options.sequenceTypeFromLocus;
|
|
19986
|
+
result.parsedSequence.isDNA = options.isDNA;
|
|
19987
|
+
result.parsedSequence.isDoubleStrandedDNA = options.isDoubleStrandedDNA;
|
|
19972
19988
|
result.parsedSequence.isSingleStrandedDNA = options.isSingleStrandedDNA;
|
|
19973
19989
|
result.parsedSequence.isRna = options.isRna;
|
|
19974
19990
|
result.parsedSequence.isDoubleStrandedRNA = options.isDoubleStrandedRNA;
|
package/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@teselagen/bio-parsers",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.5",
|
|
4
4
|
"type": "commonjs",
|
|
5
5
|
"dependencies": {
|
|
6
|
-
"@teselagen/sequence-utils": "0.3.
|
|
7
|
-
"@teselagen/range-utils": "0.3.
|
|
6
|
+
"@teselagen/sequence-utils": "0.3.5",
|
|
7
|
+
"@teselagen/range-utils": "0.3.5",
|
|
8
8
|
"@gmod/gff": "^1.2.1",
|
|
9
9
|
"buffer": "^6.0.3",
|
|
10
10
|
"bufferpack": "^0.0.6",
|
package/src/genbankToJson.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/* eslint-disable no-var*/
|
|
2
2
|
import { convertAACaretPositionOrRangeToDna } from "@teselagen/sequence-utils";
|
|
3
3
|
|
|
4
|
-
import
|
|
4
|
+
import { gbDivisions, untitledSequenceName } from "./utils/constants";
|
|
5
5
|
import flattenSequenceArray from "./utils/flattenSequenceArray";
|
|
6
6
|
import validateSequenceArray from "./utils/validateSequenceArray";
|
|
7
7
|
import splitStringIntoLines from "./utils/splitStringIntoLines.js";
|
|
@@ -11,7 +11,7 @@ import createInitialSequence from "./utils/createInitialSequence";
|
|
|
11
11
|
function genbankToJson(string, options = {}) {
|
|
12
12
|
const {
|
|
13
13
|
inclusive1BasedStart,
|
|
14
|
-
inclusive1BasedEnd
|
|
14
|
+
inclusive1BasedEnd
|
|
15
15
|
//these are also valid options:
|
|
16
16
|
// primersAsFeatures,
|
|
17
17
|
// sequenceTypeFromLocus,
|
|
@@ -43,7 +43,7 @@ function genbankToJson(string, options = {}) {
|
|
|
43
43
|
BASE_COUNT_TAG: "BASE COUNT",
|
|
44
44
|
//CONTIG_TAG: "CONTIG"
|
|
45
45
|
ORIGIN_TAG: "ORIGIN",
|
|
46
|
-
END_SEQUENCE_TAG: "//"
|
|
46
|
+
END_SEQUENCE_TAG: "//"
|
|
47
47
|
};
|
|
48
48
|
let hasFoundLocus = false;
|
|
49
49
|
let featureLocationIndentation;
|
|
@@ -214,7 +214,7 @@ function genbankToJson(string, options = {}) {
|
|
|
214
214
|
console.error("Error trying to parse file as .gb:", e);
|
|
215
215
|
result = {
|
|
216
216
|
success: false,
|
|
217
|
-
messages: ["Import Error: Invalid File"]
|
|
217
|
+
messages: ["Import Error: Invalid File"]
|
|
218
218
|
};
|
|
219
219
|
}
|
|
220
220
|
|
|
@@ -338,18 +338,25 @@ function genbankToJson(string, options = {}) {
|
|
|
338
338
|
|
|
339
339
|
if (
|
|
340
340
|
j === 4 &&
|
|
341
|
-
(item.match(/ds-dna/i) ||
|
|
341
|
+
(item.match(/ds-dna/i) ||
|
|
342
|
+
item.match(/ss-dna/i) ||
|
|
343
|
+
item.match(/dna/i) ||
|
|
344
|
+
item.match(/rna/i))
|
|
342
345
|
) {
|
|
343
346
|
if (options.isProtein === undefined) {
|
|
344
347
|
options.isProtein = false;
|
|
345
348
|
}
|
|
346
349
|
options.sequenceTypeFromLocus = item;
|
|
347
350
|
if (item.match(/ss-dna/i)) {
|
|
351
|
+
options.isDNA = true;
|
|
348
352
|
options.isSingleStrandedDNA = true;
|
|
349
|
-
}
|
|
350
|
-
if (item.match(/rna/i)) {
|
|
353
|
+
} else if (item.match(/rna/i)) {
|
|
351
354
|
options.isRna = true;
|
|
355
|
+
} else if (item.match(/ds-dna/i) || item.match(/dna/i)) {
|
|
356
|
+
options.isDNA = true;
|
|
357
|
+
options.isDoubleStrandedDNA = true;
|
|
352
358
|
}
|
|
359
|
+
|
|
353
360
|
if (item.match(/rna/i) && !item.match(/ss-rna/i)) {
|
|
354
361
|
options.isDoubleStrandedRNA = true;
|
|
355
362
|
}
|
|
@@ -373,6 +380,8 @@ function genbankToJson(string, options = {}) {
|
|
|
373
380
|
}
|
|
374
381
|
result.parsedSequence.gbDivision = gbDivision;
|
|
375
382
|
result.parsedSequence.sequenceTypeFromLocus = options.sequenceTypeFromLocus;
|
|
383
|
+
result.parsedSequence.isDNA = options.isDNA;
|
|
384
|
+
result.parsedSequence.isDoubleStrandedDNA = options.isDoubleStrandedDNA;
|
|
376
385
|
result.parsedSequence.isSingleStrandedDNA = options.isSingleStrandedDNA;
|
|
377
386
|
result.parsedSequence.isRna = options.isRna;
|
|
378
387
|
result.parsedSequence.isDoubleStrandedRNA = options.isDoubleStrandedRNA;
|
|
@@ -466,7 +475,7 @@ function genbankToJson(string, options = {}) {
|
|
|
466
475
|
function newFeature() {
|
|
467
476
|
result.parsedSequence.features.push({
|
|
468
477
|
locations: [],
|
|
469
|
-
notes: {}
|
|
478
|
+
notes: {}
|
|
470
479
|
});
|
|
471
480
|
}
|
|
472
481
|
|
|
@@ -504,7 +513,7 @@ function genbankToJson(string, options = {}) {
|
|
|
504
513
|
}
|
|
505
514
|
const location = {
|
|
506
515
|
start: start,
|
|
507
|
-
end: end
|
|
516
|
+
end: end
|
|
508
517
|
};
|
|
509
518
|
const feat = getCurrentFeature();
|
|
510
519
|
feat.locations.push(
|
|
@@ -3,7 +3,7 @@ import { getFeatureTypes } from "@teselagen/sequence-utils";
|
|
|
3
3
|
import {
|
|
4
4
|
filterAminoAcidSequenceString,
|
|
5
5
|
filterSequenceString,
|
|
6
|
-
guessIfSequenceIsDnaAndNotProtein
|
|
6
|
+
guessIfSequenceIsDnaAndNotProtein
|
|
7
7
|
} from "@teselagen/sequence-utils";
|
|
8
8
|
import { filter, some, upperFirst } from "lodash";
|
|
9
9
|
import pragmasAndTypes from "./pragmasAndTypes.js";
|
|
@@ -22,9 +22,7 @@ import { reformatName } from "./NameUtils.js";
|
|
|
22
22
|
};
|
|
23
23
|
*/
|
|
24
24
|
export default function validateSequence(sequence, options = {}) {
|
|
25
|
-
|
|
26
|
-
isProtein,
|
|
27
|
-
isOligo,
|
|
25
|
+
const {
|
|
28
26
|
guessIfProtein,
|
|
29
27
|
guessIfProteinOptions,
|
|
30
28
|
reformatSeqName,
|
|
@@ -32,11 +30,25 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
32
30
|
inclusive1BasedEnd,
|
|
33
31
|
additionalValidChars,
|
|
34
32
|
allowOverflowAnnotations,
|
|
35
|
-
coerceFeatureTypes
|
|
33
|
+
coerceFeatureTypes
|
|
36
34
|
} = options;
|
|
35
|
+
[
|
|
36
|
+
"isDNA",
|
|
37
|
+
"isOligo",
|
|
38
|
+
"isRNA",
|
|
39
|
+
"isDoubleStrandedDNA",
|
|
40
|
+
"isSingleStrandedDNA",
|
|
41
|
+
"isDoubleStrandedRNA",
|
|
42
|
+
"isProtein"
|
|
43
|
+
].forEach((k) => {
|
|
44
|
+
if (options[k] !== undefined && sequence[k] === undefined) {
|
|
45
|
+
sequence[k] = options[k];
|
|
46
|
+
}
|
|
47
|
+
});
|
|
48
|
+
|
|
37
49
|
const response = {
|
|
38
50
|
validatedAndCleanedSequence: {},
|
|
39
|
-
messages: []
|
|
51
|
+
messages: []
|
|
40
52
|
};
|
|
41
53
|
if (!sequence || typeof sequence !== "object") {
|
|
42
54
|
throw new Error("Invalid sequence");
|
|
@@ -73,13 +85,13 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
73
85
|
sequence.sequence = "";
|
|
74
86
|
}
|
|
75
87
|
let validChars;
|
|
76
|
-
if (isProtein === undefined && guessIfProtein) {
|
|
77
|
-
isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
88
|
+
if (sequence.isProtein === undefined && guessIfProtein) {
|
|
89
|
+
sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
78
90
|
sequence.sequence,
|
|
79
91
|
guessIfProteinOptions
|
|
80
92
|
);
|
|
81
93
|
}
|
|
82
|
-
if (isProtein) {
|
|
94
|
+
if (sequence.isProtein) {
|
|
83
95
|
//tnr: add code to strip invalid protein data..
|
|
84
96
|
validChars = filterAminoAcidSequenceString(sequence.sequence);
|
|
85
97
|
if (validChars !== sequence.sequence) {
|
|
@@ -97,12 +109,12 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
97
109
|
} else {
|
|
98
110
|
//todo: this logic won't catch every case of RNA, so we should probably handle RNA conversion at another level..
|
|
99
111
|
const temp = sequence.sequence;
|
|
100
|
-
if (!isOligo) {
|
|
112
|
+
if (!sequence.isOligo) {
|
|
101
113
|
sequence.sequence = sequence.sequence.replace(/u/gi, (u) =>
|
|
102
114
|
u === "U" ? "T" : "t"
|
|
103
115
|
);
|
|
104
116
|
}
|
|
105
|
-
if (temp !== sequence.sequence) {
|
|
117
|
+
if (temp !== sequence.sequence && !sequence.isDNA && !sequence.isProtein) {
|
|
106
118
|
sequence.type = "RNA";
|
|
107
119
|
sequence.sequence = temp;
|
|
108
120
|
} else {
|
|
@@ -119,7 +131,7 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
119
131
|
}
|
|
120
132
|
|
|
121
133
|
if (!sequence.size) {
|
|
122
|
-
sequence.size = isProtein
|
|
134
|
+
sequence.size = sequence.isProtein
|
|
123
135
|
? sequence.proteinSequence.length * 3
|
|
124
136
|
: sequence.sequence.length;
|
|
125
137
|
}
|