@teselagen/bio-parsers 0.3.9 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +127 -61
- package/index.mjs +127 -61
- package/index.umd.js +127 -61
- package/package.json +2 -2
- package/src/utils/validateSequence.js +15 -11
package/index.js
CHANGED
|
@@ -6170,7 +6170,9 @@ lodash.exports;
|
|
|
6170
6170
|
})(lodash, lodash.exports);
|
|
6171
6171
|
var lodashExports = lodash.exports;
|
|
6172
6172
|
const _ = /* @__PURE__ */ getDefaultExportFromCjs(lodashExports);
|
|
6173
|
+
const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
|
|
6173
6174
|
const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
6175
|
+
const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
|
|
6174
6176
|
const aminoAcidToDegenerateDnaMap = {
|
|
6175
6177
|
"-": "---",
|
|
6176
6178
|
".": "...",
|
|
@@ -10775,7 +10777,7 @@ const proteinAlphabet = {
|
|
|
10775
10777
|
hydrophobicity: 1.8,
|
|
10776
10778
|
colorByFamily: "#00FFFF",
|
|
10777
10779
|
color: "hsl(327.3, 100%, 69%)",
|
|
10778
|
-
mass:
|
|
10780
|
+
mass: 71.0779
|
|
10779
10781
|
},
|
|
10780
10782
|
R: {
|
|
10781
10783
|
value: "R",
|
|
@@ -10784,7 +10786,7 @@ const proteinAlphabet = {
|
|
|
10784
10786
|
hydrophobicity: -4.5,
|
|
10785
10787
|
colorByFamily: "#FFC0CB",
|
|
10786
10788
|
color: "hsl(258.1, 100%, 69%)",
|
|
10787
|
-
mass:
|
|
10789
|
+
mass: 156.18568
|
|
10788
10790
|
},
|
|
10789
10791
|
N: {
|
|
10790
10792
|
value: "N",
|
|
@@ -10793,7 +10795,7 @@ const proteinAlphabet = {
|
|
|
10793
10795
|
hydrophobicity: -3.5,
|
|
10794
10796
|
colorByFamily: "#D3D3D3",
|
|
10795
10797
|
color: "hsl(268.9, 100%, 69%)",
|
|
10796
|
-
mass:
|
|
10798
|
+
mass: 114.10264
|
|
10797
10799
|
},
|
|
10798
10800
|
D: {
|
|
10799
10801
|
value: "D",
|
|
@@ -10802,7 +10804,7 @@ const proteinAlphabet = {
|
|
|
10802
10804
|
hydrophobicity: -3.5,
|
|
10803
10805
|
colorByFamily: "#EE82EE",
|
|
10804
10806
|
color: "hsl(268.9, 100%, 69%)",
|
|
10805
|
-
mass:
|
|
10807
|
+
mass: 115.0874
|
|
10806
10808
|
},
|
|
10807
10809
|
C: {
|
|
10808
10810
|
value: "C",
|
|
@@ -10811,7 +10813,7 @@ const proteinAlphabet = {
|
|
|
10811
10813
|
hydrophobicity: 2.5,
|
|
10812
10814
|
colorByFamily: "#FFFF00",
|
|
10813
10815
|
color: "hsl(335.1, 100%, 69%)",
|
|
10814
|
-
mass:
|
|
10816
|
+
mass: 103.1429
|
|
10815
10817
|
},
|
|
10816
10818
|
E: {
|
|
10817
10819
|
value: "E",
|
|
@@ -10820,7 +10822,7 @@ const proteinAlphabet = {
|
|
|
10820
10822
|
hydrophobicity: -3.5,
|
|
10821
10823
|
colorByFamily: "#EE82EE",
|
|
10822
10824
|
color: "hsl(268.9, 100%, 69%)",
|
|
10823
|
-
mass:
|
|
10825
|
+
mass: 129.11398
|
|
10824
10826
|
},
|
|
10825
10827
|
Q: {
|
|
10826
10828
|
value: "Q",
|
|
@@ -10829,7 +10831,7 @@ const proteinAlphabet = {
|
|
|
10829
10831
|
hydrophobicity: -3.5,
|
|
10830
10832
|
colorByFamily: "#D3D3D3",
|
|
10831
10833
|
color: "hsl(268.9, 100%, 69%)",
|
|
10832
|
-
mass:
|
|
10834
|
+
mass: 128.12922
|
|
10833
10835
|
},
|
|
10834
10836
|
G: {
|
|
10835
10837
|
value: "G",
|
|
@@ -10838,7 +10840,7 @@ const proteinAlphabet = {
|
|
|
10838
10840
|
hydrophobicity: -0.4,
|
|
10839
10841
|
colorByFamily: "#00FFFF",
|
|
10840
10842
|
color: "hsl(303.1, 100%, 69%)",
|
|
10841
|
-
mass:
|
|
10843
|
+
mass: 57.05132
|
|
10842
10844
|
},
|
|
10843
10845
|
H: {
|
|
10844
10846
|
value: "H",
|
|
@@ -10847,7 +10849,7 @@ const proteinAlphabet = {
|
|
|
10847
10849
|
hydrophobicity: -3.2,
|
|
10848
10850
|
colorByFamily: "#FFC0CB",
|
|
10849
10851
|
color: "hsl(272.2, 100%, 69%)",
|
|
10850
|
-
mass:
|
|
10852
|
+
mass: 137.13928
|
|
10851
10853
|
},
|
|
10852
10854
|
I: {
|
|
10853
10855
|
value: "I",
|
|
@@ -10856,7 +10858,7 @@ const proteinAlphabet = {
|
|
|
10856
10858
|
hydrophobicity: 4.5,
|
|
10857
10859
|
colorByFamily: "#00FFFF",
|
|
10858
10860
|
color: "hsl(356.9, 100%, 69%)",
|
|
10859
|
-
mass:
|
|
10861
|
+
mass: 113.15764
|
|
10860
10862
|
},
|
|
10861
10863
|
L: {
|
|
10862
10864
|
value: "L",
|
|
@@ -10865,7 +10867,7 @@ const proteinAlphabet = {
|
|
|
10865
10867
|
hydrophobicity: 3.8,
|
|
10866
10868
|
colorByFamily: "#00FFFF",
|
|
10867
10869
|
color: "hsl(349.4, 100%, 69%)",
|
|
10868
|
-
mass:
|
|
10870
|
+
mass: 113.15764
|
|
10869
10871
|
},
|
|
10870
10872
|
K: {
|
|
10871
10873
|
value: "K",
|
|
@@ -10874,7 +10876,7 @@ const proteinAlphabet = {
|
|
|
10874
10876
|
hydrophobicity: -3.9,
|
|
10875
10877
|
colorByFamily: "#FFC0CB",
|
|
10876
10878
|
color: "hsl(264.7, 100%, 69%)",
|
|
10877
|
-
mass:
|
|
10879
|
+
mass: 128.17228
|
|
10878
10880
|
},
|
|
10879
10881
|
M: {
|
|
10880
10882
|
value: "M",
|
|
@@ -10883,7 +10885,7 @@ const proteinAlphabet = {
|
|
|
10883
10885
|
hydrophobicity: 1.9,
|
|
10884
10886
|
colorByFamily: "#FFFF00",
|
|
10885
10887
|
color: "hsl(328.5, 100%, 69%)",
|
|
10886
|
-
mass:
|
|
10888
|
+
mass: 131.19606
|
|
10887
10889
|
},
|
|
10888
10890
|
F: {
|
|
10889
10891
|
value: "F",
|
|
@@ -10892,7 +10894,7 @@ const proteinAlphabet = {
|
|
|
10892
10894
|
hydrophobicity: 2.8,
|
|
10893
10895
|
colorByFamily: "#FFA500",
|
|
10894
10896
|
color: "hsl(338.4, 100%, 69%)",
|
|
10895
|
-
mass:
|
|
10897
|
+
mass: 147.17386
|
|
10896
10898
|
},
|
|
10897
10899
|
P: {
|
|
10898
10900
|
value: "P",
|
|
@@ -10901,7 +10903,7 @@ const proteinAlphabet = {
|
|
|
10901
10903
|
hydrophobicity: -1.6,
|
|
10902
10904
|
colorByFamily: "#00FFFF",
|
|
10903
10905
|
color: "hsl(289.9, 100%, 69%)",
|
|
10904
|
-
mass:
|
|
10906
|
+
mass: 97.11518
|
|
10905
10907
|
},
|
|
10906
10908
|
S: {
|
|
10907
10909
|
value: "S",
|
|
@@ -10910,7 +10912,7 @@ const proteinAlphabet = {
|
|
|
10910
10912
|
hydrophobicity: -0.8,
|
|
10911
10913
|
colorByFamily: "#90EE90",
|
|
10912
10914
|
color: "hsl(298.6, 100%, 69%)",
|
|
10913
|
-
mass:
|
|
10915
|
+
mass: 87.0773
|
|
10914
10916
|
},
|
|
10915
10917
|
T: {
|
|
10916
10918
|
value: "T",
|
|
@@ -10919,7 +10921,7 @@ const proteinAlphabet = {
|
|
|
10919
10921
|
hydrophobicity: -0.7,
|
|
10920
10922
|
colorByFamily: "#90EE90",
|
|
10921
10923
|
color: "hsl(299.8, 100%, 69%)",
|
|
10922
|
-
mass:
|
|
10924
|
+
mass: 101.10388
|
|
10923
10925
|
},
|
|
10924
10926
|
U: {
|
|
10925
10927
|
value: "U",
|
|
@@ -10927,7 +10929,7 @@ const proteinAlphabet = {
|
|
|
10927
10929
|
threeLettersName: "Sec",
|
|
10928
10930
|
colorByFamily: "#FF0000",
|
|
10929
10931
|
color: "hsl(0, 100%, 69%)",
|
|
10930
|
-
mass:
|
|
10932
|
+
mass: 150.3079
|
|
10931
10933
|
},
|
|
10932
10934
|
W: {
|
|
10933
10935
|
value: "W",
|
|
@@ -10936,7 +10938,7 @@ const proteinAlphabet = {
|
|
|
10936
10938
|
hydrophobicity: -0.9,
|
|
10937
10939
|
colorByFamily: "#FFA500",
|
|
10938
10940
|
color: "hsl(297.6, 100%, 69%)",
|
|
10939
|
-
mass:
|
|
10941
|
+
mass: 186.2099
|
|
10940
10942
|
},
|
|
10941
10943
|
Y: {
|
|
10942
10944
|
value: "Y",
|
|
@@ -10945,7 +10947,7 @@ const proteinAlphabet = {
|
|
|
10945
10947
|
hydrophobicity: -1.3,
|
|
10946
10948
|
colorByFamily: "#FFA500",
|
|
10947
10949
|
color: "hsl(293.2, 100%, 69%)",
|
|
10948
|
-
mass:
|
|
10950
|
+
mass: 163.17326
|
|
10949
10951
|
},
|
|
10950
10952
|
V: {
|
|
10951
10953
|
value: "V",
|
|
@@ -10954,7 +10956,7 @@ const proteinAlphabet = {
|
|
|
10954
10956
|
hydrophobicity: 4.2,
|
|
10955
10957
|
colorByFamily: "#00FFFF",
|
|
10956
10958
|
color: "hsl(353.6, 100%, 69%)",
|
|
10957
|
-
mass:
|
|
10959
|
+
mass: 99.13106
|
|
10958
10960
|
},
|
|
10959
10961
|
"*": {
|
|
10960
10962
|
value: "*",
|
|
@@ -11273,20 +11275,91 @@ const annotationTypes = [
|
|
|
11273
11275
|
"primers",
|
|
11274
11276
|
"guides"
|
|
11275
11277
|
];
|
|
11276
|
-
function filterSequenceString(sequenceString,
|
|
11277
|
-
|
|
11278
|
-
|
|
11279
|
-
|
|
11280
|
-
|
|
11281
|
-
|
|
11282
|
-
|
|
11283
|
-
|
|
11278
|
+
function filterSequenceString(sequenceString, {
|
|
11279
|
+
additionalValidChars = "",
|
|
11280
|
+
isOligo,
|
|
11281
|
+
name,
|
|
11282
|
+
isProtein,
|
|
11283
|
+
isRna,
|
|
11284
|
+
isMixedRnaAndDna,
|
|
11285
|
+
includeStopCodon
|
|
11286
|
+
} = {}) {
|
|
11287
|
+
const acceptedChars = getAcceptedChars({
|
|
11288
|
+
isOligo,
|
|
11289
|
+
isProtein,
|
|
11290
|
+
isRna,
|
|
11291
|
+
isMixedRnaAndDna,
|
|
11292
|
+
includeStopCodon
|
|
11293
|
+
});
|
|
11294
|
+
const replaceChars = getReplaceChars({
|
|
11295
|
+
isOligo,
|
|
11296
|
+
isProtein,
|
|
11297
|
+
isRna,
|
|
11298
|
+
isMixedRnaAndDna
|
|
11299
|
+
});
|
|
11300
|
+
let sanitizedVal = "";
|
|
11301
|
+
const invalidChars = [];
|
|
11302
|
+
const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
|
|
11303
|
+
const warnings = [];
|
|
11304
|
+
const replaceCount = {};
|
|
11305
|
+
sequenceString.split("").forEach((letter) => {
|
|
11306
|
+
const lowerLetter = letter.toLowerCase();
|
|
11307
|
+
if (replaceChars && replaceChars[lowerLetter]) {
|
|
11308
|
+
if (!replaceCount[lowerLetter]) {
|
|
11309
|
+
replaceCount[lowerLetter] = 0;
|
|
11310
|
+
}
|
|
11311
|
+
replaceCount[lowerLetter]++;
|
|
11312
|
+
const isUpper = lowerLetter !== letter;
|
|
11313
|
+
sanitizedVal += isUpper ? replaceChars[lowerLetter].toUpperCase() : replaceChars[lowerLetter];
|
|
11314
|
+
} else if (chars.includes(lowerLetter)) {
|
|
11315
|
+
sanitizedVal += letter;
|
|
11316
|
+
} else {
|
|
11317
|
+
invalidChars.push(letter);
|
|
11318
|
+
}
|
|
11319
|
+
});
|
|
11320
|
+
Object.keys(replaceCount).forEach((letter) => {
|
|
11321
|
+
warnings.push(
|
|
11322
|
+
`Replaced "${letter}" with "${replaceChars[letter]}"${replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""}`
|
|
11323
|
+
);
|
|
11324
|
+
});
|
|
11325
|
+
if (sequenceString.length !== sanitizedVal.length) {
|
|
11326
|
+
warnings.push(
|
|
11327
|
+
`${name ? `Sequence ${name}: ` : ""}Invalid character(s) detected and removed: ${invalidChars.slice(0, 100).join(", ")} `
|
|
11284
11328
|
);
|
|
11285
|
-
} else {
|
|
11286
|
-
return sequenceString;
|
|
11287
11329
|
}
|
|
11330
|
+
if (typeof window !== "undefined" && window.toastr && warnings.length) {
|
|
11331
|
+
warnings.forEach((warning) => {
|
|
11332
|
+
window.toastr.warning(warning);
|
|
11333
|
+
});
|
|
11334
|
+
}
|
|
11335
|
+
return [sanitizedVal, warnings];
|
|
11288
11336
|
}
|
|
11289
11337
|
__name(filterSequenceString, "filterSequenceString");
|
|
11338
|
+
function getAcceptedChars({
|
|
11339
|
+
isOligo,
|
|
11340
|
+
isProtein,
|
|
11341
|
+
isRna,
|
|
11342
|
+
isMixedRnaAndDna,
|
|
11343
|
+
includeStopCodon
|
|
11344
|
+
} = {}) {
|
|
11345
|
+
return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
|
|
11346
|
+
//just plain old dna
|
|
11347
|
+
ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
|
|
11348
|
+
);
|
|
11349
|
+
}
|
|
11350
|
+
__name(getAcceptedChars, "getAcceptedChars");
|
|
11351
|
+
function getReplaceChars({
|
|
11352
|
+
isOligo,
|
|
11353
|
+
isProtein,
|
|
11354
|
+
isRna,
|
|
11355
|
+
isMixedRnaAndDna
|
|
11356
|
+
} = {}) {
|
|
11357
|
+
return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
|
|
11358
|
+
//just plain old dna
|
|
11359
|
+
{}
|
|
11360
|
+
);
|
|
11361
|
+
}
|
|
11362
|
+
__name(getReplaceChars, "getReplaceChars");
|
|
11290
11363
|
function tidyUpAnnotation(_annotation, {
|
|
11291
11364
|
sequenceData = {},
|
|
11292
11365
|
convertAnnotationsFromAAIndices,
|
|
@@ -11415,14 +11488,6 @@ function coerceLocation({
|
|
|
11415
11488
|
}
|
|
11416
11489
|
}
|
|
11417
11490
|
__name(coerceLocation, "coerceLocation");
|
|
11418
|
-
function filterAminoAcidSequenceString(sequenceString, options) {
|
|
11419
|
-
options = options || {};
|
|
11420
|
-
if (options.includeStopCodon) {
|
|
11421
|
-
return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
|
|
11422
|
-
}
|
|
11423
|
-
return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
|
|
11424
|
-
}
|
|
11425
|
-
__name(filterAminoAcidSequenceString, "filterAminoAcidSequenceString");
|
|
11426
11491
|
function getDegenerateDnaStringFromAAString(aaString) {
|
|
11427
11492
|
return aaString.split("").map((char) => aminoAcidToDegenerateDnaMap[char.toLowerCase()] || "nnn").join("");
|
|
11428
11493
|
}
|
|
@@ -11434,11 +11499,10 @@ function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
11434
11499
|
removeUnwantedChars,
|
|
11435
11500
|
additionalValidChars,
|
|
11436
11501
|
noTranslationData,
|
|
11437
|
-
charOverrides,
|
|
11438
11502
|
doNotProvideIdsForAnnotations,
|
|
11439
|
-
proteinFilterOptions,
|
|
11440
11503
|
noCdsTranslations,
|
|
11441
|
-
convertAnnotationsFromAAIndices
|
|
11504
|
+
convertAnnotationsFromAAIndices,
|
|
11505
|
+
topLevelSeqData
|
|
11442
11506
|
} = options;
|
|
11443
11507
|
let seqData = lodashExports.cloneDeep(pSeqData);
|
|
11444
11508
|
const response = {
|
|
@@ -11468,16 +11532,15 @@ function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
11468
11532
|
}
|
|
11469
11533
|
if (removeUnwantedChars) {
|
|
11470
11534
|
if (seqData.isProtein) {
|
|
11471
|
-
seqData.proteinSequence
|
|
11472
|
-
|
|
11473
|
-
|
|
11474
|
-
|
|
11535
|
+
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
|
|
11536
|
+
includeStopCodon: true
|
|
11537
|
+
}, topLevelSeqData || seqData));
|
|
11538
|
+
seqData.proteinSequence = newSeq;
|
|
11475
11539
|
} else {
|
|
11476
|
-
|
|
11477
|
-
|
|
11478
|
-
|
|
11479
|
-
|
|
11480
|
-
);
|
|
11540
|
+
const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
|
|
11541
|
+
additionalValidChars
|
|
11542
|
+
}, topLevelSeqData || seqData));
|
|
11543
|
+
seqData.sequence = newSeq;
|
|
11481
11544
|
}
|
|
11482
11545
|
}
|
|
11483
11546
|
if (seqData.isProtein) {
|
|
@@ -19312,7 +19375,8 @@ function validateSequence(sequence, options = {}) {
|
|
|
19312
19375
|
inclusive1BasedEnd,
|
|
19313
19376
|
additionalValidChars,
|
|
19314
19377
|
allowOverflowAnnotations,
|
|
19315
|
-
coerceFeatureTypes
|
|
19378
|
+
coerceFeatureTypes,
|
|
19379
|
+
includeStopCodon
|
|
19316
19380
|
} = options;
|
|
19317
19381
|
[
|
|
19318
19382
|
"isDNA",
|
|
@@ -19362,7 +19426,6 @@ function validateSequence(sequence, options = {}) {
|
|
|
19362
19426
|
response.messages.push("No sequence detected");
|
|
19363
19427
|
sequence.sequence = "";
|
|
19364
19428
|
}
|
|
19365
|
-
let validChars;
|
|
19366
19429
|
if (sequence.isProtein === void 0 && guessIfProtein) {
|
|
19367
19430
|
sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
19368
19431
|
sequence.sequence,
|
|
@@ -19370,12 +19433,15 @@ function validateSequence(sequence, options = {}) {
|
|
|
19370
19433
|
);
|
|
19371
19434
|
}
|
|
19372
19435
|
if (sequence.isProtein) {
|
|
19373
|
-
validChars =
|
|
19436
|
+
const [validChars, warnings] = filterSequenceString(sequence.sequence, {
|
|
19437
|
+
name: sequence.name,
|
|
19438
|
+
isProtein: true,
|
|
19439
|
+
additionalValidChars,
|
|
19440
|
+
includeStopCodon
|
|
19441
|
+
});
|
|
19374
19442
|
if (validChars !== sequence.sequence) {
|
|
19375
19443
|
sequence.sequence = validChars;
|
|
19376
|
-
response.messages.push(
|
|
19377
|
-
"Import Error: Illegal character(s) detected and removed from amino acid sequence. Allowed characters are: xtgalmfwkqespvicyhrndu"
|
|
19378
|
-
);
|
|
19444
|
+
response.messages.push(...warnings);
|
|
19379
19445
|
}
|
|
19380
19446
|
sequence.type = "PROTEIN";
|
|
19381
19447
|
sequence.isProtein = true;
|
|
@@ -19397,12 +19463,12 @@ function validateSequence(sequence, options = {}) {
|
|
|
19397
19463
|
} else {
|
|
19398
19464
|
sequence.type = "DNA";
|
|
19399
19465
|
}
|
|
19400
|
-
validChars = filterSequenceString(sequence.sequence,
|
|
19466
|
+
const [validChars, warnings] = filterSequenceString(sequence.sequence, __spreadValues({
|
|
19467
|
+
additionalValidChars
|
|
19468
|
+
}, sequence));
|
|
19401
19469
|
if (validChars !== sequence.sequence) {
|
|
19402
19470
|
sequence.sequence = validChars;
|
|
19403
|
-
response.messages.push(
|
|
19404
|
-
"Import Error: Illegal character(s) detected and removed from sequence. Allowed characters are: atgcyrswkmbvdhn"
|
|
19405
|
-
);
|
|
19471
|
+
response.messages.push(...warnings);
|
|
19406
19472
|
}
|
|
19407
19473
|
}
|
|
19408
19474
|
if (!sequence.size) {
|
package/index.mjs
CHANGED
|
@@ -6168,7 +6168,9 @@ lodash.exports;
|
|
|
6168
6168
|
})(lodash, lodash.exports);
|
|
6169
6169
|
var lodashExports = lodash.exports;
|
|
6170
6170
|
const _ = /* @__PURE__ */ getDefaultExportFromCjs(lodashExports);
|
|
6171
|
+
const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
|
|
6171
6172
|
const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
6173
|
+
const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
|
|
6172
6174
|
const aminoAcidToDegenerateDnaMap = {
|
|
6173
6175
|
"-": "---",
|
|
6174
6176
|
".": "...",
|
|
@@ -10773,7 +10775,7 @@ const proteinAlphabet = {
|
|
|
10773
10775
|
hydrophobicity: 1.8,
|
|
10774
10776
|
colorByFamily: "#00FFFF",
|
|
10775
10777
|
color: "hsl(327.3, 100%, 69%)",
|
|
10776
|
-
mass:
|
|
10778
|
+
mass: 71.0779
|
|
10777
10779
|
},
|
|
10778
10780
|
R: {
|
|
10779
10781
|
value: "R",
|
|
@@ -10782,7 +10784,7 @@ const proteinAlphabet = {
|
|
|
10782
10784
|
hydrophobicity: -4.5,
|
|
10783
10785
|
colorByFamily: "#FFC0CB",
|
|
10784
10786
|
color: "hsl(258.1, 100%, 69%)",
|
|
10785
|
-
mass:
|
|
10787
|
+
mass: 156.18568
|
|
10786
10788
|
},
|
|
10787
10789
|
N: {
|
|
10788
10790
|
value: "N",
|
|
@@ -10791,7 +10793,7 @@ const proteinAlphabet = {
|
|
|
10791
10793
|
hydrophobicity: -3.5,
|
|
10792
10794
|
colorByFamily: "#D3D3D3",
|
|
10793
10795
|
color: "hsl(268.9, 100%, 69%)",
|
|
10794
|
-
mass:
|
|
10796
|
+
mass: 114.10264
|
|
10795
10797
|
},
|
|
10796
10798
|
D: {
|
|
10797
10799
|
value: "D",
|
|
@@ -10800,7 +10802,7 @@ const proteinAlphabet = {
|
|
|
10800
10802
|
hydrophobicity: -3.5,
|
|
10801
10803
|
colorByFamily: "#EE82EE",
|
|
10802
10804
|
color: "hsl(268.9, 100%, 69%)",
|
|
10803
|
-
mass:
|
|
10805
|
+
mass: 115.0874
|
|
10804
10806
|
},
|
|
10805
10807
|
C: {
|
|
10806
10808
|
value: "C",
|
|
@@ -10809,7 +10811,7 @@ const proteinAlphabet = {
|
|
|
10809
10811
|
hydrophobicity: 2.5,
|
|
10810
10812
|
colorByFamily: "#FFFF00",
|
|
10811
10813
|
color: "hsl(335.1, 100%, 69%)",
|
|
10812
|
-
mass:
|
|
10814
|
+
mass: 103.1429
|
|
10813
10815
|
},
|
|
10814
10816
|
E: {
|
|
10815
10817
|
value: "E",
|
|
@@ -10818,7 +10820,7 @@ const proteinAlphabet = {
|
|
|
10818
10820
|
hydrophobicity: -3.5,
|
|
10819
10821
|
colorByFamily: "#EE82EE",
|
|
10820
10822
|
color: "hsl(268.9, 100%, 69%)",
|
|
10821
|
-
mass:
|
|
10823
|
+
mass: 129.11398
|
|
10822
10824
|
},
|
|
10823
10825
|
Q: {
|
|
10824
10826
|
value: "Q",
|
|
@@ -10827,7 +10829,7 @@ const proteinAlphabet = {
|
|
|
10827
10829
|
hydrophobicity: -3.5,
|
|
10828
10830
|
colorByFamily: "#D3D3D3",
|
|
10829
10831
|
color: "hsl(268.9, 100%, 69%)",
|
|
10830
|
-
mass:
|
|
10832
|
+
mass: 128.12922
|
|
10831
10833
|
},
|
|
10832
10834
|
G: {
|
|
10833
10835
|
value: "G",
|
|
@@ -10836,7 +10838,7 @@ const proteinAlphabet = {
|
|
|
10836
10838
|
hydrophobicity: -0.4,
|
|
10837
10839
|
colorByFamily: "#00FFFF",
|
|
10838
10840
|
color: "hsl(303.1, 100%, 69%)",
|
|
10839
|
-
mass:
|
|
10841
|
+
mass: 57.05132
|
|
10840
10842
|
},
|
|
10841
10843
|
H: {
|
|
10842
10844
|
value: "H",
|
|
@@ -10845,7 +10847,7 @@ const proteinAlphabet = {
|
|
|
10845
10847
|
hydrophobicity: -3.2,
|
|
10846
10848
|
colorByFamily: "#FFC0CB",
|
|
10847
10849
|
color: "hsl(272.2, 100%, 69%)",
|
|
10848
|
-
mass:
|
|
10850
|
+
mass: 137.13928
|
|
10849
10851
|
},
|
|
10850
10852
|
I: {
|
|
10851
10853
|
value: "I",
|
|
@@ -10854,7 +10856,7 @@ const proteinAlphabet = {
|
|
|
10854
10856
|
hydrophobicity: 4.5,
|
|
10855
10857
|
colorByFamily: "#00FFFF",
|
|
10856
10858
|
color: "hsl(356.9, 100%, 69%)",
|
|
10857
|
-
mass:
|
|
10859
|
+
mass: 113.15764
|
|
10858
10860
|
},
|
|
10859
10861
|
L: {
|
|
10860
10862
|
value: "L",
|
|
@@ -10863,7 +10865,7 @@ const proteinAlphabet = {
|
|
|
10863
10865
|
hydrophobicity: 3.8,
|
|
10864
10866
|
colorByFamily: "#00FFFF",
|
|
10865
10867
|
color: "hsl(349.4, 100%, 69%)",
|
|
10866
|
-
mass:
|
|
10868
|
+
mass: 113.15764
|
|
10867
10869
|
},
|
|
10868
10870
|
K: {
|
|
10869
10871
|
value: "K",
|
|
@@ -10872,7 +10874,7 @@ const proteinAlphabet = {
|
|
|
10872
10874
|
hydrophobicity: -3.9,
|
|
10873
10875
|
colorByFamily: "#FFC0CB",
|
|
10874
10876
|
color: "hsl(264.7, 100%, 69%)",
|
|
10875
|
-
mass:
|
|
10877
|
+
mass: 128.17228
|
|
10876
10878
|
},
|
|
10877
10879
|
M: {
|
|
10878
10880
|
value: "M",
|
|
@@ -10881,7 +10883,7 @@ const proteinAlphabet = {
|
|
|
10881
10883
|
hydrophobicity: 1.9,
|
|
10882
10884
|
colorByFamily: "#FFFF00",
|
|
10883
10885
|
color: "hsl(328.5, 100%, 69%)",
|
|
10884
|
-
mass:
|
|
10886
|
+
mass: 131.19606
|
|
10885
10887
|
},
|
|
10886
10888
|
F: {
|
|
10887
10889
|
value: "F",
|
|
@@ -10890,7 +10892,7 @@ const proteinAlphabet = {
|
|
|
10890
10892
|
hydrophobicity: 2.8,
|
|
10891
10893
|
colorByFamily: "#FFA500",
|
|
10892
10894
|
color: "hsl(338.4, 100%, 69%)",
|
|
10893
|
-
mass:
|
|
10895
|
+
mass: 147.17386
|
|
10894
10896
|
},
|
|
10895
10897
|
P: {
|
|
10896
10898
|
value: "P",
|
|
@@ -10899,7 +10901,7 @@ const proteinAlphabet = {
|
|
|
10899
10901
|
hydrophobicity: -1.6,
|
|
10900
10902
|
colorByFamily: "#00FFFF",
|
|
10901
10903
|
color: "hsl(289.9, 100%, 69%)",
|
|
10902
|
-
mass:
|
|
10904
|
+
mass: 97.11518
|
|
10903
10905
|
},
|
|
10904
10906
|
S: {
|
|
10905
10907
|
value: "S",
|
|
@@ -10908,7 +10910,7 @@ const proteinAlphabet = {
|
|
|
10908
10910
|
hydrophobicity: -0.8,
|
|
10909
10911
|
colorByFamily: "#90EE90",
|
|
10910
10912
|
color: "hsl(298.6, 100%, 69%)",
|
|
10911
|
-
mass:
|
|
10913
|
+
mass: 87.0773
|
|
10912
10914
|
},
|
|
10913
10915
|
T: {
|
|
10914
10916
|
value: "T",
|
|
@@ -10917,7 +10919,7 @@ const proteinAlphabet = {
|
|
|
10917
10919
|
hydrophobicity: -0.7,
|
|
10918
10920
|
colorByFamily: "#90EE90",
|
|
10919
10921
|
color: "hsl(299.8, 100%, 69%)",
|
|
10920
|
-
mass:
|
|
10922
|
+
mass: 101.10388
|
|
10921
10923
|
},
|
|
10922
10924
|
U: {
|
|
10923
10925
|
value: "U",
|
|
@@ -10925,7 +10927,7 @@ const proteinAlphabet = {
|
|
|
10925
10927
|
threeLettersName: "Sec",
|
|
10926
10928
|
colorByFamily: "#FF0000",
|
|
10927
10929
|
color: "hsl(0, 100%, 69%)",
|
|
10928
|
-
mass:
|
|
10930
|
+
mass: 150.3079
|
|
10929
10931
|
},
|
|
10930
10932
|
W: {
|
|
10931
10933
|
value: "W",
|
|
@@ -10934,7 +10936,7 @@ const proteinAlphabet = {
|
|
|
10934
10936
|
hydrophobicity: -0.9,
|
|
10935
10937
|
colorByFamily: "#FFA500",
|
|
10936
10938
|
color: "hsl(297.6, 100%, 69%)",
|
|
10937
|
-
mass:
|
|
10939
|
+
mass: 186.2099
|
|
10938
10940
|
},
|
|
10939
10941
|
Y: {
|
|
10940
10942
|
value: "Y",
|
|
@@ -10943,7 +10945,7 @@ const proteinAlphabet = {
|
|
|
10943
10945
|
hydrophobicity: -1.3,
|
|
10944
10946
|
colorByFamily: "#FFA500",
|
|
10945
10947
|
color: "hsl(293.2, 100%, 69%)",
|
|
10946
|
-
mass:
|
|
10948
|
+
mass: 163.17326
|
|
10947
10949
|
},
|
|
10948
10950
|
V: {
|
|
10949
10951
|
value: "V",
|
|
@@ -10952,7 +10954,7 @@ const proteinAlphabet = {
|
|
|
10952
10954
|
hydrophobicity: 4.2,
|
|
10953
10955
|
colorByFamily: "#00FFFF",
|
|
10954
10956
|
color: "hsl(353.6, 100%, 69%)",
|
|
10955
|
-
mass:
|
|
10957
|
+
mass: 99.13106
|
|
10956
10958
|
},
|
|
10957
10959
|
"*": {
|
|
10958
10960
|
value: "*",
|
|
@@ -11271,20 +11273,91 @@ const annotationTypes = [
|
|
|
11271
11273
|
"primers",
|
|
11272
11274
|
"guides"
|
|
11273
11275
|
];
|
|
11274
|
-
function filterSequenceString(sequenceString,
|
|
11275
|
-
|
|
11276
|
-
|
|
11277
|
-
|
|
11278
|
-
|
|
11279
|
-
|
|
11280
|
-
|
|
11281
|
-
|
|
11276
|
+
function filterSequenceString(sequenceString, {
|
|
11277
|
+
additionalValidChars = "",
|
|
11278
|
+
isOligo,
|
|
11279
|
+
name,
|
|
11280
|
+
isProtein,
|
|
11281
|
+
isRna,
|
|
11282
|
+
isMixedRnaAndDna,
|
|
11283
|
+
includeStopCodon
|
|
11284
|
+
} = {}) {
|
|
11285
|
+
const acceptedChars = getAcceptedChars({
|
|
11286
|
+
isOligo,
|
|
11287
|
+
isProtein,
|
|
11288
|
+
isRna,
|
|
11289
|
+
isMixedRnaAndDna,
|
|
11290
|
+
includeStopCodon
|
|
11291
|
+
});
|
|
11292
|
+
const replaceChars = getReplaceChars({
|
|
11293
|
+
isOligo,
|
|
11294
|
+
isProtein,
|
|
11295
|
+
isRna,
|
|
11296
|
+
isMixedRnaAndDna
|
|
11297
|
+
});
|
|
11298
|
+
let sanitizedVal = "";
|
|
11299
|
+
const invalidChars = [];
|
|
11300
|
+
const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
|
|
11301
|
+
const warnings = [];
|
|
11302
|
+
const replaceCount = {};
|
|
11303
|
+
sequenceString.split("").forEach((letter) => {
|
|
11304
|
+
const lowerLetter = letter.toLowerCase();
|
|
11305
|
+
if (replaceChars && replaceChars[lowerLetter]) {
|
|
11306
|
+
if (!replaceCount[lowerLetter]) {
|
|
11307
|
+
replaceCount[lowerLetter] = 0;
|
|
11308
|
+
}
|
|
11309
|
+
replaceCount[lowerLetter]++;
|
|
11310
|
+
const isUpper = lowerLetter !== letter;
|
|
11311
|
+
sanitizedVal += isUpper ? replaceChars[lowerLetter].toUpperCase() : replaceChars[lowerLetter];
|
|
11312
|
+
} else if (chars.includes(lowerLetter)) {
|
|
11313
|
+
sanitizedVal += letter;
|
|
11314
|
+
} else {
|
|
11315
|
+
invalidChars.push(letter);
|
|
11316
|
+
}
|
|
11317
|
+
});
|
|
11318
|
+
Object.keys(replaceCount).forEach((letter) => {
|
|
11319
|
+
warnings.push(
|
|
11320
|
+
`Replaced "${letter}" with "${replaceChars[letter]}"${replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""}`
|
|
11321
|
+
);
|
|
11322
|
+
});
|
|
11323
|
+
if (sequenceString.length !== sanitizedVal.length) {
|
|
11324
|
+
warnings.push(
|
|
11325
|
+
`${name ? `Sequence ${name}: ` : ""}Invalid character(s) detected and removed: ${invalidChars.slice(0, 100).join(", ")} `
|
|
11282
11326
|
);
|
|
11283
|
-
} else {
|
|
11284
|
-
return sequenceString;
|
|
11285
11327
|
}
|
|
11328
|
+
if (typeof window !== "undefined" && window.toastr && warnings.length) {
|
|
11329
|
+
warnings.forEach((warning) => {
|
|
11330
|
+
window.toastr.warning(warning);
|
|
11331
|
+
});
|
|
11332
|
+
}
|
|
11333
|
+
return [sanitizedVal, warnings];
|
|
11286
11334
|
}
|
|
11287
11335
|
__name(filterSequenceString, "filterSequenceString");
|
|
11336
|
+
function getAcceptedChars({
|
|
11337
|
+
isOligo,
|
|
11338
|
+
isProtein,
|
|
11339
|
+
isRna,
|
|
11340
|
+
isMixedRnaAndDna,
|
|
11341
|
+
includeStopCodon
|
|
11342
|
+
} = {}) {
|
|
11343
|
+
return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
|
|
11344
|
+
//just plain old dna
|
|
11345
|
+
ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
|
|
11346
|
+
);
|
|
11347
|
+
}
|
|
11348
|
+
__name(getAcceptedChars, "getAcceptedChars");
|
|
11349
|
+
function getReplaceChars({
|
|
11350
|
+
isOligo,
|
|
11351
|
+
isProtein,
|
|
11352
|
+
isRna,
|
|
11353
|
+
isMixedRnaAndDna
|
|
11354
|
+
} = {}) {
|
|
11355
|
+
return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
|
|
11356
|
+
//just plain old dna
|
|
11357
|
+
{}
|
|
11358
|
+
);
|
|
11359
|
+
}
|
|
11360
|
+
__name(getReplaceChars, "getReplaceChars");
|
|
11288
11361
|
function tidyUpAnnotation(_annotation, {
|
|
11289
11362
|
sequenceData = {},
|
|
11290
11363
|
convertAnnotationsFromAAIndices,
|
|
@@ -11413,14 +11486,6 @@ function coerceLocation({
|
|
|
11413
11486
|
}
|
|
11414
11487
|
}
|
|
11415
11488
|
__name(coerceLocation, "coerceLocation");
|
|
11416
|
-
function filterAminoAcidSequenceString(sequenceString, options) {
|
|
11417
|
-
options = options || {};
|
|
11418
|
-
if (options.includeStopCodon) {
|
|
11419
|
-
return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
|
|
11420
|
-
}
|
|
11421
|
-
return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
|
|
11422
|
-
}
|
|
11423
|
-
__name(filterAminoAcidSequenceString, "filterAminoAcidSequenceString");
|
|
11424
11489
|
function getDegenerateDnaStringFromAAString(aaString) {
|
|
11425
11490
|
return aaString.split("").map((char) => aminoAcidToDegenerateDnaMap[char.toLowerCase()] || "nnn").join("");
|
|
11426
11491
|
}
|
|
@@ -11432,11 +11497,10 @@ function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
11432
11497
|
removeUnwantedChars,
|
|
11433
11498
|
additionalValidChars,
|
|
11434
11499
|
noTranslationData,
|
|
11435
|
-
charOverrides,
|
|
11436
11500
|
doNotProvideIdsForAnnotations,
|
|
11437
|
-
proteinFilterOptions,
|
|
11438
11501
|
noCdsTranslations,
|
|
11439
|
-
convertAnnotationsFromAAIndices
|
|
11502
|
+
convertAnnotationsFromAAIndices,
|
|
11503
|
+
topLevelSeqData
|
|
11440
11504
|
} = options;
|
|
11441
11505
|
let seqData = lodashExports.cloneDeep(pSeqData);
|
|
11442
11506
|
const response = {
|
|
@@ -11466,16 +11530,15 @@ function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
11466
11530
|
}
|
|
11467
11531
|
if (removeUnwantedChars) {
|
|
11468
11532
|
if (seqData.isProtein) {
|
|
11469
|
-
seqData.proteinSequence
|
|
11470
|
-
|
|
11471
|
-
|
|
11472
|
-
|
|
11533
|
+
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
|
|
11534
|
+
includeStopCodon: true
|
|
11535
|
+
}, topLevelSeqData || seqData));
|
|
11536
|
+
seqData.proteinSequence = newSeq;
|
|
11473
11537
|
} else {
|
|
11474
|
-
|
|
11475
|
-
|
|
11476
|
-
|
|
11477
|
-
|
|
11478
|
-
);
|
|
11538
|
+
const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
|
|
11539
|
+
additionalValidChars
|
|
11540
|
+
}, topLevelSeqData || seqData));
|
|
11541
|
+
seqData.sequence = newSeq;
|
|
11479
11542
|
}
|
|
11480
11543
|
}
|
|
11481
11544
|
if (seqData.isProtein) {
|
|
@@ -19310,7 +19373,8 @@ function validateSequence(sequence, options = {}) {
|
|
|
19310
19373
|
inclusive1BasedEnd,
|
|
19311
19374
|
additionalValidChars,
|
|
19312
19375
|
allowOverflowAnnotations,
|
|
19313
|
-
coerceFeatureTypes
|
|
19376
|
+
coerceFeatureTypes,
|
|
19377
|
+
includeStopCodon
|
|
19314
19378
|
} = options;
|
|
19315
19379
|
[
|
|
19316
19380
|
"isDNA",
|
|
@@ -19360,7 +19424,6 @@ function validateSequence(sequence, options = {}) {
|
|
|
19360
19424
|
response.messages.push("No sequence detected");
|
|
19361
19425
|
sequence.sequence = "";
|
|
19362
19426
|
}
|
|
19363
|
-
let validChars;
|
|
19364
19427
|
if (sequence.isProtein === void 0 && guessIfProtein) {
|
|
19365
19428
|
sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
19366
19429
|
sequence.sequence,
|
|
@@ -19368,12 +19431,15 @@ function validateSequence(sequence, options = {}) {
|
|
|
19368
19431
|
);
|
|
19369
19432
|
}
|
|
19370
19433
|
if (sequence.isProtein) {
|
|
19371
|
-
validChars =
|
|
19434
|
+
const [validChars, warnings] = filterSequenceString(sequence.sequence, {
|
|
19435
|
+
name: sequence.name,
|
|
19436
|
+
isProtein: true,
|
|
19437
|
+
additionalValidChars,
|
|
19438
|
+
includeStopCodon
|
|
19439
|
+
});
|
|
19372
19440
|
if (validChars !== sequence.sequence) {
|
|
19373
19441
|
sequence.sequence = validChars;
|
|
19374
|
-
response.messages.push(
|
|
19375
|
-
"Import Error: Illegal character(s) detected and removed from amino acid sequence. Allowed characters are: xtgalmfwkqespvicyhrndu"
|
|
19376
|
-
);
|
|
19442
|
+
response.messages.push(...warnings);
|
|
19377
19443
|
}
|
|
19378
19444
|
sequence.type = "PROTEIN";
|
|
19379
19445
|
sequence.isProtein = true;
|
|
@@ -19395,12 +19461,12 @@ function validateSequence(sequence, options = {}) {
|
|
|
19395
19461
|
} else {
|
|
19396
19462
|
sequence.type = "DNA";
|
|
19397
19463
|
}
|
|
19398
|
-
validChars = filterSequenceString(sequence.sequence,
|
|
19464
|
+
const [validChars, warnings] = filterSequenceString(sequence.sequence, __spreadValues({
|
|
19465
|
+
additionalValidChars
|
|
19466
|
+
}, sequence));
|
|
19399
19467
|
if (validChars !== sequence.sequence) {
|
|
19400
19468
|
sequence.sequence = validChars;
|
|
19401
|
-
response.messages.push(
|
|
19402
|
-
"Import Error: Illegal character(s) detected and removed from sequence. Allowed characters are: atgcyrswkmbvdhn"
|
|
19403
|
-
);
|
|
19469
|
+
response.messages.push(...warnings);
|
|
19404
19470
|
}
|
|
19405
19471
|
}
|
|
19406
19472
|
if (!sequence.size) {
|
package/index.umd.js
CHANGED
|
@@ -6172,7 +6172,9 @@ var __async = (__this, __arguments, generator) => {
|
|
|
6172
6172
|
})(lodash, lodash.exports);
|
|
6173
6173
|
var lodashExports = lodash.exports;
|
|
6174
6174
|
const _ = /* @__PURE__ */ getDefaultExportFromCjs(lodashExports);
|
|
6175
|
+
const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
|
|
6175
6176
|
const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
6177
|
+
const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
|
|
6176
6178
|
const aminoAcidToDegenerateDnaMap = {
|
|
6177
6179
|
"-": "---",
|
|
6178
6180
|
".": "...",
|
|
@@ -10777,7 +10779,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10777
10779
|
hydrophobicity: 1.8,
|
|
10778
10780
|
colorByFamily: "#00FFFF",
|
|
10779
10781
|
color: "hsl(327.3, 100%, 69%)",
|
|
10780
|
-
mass:
|
|
10782
|
+
mass: 71.0779
|
|
10781
10783
|
},
|
|
10782
10784
|
R: {
|
|
10783
10785
|
value: "R",
|
|
@@ -10786,7 +10788,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10786
10788
|
hydrophobicity: -4.5,
|
|
10787
10789
|
colorByFamily: "#FFC0CB",
|
|
10788
10790
|
color: "hsl(258.1, 100%, 69%)",
|
|
10789
|
-
mass:
|
|
10791
|
+
mass: 156.18568
|
|
10790
10792
|
},
|
|
10791
10793
|
N: {
|
|
10792
10794
|
value: "N",
|
|
@@ -10795,7 +10797,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10795
10797
|
hydrophobicity: -3.5,
|
|
10796
10798
|
colorByFamily: "#D3D3D3",
|
|
10797
10799
|
color: "hsl(268.9, 100%, 69%)",
|
|
10798
|
-
mass:
|
|
10800
|
+
mass: 114.10264
|
|
10799
10801
|
},
|
|
10800
10802
|
D: {
|
|
10801
10803
|
value: "D",
|
|
@@ -10804,7 +10806,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10804
10806
|
hydrophobicity: -3.5,
|
|
10805
10807
|
colorByFamily: "#EE82EE",
|
|
10806
10808
|
color: "hsl(268.9, 100%, 69%)",
|
|
10807
|
-
mass:
|
|
10809
|
+
mass: 115.0874
|
|
10808
10810
|
},
|
|
10809
10811
|
C: {
|
|
10810
10812
|
value: "C",
|
|
@@ -10813,7 +10815,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10813
10815
|
hydrophobicity: 2.5,
|
|
10814
10816
|
colorByFamily: "#FFFF00",
|
|
10815
10817
|
color: "hsl(335.1, 100%, 69%)",
|
|
10816
|
-
mass:
|
|
10818
|
+
mass: 103.1429
|
|
10817
10819
|
},
|
|
10818
10820
|
E: {
|
|
10819
10821
|
value: "E",
|
|
@@ -10822,7 +10824,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10822
10824
|
hydrophobicity: -3.5,
|
|
10823
10825
|
colorByFamily: "#EE82EE",
|
|
10824
10826
|
color: "hsl(268.9, 100%, 69%)",
|
|
10825
|
-
mass:
|
|
10827
|
+
mass: 129.11398
|
|
10826
10828
|
},
|
|
10827
10829
|
Q: {
|
|
10828
10830
|
value: "Q",
|
|
@@ -10831,7 +10833,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10831
10833
|
hydrophobicity: -3.5,
|
|
10832
10834
|
colorByFamily: "#D3D3D3",
|
|
10833
10835
|
color: "hsl(268.9, 100%, 69%)",
|
|
10834
|
-
mass:
|
|
10836
|
+
mass: 128.12922
|
|
10835
10837
|
},
|
|
10836
10838
|
G: {
|
|
10837
10839
|
value: "G",
|
|
@@ -10840,7 +10842,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10840
10842
|
hydrophobicity: -0.4,
|
|
10841
10843
|
colorByFamily: "#00FFFF",
|
|
10842
10844
|
color: "hsl(303.1, 100%, 69%)",
|
|
10843
|
-
mass:
|
|
10845
|
+
mass: 57.05132
|
|
10844
10846
|
},
|
|
10845
10847
|
H: {
|
|
10846
10848
|
value: "H",
|
|
@@ -10849,7 +10851,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10849
10851
|
hydrophobicity: -3.2,
|
|
10850
10852
|
colorByFamily: "#FFC0CB",
|
|
10851
10853
|
color: "hsl(272.2, 100%, 69%)",
|
|
10852
|
-
mass:
|
|
10854
|
+
mass: 137.13928
|
|
10853
10855
|
},
|
|
10854
10856
|
I: {
|
|
10855
10857
|
value: "I",
|
|
@@ -10858,7 +10860,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10858
10860
|
hydrophobicity: 4.5,
|
|
10859
10861
|
colorByFamily: "#00FFFF",
|
|
10860
10862
|
color: "hsl(356.9, 100%, 69%)",
|
|
10861
|
-
mass:
|
|
10863
|
+
mass: 113.15764
|
|
10862
10864
|
},
|
|
10863
10865
|
L: {
|
|
10864
10866
|
value: "L",
|
|
@@ -10867,7 +10869,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10867
10869
|
hydrophobicity: 3.8,
|
|
10868
10870
|
colorByFamily: "#00FFFF",
|
|
10869
10871
|
color: "hsl(349.4, 100%, 69%)",
|
|
10870
|
-
mass:
|
|
10872
|
+
mass: 113.15764
|
|
10871
10873
|
},
|
|
10872
10874
|
K: {
|
|
10873
10875
|
value: "K",
|
|
@@ -10876,7 +10878,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10876
10878
|
hydrophobicity: -3.9,
|
|
10877
10879
|
colorByFamily: "#FFC0CB",
|
|
10878
10880
|
color: "hsl(264.7, 100%, 69%)",
|
|
10879
|
-
mass:
|
|
10881
|
+
mass: 128.17228
|
|
10880
10882
|
},
|
|
10881
10883
|
M: {
|
|
10882
10884
|
value: "M",
|
|
@@ -10885,7 +10887,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10885
10887
|
hydrophobicity: 1.9,
|
|
10886
10888
|
colorByFamily: "#FFFF00",
|
|
10887
10889
|
color: "hsl(328.5, 100%, 69%)",
|
|
10888
|
-
mass:
|
|
10890
|
+
mass: 131.19606
|
|
10889
10891
|
},
|
|
10890
10892
|
F: {
|
|
10891
10893
|
value: "F",
|
|
@@ -10894,7 +10896,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10894
10896
|
hydrophobicity: 2.8,
|
|
10895
10897
|
colorByFamily: "#FFA500",
|
|
10896
10898
|
color: "hsl(338.4, 100%, 69%)",
|
|
10897
|
-
mass:
|
|
10899
|
+
mass: 147.17386
|
|
10898
10900
|
},
|
|
10899
10901
|
P: {
|
|
10900
10902
|
value: "P",
|
|
@@ -10903,7 +10905,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10903
10905
|
hydrophobicity: -1.6,
|
|
10904
10906
|
colorByFamily: "#00FFFF",
|
|
10905
10907
|
color: "hsl(289.9, 100%, 69%)",
|
|
10906
|
-
mass:
|
|
10908
|
+
mass: 97.11518
|
|
10907
10909
|
},
|
|
10908
10910
|
S: {
|
|
10909
10911
|
value: "S",
|
|
@@ -10912,7 +10914,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10912
10914
|
hydrophobicity: -0.8,
|
|
10913
10915
|
colorByFamily: "#90EE90",
|
|
10914
10916
|
color: "hsl(298.6, 100%, 69%)",
|
|
10915
|
-
mass:
|
|
10917
|
+
mass: 87.0773
|
|
10916
10918
|
},
|
|
10917
10919
|
T: {
|
|
10918
10920
|
value: "T",
|
|
@@ -10921,7 +10923,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10921
10923
|
hydrophobicity: -0.7,
|
|
10922
10924
|
colorByFamily: "#90EE90",
|
|
10923
10925
|
color: "hsl(299.8, 100%, 69%)",
|
|
10924
|
-
mass:
|
|
10926
|
+
mass: 101.10388
|
|
10925
10927
|
},
|
|
10926
10928
|
U: {
|
|
10927
10929
|
value: "U",
|
|
@@ -10929,7 +10931,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10929
10931
|
threeLettersName: "Sec",
|
|
10930
10932
|
colorByFamily: "#FF0000",
|
|
10931
10933
|
color: "hsl(0, 100%, 69%)",
|
|
10932
|
-
mass:
|
|
10934
|
+
mass: 150.3079
|
|
10933
10935
|
},
|
|
10934
10936
|
W: {
|
|
10935
10937
|
value: "W",
|
|
@@ -10938,7 +10940,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10938
10940
|
hydrophobicity: -0.9,
|
|
10939
10941
|
colorByFamily: "#FFA500",
|
|
10940
10942
|
color: "hsl(297.6, 100%, 69%)",
|
|
10941
|
-
mass:
|
|
10943
|
+
mass: 186.2099
|
|
10942
10944
|
},
|
|
10943
10945
|
Y: {
|
|
10944
10946
|
value: "Y",
|
|
@@ -10947,7 +10949,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10947
10949
|
hydrophobicity: -1.3,
|
|
10948
10950
|
colorByFamily: "#FFA500",
|
|
10949
10951
|
color: "hsl(293.2, 100%, 69%)",
|
|
10950
|
-
mass:
|
|
10952
|
+
mass: 163.17326
|
|
10951
10953
|
},
|
|
10952
10954
|
V: {
|
|
10953
10955
|
value: "V",
|
|
@@ -10956,7 +10958,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
10956
10958
|
hydrophobicity: 4.2,
|
|
10957
10959
|
colorByFamily: "#00FFFF",
|
|
10958
10960
|
color: "hsl(353.6, 100%, 69%)",
|
|
10959
|
-
mass:
|
|
10961
|
+
mass: 99.13106
|
|
10960
10962
|
},
|
|
10961
10963
|
"*": {
|
|
10962
10964
|
value: "*",
|
|
@@ -11275,20 +11277,91 @@ var __async = (__this, __arguments, generator) => {
|
|
|
11275
11277
|
"primers",
|
|
11276
11278
|
"guides"
|
|
11277
11279
|
];
|
|
11278
|
-
function filterSequenceString(sequenceString,
|
|
11279
|
-
|
|
11280
|
-
|
|
11281
|
-
|
|
11282
|
-
|
|
11283
|
-
|
|
11284
|
-
|
|
11285
|
-
|
|
11280
|
+
function filterSequenceString(sequenceString, {
|
|
11281
|
+
additionalValidChars = "",
|
|
11282
|
+
isOligo,
|
|
11283
|
+
name: name2,
|
|
11284
|
+
isProtein,
|
|
11285
|
+
isRna,
|
|
11286
|
+
isMixedRnaAndDna,
|
|
11287
|
+
includeStopCodon
|
|
11288
|
+
} = {}) {
|
|
11289
|
+
const acceptedChars = getAcceptedChars({
|
|
11290
|
+
isOligo,
|
|
11291
|
+
isProtein,
|
|
11292
|
+
isRna,
|
|
11293
|
+
isMixedRnaAndDna,
|
|
11294
|
+
includeStopCodon
|
|
11295
|
+
});
|
|
11296
|
+
const replaceChars = getReplaceChars({
|
|
11297
|
+
isOligo,
|
|
11298
|
+
isProtein,
|
|
11299
|
+
isRna,
|
|
11300
|
+
isMixedRnaAndDna
|
|
11301
|
+
});
|
|
11302
|
+
let sanitizedVal = "";
|
|
11303
|
+
const invalidChars = [];
|
|
11304
|
+
const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
|
|
11305
|
+
const warnings = [];
|
|
11306
|
+
const replaceCount = {};
|
|
11307
|
+
sequenceString.split("").forEach((letter) => {
|
|
11308
|
+
const lowerLetter = letter.toLowerCase();
|
|
11309
|
+
if (replaceChars && replaceChars[lowerLetter]) {
|
|
11310
|
+
if (!replaceCount[lowerLetter]) {
|
|
11311
|
+
replaceCount[lowerLetter] = 0;
|
|
11312
|
+
}
|
|
11313
|
+
replaceCount[lowerLetter]++;
|
|
11314
|
+
const isUpper = lowerLetter !== letter;
|
|
11315
|
+
sanitizedVal += isUpper ? replaceChars[lowerLetter].toUpperCase() : replaceChars[lowerLetter];
|
|
11316
|
+
} else if (chars.includes(lowerLetter)) {
|
|
11317
|
+
sanitizedVal += letter;
|
|
11318
|
+
} else {
|
|
11319
|
+
invalidChars.push(letter);
|
|
11320
|
+
}
|
|
11321
|
+
});
|
|
11322
|
+
Object.keys(replaceCount).forEach((letter) => {
|
|
11323
|
+
warnings.push(
|
|
11324
|
+
`Replaced "${letter}" with "${replaceChars[letter]}"${replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""}`
|
|
11325
|
+
);
|
|
11326
|
+
});
|
|
11327
|
+
if (sequenceString.length !== sanitizedVal.length) {
|
|
11328
|
+
warnings.push(
|
|
11329
|
+
`${name2 ? `Sequence ${name2}: ` : ""}Invalid character(s) detected and removed: ${invalidChars.slice(0, 100).join(", ")} `
|
|
11286
11330
|
);
|
|
11287
|
-
} else {
|
|
11288
|
-
return sequenceString;
|
|
11289
11331
|
}
|
|
11332
|
+
if (typeof window !== "undefined" && window.toastr && warnings.length) {
|
|
11333
|
+
warnings.forEach((warning) => {
|
|
11334
|
+
window.toastr.warning(warning);
|
|
11335
|
+
});
|
|
11336
|
+
}
|
|
11337
|
+
return [sanitizedVal, warnings];
|
|
11290
11338
|
}
|
|
11291
11339
|
__name(filterSequenceString, "filterSequenceString");
|
|
11340
|
+
function getAcceptedChars({
|
|
11341
|
+
isOligo,
|
|
11342
|
+
isProtein,
|
|
11343
|
+
isRna,
|
|
11344
|
+
isMixedRnaAndDna,
|
|
11345
|
+
includeStopCodon
|
|
11346
|
+
} = {}) {
|
|
11347
|
+
return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
|
|
11348
|
+
//just plain old dna
|
|
11349
|
+
ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
|
|
11350
|
+
);
|
|
11351
|
+
}
|
|
11352
|
+
__name(getAcceptedChars, "getAcceptedChars");
|
|
11353
|
+
function getReplaceChars({
|
|
11354
|
+
isOligo,
|
|
11355
|
+
isProtein,
|
|
11356
|
+
isRna,
|
|
11357
|
+
isMixedRnaAndDna
|
|
11358
|
+
} = {}) {
|
|
11359
|
+
return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
|
|
11360
|
+
//just plain old dna
|
|
11361
|
+
{}
|
|
11362
|
+
);
|
|
11363
|
+
}
|
|
11364
|
+
__name(getReplaceChars, "getReplaceChars");
|
|
11292
11365
|
function tidyUpAnnotation(_annotation, {
|
|
11293
11366
|
sequenceData = {},
|
|
11294
11367
|
convertAnnotationsFromAAIndices,
|
|
@@ -11417,14 +11490,6 @@ var __async = (__this, __arguments, generator) => {
|
|
|
11417
11490
|
}
|
|
11418
11491
|
}
|
|
11419
11492
|
__name(coerceLocation, "coerceLocation");
|
|
11420
|
-
function filterAminoAcidSequenceString(sequenceString, options) {
|
|
11421
|
-
options = options || {};
|
|
11422
|
-
if (options.includeStopCodon) {
|
|
11423
|
-
return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
|
|
11424
|
-
}
|
|
11425
|
-
return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
|
|
11426
|
-
}
|
|
11427
|
-
__name(filterAminoAcidSequenceString, "filterAminoAcidSequenceString");
|
|
11428
11493
|
function getDegenerateDnaStringFromAAString(aaString) {
|
|
11429
11494
|
return aaString.split("").map((char) => aminoAcidToDegenerateDnaMap[char.toLowerCase()] || "nnn").join("");
|
|
11430
11495
|
}
|
|
@@ -11436,11 +11501,10 @@ var __async = (__this, __arguments, generator) => {
|
|
|
11436
11501
|
removeUnwantedChars,
|
|
11437
11502
|
additionalValidChars,
|
|
11438
11503
|
noTranslationData,
|
|
11439
|
-
charOverrides,
|
|
11440
11504
|
doNotProvideIdsForAnnotations,
|
|
11441
|
-
proteinFilterOptions,
|
|
11442
11505
|
noCdsTranslations,
|
|
11443
|
-
convertAnnotationsFromAAIndices
|
|
11506
|
+
convertAnnotationsFromAAIndices,
|
|
11507
|
+
topLevelSeqData
|
|
11444
11508
|
} = options;
|
|
11445
11509
|
let seqData = lodashExports.cloneDeep(pSeqData);
|
|
11446
11510
|
const response = {
|
|
@@ -11470,16 +11534,15 @@ var __async = (__this, __arguments, generator) => {
|
|
|
11470
11534
|
}
|
|
11471
11535
|
if (removeUnwantedChars) {
|
|
11472
11536
|
if (seqData.isProtein) {
|
|
11473
|
-
seqData.proteinSequence
|
|
11474
|
-
|
|
11475
|
-
|
|
11476
|
-
|
|
11537
|
+
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
|
|
11538
|
+
includeStopCodon: true
|
|
11539
|
+
}, topLevelSeqData || seqData));
|
|
11540
|
+
seqData.proteinSequence = newSeq;
|
|
11477
11541
|
} else {
|
|
11478
|
-
|
|
11479
|
-
|
|
11480
|
-
|
|
11481
|
-
|
|
11482
|
-
);
|
|
11542
|
+
const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
|
|
11543
|
+
additionalValidChars
|
|
11544
|
+
}, topLevelSeqData || seqData));
|
|
11545
|
+
seqData.sequence = newSeq;
|
|
11483
11546
|
}
|
|
11484
11547
|
}
|
|
11485
11548
|
if (seqData.isProtein) {
|
|
@@ -19314,7 +19377,8 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19314
19377
|
inclusive1BasedEnd,
|
|
19315
19378
|
additionalValidChars,
|
|
19316
19379
|
allowOverflowAnnotations,
|
|
19317
|
-
coerceFeatureTypes
|
|
19380
|
+
coerceFeatureTypes,
|
|
19381
|
+
includeStopCodon
|
|
19318
19382
|
} = options;
|
|
19319
19383
|
[
|
|
19320
19384
|
"isDNA",
|
|
@@ -19364,7 +19428,6 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19364
19428
|
response.messages.push("No sequence detected");
|
|
19365
19429
|
sequence.sequence = "";
|
|
19366
19430
|
}
|
|
19367
|
-
let validChars;
|
|
19368
19431
|
if (sequence.isProtein === void 0 && guessIfProtein) {
|
|
19369
19432
|
sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
19370
19433
|
sequence.sequence,
|
|
@@ -19372,12 +19435,15 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19372
19435
|
);
|
|
19373
19436
|
}
|
|
19374
19437
|
if (sequence.isProtein) {
|
|
19375
|
-
validChars =
|
|
19438
|
+
const [validChars, warnings] = filterSequenceString(sequence.sequence, {
|
|
19439
|
+
name: sequence.name,
|
|
19440
|
+
isProtein: true,
|
|
19441
|
+
additionalValidChars,
|
|
19442
|
+
includeStopCodon
|
|
19443
|
+
});
|
|
19376
19444
|
if (validChars !== sequence.sequence) {
|
|
19377
19445
|
sequence.sequence = validChars;
|
|
19378
|
-
response.messages.push(
|
|
19379
|
-
"Import Error: Illegal character(s) detected and removed from amino acid sequence. Allowed characters are: xtgalmfwkqespvicyhrndu"
|
|
19380
|
-
);
|
|
19446
|
+
response.messages.push(...warnings);
|
|
19381
19447
|
}
|
|
19382
19448
|
sequence.type = "PROTEIN";
|
|
19383
19449
|
sequence.isProtein = true;
|
|
@@ -19399,12 +19465,12 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19399
19465
|
} else {
|
|
19400
19466
|
sequence.type = "DNA";
|
|
19401
19467
|
}
|
|
19402
|
-
validChars = filterSequenceString(sequence.sequence,
|
|
19468
|
+
const [validChars, warnings] = filterSequenceString(sequence.sequence, __spreadValues({
|
|
19469
|
+
additionalValidChars
|
|
19470
|
+
}, sequence));
|
|
19403
19471
|
if (validChars !== sequence.sequence) {
|
|
19404
19472
|
sequence.sequence = validChars;
|
|
19405
|
-
response.messages.push(
|
|
19406
|
-
"Import Error: Illegal character(s) detected and removed from sequence. Allowed characters are: atgcyrswkmbvdhn"
|
|
19407
|
-
);
|
|
19473
|
+
response.messages.push(...warnings);
|
|
19408
19474
|
}
|
|
19409
19475
|
}
|
|
19410
19476
|
if (!sequence.size) {
|
package/package.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@teselagen/bio-parsers",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.1",
|
|
4
4
|
"dependencies": {
|
|
5
|
-
"@teselagen/sequence-utils": "0.3.
|
|
5
|
+
"@teselagen/sequence-utils": "0.3.9",
|
|
6
6
|
"@teselagen/range-utils": "0.3.7",
|
|
7
7
|
"@gmod/gff": "^1.2.1",
|
|
8
8
|
"buffer": "^6.0.3",
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import areNonNegativeIntegers from "validate.io-nonnegative-integer-array";
|
|
2
2
|
import { getFeatureTypes } from "@teselagen/sequence-utils";
|
|
3
3
|
import {
|
|
4
|
-
filterAminoAcidSequenceString,
|
|
5
4
|
filterSequenceString,
|
|
6
5
|
guessIfSequenceIsDnaAndNotProtein
|
|
7
6
|
} from "@teselagen/sequence-utils";
|
|
@@ -30,7 +29,8 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
30
29
|
inclusive1BasedEnd,
|
|
31
30
|
additionalValidChars,
|
|
32
31
|
allowOverflowAnnotations,
|
|
33
|
-
coerceFeatureTypes
|
|
32
|
+
coerceFeatureTypes,
|
|
33
|
+
includeStopCodon
|
|
34
34
|
} = options;
|
|
35
35
|
[
|
|
36
36
|
"isDNA",
|
|
@@ -84,7 +84,7 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
84
84
|
response.messages.push("No sequence detected");
|
|
85
85
|
sequence.sequence = "";
|
|
86
86
|
}
|
|
87
|
-
|
|
87
|
+
|
|
88
88
|
if (sequence.isProtein === undefined && guessIfProtein) {
|
|
89
89
|
sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
|
|
90
90
|
sequence.sequence,
|
|
@@ -93,12 +93,15 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
93
93
|
}
|
|
94
94
|
if (sequence.isProtein) {
|
|
95
95
|
//tnr: add code to strip invalid protein data..
|
|
96
|
-
validChars =
|
|
96
|
+
const [validChars, warnings] = filterSequenceString(sequence.sequence, {
|
|
97
|
+
name: sequence.name,
|
|
98
|
+
isProtein: true,
|
|
99
|
+
additionalValidChars,
|
|
100
|
+
includeStopCodon
|
|
101
|
+
});
|
|
97
102
|
if (validChars !== sequence.sequence) {
|
|
98
103
|
sequence.sequence = validChars;
|
|
99
|
-
response.messages.push(
|
|
100
|
-
"Import Error: Illegal character(s) detected and removed from amino acid sequence. Allowed characters are: xtgalmfwkqespvicyhrndu"
|
|
101
|
-
);
|
|
104
|
+
response.messages.push(...warnings);
|
|
102
105
|
}
|
|
103
106
|
sequence.type = "PROTEIN";
|
|
104
107
|
sequence.isProtein = true;
|
|
@@ -126,12 +129,13 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
126
129
|
sequence.type = "DNA";
|
|
127
130
|
}
|
|
128
131
|
|
|
129
|
-
validChars = filterSequenceString(sequence.sequence,
|
|
132
|
+
const [validChars, warnings] = filterSequenceString(sequence.sequence, {
|
|
133
|
+
additionalValidChars,
|
|
134
|
+
...sequence
|
|
135
|
+
});
|
|
130
136
|
if (validChars !== sequence.sequence) {
|
|
131
137
|
sequence.sequence = validChars;
|
|
132
|
-
response.messages.push(
|
|
133
|
-
"Import Error: Illegal character(s) detected and removed from sequence. Allowed characters are: atgcyrswkmbvdhn"
|
|
134
|
-
);
|
|
138
|
+
response.messages.push(...warnings);
|
|
135
139
|
}
|
|
136
140
|
}
|
|
137
141
|
|