@teselagen/bio-parsers 0.3.9 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -6170,7 +6170,9 @@ lodash.exports;
6170
6170
  })(lodash, lodash.exports);
6171
6171
  var lodashExports = lodash.exports;
6172
6172
  const _ = /* @__PURE__ */ getDefaultExportFromCjs(lodashExports);
6173
+ const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6173
6174
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6175
+ const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
6174
6176
  const aminoAcidToDegenerateDnaMap = {
6175
6177
  "-": "---",
6176
6178
  ".": "...",
@@ -10775,7 +10777,7 @@ const proteinAlphabet = {
10775
10777
  hydrophobicity: 1.8,
10776
10778
  colorByFamily: "#00FFFF",
10777
10779
  color: "hsl(327.3, 100%, 69%)",
10778
- mass: 89.1
10780
+ mass: 71.0779
10779
10781
  },
10780
10782
  R: {
10781
10783
  value: "R",
@@ -10784,7 +10786,7 @@ const proteinAlphabet = {
10784
10786
  hydrophobicity: -4.5,
10785
10787
  colorByFamily: "#FFC0CB",
10786
10788
  color: "hsl(258.1, 100%, 69%)",
10787
- mass: 174.2
10789
+ mass: 156.18568
10788
10790
  },
10789
10791
  N: {
10790
10792
  value: "N",
@@ -10793,7 +10795,7 @@ const proteinAlphabet = {
10793
10795
  hydrophobicity: -3.5,
10794
10796
  colorByFamily: "#D3D3D3",
10795
10797
  color: "hsl(268.9, 100%, 69%)",
10796
- mass: 132.1
10798
+ mass: 114.10264
10797
10799
  },
10798
10800
  D: {
10799
10801
  value: "D",
@@ -10802,7 +10804,7 @@ const proteinAlphabet = {
10802
10804
  hydrophobicity: -3.5,
10803
10805
  colorByFamily: "#EE82EE",
10804
10806
  color: "hsl(268.9, 100%, 69%)",
10805
- mass: 133.1
10807
+ mass: 115.0874
10806
10808
  },
10807
10809
  C: {
10808
10810
  value: "C",
@@ -10811,7 +10813,7 @@ const proteinAlphabet = {
10811
10813
  hydrophobicity: 2.5,
10812
10814
  colorByFamily: "#FFFF00",
10813
10815
  color: "hsl(335.1, 100%, 69%)",
10814
- mass: 121.2
10816
+ mass: 103.1429
10815
10817
  },
10816
10818
  E: {
10817
10819
  value: "E",
@@ -10820,7 +10822,7 @@ const proteinAlphabet = {
10820
10822
  hydrophobicity: -3.5,
10821
10823
  colorByFamily: "#EE82EE",
10822
10824
  color: "hsl(268.9, 100%, 69%)",
10823
- mass: 147.1
10825
+ mass: 129.11398
10824
10826
  },
10825
10827
  Q: {
10826
10828
  value: "Q",
@@ -10829,7 +10831,7 @@ const proteinAlphabet = {
10829
10831
  hydrophobicity: -3.5,
10830
10832
  colorByFamily: "#D3D3D3",
10831
10833
  color: "hsl(268.9, 100%, 69%)",
10832
- mass: 146.2
10834
+ mass: 128.12922
10833
10835
  },
10834
10836
  G: {
10835
10837
  value: "G",
@@ -10838,7 +10840,7 @@ const proteinAlphabet = {
10838
10840
  hydrophobicity: -0.4,
10839
10841
  colorByFamily: "#00FFFF",
10840
10842
  color: "hsl(303.1, 100%, 69%)",
10841
- mass: 75.1
10843
+ mass: 57.05132
10842
10844
  },
10843
10845
  H: {
10844
10846
  value: "H",
@@ -10847,7 +10849,7 @@ const proteinAlphabet = {
10847
10849
  hydrophobicity: -3.2,
10848
10850
  colorByFamily: "#FFC0CB",
10849
10851
  color: "hsl(272.2, 100%, 69%)",
10850
- mass: 155.2
10852
+ mass: 137.13928
10851
10853
  },
10852
10854
  I: {
10853
10855
  value: "I",
@@ -10856,7 +10858,7 @@ const proteinAlphabet = {
10856
10858
  hydrophobicity: 4.5,
10857
10859
  colorByFamily: "#00FFFF",
10858
10860
  color: "hsl(356.9, 100%, 69%)",
10859
- mass: 131.2
10861
+ mass: 113.15764
10860
10862
  },
10861
10863
  L: {
10862
10864
  value: "L",
@@ -10865,7 +10867,7 @@ const proteinAlphabet = {
10865
10867
  hydrophobicity: 3.8,
10866
10868
  colorByFamily: "#00FFFF",
10867
10869
  color: "hsl(349.4, 100%, 69%)",
10868
- mass: 131.2
10870
+ mass: 113.15764
10869
10871
  },
10870
10872
  K: {
10871
10873
  value: "K",
@@ -10874,7 +10876,7 @@ const proteinAlphabet = {
10874
10876
  hydrophobicity: -3.9,
10875
10877
  colorByFamily: "#FFC0CB",
10876
10878
  color: "hsl(264.7, 100%, 69%)",
10877
- mass: 146.2
10879
+ mass: 128.17228
10878
10880
  },
10879
10881
  M: {
10880
10882
  value: "M",
@@ -10883,7 +10885,7 @@ const proteinAlphabet = {
10883
10885
  hydrophobicity: 1.9,
10884
10886
  colorByFamily: "#FFFF00",
10885
10887
  color: "hsl(328.5, 100%, 69%)",
10886
- mass: 149.2
10888
+ mass: 131.19606
10887
10889
  },
10888
10890
  F: {
10889
10891
  value: "F",
@@ -10892,7 +10894,7 @@ const proteinAlphabet = {
10892
10894
  hydrophobicity: 2.8,
10893
10895
  colorByFamily: "#FFA500",
10894
10896
  color: "hsl(338.4, 100%, 69%)",
10895
- mass: 165.2
10897
+ mass: 147.17386
10896
10898
  },
10897
10899
  P: {
10898
10900
  value: "P",
@@ -10901,7 +10903,7 @@ const proteinAlphabet = {
10901
10903
  hydrophobicity: -1.6,
10902
10904
  colorByFamily: "#00FFFF",
10903
10905
  color: "hsl(289.9, 100%, 69%)",
10904
- mass: 115.1
10906
+ mass: 97.11518
10905
10907
  },
10906
10908
  S: {
10907
10909
  value: "S",
@@ -10910,7 +10912,7 @@ const proteinAlphabet = {
10910
10912
  hydrophobicity: -0.8,
10911
10913
  colorByFamily: "#90EE90",
10912
10914
  color: "hsl(298.6, 100%, 69%)",
10913
- mass: 105.1
10915
+ mass: 87.0773
10914
10916
  },
10915
10917
  T: {
10916
10918
  value: "T",
@@ -10919,7 +10921,7 @@ const proteinAlphabet = {
10919
10921
  hydrophobicity: -0.7,
10920
10922
  colorByFamily: "#90EE90",
10921
10923
  color: "hsl(299.8, 100%, 69%)",
10922
- mass: 119.1
10924
+ mass: 101.10388
10923
10925
  },
10924
10926
  U: {
10925
10927
  value: "U",
@@ -10927,7 +10929,7 @@ const proteinAlphabet = {
10927
10929
  threeLettersName: "Sec",
10928
10930
  colorByFamily: "#FF0000",
10929
10931
  color: "hsl(0, 100%, 69%)",
10930
- mass: 168.1
10932
+ mass: 150.3079
10931
10933
  },
10932
10934
  W: {
10933
10935
  value: "W",
@@ -10936,7 +10938,7 @@ const proteinAlphabet = {
10936
10938
  hydrophobicity: -0.9,
10937
10939
  colorByFamily: "#FFA500",
10938
10940
  color: "hsl(297.6, 100%, 69%)",
10939
- mass: 204.2
10941
+ mass: 186.2099
10940
10942
  },
10941
10943
  Y: {
10942
10944
  value: "Y",
@@ -10945,7 +10947,7 @@ const proteinAlphabet = {
10945
10947
  hydrophobicity: -1.3,
10946
10948
  colorByFamily: "#FFA500",
10947
10949
  color: "hsl(293.2, 100%, 69%)",
10948
- mass: 181.2
10950
+ mass: 163.17326
10949
10951
  },
10950
10952
  V: {
10951
10953
  value: "V",
@@ -10954,7 +10956,7 @@ const proteinAlphabet = {
10954
10956
  hydrophobicity: 4.2,
10955
10957
  colorByFamily: "#00FFFF",
10956
10958
  color: "hsl(353.6, 100%, 69%)",
10957
- mass: 117.1
10959
+ mass: 99.13106
10958
10960
  },
10959
10961
  "*": {
10960
10962
  value: "*",
@@ -11273,20 +11275,91 @@ const annotationTypes = [
11273
11275
  "primers",
11274
11276
  "guides"
11275
11277
  ];
11276
- function filterSequenceString(sequenceString, additionalValidChars = "", charOverrides) {
11277
- if (sequenceString) {
11278
- return sequenceString.replace(
11279
- new RegExp(
11280
- `[^${charOverrides || `atgcyrswkmbvdhnu${additionalValidChars.split("").join("\\")}`}]`,
11281
- "gi"
11282
- ),
11283
- ""
11278
+ function filterSequenceString(sequenceString, {
11279
+ additionalValidChars = "",
11280
+ isOligo,
11281
+ name,
11282
+ isProtein,
11283
+ isRna,
11284
+ isMixedRnaAndDna,
11285
+ includeStopCodon
11286
+ } = {}) {
11287
+ const acceptedChars = getAcceptedChars({
11288
+ isOligo,
11289
+ isProtein,
11290
+ isRna,
11291
+ isMixedRnaAndDna,
11292
+ includeStopCodon
11293
+ });
11294
+ const replaceChars = getReplaceChars({
11295
+ isOligo,
11296
+ isProtein,
11297
+ isRna,
11298
+ isMixedRnaAndDna
11299
+ });
11300
+ let sanitizedVal = "";
11301
+ const invalidChars = [];
11302
+ const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
11303
+ const warnings = [];
11304
+ const replaceCount = {};
11305
+ sequenceString.split("").forEach((letter) => {
11306
+ const lowerLetter = letter.toLowerCase();
11307
+ if (replaceChars && replaceChars[lowerLetter]) {
11308
+ if (!replaceCount[lowerLetter]) {
11309
+ replaceCount[lowerLetter] = 0;
11310
+ }
11311
+ replaceCount[lowerLetter]++;
11312
+ const isUpper = lowerLetter !== letter;
11313
+ sanitizedVal += isUpper ? replaceChars[lowerLetter].toUpperCase() : replaceChars[lowerLetter];
11314
+ } else if (chars.includes(lowerLetter)) {
11315
+ sanitizedVal += letter;
11316
+ } else {
11317
+ invalidChars.push(letter);
11318
+ }
11319
+ });
11320
+ Object.keys(replaceCount).forEach((letter) => {
11321
+ warnings.push(
11322
+ `Replaced "${letter}" with "${replaceChars[letter]}"${replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""}`
11323
+ );
11324
+ });
11325
+ if (sequenceString.length !== sanitizedVal.length) {
11326
+ warnings.push(
11327
+ `${name ? `Sequence ${name}: ` : ""}Invalid character(s) detected and removed: ${invalidChars.slice(0, 100).join(", ")} `
11284
11328
  );
11285
- } else {
11286
- return sequenceString;
11287
11329
  }
11330
+ if (typeof window !== "undefined" && window.toastr && warnings.length) {
11331
+ warnings.forEach((warning) => {
11332
+ window.toastr.warning(warning);
11333
+ });
11334
+ }
11335
+ return [sanitizedVal, warnings];
11288
11336
  }
11289
11337
  __name(filterSequenceString, "filterSequenceString");
11338
+ function getAcceptedChars({
11339
+ isOligo,
11340
+ isProtein,
11341
+ isRna,
11342
+ isMixedRnaAndDna,
11343
+ includeStopCodon
11344
+ } = {}) {
11345
+ return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
11346
+ //just plain old dna
11347
+ ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
11348
+ );
11349
+ }
11350
+ __name(getAcceptedChars, "getAcceptedChars");
11351
+ function getReplaceChars({
11352
+ isOligo,
11353
+ isProtein,
11354
+ isRna,
11355
+ isMixedRnaAndDna
11356
+ } = {}) {
11357
+ return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
11358
+ //just plain old dna
11359
+ {}
11360
+ );
11361
+ }
11362
+ __name(getReplaceChars, "getReplaceChars");
11290
11363
  function tidyUpAnnotation(_annotation, {
11291
11364
  sequenceData = {},
11292
11365
  convertAnnotationsFromAAIndices,
@@ -11415,14 +11488,6 @@ function coerceLocation({
11415
11488
  }
11416
11489
  }
11417
11490
  __name(coerceLocation, "coerceLocation");
11418
- function filterAminoAcidSequenceString(sequenceString, options) {
11419
- options = options || {};
11420
- if (options.includeStopCodon) {
11421
- return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
11422
- }
11423
- return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
11424
- }
11425
- __name(filterAminoAcidSequenceString, "filterAminoAcidSequenceString");
11426
11491
  function getDegenerateDnaStringFromAAString(aaString) {
11427
11492
  return aaString.split("").map((char) => aminoAcidToDegenerateDnaMap[char.toLowerCase()] || "nnn").join("");
11428
11493
  }
@@ -11434,11 +11499,10 @@ function tidyUpSequenceData(pSeqData, options = {}) {
11434
11499
  removeUnwantedChars,
11435
11500
  additionalValidChars,
11436
11501
  noTranslationData,
11437
- charOverrides,
11438
11502
  doNotProvideIdsForAnnotations,
11439
- proteinFilterOptions,
11440
11503
  noCdsTranslations,
11441
- convertAnnotationsFromAAIndices
11504
+ convertAnnotationsFromAAIndices,
11505
+ topLevelSeqData
11442
11506
  } = options;
11443
11507
  let seqData = lodashExports.cloneDeep(pSeqData);
11444
11508
  const response = {
@@ -11468,16 +11532,15 @@ function tidyUpSequenceData(pSeqData, options = {}) {
11468
11532
  }
11469
11533
  if (removeUnwantedChars) {
11470
11534
  if (seqData.isProtein) {
11471
- seqData.proteinSequence = filterAminoAcidSequenceString(
11472
- seqData.proteinSequence,
11473
- __spreadValues({ includeStopCodon: true }, proteinFilterOptions)
11474
- );
11535
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
11536
+ includeStopCodon: true
11537
+ }, topLevelSeqData || seqData));
11538
+ seqData.proteinSequence = newSeq;
11475
11539
  } else {
11476
- seqData.sequence = filterSequenceString(
11477
- seqData.sequence,
11478
- `${additionalValidChars || ""}${seqData.isRna || seqData.isMixedRnaAndDna ? "u" : ""}`,
11479
- charOverrides
11480
- );
11540
+ const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
11541
+ additionalValidChars
11542
+ }, topLevelSeqData || seqData));
11543
+ seqData.sequence = newSeq;
11481
11544
  }
11482
11545
  }
11483
11546
  if (seqData.isProtein) {
@@ -19312,7 +19375,8 @@ function validateSequence(sequence, options = {}) {
19312
19375
  inclusive1BasedEnd,
19313
19376
  additionalValidChars,
19314
19377
  allowOverflowAnnotations,
19315
- coerceFeatureTypes
19378
+ coerceFeatureTypes,
19379
+ includeStopCodon
19316
19380
  } = options;
19317
19381
  [
19318
19382
  "isDNA",
@@ -19362,7 +19426,6 @@ function validateSequence(sequence, options = {}) {
19362
19426
  response.messages.push("No sequence detected");
19363
19427
  sequence.sequence = "";
19364
19428
  }
19365
- let validChars;
19366
19429
  if (sequence.isProtein === void 0 && guessIfProtein) {
19367
19430
  sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
19368
19431
  sequence.sequence,
@@ -19370,12 +19433,15 @@ function validateSequence(sequence, options = {}) {
19370
19433
  );
19371
19434
  }
19372
19435
  if (sequence.isProtein) {
19373
- validChars = filterAminoAcidSequenceString(sequence.sequence);
19436
+ const [validChars, warnings] = filterSequenceString(sequence.sequence, {
19437
+ name: sequence.name,
19438
+ isProtein: true,
19439
+ additionalValidChars,
19440
+ includeStopCodon
19441
+ });
19374
19442
  if (validChars !== sequence.sequence) {
19375
19443
  sequence.sequence = validChars;
19376
- response.messages.push(
19377
- "Import Error: Illegal character(s) detected and removed from amino acid sequence. Allowed characters are: xtgalmfwkqespvicyhrndu"
19378
- );
19444
+ response.messages.push(...warnings);
19379
19445
  }
19380
19446
  sequence.type = "PROTEIN";
19381
19447
  sequence.isProtein = true;
@@ -19397,12 +19463,12 @@ function validateSequence(sequence, options = {}) {
19397
19463
  } else {
19398
19464
  sequence.type = "DNA";
19399
19465
  }
19400
- validChars = filterSequenceString(sequence.sequence, additionalValidChars);
19466
+ const [validChars, warnings] = filterSequenceString(sequence.sequence, __spreadValues({
19467
+ additionalValidChars
19468
+ }, sequence));
19401
19469
  if (validChars !== sequence.sequence) {
19402
19470
  sequence.sequence = validChars;
19403
- response.messages.push(
19404
- "Import Error: Illegal character(s) detected and removed from sequence. Allowed characters are: atgcyrswkmbvdhn"
19405
- );
19471
+ response.messages.push(...warnings);
19406
19472
  }
19407
19473
  }
19408
19474
  if (!sequence.size) {
package/index.mjs CHANGED
@@ -6168,7 +6168,9 @@ lodash.exports;
6168
6168
  })(lodash, lodash.exports);
6169
6169
  var lodashExports = lodash.exports;
6170
6170
  const _ = /* @__PURE__ */ getDefaultExportFromCjs(lodashExports);
6171
+ const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6171
6172
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6173
+ const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
6172
6174
  const aminoAcidToDegenerateDnaMap = {
6173
6175
  "-": "---",
6174
6176
  ".": "...",
@@ -10773,7 +10775,7 @@ const proteinAlphabet = {
10773
10775
  hydrophobicity: 1.8,
10774
10776
  colorByFamily: "#00FFFF",
10775
10777
  color: "hsl(327.3, 100%, 69%)",
10776
- mass: 89.1
10778
+ mass: 71.0779
10777
10779
  },
10778
10780
  R: {
10779
10781
  value: "R",
@@ -10782,7 +10784,7 @@ const proteinAlphabet = {
10782
10784
  hydrophobicity: -4.5,
10783
10785
  colorByFamily: "#FFC0CB",
10784
10786
  color: "hsl(258.1, 100%, 69%)",
10785
- mass: 174.2
10787
+ mass: 156.18568
10786
10788
  },
10787
10789
  N: {
10788
10790
  value: "N",
@@ -10791,7 +10793,7 @@ const proteinAlphabet = {
10791
10793
  hydrophobicity: -3.5,
10792
10794
  colorByFamily: "#D3D3D3",
10793
10795
  color: "hsl(268.9, 100%, 69%)",
10794
- mass: 132.1
10796
+ mass: 114.10264
10795
10797
  },
10796
10798
  D: {
10797
10799
  value: "D",
@@ -10800,7 +10802,7 @@ const proteinAlphabet = {
10800
10802
  hydrophobicity: -3.5,
10801
10803
  colorByFamily: "#EE82EE",
10802
10804
  color: "hsl(268.9, 100%, 69%)",
10803
- mass: 133.1
10805
+ mass: 115.0874
10804
10806
  },
10805
10807
  C: {
10806
10808
  value: "C",
@@ -10809,7 +10811,7 @@ const proteinAlphabet = {
10809
10811
  hydrophobicity: 2.5,
10810
10812
  colorByFamily: "#FFFF00",
10811
10813
  color: "hsl(335.1, 100%, 69%)",
10812
- mass: 121.2
10814
+ mass: 103.1429
10813
10815
  },
10814
10816
  E: {
10815
10817
  value: "E",
@@ -10818,7 +10820,7 @@ const proteinAlphabet = {
10818
10820
  hydrophobicity: -3.5,
10819
10821
  colorByFamily: "#EE82EE",
10820
10822
  color: "hsl(268.9, 100%, 69%)",
10821
- mass: 147.1
10823
+ mass: 129.11398
10822
10824
  },
10823
10825
  Q: {
10824
10826
  value: "Q",
@@ -10827,7 +10829,7 @@ const proteinAlphabet = {
10827
10829
  hydrophobicity: -3.5,
10828
10830
  colorByFamily: "#D3D3D3",
10829
10831
  color: "hsl(268.9, 100%, 69%)",
10830
- mass: 146.2
10832
+ mass: 128.12922
10831
10833
  },
10832
10834
  G: {
10833
10835
  value: "G",
@@ -10836,7 +10838,7 @@ const proteinAlphabet = {
10836
10838
  hydrophobicity: -0.4,
10837
10839
  colorByFamily: "#00FFFF",
10838
10840
  color: "hsl(303.1, 100%, 69%)",
10839
- mass: 75.1
10841
+ mass: 57.05132
10840
10842
  },
10841
10843
  H: {
10842
10844
  value: "H",
@@ -10845,7 +10847,7 @@ const proteinAlphabet = {
10845
10847
  hydrophobicity: -3.2,
10846
10848
  colorByFamily: "#FFC0CB",
10847
10849
  color: "hsl(272.2, 100%, 69%)",
10848
- mass: 155.2
10850
+ mass: 137.13928
10849
10851
  },
10850
10852
  I: {
10851
10853
  value: "I",
@@ -10854,7 +10856,7 @@ const proteinAlphabet = {
10854
10856
  hydrophobicity: 4.5,
10855
10857
  colorByFamily: "#00FFFF",
10856
10858
  color: "hsl(356.9, 100%, 69%)",
10857
- mass: 131.2
10859
+ mass: 113.15764
10858
10860
  },
10859
10861
  L: {
10860
10862
  value: "L",
@@ -10863,7 +10865,7 @@ const proteinAlphabet = {
10863
10865
  hydrophobicity: 3.8,
10864
10866
  colorByFamily: "#00FFFF",
10865
10867
  color: "hsl(349.4, 100%, 69%)",
10866
- mass: 131.2
10868
+ mass: 113.15764
10867
10869
  },
10868
10870
  K: {
10869
10871
  value: "K",
@@ -10872,7 +10874,7 @@ const proteinAlphabet = {
10872
10874
  hydrophobicity: -3.9,
10873
10875
  colorByFamily: "#FFC0CB",
10874
10876
  color: "hsl(264.7, 100%, 69%)",
10875
- mass: 146.2
10877
+ mass: 128.17228
10876
10878
  },
10877
10879
  M: {
10878
10880
  value: "M",
@@ -10881,7 +10883,7 @@ const proteinAlphabet = {
10881
10883
  hydrophobicity: 1.9,
10882
10884
  colorByFamily: "#FFFF00",
10883
10885
  color: "hsl(328.5, 100%, 69%)",
10884
- mass: 149.2
10886
+ mass: 131.19606
10885
10887
  },
10886
10888
  F: {
10887
10889
  value: "F",
@@ -10890,7 +10892,7 @@ const proteinAlphabet = {
10890
10892
  hydrophobicity: 2.8,
10891
10893
  colorByFamily: "#FFA500",
10892
10894
  color: "hsl(338.4, 100%, 69%)",
10893
- mass: 165.2
10895
+ mass: 147.17386
10894
10896
  },
10895
10897
  P: {
10896
10898
  value: "P",
@@ -10899,7 +10901,7 @@ const proteinAlphabet = {
10899
10901
  hydrophobicity: -1.6,
10900
10902
  colorByFamily: "#00FFFF",
10901
10903
  color: "hsl(289.9, 100%, 69%)",
10902
- mass: 115.1
10904
+ mass: 97.11518
10903
10905
  },
10904
10906
  S: {
10905
10907
  value: "S",
@@ -10908,7 +10910,7 @@ const proteinAlphabet = {
10908
10910
  hydrophobicity: -0.8,
10909
10911
  colorByFamily: "#90EE90",
10910
10912
  color: "hsl(298.6, 100%, 69%)",
10911
- mass: 105.1
10913
+ mass: 87.0773
10912
10914
  },
10913
10915
  T: {
10914
10916
  value: "T",
@@ -10917,7 +10919,7 @@ const proteinAlphabet = {
10917
10919
  hydrophobicity: -0.7,
10918
10920
  colorByFamily: "#90EE90",
10919
10921
  color: "hsl(299.8, 100%, 69%)",
10920
- mass: 119.1
10922
+ mass: 101.10388
10921
10923
  },
10922
10924
  U: {
10923
10925
  value: "U",
@@ -10925,7 +10927,7 @@ const proteinAlphabet = {
10925
10927
  threeLettersName: "Sec",
10926
10928
  colorByFamily: "#FF0000",
10927
10929
  color: "hsl(0, 100%, 69%)",
10928
- mass: 168.1
10930
+ mass: 150.3079
10929
10931
  },
10930
10932
  W: {
10931
10933
  value: "W",
@@ -10934,7 +10936,7 @@ const proteinAlphabet = {
10934
10936
  hydrophobicity: -0.9,
10935
10937
  colorByFamily: "#FFA500",
10936
10938
  color: "hsl(297.6, 100%, 69%)",
10937
- mass: 204.2
10939
+ mass: 186.2099
10938
10940
  },
10939
10941
  Y: {
10940
10942
  value: "Y",
@@ -10943,7 +10945,7 @@ const proteinAlphabet = {
10943
10945
  hydrophobicity: -1.3,
10944
10946
  colorByFamily: "#FFA500",
10945
10947
  color: "hsl(293.2, 100%, 69%)",
10946
- mass: 181.2
10948
+ mass: 163.17326
10947
10949
  },
10948
10950
  V: {
10949
10951
  value: "V",
@@ -10952,7 +10954,7 @@ const proteinAlphabet = {
10952
10954
  hydrophobicity: 4.2,
10953
10955
  colorByFamily: "#00FFFF",
10954
10956
  color: "hsl(353.6, 100%, 69%)",
10955
- mass: 117.1
10957
+ mass: 99.13106
10956
10958
  },
10957
10959
  "*": {
10958
10960
  value: "*",
@@ -11271,20 +11273,91 @@ const annotationTypes = [
11271
11273
  "primers",
11272
11274
  "guides"
11273
11275
  ];
11274
- function filterSequenceString(sequenceString, additionalValidChars = "", charOverrides) {
11275
- if (sequenceString) {
11276
- return sequenceString.replace(
11277
- new RegExp(
11278
- `[^${charOverrides || `atgcyrswkmbvdhnu${additionalValidChars.split("").join("\\")}`}]`,
11279
- "gi"
11280
- ),
11281
- ""
11276
+ function filterSequenceString(sequenceString, {
11277
+ additionalValidChars = "",
11278
+ isOligo,
11279
+ name,
11280
+ isProtein,
11281
+ isRna,
11282
+ isMixedRnaAndDna,
11283
+ includeStopCodon
11284
+ } = {}) {
11285
+ const acceptedChars = getAcceptedChars({
11286
+ isOligo,
11287
+ isProtein,
11288
+ isRna,
11289
+ isMixedRnaAndDna,
11290
+ includeStopCodon
11291
+ });
11292
+ const replaceChars = getReplaceChars({
11293
+ isOligo,
11294
+ isProtein,
11295
+ isRna,
11296
+ isMixedRnaAndDna
11297
+ });
11298
+ let sanitizedVal = "";
11299
+ const invalidChars = [];
11300
+ const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
11301
+ const warnings = [];
11302
+ const replaceCount = {};
11303
+ sequenceString.split("").forEach((letter) => {
11304
+ const lowerLetter = letter.toLowerCase();
11305
+ if (replaceChars && replaceChars[lowerLetter]) {
11306
+ if (!replaceCount[lowerLetter]) {
11307
+ replaceCount[lowerLetter] = 0;
11308
+ }
11309
+ replaceCount[lowerLetter]++;
11310
+ const isUpper = lowerLetter !== letter;
11311
+ sanitizedVal += isUpper ? replaceChars[lowerLetter].toUpperCase() : replaceChars[lowerLetter];
11312
+ } else if (chars.includes(lowerLetter)) {
11313
+ sanitizedVal += letter;
11314
+ } else {
11315
+ invalidChars.push(letter);
11316
+ }
11317
+ });
11318
+ Object.keys(replaceCount).forEach((letter) => {
11319
+ warnings.push(
11320
+ `Replaced "${letter}" with "${replaceChars[letter]}"${replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""}`
11321
+ );
11322
+ });
11323
+ if (sequenceString.length !== sanitizedVal.length) {
11324
+ warnings.push(
11325
+ `${name ? `Sequence ${name}: ` : ""}Invalid character(s) detected and removed: ${invalidChars.slice(0, 100).join(", ")} `
11282
11326
  );
11283
- } else {
11284
- return sequenceString;
11285
11327
  }
11328
+ if (typeof window !== "undefined" && window.toastr && warnings.length) {
11329
+ warnings.forEach((warning) => {
11330
+ window.toastr.warning(warning);
11331
+ });
11332
+ }
11333
+ return [sanitizedVal, warnings];
11286
11334
  }
11287
11335
  __name(filterSequenceString, "filterSequenceString");
11336
+ function getAcceptedChars({
11337
+ isOligo,
11338
+ isProtein,
11339
+ isRna,
11340
+ isMixedRnaAndDna,
11341
+ includeStopCodon
11342
+ } = {}) {
11343
+ return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
11344
+ //just plain old dna
11345
+ ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
11346
+ );
11347
+ }
11348
+ __name(getAcceptedChars, "getAcceptedChars");
11349
+ function getReplaceChars({
11350
+ isOligo,
11351
+ isProtein,
11352
+ isRna,
11353
+ isMixedRnaAndDna
11354
+ } = {}) {
11355
+ return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
11356
+ //just plain old dna
11357
+ {}
11358
+ );
11359
+ }
11360
+ __name(getReplaceChars, "getReplaceChars");
11288
11361
  function tidyUpAnnotation(_annotation, {
11289
11362
  sequenceData = {},
11290
11363
  convertAnnotationsFromAAIndices,
@@ -11413,14 +11486,6 @@ function coerceLocation({
11413
11486
  }
11414
11487
  }
11415
11488
  __name(coerceLocation, "coerceLocation");
11416
- function filterAminoAcidSequenceString(sequenceString, options) {
11417
- options = options || {};
11418
- if (options.includeStopCodon) {
11419
- return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
11420
- }
11421
- return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
11422
- }
11423
- __name(filterAminoAcidSequenceString, "filterAminoAcidSequenceString");
11424
11489
  function getDegenerateDnaStringFromAAString(aaString) {
11425
11490
  return aaString.split("").map((char) => aminoAcidToDegenerateDnaMap[char.toLowerCase()] || "nnn").join("");
11426
11491
  }
@@ -11432,11 +11497,10 @@ function tidyUpSequenceData(pSeqData, options = {}) {
11432
11497
  removeUnwantedChars,
11433
11498
  additionalValidChars,
11434
11499
  noTranslationData,
11435
- charOverrides,
11436
11500
  doNotProvideIdsForAnnotations,
11437
- proteinFilterOptions,
11438
11501
  noCdsTranslations,
11439
- convertAnnotationsFromAAIndices
11502
+ convertAnnotationsFromAAIndices,
11503
+ topLevelSeqData
11440
11504
  } = options;
11441
11505
  let seqData = lodashExports.cloneDeep(pSeqData);
11442
11506
  const response = {
@@ -11466,16 +11530,15 @@ function tidyUpSequenceData(pSeqData, options = {}) {
11466
11530
  }
11467
11531
  if (removeUnwantedChars) {
11468
11532
  if (seqData.isProtein) {
11469
- seqData.proteinSequence = filterAminoAcidSequenceString(
11470
- seqData.proteinSequence,
11471
- __spreadValues({ includeStopCodon: true }, proteinFilterOptions)
11472
- );
11533
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
11534
+ includeStopCodon: true
11535
+ }, topLevelSeqData || seqData));
11536
+ seqData.proteinSequence = newSeq;
11473
11537
  } else {
11474
- seqData.sequence = filterSequenceString(
11475
- seqData.sequence,
11476
- `${additionalValidChars || ""}${seqData.isRna || seqData.isMixedRnaAndDna ? "u" : ""}`,
11477
- charOverrides
11478
- );
11538
+ const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
11539
+ additionalValidChars
11540
+ }, topLevelSeqData || seqData));
11541
+ seqData.sequence = newSeq;
11479
11542
  }
11480
11543
  }
11481
11544
  if (seqData.isProtein) {
@@ -19310,7 +19373,8 @@ function validateSequence(sequence, options = {}) {
19310
19373
  inclusive1BasedEnd,
19311
19374
  additionalValidChars,
19312
19375
  allowOverflowAnnotations,
19313
- coerceFeatureTypes
19376
+ coerceFeatureTypes,
19377
+ includeStopCodon
19314
19378
  } = options;
19315
19379
  [
19316
19380
  "isDNA",
@@ -19360,7 +19424,6 @@ function validateSequence(sequence, options = {}) {
19360
19424
  response.messages.push("No sequence detected");
19361
19425
  sequence.sequence = "";
19362
19426
  }
19363
- let validChars;
19364
19427
  if (sequence.isProtein === void 0 && guessIfProtein) {
19365
19428
  sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
19366
19429
  sequence.sequence,
@@ -19368,12 +19431,15 @@ function validateSequence(sequence, options = {}) {
19368
19431
  );
19369
19432
  }
19370
19433
  if (sequence.isProtein) {
19371
- validChars = filterAminoAcidSequenceString(sequence.sequence);
19434
+ const [validChars, warnings] = filterSequenceString(sequence.sequence, {
19435
+ name: sequence.name,
19436
+ isProtein: true,
19437
+ additionalValidChars,
19438
+ includeStopCodon
19439
+ });
19372
19440
  if (validChars !== sequence.sequence) {
19373
19441
  sequence.sequence = validChars;
19374
- response.messages.push(
19375
- "Import Error: Illegal character(s) detected and removed from amino acid sequence. Allowed characters are: xtgalmfwkqespvicyhrndu"
19376
- );
19442
+ response.messages.push(...warnings);
19377
19443
  }
19378
19444
  sequence.type = "PROTEIN";
19379
19445
  sequence.isProtein = true;
@@ -19395,12 +19461,12 @@ function validateSequence(sequence, options = {}) {
19395
19461
  } else {
19396
19462
  sequence.type = "DNA";
19397
19463
  }
19398
- validChars = filterSequenceString(sequence.sequence, additionalValidChars);
19464
+ const [validChars, warnings] = filterSequenceString(sequence.sequence, __spreadValues({
19465
+ additionalValidChars
19466
+ }, sequence));
19399
19467
  if (validChars !== sequence.sequence) {
19400
19468
  sequence.sequence = validChars;
19401
- response.messages.push(
19402
- "Import Error: Illegal character(s) detected and removed from sequence. Allowed characters are: atgcyrswkmbvdhn"
19403
- );
19469
+ response.messages.push(...warnings);
19404
19470
  }
19405
19471
  }
19406
19472
  if (!sequence.size) {
package/index.umd.js CHANGED
@@ -6172,7 +6172,9 @@ var __async = (__this, __arguments, generator) => {
6172
6172
  })(lodash, lodash.exports);
6173
6173
  var lodashExports = lodash.exports;
6174
6174
  const _ = /* @__PURE__ */ getDefaultExportFromCjs(lodashExports);
6175
+ const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6175
6176
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6177
+ const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
6176
6178
  const aminoAcidToDegenerateDnaMap = {
6177
6179
  "-": "---",
6178
6180
  ".": "...",
@@ -10777,7 +10779,7 @@ var __async = (__this, __arguments, generator) => {
10777
10779
  hydrophobicity: 1.8,
10778
10780
  colorByFamily: "#00FFFF",
10779
10781
  color: "hsl(327.3, 100%, 69%)",
10780
- mass: 89.1
10782
+ mass: 71.0779
10781
10783
  },
10782
10784
  R: {
10783
10785
  value: "R",
@@ -10786,7 +10788,7 @@ var __async = (__this, __arguments, generator) => {
10786
10788
  hydrophobicity: -4.5,
10787
10789
  colorByFamily: "#FFC0CB",
10788
10790
  color: "hsl(258.1, 100%, 69%)",
10789
- mass: 174.2
10791
+ mass: 156.18568
10790
10792
  },
10791
10793
  N: {
10792
10794
  value: "N",
@@ -10795,7 +10797,7 @@ var __async = (__this, __arguments, generator) => {
10795
10797
  hydrophobicity: -3.5,
10796
10798
  colorByFamily: "#D3D3D3",
10797
10799
  color: "hsl(268.9, 100%, 69%)",
10798
- mass: 132.1
10800
+ mass: 114.10264
10799
10801
  },
10800
10802
  D: {
10801
10803
  value: "D",
@@ -10804,7 +10806,7 @@ var __async = (__this, __arguments, generator) => {
10804
10806
  hydrophobicity: -3.5,
10805
10807
  colorByFamily: "#EE82EE",
10806
10808
  color: "hsl(268.9, 100%, 69%)",
10807
- mass: 133.1
10809
+ mass: 115.0874
10808
10810
  },
10809
10811
  C: {
10810
10812
  value: "C",
@@ -10813,7 +10815,7 @@ var __async = (__this, __arguments, generator) => {
10813
10815
  hydrophobicity: 2.5,
10814
10816
  colorByFamily: "#FFFF00",
10815
10817
  color: "hsl(335.1, 100%, 69%)",
10816
- mass: 121.2
10818
+ mass: 103.1429
10817
10819
  },
10818
10820
  E: {
10819
10821
  value: "E",
@@ -10822,7 +10824,7 @@ var __async = (__this, __arguments, generator) => {
10822
10824
  hydrophobicity: -3.5,
10823
10825
  colorByFamily: "#EE82EE",
10824
10826
  color: "hsl(268.9, 100%, 69%)",
10825
- mass: 147.1
10827
+ mass: 129.11398
10826
10828
  },
10827
10829
  Q: {
10828
10830
  value: "Q",
@@ -10831,7 +10833,7 @@ var __async = (__this, __arguments, generator) => {
10831
10833
  hydrophobicity: -3.5,
10832
10834
  colorByFamily: "#D3D3D3",
10833
10835
  color: "hsl(268.9, 100%, 69%)",
10834
- mass: 146.2
10836
+ mass: 128.12922
10835
10837
  },
10836
10838
  G: {
10837
10839
  value: "G",
@@ -10840,7 +10842,7 @@ var __async = (__this, __arguments, generator) => {
10840
10842
  hydrophobicity: -0.4,
10841
10843
  colorByFamily: "#00FFFF",
10842
10844
  color: "hsl(303.1, 100%, 69%)",
10843
- mass: 75.1
10845
+ mass: 57.05132
10844
10846
  },
10845
10847
  H: {
10846
10848
  value: "H",
@@ -10849,7 +10851,7 @@ var __async = (__this, __arguments, generator) => {
10849
10851
  hydrophobicity: -3.2,
10850
10852
  colorByFamily: "#FFC0CB",
10851
10853
  color: "hsl(272.2, 100%, 69%)",
10852
- mass: 155.2
10854
+ mass: 137.13928
10853
10855
  },
10854
10856
  I: {
10855
10857
  value: "I",
@@ -10858,7 +10860,7 @@ var __async = (__this, __arguments, generator) => {
10858
10860
  hydrophobicity: 4.5,
10859
10861
  colorByFamily: "#00FFFF",
10860
10862
  color: "hsl(356.9, 100%, 69%)",
10861
- mass: 131.2
10863
+ mass: 113.15764
10862
10864
  },
10863
10865
  L: {
10864
10866
  value: "L",
@@ -10867,7 +10869,7 @@ var __async = (__this, __arguments, generator) => {
10867
10869
  hydrophobicity: 3.8,
10868
10870
  colorByFamily: "#00FFFF",
10869
10871
  color: "hsl(349.4, 100%, 69%)",
10870
- mass: 131.2
10872
+ mass: 113.15764
10871
10873
  },
10872
10874
  K: {
10873
10875
  value: "K",
@@ -10876,7 +10878,7 @@ var __async = (__this, __arguments, generator) => {
10876
10878
  hydrophobicity: -3.9,
10877
10879
  colorByFamily: "#FFC0CB",
10878
10880
  color: "hsl(264.7, 100%, 69%)",
10879
- mass: 146.2
10881
+ mass: 128.17228
10880
10882
  },
10881
10883
  M: {
10882
10884
  value: "M",
@@ -10885,7 +10887,7 @@ var __async = (__this, __arguments, generator) => {
10885
10887
  hydrophobicity: 1.9,
10886
10888
  colorByFamily: "#FFFF00",
10887
10889
  color: "hsl(328.5, 100%, 69%)",
10888
- mass: 149.2
10890
+ mass: 131.19606
10889
10891
  },
10890
10892
  F: {
10891
10893
  value: "F",
@@ -10894,7 +10896,7 @@ var __async = (__this, __arguments, generator) => {
10894
10896
  hydrophobicity: 2.8,
10895
10897
  colorByFamily: "#FFA500",
10896
10898
  color: "hsl(338.4, 100%, 69%)",
10897
- mass: 165.2
10899
+ mass: 147.17386
10898
10900
  },
10899
10901
  P: {
10900
10902
  value: "P",
@@ -10903,7 +10905,7 @@ var __async = (__this, __arguments, generator) => {
10903
10905
  hydrophobicity: -1.6,
10904
10906
  colorByFamily: "#00FFFF",
10905
10907
  color: "hsl(289.9, 100%, 69%)",
10906
- mass: 115.1
10908
+ mass: 97.11518
10907
10909
  },
10908
10910
  S: {
10909
10911
  value: "S",
@@ -10912,7 +10914,7 @@ var __async = (__this, __arguments, generator) => {
10912
10914
  hydrophobicity: -0.8,
10913
10915
  colorByFamily: "#90EE90",
10914
10916
  color: "hsl(298.6, 100%, 69%)",
10915
- mass: 105.1
10917
+ mass: 87.0773
10916
10918
  },
10917
10919
  T: {
10918
10920
  value: "T",
@@ -10921,7 +10923,7 @@ var __async = (__this, __arguments, generator) => {
10921
10923
  hydrophobicity: -0.7,
10922
10924
  colorByFamily: "#90EE90",
10923
10925
  color: "hsl(299.8, 100%, 69%)",
10924
- mass: 119.1
10926
+ mass: 101.10388
10925
10927
  },
10926
10928
  U: {
10927
10929
  value: "U",
@@ -10929,7 +10931,7 @@ var __async = (__this, __arguments, generator) => {
10929
10931
  threeLettersName: "Sec",
10930
10932
  colorByFamily: "#FF0000",
10931
10933
  color: "hsl(0, 100%, 69%)",
10932
- mass: 168.1
10934
+ mass: 150.3079
10933
10935
  },
10934
10936
  W: {
10935
10937
  value: "W",
@@ -10938,7 +10940,7 @@ var __async = (__this, __arguments, generator) => {
10938
10940
  hydrophobicity: -0.9,
10939
10941
  colorByFamily: "#FFA500",
10940
10942
  color: "hsl(297.6, 100%, 69%)",
10941
- mass: 204.2
10943
+ mass: 186.2099
10942
10944
  },
10943
10945
  Y: {
10944
10946
  value: "Y",
@@ -10947,7 +10949,7 @@ var __async = (__this, __arguments, generator) => {
10947
10949
  hydrophobicity: -1.3,
10948
10950
  colorByFamily: "#FFA500",
10949
10951
  color: "hsl(293.2, 100%, 69%)",
10950
- mass: 181.2
10952
+ mass: 163.17326
10951
10953
  },
10952
10954
  V: {
10953
10955
  value: "V",
@@ -10956,7 +10958,7 @@ var __async = (__this, __arguments, generator) => {
10956
10958
  hydrophobicity: 4.2,
10957
10959
  colorByFamily: "#00FFFF",
10958
10960
  color: "hsl(353.6, 100%, 69%)",
10959
- mass: 117.1
10961
+ mass: 99.13106
10960
10962
  },
10961
10963
  "*": {
10962
10964
  value: "*",
@@ -11275,20 +11277,91 @@ var __async = (__this, __arguments, generator) => {
11275
11277
  "primers",
11276
11278
  "guides"
11277
11279
  ];
11278
- function filterSequenceString(sequenceString, additionalValidChars = "", charOverrides) {
11279
- if (sequenceString) {
11280
- return sequenceString.replace(
11281
- new RegExp(
11282
- `[^${charOverrides || `atgcyrswkmbvdhnu${additionalValidChars.split("").join("\\")}`}]`,
11283
- "gi"
11284
- ),
11285
- ""
11280
+ function filterSequenceString(sequenceString, {
11281
+ additionalValidChars = "",
11282
+ isOligo,
11283
+ name: name2,
11284
+ isProtein,
11285
+ isRna,
11286
+ isMixedRnaAndDna,
11287
+ includeStopCodon
11288
+ } = {}) {
11289
+ const acceptedChars = getAcceptedChars({
11290
+ isOligo,
11291
+ isProtein,
11292
+ isRna,
11293
+ isMixedRnaAndDna,
11294
+ includeStopCodon
11295
+ });
11296
+ const replaceChars = getReplaceChars({
11297
+ isOligo,
11298
+ isProtein,
11299
+ isRna,
11300
+ isMixedRnaAndDna
11301
+ });
11302
+ let sanitizedVal = "";
11303
+ const invalidChars = [];
11304
+ const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
11305
+ const warnings = [];
11306
+ const replaceCount = {};
11307
+ sequenceString.split("").forEach((letter) => {
11308
+ const lowerLetter = letter.toLowerCase();
11309
+ if (replaceChars && replaceChars[lowerLetter]) {
11310
+ if (!replaceCount[lowerLetter]) {
11311
+ replaceCount[lowerLetter] = 0;
11312
+ }
11313
+ replaceCount[lowerLetter]++;
11314
+ const isUpper = lowerLetter !== letter;
11315
+ sanitizedVal += isUpper ? replaceChars[lowerLetter].toUpperCase() : replaceChars[lowerLetter];
11316
+ } else if (chars.includes(lowerLetter)) {
11317
+ sanitizedVal += letter;
11318
+ } else {
11319
+ invalidChars.push(letter);
11320
+ }
11321
+ });
11322
+ Object.keys(replaceCount).forEach((letter) => {
11323
+ warnings.push(
11324
+ `Replaced "${letter}" with "${replaceChars[letter]}"${replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""}`
11325
+ );
11326
+ });
11327
+ if (sequenceString.length !== sanitizedVal.length) {
11328
+ warnings.push(
11329
+ `${name2 ? `Sequence ${name2}: ` : ""}Invalid character(s) detected and removed: ${invalidChars.slice(0, 100).join(", ")} `
11286
11330
  );
11287
- } else {
11288
- return sequenceString;
11289
11331
  }
11332
+ if (typeof window !== "undefined" && window.toastr && warnings.length) {
11333
+ warnings.forEach((warning) => {
11334
+ window.toastr.warning(warning);
11335
+ });
11336
+ }
11337
+ return [sanitizedVal, warnings];
11290
11338
  }
11291
11339
  __name(filterSequenceString, "filterSequenceString");
11340
+ function getAcceptedChars({
11341
+ isOligo,
11342
+ isProtein,
11343
+ isRna,
11344
+ isMixedRnaAndDna,
11345
+ includeStopCodon
11346
+ } = {}) {
11347
+ return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
11348
+ //just plain old dna
11349
+ ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
11350
+ );
11351
+ }
11352
+ __name(getAcceptedChars, "getAcceptedChars");
11353
+ function getReplaceChars({
11354
+ isOligo,
11355
+ isProtein,
11356
+ isRna,
11357
+ isMixedRnaAndDna
11358
+ } = {}) {
11359
+ return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
11360
+ //just plain old dna
11361
+ {}
11362
+ );
11363
+ }
11364
+ __name(getReplaceChars, "getReplaceChars");
11292
11365
  function tidyUpAnnotation(_annotation, {
11293
11366
  sequenceData = {},
11294
11367
  convertAnnotationsFromAAIndices,
@@ -11417,14 +11490,6 @@ var __async = (__this, __arguments, generator) => {
11417
11490
  }
11418
11491
  }
11419
11492
  __name(coerceLocation, "coerceLocation");
11420
- function filterAminoAcidSequenceString(sequenceString, options) {
11421
- options = options || {};
11422
- if (options.includeStopCodon) {
11423
- return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
11424
- }
11425
- return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
11426
- }
11427
- __name(filterAminoAcidSequenceString, "filterAminoAcidSequenceString");
11428
11493
  function getDegenerateDnaStringFromAAString(aaString) {
11429
11494
  return aaString.split("").map((char) => aminoAcidToDegenerateDnaMap[char.toLowerCase()] || "nnn").join("");
11430
11495
  }
@@ -11436,11 +11501,10 @@ var __async = (__this, __arguments, generator) => {
11436
11501
  removeUnwantedChars,
11437
11502
  additionalValidChars,
11438
11503
  noTranslationData,
11439
- charOverrides,
11440
11504
  doNotProvideIdsForAnnotations,
11441
- proteinFilterOptions,
11442
11505
  noCdsTranslations,
11443
- convertAnnotationsFromAAIndices
11506
+ convertAnnotationsFromAAIndices,
11507
+ topLevelSeqData
11444
11508
  } = options;
11445
11509
  let seqData = lodashExports.cloneDeep(pSeqData);
11446
11510
  const response = {
@@ -11470,16 +11534,15 @@ var __async = (__this, __arguments, generator) => {
11470
11534
  }
11471
11535
  if (removeUnwantedChars) {
11472
11536
  if (seqData.isProtein) {
11473
- seqData.proteinSequence = filterAminoAcidSequenceString(
11474
- seqData.proteinSequence,
11475
- __spreadValues({ includeStopCodon: true }, proteinFilterOptions)
11476
- );
11537
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
11538
+ includeStopCodon: true
11539
+ }, topLevelSeqData || seqData));
11540
+ seqData.proteinSequence = newSeq;
11477
11541
  } else {
11478
- seqData.sequence = filterSequenceString(
11479
- seqData.sequence,
11480
- `${additionalValidChars || ""}${seqData.isRna || seqData.isMixedRnaAndDna ? "u" : ""}`,
11481
- charOverrides
11482
- );
11542
+ const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
11543
+ additionalValidChars
11544
+ }, topLevelSeqData || seqData));
11545
+ seqData.sequence = newSeq;
11483
11546
  }
11484
11547
  }
11485
11548
  if (seqData.isProtein) {
@@ -19314,7 +19377,8 @@ var __async = (__this, __arguments, generator) => {
19314
19377
  inclusive1BasedEnd,
19315
19378
  additionalValidChars,
19316
19379
  allowOverflowAnnotations,
19317
- coerceFeatureTypes
19380
+ coerceFeatureTypes,
19381
+ includeStopCodon
19318
19382
  } = options;
19319
19383
  [
19320
19384
  "isDNA",
@@ -19364,7 +19428,6 @@ var __async = (__this, __arguments, generator) => {
19364
19428
  response.messages.push("No sequence detected");
19365
19429
  sequence.sequence = "";
19366
19430
  }
19367
- let validChars;
19368
19431
  if (sequence.isProtein === void 0 && guessIfProtein) {
19369
19432
  sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
19370
19433
  sequence.sequence,
@@ -19372,12 +19435,15 @@ var __async = (__this, __arguments, generator) => {
19372
19435
  );
19373
19436
  }
19374
19437
  if (sequence.isProtein) {
19375
- validChars = filterAminoAcidSequenceString(sequence.sequence);
19438
+ const [validChars, warnings] = filterSequenceString(sequence.sequence, {
19439
+ name: sequence.name,
19440
+ isProtein: true,
19441
+ additionalValidChars,
19442
+ includeStopCodon
19443
+ });
19376
19444
  if (validChars !== sequence.sequence) {
19377
19445
  sequence.sequence = validChars;
19378
- response.messages.push(
19379
- "Import Error: Illegal character(s) detected and removed from amino acid sequence. Allowed characters are: xtgalmfwkqespvicyhrndu"
19380
- );
19446
+ response.messages.push(...warnings);
19381
19447
  }
19382
19448
  sequence.type = "PROTEIN";
19383
19449
  sequence.isProtein = true;
@@ -19399,12 +19465,12 @@ var __async = (__this, __arguments, generator) => {
19399
19465
  } else {
19400
19466
  sequence.type = "DNA";
19401
19467
  }
19402
- validChars = filterSequenceString(sequence.sequence, additionalValidChars);
19468
+ const [validChars, warnings] = filterSequenceString(sequence.sequence, __spreadValues({
19469
+ additionalValidChars
19470
+ }, sequence));
19403
19471
  if (validChars !== sequence.sequence) {
19404
19472
  sequence.sequence = validChars;
19405
- response.messages.push(
19406
- "Import Error: Illegal character(s) detected and removed from sequence. Allowed characters are: atgcyrswkmbvdhn"
19407
- );
19473
+ response.messages.push(...warnings);
19408
19474
  }
19409
19475
  }
19410
19476
  if (!sequence.size) {
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "@teselagen/bio-parsers",
3
- "version": "0.3.9",
3
+ "version": "0.4.1",
4
4
  "dependencies": {
5
- "@teselagen/sequence-utils": "0.3.7",
5
+ "@teselagen/sequence-utils": "0.3.9",
6
6
  "@teselagen/range-utils": "0.3.7",
7
7
  "@gmod/gff": "^1.2.1",
8
8
  "buffer": "^6.0.3",
@@ -1,7 +1,6 @@
1
1
  import areNonNegativeIntegers from "validate.io-nonnegative-integer-array";
2
2
  import { getFeatureTypes } from "@teselagen/sequence-utils";
3
3
  import {
4
- filterAminoAcidSequenceString,
5
4
  filterSequenceString,
6
5
  guessIfSequenceIsDnaAndNotProtein
7
6
  } from "@teselagen/sequence-utils";
@@ -30,7 +29,8 @@ export default function validateSequence(sequence, options = {}) {
30
29
  inclusive1BasedEnd,
31
30
  additionalValidChars,
32
31
  allowOverflowAnnotations,
33
- coerceFeatureTypes
32
+ coerceFeatureTypes,
33
+ includeStopCodon
34
34
  } = options;
35
35
  [
36
36
  "isDNA",
@@ -84,7 +84,7 @@ export default function validateSequence(sequence, options = {}) {
84
84
  response.messages.push("No sequence detected");
85
85
  sequence.sequence = "";
86
86
  }
87
- let validChars;
87
+
88
88
  if (sequence.isProtein === undefined && guessIfProtein) {
89
89
  sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
90
90
  sequence.sequence,
@@ -93,12 +93,15 @@ export default function validateSequence(sequence, options = {}) {
93
93
  }
94
94
  if (sequence.isProtein) {
95
95
  //tnr: add code to strip invalid protein data..
96
- validChars = filterAminoAcidSequenceString(sequence.sequence);
96
+ const [validChars, warnings] = filterSequenceString(sequence.sequence, {
97
+ name: sequence.name,
98
+ isProtein: true,
99
+ additionalValidChars,
100
+ includeStopCodon
101
+ });
97
102
  if (validChars !== sequence.sequence) {
98
103
  sequence.sequence = validChars;
99
- response.messages.push(
100
- "Import Error: Illegal character(s) detected and removed from amino acid sequence. Allowed characters are: xtgalmfwkqespvicyhrndu"
101
- );
104
+ response.messages.push(...warnings);
102
105
  }
103
106
  sequence.type = "PROTEIN";
104
107
  sequence.isProtein = true;
@@ -126,12 +129,13 @@ export default function validateSequence(sequence, options = {}) {
126
129
  sequence.type = "DNA";
127
130
  }
128
131
 
129
- validChars = filterSequenceString(sequence.sequence, additionalValidChars);
132
+ const [validChars, warnings] = filterSequenceString(sequence.sequence, {
133
+ additionalValidChars,
134
+ ...sequence
135
+ });
130
136
  if (validChars !== sequence.sequence) {
131
137
  sequence.sequence = validChars;
132
- response.messages.push(
133
- "Import Error: Illegal character(s) detected and removed from sequence. Allowed characters are: atgcyrswkmbvdhn"
134
- );
138
+ response.messages.push(...warnings);
135
139
  }
136
140
  }
137
141