@teselagen/bio-parsers 0.3.10 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -6170,7 +6170,9 @@ lodash.exports;
6170
6170
  })(lodash, lodash.exports);
6171
6171
  var lodashExports = lodash.exports;
6172
6172
  const _ = /* @__PURE__ */ getDefaultExportFromCjs(lodashExports);
6173
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
6173
6174
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6175
+ const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
6174
6176
  const aminoAcidToDegenerateDnaMap = {
6175
6177
  "-": "---",
6176
6178
  ".": "...",
@@ -11273,20 +11275,88 @@ const annotationTypes = [
11273
11275
  "primers",
11274
11276
  "guides"
11275
11277
  ];
11276
- function filterSequenceString(sequenceString, additionalValidChars = "", charOverrides) {
11277
- if (sequenceString) {
11278
- return sequenceString.replace(
11279
- new RegExp(
11280
- `[^${charOverrides || `atgcyrswkmbvdhnu${additionalValidChars.split("").join("\\")}`}]`,
11281
- "gi"
11282
- ),
11283
- ""
11278
+ function filterSequenceString(sequenceString, {
11279
+ additionalValidChars = "",
11280
+ isOligo,
11281
+ name,
11282
+ isProtein,
11283
+ isRna,
11284
+ isMixedRnaAndDna
11285
+ } = {}) {
11286
+ const acceptedChars = getAcceptedChars({
11287
+ isOligo,
11288
+ isProtein,
11289
+ isRna,
11290
+ isMixedRnaAndDna
11291
+ });
11292
+ const replaceChars = getReplaceChars({
11293
+ isOligo,
11294
+ isProtein,
11295
+ isRna,
11296
+ isMixedRnaAndDna
11297
+ });
11298
+ let sanitizedVal = "";
11299
+ const invalidChars = [];
11300
+ const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
11301
+ const warnings = [];
11302
+ const replaceCount = {};
11303
+ sequenceString.split("").forEach((letter) => {
11304
+ const lowerLetter = letter.toLowerCase();
11305
+ if (replaceChars && replaceChars[lowerLetter]) {
11306
+ if (!replaceCount[lowerLetter]) {
11307
+ replaceCount[lowerLetter] = 0;
11308
+ }
11309
+ replaceCount[lowerLetter]++;
11310
+ const isUpper = lowerLetter !== letter;
11311
+ sanitizedVal += isUpper ? replaceChars[lowerLetter].toUpperCase() : replaceChars[lowerLetter];
11312
+ } else if (chars.includes(lowerLetter)) {
11313
+ sanitizedVal += letter;
11314
+ } else {
11315
+ invalidChars.push(letter);
11316
+ }
11317
+ });
11318
+ Object.keys(replaceCount).forEach((letter) => {
11319
+ warnings.push(
11320
+ `Replaced "${letter}" with "${replaceChars[letter]}"${replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""}`
11321
+ );
11322
+ });
11323
+ if (sequenceString.length !== sanitizedVal.length) {
11324
+ warnings.push(
11325
+ `${name ? `Sequence ${name}: ` : ""}Invalid character(s) detected and removed: ${invalidChars.slice(0, 100).join(", ")} `
11284
11326
  );
11285
- } else {
11286
- return sequenceString;
11287
11327
  }
11328
+ if (typeof window !== "undefined" && window.toastr && warnings.length) {
11329
+ warnings.forEach((warning) => {
11330
+ window.toastr.warning(warning);
11331
+ });
11332
+ }
11333
+ return [sanitizedVal, warnings];
11288
11334
  }
11289
11335
  __name(filterSequenceString, "filterSequenceString");
11336
+ function getAcceptedChars({
11337
+ isOligo,
11338
+ isProtein,
11339
+ isRna,
11340
+ isMixedRnaAndDna
11341
+ } = {}) {
11342
+ return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
11343
+ //just plain old dna
11344
+ ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
11345
+ );
11346
+ }
11347
+ __name(getAcceptedChars, "getAcceptedChars");
11348
+ function getReplaceChars({
11349
+ isOligo,
11350
+ isProtein,
11351
+ isRna,
11352
+ isMixedRnaAndDna
11353
+ } = {}) {
11354
+ return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
11355
+ //just plain old dna
11356
+ {}
11357
+ );
11358
+ }
11359
+ __name(getReplaceChars, "getReplaceChars");
11290
11360
  function tidyUpAnnotation(_annotation, {
11291
11361
  sequenceData = {},
11292
11362
  convertAnnotationsFromAAIndices,
@@ -11415,14 +11485,6 @@ function coerceLocation({
11415
11485
  }
11416
11486
  }
11417
11487
  __name(coerceLocation, "coerceLocation");
11418
- function filterAminoAcidSequenceString(sequenceString, options) {
11419
- options = options || {};
11420
- if (options.includeStopCodon) {
11421
- return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
11422
- }
11423
- return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
11424
- }
11425
- __name(filterAminoAcidSequenceString, "filterAminoAcidSequenceString");
11426
11488
  function getDegenerateDnaStringFromAAString(aaString) {
11427
11489
  return aaString.split("").map((char) => aminoAcidToDegenerateDnaMap[char.toLowerCase()] || "nnn").join("");
11428
11490
  }
@@ -11431,14 +11493,13 @@ function tidyUpSequenceData(pSeqData, options = {}) {
11431
11493
  const {
11432
11494
  annotationsAsObjects,
11433
11495
  logMessages,
11434
- removeUnwantedChars,
11496
+ doNotRemoveInvalidChars,
11435
11497
  additionalValidChars,
11436
11498
  noTranslationData,
11437
- charOverrides,
11438
11499
  doNotProvideIdsForAnnotations,
11439
- proteinFilterOptions,
11440
11500
  noCdsTranslations,
11441
- convertAnnotationsFromAAIndices
11501
+ convertAnnotationsFromAAIndices,
11502
+ topLevelSeqData
11442
11503
  } = options;
11443
11504
  let seqData = lodashExports.cloneDeep(pSeqData);
11444
11505
  const response = {
@@ -11466,18 +11527,15 @@ function tidyUpSequenceData(pSeqData, options = {}) {
11466
11527
  if (seqData.isRna) {
11467
11528
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
11468
11529
  }
11469
- if (removeUnwantedChars) {
11530
+ if (!doNotRemoveInvalidChars) {
11470
11531
  if (seqData.isProtein) {
11471
- seqData.proteinSequence = filterAminoAcidSequenceString(
11472
- seqData.proteinSequence,
11473
- __spreadValues({ includeStopCodon: true }, proteinFilterOptions)
11474
- );
11532
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
11533
+ seqData.proteinSequence = newSeq;
11475
11534
  } else {
11476
- seqData.sequence = filterSequenceString(
11477
- seqData.sequence,
11478
- `${additionalValidChars || ""}${seqData.isRna || seqData.isMixedRnaAndDna ? "u" : ""}`,
11479
- charOverrides
11480
- );
11535
+ const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
11536
+ additionalValidChars
11537
+ }, topLevelSeqData || seqData));
11538
+ seqData.sequence = newSeq;
11481
11539
  }
11482
11540
  }
11483
11541
  if (seqData.isProtein) {
@@ -19362,7 +19420,6 @@ function validateSequence(sequence, options = {}) {
19362
19420
  response.messages.push("No sequence detected");
19363
19421
  sequence.sequence = "";
19364
19422
  }
19365
- let validChars;
19366
19423
  if (sequence.isProtein === void 0 && guessIfProtein) {
19367
19424
  sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
19368
19425
  sequence.sequence,
@@ -19370,12 +19427,14 @@ function validateSequence(sequence, options = {}) {
19370
19427
  );
19371
19428
  }
19372
19429
  if (sequence.isProtein) {
19373
- validChars = filterAminoAcidSequenceString(sequence.sequence);
19430
+ const [validChars, warnings] = filterSequenceString(sequence.sequence, {
19431
+ name: sequence.name,
19432
+ isProtein: true,
19433
+ additionalValidChars
19434
+ });
19374
19435
  if (validChars !== sequence.sequence) {
19375
19436
  sequence.sequence = validChars;
19376
- response.messages.push(
19377
- "Import Error: Illegal character(s) detected and removed from amino acid sequence. Allowed characters are: xtgalmfwkqespvicyhrndu"
19378
- );
19437
+ response.messages.push(...warnings);
19379
19438
  }
19380
19439
  sequence.type = "PROTEIN";
19381
19440
  sequence.isProtein = true;
@@ -19397,12 +19456,12 @@ function validateSequence(sequence, options = {}) {
19397
19456
  } else {
19398
19457
  sequence.type = "DNA";
19399
19458
  }
19400
- validChars = filterSequenceString(sequence.sequence, additionalValidChars);
19459
+ const [validChars, warnings] = filterSequenceString(sequence.sequence, __spreadValues({
19460
+ additionalValidChars
19461
+ }, sequence));
19401
19462
  if (validChars !== sequence.sequence) {
19402
19463
  sequence.sequence = validChars;
19403
- response.messages.push(
19404
- "Import Error: Illegal character(s) detected and removed from sequence. Allowed characters are: atgcyrswkmbvdhn"
19405
- );
19464
+ response.messages.push(...warnings);
19406
19465
  }
19407
19466
  }
19408
19467
  if (!sequence.size) {
package/index.mjs CHANGED
@@ -6168,7 +6168,9 @@ lodash.exports;
6168
6168
  })(lodash, lodash.exports);
6169
6169
  var lodashExports = lodash.exports;
6170
6170
  const _ = /* @__PURE__ */ getDefaultExportFromCjs(lodashExports);
6171
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
6171
6172
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6173
+ const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
6172
6174
  const aminoAcidToDegenerateDnaMap = {
6173
6175
  "-": "---",
6174
6176
  ".": "...",
@@ -11271,20 +11273,88 @@ const annotationTypes = [
11271
11273
  "primers",
11272
11274
  "guides"
11273
11275
  ];
11274
- function filterSequenceString(sequenceString, additionalValidChars = "", charOverrides) {
11275
- if (sequenceString) {
11276
- return sequenceString.replace(
11277
- new RegExp(
11278
- `[^${charOverrides || `atgcyrswkmbvdhnu${additionalValidChars.split("").join("\\")}`}]`,
11279
- "gi"
11280
- ),
11281
- ""
11276
+ function filterSequenceString(sequenceString, {
11277
+ additionalValidChars = "",
11278
+ isOligo,
11279
+ name,
11280
+ isProtein,
11281
+ isRna,
11282
+ isMixedRnaAndDna
11283
+ } = {}) {
11284
+ const acceptedChars = getAcceptedChars({
11285
+ isOligo,
11286
+ isProtein,
11287
+ isRna,
11288
+ isMixedRnaAndDna
11289
+ });
11290
+ const replaceChars = getReplaceChars({
11291
+ isOligo,
11292
+ isProtein,
11293
+ isRna,
11294
+ isMixedRnaAndDna
11295
+ });
11296
+ let sanitizedVal = "";
11297
+ const invalidChars = [];
11298
+ const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
11299
+ const warnings = [];
11300
+ const replaceCount = {};
11301
+ sequenceString.split("").forEach((letter) => {
11302
+ const lowerLetter = letter.toLowerCase();
11303
+ if (replaceChars && replaceChars[lowerLetter]) {
11304
+ if (!replaceCount[lowerLetter]) {
11305
+ replaceCount[lowerLetter] = 0;
11306
+ }
11307
+ replaceCount[lowerLetter]++;
11308
+ const isUpper = lowerLetter !== letter;
11309
+ sanitizedVal += isUpper ? replaceChars[lowerLetter].toUpperCase() : replaceChars[lowerLetter];
11310
+ } else if (chars.includes(lowerLetter)) {
11311
+ sanitizedVal += letter;
11312
+ } else {
11313
+ invalidChars.push(letter);
11314
+ }
11315
+ });
11316
+ Object.keys(replaceCount).forEach((letter) => {
11317
+ warnings.push(
11318
+ `Replaced "${letter}" with "${replaceChars[letter]}"${replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""}`
11319
+ );
11320
+ });
11321
+ if (sequenceString.length !== sanitizedVal.length) {
11322
+ warnings.push(
11323
+ `${name ? `Sequence ${name}: ` : ""}Invalid character(s) detected and removed: ${invalidChars.slice(0, 100).join(", ")} `
11282
11324
  );
11283
- } else {
11284
- return sequenceString;
11285
11325
  }
11326
+ if (typeof window !== "undefined" && window.toastr && warnings.length) {
11327
+ warnings.forEach((warning) => {
11328
+ window.toastr.warning(warning);
11329
+ });
11330
+ }
11331
+ return [sanitizedVal, warnings];
11286
11332
  }
11287
11333
  __name(filterSequenceString, "filterSequenceString");
11334
+ function getAcceptedChars({
11335
+ isOligo,
11336
+ isProtein,
11337
+ isRna,
11338
+ isMixedRnaAndDna
11339
+ } = {}) {
11340
+ return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
11341
+ //just plain old dna
11342
+ ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
11343
+ );
11344
+ }
11345
+ __name(getAcceptedChars, "getAcceptedChars");
11346
+ function getReplaceChars({
11347
+ isOligo,
11348
+ isProtein,
11349
+ isRna,
11350
+ isMixedRnaAndDna
11351
+ } = {}) {
11352
+ return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
11353
+ //just plain old dna
11354
+ {}
11355
+ );
11356
+ }
11357
+ __name(getReplaceChars, "getReplaceChars");
11288
11358
  function tidyUpAnnotation(_annotation, {
11289
11359
  sequenceData = {},
11290
11360
  convertAnnotationsFromAAIndices,
@@ -11413,14 +11483,6 @@ function coerceLocation({
11413
11483
  }
11414
11484
  }
11415
11485
  __name(coerceLocation, "coerceLocation");
11416
- function filterAminoAcidSequenceString(sequenceString, options) {
11417
- options = options || {};
11418
- if (options.includeStopCodon) {
11419
- return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
11420
- }
11421
- return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
11422
- }
11423
- __name(filterAminoAcidSequenceString, "filterAminoAcidSequenceString");
11424
11486
  function getDegenerateDnaStringFromAAString(aaString) {
11425
11487
  return aaString.split("").map((char) => aminoAcidToDegenerateDnaMap[char.toLowerCase()] || "nnn").join("");
11426
11488
  }
@@ -11429,14 +11491,13 @@ function tidyUpSequenceData(pSeqData, options = {}) {
11429
11491
  const {
11430
11492
  annotationsAsObjects,
11431
11493
  logMessages,
11432
- removeUnwantedChars,
11494
+ doNotRemoveInvalidChars,
11433
11495
  additionalValidChars,
11434
11496
  noTranslationData,
11435
- charOverrides,
11436
11497
  doNotProvideIdsForAnnotations,
11437
- proteinFilterOptions,
11438
11498
  noCdsTranslations,
11439
- convertAnnotationsFromAAIndices
11499
+ convertAnnotationsFromAAIndices,
11500
+ topLevelSeqData
11440
11501
  } = options;
11441
11502
  let seqData = lodashExports.cloneDeep(pSeqData);
11442
11503
  const response = {
@@ -11464,18 +11525,15 @@ function tidyUpSequenceData(pSeqData, options = {}) {
11464
11525
  if (seqData.isRna) {
11465
11526
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
11466
11527
  }
11467
- if (removeUnwantedChars) {
11528
+ if (!doNotRemoveInvalidChars) {
11468
11529
  if (seqData.isProtein) {
11469
- seqData.proteinSequence = filterAminoAcidSequenceString(
11470
- seqData.proteinSequence,
11471
- __spreadValues({ includeStopCodon: true }, proteinFilterOptions)
11472
- );
11530
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
11531
+ seqData.proteinSequence = newSeq;
11473
11532
  } else {
11474
- seqData.sequence = filterSequenceString(
11475
- seqData.sequence,
11476
- `${additionalValidChars || ""}${seqData.isRna || seqData.isMixedRnaAndDna ? "u" : ""}`,
11477
- charOverrides
11478
- );
11533
+ const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
11534
+ additionalValidChars
11535
+ }, topLevelSeqData || seqData));
11536
+ seqData.sequence = newSeq;
11479
11537
  }
11480
11538
  }
11481
11539
  if (seqData.isProtein) {
@@ -19360,7 +19418,6 @@ function validateSequence(sequence, options = {}) {
19360
19418
  response.messages.push("No sequence detected");
19361
19419
  sequence.sequence = "";
19362
19420
  }
19363
- let validChars;
19364
19421
  if (sequence.isProtein === void 0 && guessIfProtein) {
19365
19422
  sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
19366
19423
  sequence.sequence,
@@ -19368,12 +19425,14 @@ function validateSequence(sequence, options = {}) {
19368
19425
  );
19369
19426
  }
19370
19427
  if (sequence.isProtein) {
19371
- validChars = filterAminoAcidSequenceString(sequence.sequence);
19428
+ const [validChars, warnings] = filterSequenceString(sequence.sequence, {
19429
+ name: sequence.name,
19430
+ isProtein: true,
19431
+ additionalValidChars
19432
+ });
19372
19433
  if (validChars !== sequence.sequence) {
19373
19434
  sequence.sequence = validChars;
19374
- response.messages.push(
19375
- "Import Error: Illegal character(s) detected and removed from amino acid sequence. Allowed characters are: xtgalmfwkqespvicyhrndu"
19376
- );
19435
+ response.messages.push(...warnings);
19377
19436
  }
19378
19437
  sequence.type = "PROTEIN";
19379
19438
  sequence.isProtein = true;
@@ -19395,12 +19454,12 @@ function validateSequence(sequence, options = {}) {
19395
19454
  } else {
19396
19455
  sequence.type = "DNA";
19397
19456
  }
19398
- validChars = filterSequenceString(sequence.sequence, additionalValidChars);
19457
+ const [validChars, warnings] = filterSequenceString(sequence.sequence, __spreadValues({
19458
+ additionalValidChars
19459
+ }, sequence));
19399
19460
  if (validChars !== sequence.sequence) {
19400
19461
  sequence.sequence = validChars;
19401
- response.messages.push(
19402
- "Import Error: Illegal character(s) detected and removed from sequence. Allowed characters are: atgcyrswkmbvdhn"
19403
- );
19462
+ response.messages.push(...warnings);
19404
19463
  }
19405
19464
  }
19406
19465
  if (!sequence.size) {
package/index.umd.js CHANGED
@@ -6172,7 +6172,9 @@ var __async = (__this, __arguments, generator) => {
6172
6172
  })(lodash, lodash.exports);
6173
6173
  var lodashExports = lodash.exports;
6174
6174
  const _ = /* @__PURE__ */ getDefaultExportFromCjs(lodashExports);
6175
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
6175
6176
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6177
+ const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
6176
6178
  const aminoAcidToDegenerateDnaMap = {
6177
6179
  "-": "---",
6178
6180
  ".": "...",
@@ -11275,20 +11277,88 @@ var __async = (__this, __arguments, generator) => {
11275
11277
  "primers",
11276
11278
  "guides"
11277
11279
  ];
11278
- function filterSequenceString(sequenceString, additionalValidChars = "", charOverrides) {
11279
- if (sequenceString) {
11280
- return sequenceString.replace(
11281
- new RegExp(
11282
- `[^${charOverrides || `atgcyrswkmbvdhnu${additionalValidChars.split("").join("\\")}`}]`,
11283
- "gi"
11284
- ),
11285
- ""
11280
+ function filterSequenceString(sequenceString, {
11281
+ additionalValidChars = "",
11282
+ isOligo,
11283
+ name: name2,
11284
+ isProtein,
11285
+ isRna,
11286
+ isMixedRnaAndDna
11287
+ } = {}) {
11288
+ const acceptedChars = getAcceptedChars({
11289
+ isOligo,
11290
+ isProtein,
11291
+ isRna,
11292
+ isMixedRnaAndDna
11293
+ });
11294
+ const replaceChars = getReplaceChars({
11295
+ isOligo,
11296
+ isProtein,
11297
+ isRna,
11298
+ isMixedRnaAndDna
11299
+ });
11300
+ let sanitizedVal = "";
11301
+ const invalidChars = [];
11302
+ const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
11303
+ const warnings = [];
11304
+ const replaceCount = {};
11305
+ sequenceString.split("").forEach((letter) => {
11306
+ const lowerLetter = letter.toLowerCase();
11307
+ if (replaceChars && replaceChars[lowerLetter]) {
11308
+ if (!replaceCount[lowerLetter]) {
11309
+ replaceCount[lowerLetter] = 0;
11310
+ }
11311
+ replaceCount[lowerLetter]++;
11312
+ const isUpper = lowerLetter !== letter;
11313
+ sanitizedVal += isUpper ? replaceChars[lowerLetter].toUpperCase() : replaceChars[lowerLetter];
11314
+ } else if (chars.includes(lowerLetter)) {
11315
+ sanitizedVal += letter;
11316
+ } else {
11317
+ invalidChars.push(letter);
11318
+ }
11319
+ });
11320
+ Object.keys(replaceCount).forEach((letter) => {
11321
+ warnings.push(
11322
+ `Replaced "${letter}" with "${replaceChars[letter]}"${replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""}`
11323
+ );
11324
+ });
11325
+ if (sequenceString.length !== sanitizedVal.length) {
11326
+ warnings.push(
11327
+ `${name2 ? `Sequence ${name2}: ` : ""}Invalid character(s) detected and removed: ${invalidChars.slice(0, 100).join(", ")} `
11286
11328
  );
11287
- } else {
11288
- return sequenceString;
11289
11329
  }
11330
+ if (typeof window !== "undefined" && window.toastr && warnings.length) {
11331
+ warnings.forEach((warning) => {
11332
+ window.toastr.warning(warning);
11333
+ });
11334
+ }
11335
+ return [sanitizedVal, warnings];
11290
11336
  }
11291
11337
  __name(filterSequenceString, "filterSequenceString");
11338
+ function getAcceptedChars({
11339
+ isOligo,
11340
+ isProtein,
11341
+ isRna,
11342
+ isMixedRnaAndDna
11343
+ } = {}) {
11344
+ return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
11345
+ //just plain old dna
11346
+ ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
11347
+ );
11348
+ }
11349
+ __name(getAcceptedChars, "getAcceptedChars");
11350
+ function getReplaceChars({
11351
+ isOligo,
11352
+ isProtein,
11353
+ isRna,
11354
+ isMixedRnaAndDna
11355
+ } = {}) {
11356
+ return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
11357
+ //just plain old dna
11358
+ {}
11359
+ );
11360
+ }
11361
+ __name(getReplaceChars, "getReplaceChars");
11292
11362
  function tidyUpAnnotation(_annotation, {
11293
11363
  sequenceData = {},
11294
11364
  convertAnnotationsFromAAIndices,
@@ -11417,14 +11487,6 @@ var __async = (__this, __arguments, generator) => {
11417
11487
  }
11418
11488
  }
11419
11489
  __name(coerceLocation, "coerceLocation");
11420
- function filterAminoAcidSequenceString(sequenceString, options) {
11421
- options = options || {};
11422
- if (options.includeStopCodon) {
11423
- return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
11424
- }
11425
- return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
11426
- }
11427
- __name(filterAminoAcidSequenceString, "filterAminoAcidSequenceString");
11428
11490
  function getDegenerateDnaStringFromAAString(aaString) {
11429
11491
  return aaString.split("").map((char) => aminoAcidToDegenerateDnaMap[char.toLowerCase()] || "nnn").join("");
11430
11492
  }
@@ -11433,14 +11495,13 @@ var __async = (__this, __arguments, generator) => {
11433
11495
  const {
11434
11496
  annotationsAsObjects,
11435
11497
  logMessages,
11436
- removeUnwantedChars,
11498
+ doNotRemoveInvalidChars,
11437
11499
  additionalValidChars,
11438
11500
  noTranslationData,
11439
- charOverrides,
11440
11501
  doNotProvideIdsForAnnotations,
11441
- proteinFilterOptions,
11442
11502
  noCdsTranslations,
11443
- convertAnnotationsFromAAIndices
11503
+ convertAnnotationsFromAAIndices,
11504
+ topLevelSeqData
11444
11505
  } = options;
11445
11506
  let seqData = lodashExports.cloneDeep(pSeqData);
11446
11507
  const response = {
@@ -11468,18 +11529,15 @@ var __async = (__this, __arguments, generator) => {
11468
11529
  if (seqData.isRna) {
11469
11530
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
11470
11531
  }
11471
- if (removeUnwantedChars) {
11532
+ if (!doNotRemoveInvalidChars) {
11472
11533
  if (seqData.isProtein) {
11473
- seqData.proteinSequence = filterAminoAcidSequenceString(
11474
- seqData.proteinSequence,
11475
- __spreadValues({ includeStopCodon: true }, proteinFilterOptions)
11476
- );
11534
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
11535
+ seqData.proteinSequence = newSeq;
11477
11536
  } else {
11478
- seqData.sequence = filterSequenceString(
11479
- seqData.sequence,
11480
- `${additionalValidChars || ""}${seqData.isRna || seqData.isMixedRnaAndDna ? "u" : ""}`,
11481
- charOverrides
11482
- );
11537
+ const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
11538
+ additionalValidChars
11539
+ }, topLevelSeqData || seqData));
11540
+ seqData.sequence = newSeq;
11483
11541
  }
11484
11542
  }
11485
11543
  if (seqData.isProtein) {
@@ -19364,7 +19422,6 @@ var __async = (__this, __arguments, generator) => {
19364
19422
  response.messages.push("No sequence detected");
19365
19423
  sequence.sequence = "";
19366
19424
  }
19367
- let validChars;
19368
19425
  if (sequence.isProtein === void 0 && guessIfProtein) {
19369
19426
  sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
19370
19427
  sequence.sequence,
@@ -19372,12 +19429,14 @@ var __async = (__this, __arguments, generator) => {
19372
19429
  );
19373
19430
  }
19374
19431
  if (sequence.isProtein) {
19375
- validChars = filterAminoAcidSequenceString(sequence.sequence);
19432
+ const [validChars, warnings] = filterSequenceString(sequence.sequence, {
19433
+ name: sequence.name,
19434
+ isProtein: true,
19435
+ additionalValidChars
19436
+ });
19376
19437
  if (validChars !== sequence.sequence) {
19377
19438
  sequence.sequence = validChars;
19378
- response.messages.push(
19379
- "Import Error: Illegal character(s) detected and removed from amino acid sequence. Allowed characters are: xtgalmfwkqespvicyhrndu"
19380
- );
19439
+ response.messages.push(...warnings);
19381
19440
  }
19382
19441
  sequence.type = "PROTEIN";
19383
19442
  sequence.isProtein = true;
@@ -19399,12 +19458,12 @@ var __async = (__this, __arguments, generator) => {
19399
19458
  } else {
19400
19459
  sequence.type = "DNA";
19401
19460
  }
19402
- validChars = filterSequenceString(sequence.sequence, additionalValidChars);
19461
+ const [validChars, warnings] = filterSequenceString(sequence.sequence, __spreadValues({
19462
+ additionalValidChars
19463
+ }, sequence));
19403
19464
  if (validChars !== sequence.sequence) {
19404
19465
  sequence.sequence = validChars;
19405
- response.messages.push(
19406
- "Import Error: Illegal character(s) detected and removed from sequence. Allowed characters are: atgcyrswkmbvdhn"
19407
- );
19466
+ response.messages.push(...warnings);
19408
19467
  }
19409
19468
  }
19410
19469
  if (!sequence.size) {
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "@teselagen/bio-parsers",
3
- "version": "0.3.10",
3
+ "version": "0.4.2",
4
4
  "dependencies": {
5
- "@teselagen/sequence-utils": "0.3.8",
5
+ "@teselagen/sequence-utils": "0.3.10",
6
6
  "@teselagen/range-utils": "0.3.7",
7
7
  "@gmod/gff": "^1.2.1",
8
8
  "buffer": "^6.0.3",
@@ -1,7 +1,6 @@
1
1
  import areNonNegativeIntegers from "validate.io-nonnegative-integer-array";
2
2
  import { getFeatureTypes } from "@teselagen/sequence-utils";
3
3
  import {
4
- filterAminoAcidSequenceString,
5
4
  filterSequenceString,
6
5
  guessIfSequenceIsDnaAndNotProtein
7
6
  } from "@teselagen/sequence-utils";
@@ -30,7 +29,7 @@ export default function validateSequence(sequence, options = {}) {
30
29
  inclusive1BasedEnd,
31
30
  additionalValidChars,
32
31
  allowOverflowAnnotations,
33
- coerceFeatureTypes
32
+ coerceFeatureTypes,
34
33
  } = options;
35
34
  [
36
35
  "isDNA",
@@ -84,7 +83,7 @@ export default function validateSequence(sequence, options = {}) {
84
83
  response.messages.push("No sequence detected");
85
84
  sequence.sequence = "";
86
85
  }
87
- let validChars;
86
+
88
87
  if (sequence.isProtein === undefined && guessIfProtein) {
89
88
  sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
90
89
  sequence.sequence,
@@ -93,12 +92,14 @@ export default function validateSequence(sequence, options = {}) {
93
92
  }
94
93
  if (sequence.isProtein) {
95
94
  //tnr: add code to strip invalid protein data..
96
- validChars = filterAminoAcidSequenceString(sequence.sequence);
95
+ const [validChars, warnings] = filterSequenceString(sequence.sequence, {
96
+ name: sequence.name,
97
+ isProtein: true,
98
+ additionalValidChars,
99
+ });
97
100
  if (validChars !== sequence.sequence) {
98
101
  sequence.sequence = validChars;
99
- response.messages.push(
100
- "Import Error: Illegal character(s) detected and removed from amino acid sequence. Allowed characters are: xtgalmfwkqespvicyhrndu"
101
- );
102
+ response.messages.push(...warnings);
102
103
  }
103
104
  sequence.type = "PROTEIN";
104
105
  sequence.isProtein = true;
@@ -126,12 +127,13 @@ export default function validateSequence(sequence, options = {}) {
126
127
  sequence.type = "DNA";
127
128
  }
128
129
 
129
- validChars = filterSequenceString(sequence.sequence, additionalValidChars);
130
+ const [validChars, warnings] = filterSequenceString(sequence.sequence, {
131
+ additionalValidChars,
132
+ ...sequence
133
+ });
130
134
  if (validChars !== sequence.sequence) {
131
135
  sequence.sequence = validChars;
132
- response.messages.push(
133
- "Import Error: Illegal character(s) detected and removed from sequence. Allowed characters are: atgcyrswkmbvdhn"
134
- );
136
+ response.messages.push(...warnings);
135
137
  }
136
138
  }
137
139