@teselagen/sequence-utils 0.3.8 → 0.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bioData.d.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  export const protein_letters: "ACDEFGHIKLMNPQRSTVWY";
2
- export const extended_protein_letters: "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
2
+ export const protein_letters_withUandX: "ACDEFGHIKLMNPQRSTVWYUX";
3
+ export const extended_protein_letters: "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
3
4
  export const ambiguous_dna_letters: "GATCRYWSMKHBVDN";
4
5
  export const unambiguous_dna_letters: "GATC";
5
6
  export const ambiguous_rna_letters: "GAUCRYWSMKHBVDN";
@@ -1 +1,24 @@
1
- export default function filterSequenceString(sequenceString: any, additionalValidChars: string | undefined, charOverrides: any): any;
1
+ export default function filterSequenceString(sequenceString: any, { additionalValidChars, isOligo, name, isProtein, isRna, isMixedRnaAndDna, }?: {
2
+ additionalValidChars?: string | undefined;
3
+ isOligo: any;
4
+ name: any;
5
+ isProtein: any;
6
+ isRna: any;
7
+ isMixedRnaAndDna: any;
8
+ }): (string | string[])[];
9
+ export function getAcceptedChars({ isOligo, isProtein, isRna, isMixedRnaAndDna, }?: {
10
+ isOligo: any;
11
+ isProtein: any;
12
+ isRna: any;
13
+ isMixedRnaAndDna: any;
14
+ }): string;
15
+ export function getReplaceChars({ isOligo, isProtein, isRna, isMixedRnaAndDna }?: {
16
+ isOligo: any;
17
+ isProtein: any;
18
+ isRna: any;
19
+ isMixedRnaAndDna: any;
20
+ }): {
21
+ t?: undefined;
22
+ } | {
23
+ t: string;
24
+ };
package/index.d.ts CHANGED
@@ -20,7 +20,6 @@ export { default as aliasedEnzymesByName } from "./aliasedEnzymesByName";
20
20
  export { default as defaultEnzymesByName } from "./defaultEnzymesByName";
21
21
  export { default as generateSequenceData } from "./generateSequenceData";
22
22
  export { default as generateAnnotations } from "./generateAnnotations";
23
- export { default as filterAminoAcidSequenceString } from "./filterAminoAcidSequenceString";
24
23
  export { default as filterSequenceString } from "./filterSequenceString";
25
24
  export { default as findNearestRangeOfSequenceOverlapToPosition } from "./findNearestRangeOfSequenceOverlapToPosition";
26
25
  export { default as findOrfsInPlasmid } from "./findOrfsInPlasmid";
package/index.js CHANGED
@@ -5999,7 +5999,8 @@ lodash.exports;
5999
5999
  })(lodash, lodash.exports);
6000
6000
  var lodashExports = lodash.exports;
6001
6001
  const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
6002
- const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
6002
+ const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6003
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
6003
6004
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6004
6005
  const unambiguous_dna_letters = "GATC";
6005
6006
  const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
@@ -6067,6 +6068,7 @@ const bioData = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.definePrope
6067
6068
  extended_protein_letters,
6068
6069
  extended_protein_values,
6069
6070
  protein_letters,
6071
+ protein_letters_withUandX,
6070
6072
  unambiguous_dna_letters,
6071
6073
  unambiguous_rna_letters
6072
6074
  }, Symbol.toStringTag, { value: "Module" }));
@@ -12321,20 +12323,88 @@ const modifiableTypes = [
12321
12323
  "primers",
12322
12324
  "guides"
12323
12325
  ];
12324
- function filterSequenceString(sequenceString, additionalValidChars = "", charOverrides) {
12325
- if (sequenceString) {
12326
- return sequenceString.replace(
12327
- new RegExp(
12328
- `[^${charOverrides || `atgcyrswkmbvdhnu${additionalValidChars.split("").join("\\")}`}]`,
12329
- "gi"
12330
- ),
12331
- ""
12326
+ function filterSequenceString(sequenceString, {
12327
+ additionalValidChars = "",
12328
+ isOligo,
12329
+ name,
12330
+ isProtein,
12331
+ isRna,
12332
+ isMixedRnaAndDna
12333
+ } = {}) {
12334
+ const acceptedChars = getAcceptedChars({
12335
+ isOligo,
12336
+ isProtein,
12337
+ isRna,
12338
+ isMixedRnaAndDna
12339
+ });
12340
+ const replaceChars = getReplaceChars({
12341
+ isOligo,
12342
+ isProtein,
12343
+ isRna,
12344
+ isMixedRnaAndDna
12345
+ });
12346
+ let sanitizedVal = "";
12347
+ const invalidChars = [];
12348
+ const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
12349
+ const warnings = [];
12350
+ const replaceCount = {};
12351
+ sequenceString.split("").forEach((letter) => {
12352
+ const lowerLetter = letter.toLowerCase();
12353
+ if (replaceChars && replaceChars[lowerLetter]) {
12354
+ if (!replaceCount[lowerLetter]) {
12355
+ replaceCount[lowerLetter] = 0;
12356
+ }
12357
+ replaceCount[lowerLetter]++;
12358
+ const isUpper = lowerLetter !== letter;
12359
+ sanitizedVal += isUpper ? replaceChars[lowerLetter].toUpperCase() : replaceChars[lowerLetter];
12360
+ } else if (chars.includes(lowerLetter)) {
12361
+ sanitizedVal += letter;
12362
+ } else {
12363
+ invalidChars.push(letter);
12364
+ }
12365
+ });
12366
+ Object.keys(replaceCount).forEach((letter) => {
12367
+ warnings.push(
12368
+ `Replaced "${letter}" with "${replaceChars[letter]}"${replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""}`
12332
12369
  );
12333
- } else {
12334
- return sequenceString;
12370
+ });
12371
+ if (sequenceString.length !== sanitizedVal.length) {
12372
+ warnings.push(
12373
+ `${name ? `Sequence ${name}: ` : ""}Invalid character(s) detected and removed: ${invalidChars.slice(0, 100).join(", ")} `
12374
+ );
12375
+ }
12376
+ if (typeof window !== "undefined" && window.toastr && warnings.length) {
12377
+ warnings.forEach((warning) => {
12378
+ window.toastr.warning(warning);
12379
+ });
12335
12380
  }
12381
+ return [sanitizedVal, warnings];
12336
12382
  }
12337
12383
  __name(filterSequenceString, "filterSequenceString");
12384
+ function getAcceptedChars({
12385
+ isOligo,
12386
+ isProtein,
12387
+ isRna,
12388
+ isMixedRnaAndDna
12389
+ } = {}) {
12390
+ return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
12391
+ //just plain old dna
12392
+ ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
12393
+ );
12394
+ }
12395
+ __name(getAcceptedChars, "getAcceptedChars");
12396
+ function getReplaceChars({
12397
+ isOligo,
12398
+ isProtein,
12399
+ isRna,
12400
+ isMixedRnaAndDna
12401
+ } = {}) {
12402
+ return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
12403
+ //just plain old dna
12404
+ {}
12405
+ );
12406
+ }
12407
+ __name(getReplaceChars, "getReplaceChars");
12338
12408
  function tidyUpAnnotation(_annotation, {
12339
12409
  sequenceData = {},
12340
12410
  convertAnnotationsFromAAIndices,
@@ -12463,14 +12533,6 @@ function coerceLocation({
12463
12533
  }
12464
12534
  }
12465
12535
  __name(coerceLocation, "coerceLocation");
12466
- function filterAminoAcidSequenceString(sequenceString, options) {
12467
- options = options || {};
12468
- if (options.includeStopCodon) {
12469
- return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
12470
- }
12471
- return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
12472
- }
12473
- __name(filterAminoAcidSequenceString, "filterAminoAcidSequenceString");
12474
12536
  function getDegenerateDnaStringFromAAString(aaString) {
12475
12537
  return aaString.split("").map((char) => aminoAcidToDegenerateDnaMap[char.toLowerCase()] || "nnn").join("");
12476
12538
  }
@@ -12479,14 +12541,13 @@ function tidyUpSequenceData(pSeqData, options = {}) {
12479
12541
  const {
12480
12542
  annotationsAsObjects,
12481
12543
  logMessages,
12482
- removeUnwantedChars,
12544
+ doNotRemoveInvalidChars,
12483
12545
  additionalValidChars,
12484
12546
  noTranslationData,
12485
- charOverrides,
12486
12547
  doNotProvideIdsForAnnotations,
12487
- proteinFilterOptions,
12488
12548
  noCdsTranslations,
12489
- convertAnnotationsFromAAIndices
12549
+ convertAnnotationsFromAAIndices,
12550
+ topLevelSeqData
12490
12551
  } = options;
12491
12552
  let seqData = lodashExports.cloneDeep(pSeqData);
12492
12553
  const response = {
@@ -12514,18 +12575,15 @@ function tidyUpSequenceData(pSeqData, options = {}) {
12514
12575
  if (seqData.isRna) {
12515
12576
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
12516
12577
  }
12517
- if (removeUnwantedChars) {
12578
+ if (!doNotRemoveInvalidChars) {
12518
12579
  if (seqData.isProtein) {
12519
- seqData.proteinSequence = filterAminoAcidSequenceString(
12520
- seqData.proteinSequence,
12521
- __spreadValues({ includeStopCodon: true }, proteinFilterOptions)
12522
- );
12580
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
12581
+ seqData.proteinSequence = newSeq;
12523
12582
  } else {
12524
- seqData.sequence = filterSequenceString(
12525
- seqData.sequence,
12526
- `${additionalValidChars || ""}${seqData.isRna || seqData.isMixedRnaAndDna ? "u" : ""}`,
12527
- charOverrides
12528
- );
12583
+ const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
12584
+ additionalValidChars
12585
+ }, topLevelSeqData || seqData));
12586
+ seqData.sequence = newSeq;
12529
12587
  }
12530
12588
  }
12531
12589
  if (seqData.isProtein) {
@@ -22665,7 +22723,6 @@ exports.degenerateRnaToAminoAcidMap = degenerateRnaToAminoAcidMap;
22665
22723
  exports.deleteSequenceDataAtRange = deleteSequenceDataAtRange;
22666
22724
  exports.doesEnzymeChopOutsideOfRecognitionSite = doesEnzymeChopOutsideOfRecognitionSite;
22667
22725
  exports.featureColors = featureColors;
22668
- exports.filterAminoAcidSequenceString = filterAminoAcidSequenceString;
22669
22726
  exports.filterSequenceString = filterSequenceString;
22670
22727
  exports.findNearestRangeOfSequenceOverlapToPosition = findNearestRangeOfSequenceOverlapToPosition;
22671
22728
  exports.findOrfsInPlasmid = findOrfsInPlasmid;
package/index.mjs CHANGED
@@ -5997,7 +5997,8 @@ lodash.exports;
5997
5997
  })(lodash, lodash.exports);
5998
5998
  var lodashExports = lodash.exports;
5999
5999
  const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
6000
- const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
6000
+ const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6001
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
6001
6002
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6002
6003
  const unambiguous_dna_letters = "GATC";
6003
6004
  const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
@@ -6065,6 +6066,7 @@ const bioData = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.definePrope
6065
6066
  extended_protein_letters,
6066
6067
  extended_protein_values,
6067
6068
  protein_letters,
6069
+ protein_letters_withUandX,
6068
6070
  unambiguous_dna_letters,
6069
6071
  unambiguous_rna_letters
6070
6072
  }, Symbol.toStringTag, { value: "Module" }));
@@ -12319,20 +12321,88 @@ const modifiableTypes = [
12319
12321
  "primers",
12320
12322
  "guides"
12321
12323
  ];
12322
- function filterSequenceString(sequenceString, additionalValidChars = "", charOverrides) {
12323
- if (sequenceString) {
12324
- return sequenceString.replace(
12325
- new RegExp(
12326
- `[^${charOverrides || `atgcyrswkmbvdhnu${additionalValidChars.split("").join("\\")}`}]`,
12327
- "gi"
12328
- ),
12329
- ""
12324
+ function filterSequenceString(sequenceString, {
12325
+ additionalValidChars = "",
12326
+ isOligo,
12327
+ name,
12328
+ isProtein,
12329
+ isRna,
12330
+ isMixedRnaAndDna
12331
+ } = {}) {
12332
+ const acceptedChars = getAcceptedChars({
12333
+ isOligo,
12334
+ isProtein,
12335
+ isRna,
12336
+ isMixedRnaAndDna
12337
+ });
12338
+ const replaceChars = getReplaceChars({
12339
+ isOligo,
12340
+ isProtein,
12341
+ isRna,
12342
+ isMixedRnaAndDna
12343
+ });
12344
+ let sanitizedVal = "";
12345
+ const invalidChars = [];
12346
+ const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
12347
+ const warnings = [];
12348
+ const replaceCount = {};
12349
+ sequenceString.split("").forEach((letter) => {
12350
+ const lowerLetter = letter.toLowerCase();
12351
+ if (replaceChars && replaceChars[lowerLetter]) {
12352
+ if (!replaceCount[lowerLetter]) {
12353
+ replaceCount[lowerLetter] = 0;
12354
+ }
12355
+ replaceCount[lowerLetter]++;
12356
+ const isUpper = lowerLetter !== letter;
12357
+ sanitizedVal += isUpper ? replaceChars[lowerLetter].toUpperCase() : replaceChars[lowerLetter];
12358
+ } else if (chars.includes(lowerLetter)) {
12359
+ sanitizedVal += letter;
12360
+ } else {
12361
+ invalidChars.push(letter);
12362
+ }
12363
+ });
12364
+ Object.keys(replaceCount).forEach((letter) => {
12365
+ warnings.push(
12366
+ `Replaced "${letter}" with "${replaceChars[letter]}"${replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""}`
12330
12367
  );
12331
- } else {
12332
- return sequenceString;
12368
+ });
12369
+ if (sequenceString.length !== sanitizedVal.length) {
12370
+ warnings.push(
12371
+ `${name ? `Sequence ${name}: ` : ""}Invalid character(s) detected and removed: ${invalidChars.slice(0, 100).join(", ")} `
12372
+ );
12373
+ }
12374
+ if (typeof window !== "undefined" && window.toastr && warnings.length) {
12375
+ warnings.forEach((warning) => {
12376
+ window.toastr.warning(warning);
12377
+ });
12333
12378
  }
12379
+ return [sanitizedVal, warnings];
12334
12380
  }
12335
12381
  __name(filterSequenceString, "filterSequenceString");
12382
+ function getAcceptedChars({
12383
+ isOligo,
12384
+ isProtein,
12385
+ isRna,
12386
+ isMixedRnaAndDna
12387
+ } = {}) {
12388
+ return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
12389
+ //just plain old dna
12390
+ ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
12391
+ );
12392
+ }
12393
+ __name(getAcceptedChars, "getAcceptedChars");
12394
+ function getReplaceChars({
12395
+ isOligo,
12396
+ isProtein,
12397
+ isRna,
12398
+ isMixedRnaAndDna
12399
+ } = {}) {
12400
+ return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
12401
+ //just plain old dna
12402
+ {}
12403
+ );
12404
+ }
12405
+ __name(getReplaceChars, "getReplaceChars");
12336
12406
  function tidyUpAnnotation(_annotation, {
12337
12407
  sequenceData = {},
12338
12408
  convertAnnotationsFromAAIndices,
@@ -12461,14 +12531,6 @@ function coerceLocation({
12461
12531
  }
12462
12532
  }
12463
12533
  __name(coerceLocation, "coerceLocation");
12464
- function filterAminoAcidSequenceString(sequenceString, options) {
12465
- options = options || {};
12466
- if (options.includeStopCodon) {
12467
- return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
12468
- }
12469
- return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
12470
- }
12471
- __name(filterAminoAcidSequenceString, "filterAminoAcidSequenceString");
12472
12534
  function getDegenerateDnaStringFromAAString(aaString) {
12473
12535
  return aaString.split("").map((char) => aminoAcidToDegenerateDnaMap[char.toLowerCase()] || "nnn").join("");
12474
12536
  }
@@ -12477,14 +12539,13 @@ function tidyUpSequenceData(pSeqData, options = {}) {
12477
12539
  const {
12478
12540
  annotationsAsObjects,
12479
12541
  logMessages,
12480
- removeUnwantedChars,
12542
+ doNotRemoveInvalidChars,
12481
12543
  additionalValidChars,
12482
12544
  noTranslationData,
12483
- charOverrides,
12484
12545
  doNotProvideIdsForAnnotations,
12485
- proteinFilterOptions,
12486
12546
  noCdsTranslations,
12487
- convertAnnotationsFromAAIndices
12547
+ convertAnnotationsFromAAIndices,
12548
+ topLevelSeqData
12488
12549
  } = options;
12489
12550
  let seqData = lodashExports.cloneDeep(pSeqData);
12490
12551
  const response = {
@@ -12512,18 +12573,15 @@ function tidyUpSequenceData(pSeqData, options = {}) {
12512
12573
  if (seqData.isRna) {
12513
12574
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
12514
12575
  }
12515
- if (removeUnwantedChars) {
12576
+ if (!doNotRemoveInvalidChars) {
12516
12577
  if (seqData.isProtein) {
12517
- seqData.proteinSequence = filterAminoAcidSequenceString(
12518
- seqData.proteinSequence,
12519
- __spreadValues({ includeStopCodon: true }, proteinFilterOptions)
12520
- );
12578
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
12579
+ seqData.proteinSequence = newSeq;
12521
12580
  } else {
12522
- seqData.sequence = filterSequenceString(
12523
- seqData.sequence,
12524
- `${additionalValidChars || ""}${seqData.isRna || seqData.isMixedRnaAndDna ? "u" : ""}`,
12525
- charOverrides
12526
- );
12581
+ const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
12582
+ additionalValidChars
12583
+ }, topLevelSeqData || seqData));
12584
+ seqData.sequence = newSeq;
12527
12585
  }
12528
12586
  }
12529
12587
  if (seqData.isProtein) {
@@ -22664,7 +22722,6 @@ export {
22664
22722
  deleteSequenceDataAtRange,
22665
22723
  doesEnzymeChopOutsideOfRecognitionSite,
22666
22724
  featureColors,
22667
- filterAminoAcidSequenceString,
22668
22725
  filterSequenceString,
22669
22726
  findNearestRangeOfSequenceOverlapToPosition,
22670
22727
  findOrfsInPlasmid,
package/index.umd.js CHANGED
@@ -6001,7 +6001,8 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
6001
6001
  })(lodash, lodash.exports);
6002
6002
  var lodashExports = lodash.exports;
6003
6003
  const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
6004
- const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
6004
+ const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6005
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
6005
6006
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6006
6007
  const unambiguous_dna_letters = "GATC";
6007
6008
  const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
@@ -6069,6 +6070,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
6069
6070
  extended_protein_letters,
6070
6071
  extended_protein_values,
6071
6072
  protein_letters,
6073
+ protein_letters_withUandX,
6072
6074
  unambiguous_dna_letters,
6073
6075
  unambiguous_rna_letters
6074
6076
  }, Symbol.toStringTag, { value: "Module" }));
@@ -12323,20 +12325,88 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
12323
12325
  "primers",
12324
12326
  "guides"
12325
12327
  ];
12326
- function filterSequenceString(sequenceString, additionalValidChars = "", charOverrides) {
12327
- if (sequenceString) {
12328
- return sequenceString.replace(
12329
- new RegExp(
12330
- `[^${charOverrides || `atgcyrswkmbvdhnu${additionalValidChars.split("").join("\\")}`}]`,
12331
- "gi"
12332
- ),
12333
- ""
12328
+ function filterSequenceString(sequenceString, {
12329
+ additionalValidChars = "",
12330
+ isOligo,
12331
+ name,
12332
+ isProtein,
12333
+ isRna,
12334
+ isMixedRnaAndDna
12335
+ } = {}) {
12336
+ const acceptedChars = getAcceptedChars({
12337
+ isOligo,
12338
+ isProtein,
12339
+ isRna,
12340
+ isMixedRnaAndDna
12341
+ });
12342
+ const replaceChars = getReplaceChars({
12343
+ isOligo,
12344
+ isProtein,
12345
+ isRna,
12346
+ isMixedRnaAndDna
12347
+ });
12348
+ let sanitizedVal = "";
12349
+ const invalidChars = [];
12350
+ const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
12351
+ const warnings = [];
12352
+ const replaceCount = {};
12353
+ sequenceString.split("").forEach((letter) => {
12354
+ const lowerLetter = letter.toLowerCase();
12355
+ if (replaceChars && replaceChars[lowerLetter]) {
12356
+ if (!replaceCount[lowerLetter]) {
12357
+ replaceCount[lowerLetter] = 0;
12358
+ }
12359
+ replaceCount[lowerLetter]++;
12360
+ const isUpper = lowerLetter !== letter;
12361
+ sanitizedVal += isUpper ? replaceChars[lowerLetter].toUpperCase() : replaceChars[lowerLetter];
12362
+ } else if (chars.includes(lowerLetter)) {
12363
+ sanitizedVal += letter;
12364
+ } else {
12365
+ invalidChars.push(letter);
12366
+ }
12367
+ });
12368
+ Object.keys(replaceCount).forEach((letter) => {
12369
+ warnings.push(
12370
+ `Replaced "${letter}" with "${replaceChars[letter]}"${replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""}`
12334
12371
  );
12335
- } else {
12336
- return sequenceString;
12372
+ });
12373
+ if (sequenceString.length !== sanitizedVal.length) {
12374
+ warnings.push(
12375
+ `${name ? `Sequence ${name}: ` : ""}Invalid character(s) detected and removed: ${invalidChars.slice(0, 100).join(", ")} `
12376
+ );
12377
+ }
12378
+ if (typeof window !== "undefined" && window.toastr && warnings.length) {
12379
+ warnings.forEach((warning) => {
12380
+ window.toastr.warning(warning);
12381
+ });
12337
12382
  }
12383
+ return [sanitizedVal, warnings];
12338
12384
  }
12339
12385
  __name(filterSequenceString, "filterSequenceString");
12386
+ function getAcceptedChars({
12387
+ isOligo,
12388
+ isProtein,
12389
+ isRna,
12390
+ isMixedRnaAndDna
12391
+ } = {}) {
12392
+ return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
12393
+ //just plain old dna
12394
+ ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
12395
+ );
12396
+ }
12397
+ __name(getAcceptedChars, "getAcceptedChars");
12398
+ function getReplaceChars({
12399
+ isOligo,
12400
+ isProtein,
12401
+ isRna,
12402
+ isMixedRnaAndDna
12403
+ } = {}) {
12404
+ return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
12405
+ //just plain old dna
12406
+ {}
12407
+ );
12408
+ }
12409
+ __name(getReplaceChars, "getReplaceChars");
12340
12410
  function tidyUpAnnotation(_annotation, {
12341
12411
  sequenceData = {},
12342
12412
  convertAnnotationsFromAAIndices,
@@ -12465,14 +12535,6 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
12465
12535
  }
12466
12536
  }
12467
12537
  __name(coerceLocation, "coerceLocation");
12468
- function filterAminoAcidSequenceString(sequenceString, options) {
12469
- options = options || {};
12470
- if (options.includeStopCodon) {
12471
- return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
12472
- }
12473
- return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
12474
- }
12475
- __name(filterAminoAcidSequenceString, "filterAminoAcidSequenceString");
12476
12538
  function getDegenerateDnaStringFromAAString(aaString) {
12477
12539
  return aaString.split("").map((char) => aminoAcidToDegenerateDnaMap[char.toLowerCase()] || "nnn").join("");
12478
12540
  }
@@ -12481,14 +12543,13 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
12481
12543
  const {
12482
12544
  annotationsAsObjects,
12483
12545
  logMessages,
12484
- removeUnwantedChars,
12546
+ doNotRemoveInvalidChars,
12485
12547
  additionalValidChars,
12486
12548
  noTranslationData,
12487
- charOverrides,
12488
12549
  doNotProvideIdsForAnnotations,
12489
- proteinFilterOptions,
12490
12550
  noCdsTranslations,
12491
- convertAnnotationsFromAAIndices
12551
+ convertAnnotationsFromAAIndices,
12552
+ topLevelSeqData
12492
12553
  } = options;
12493
12554
  let seqData = lodashExports.cloneDeep(pSeqData);
12494
12555
  const response = {
@@ -12516,18 +12577,15 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
12516
12577
  if (seqData.isRna) {
12517
12578
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
12518
12579
  }
12519
- if (removeUnwantedChars) {
12580
+ if (!doNotRemoveInvalidChars) {
12520
12581
  if (seqData.isProtein) {
12521
- seqData.proteinSequence = filterAminoAcidSequenceString(
12522
- seqData.proteinSequence,
12523
- __spreadValues({ includeStopCodon: true }, proteinFilterOptions)
12524
- );
12582
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
12583
+ seqData.proteinSequence = newSeq;
12525
12584
  } else {
12526
- seqData.sequence = filterSequenceString(
12527
- seqData.sequence,
12528
- `${additionalValidChars || ""}${seqData.isRna || seqData.isMixedRnaAndDna ? "u" : ""}`,
12529
- charOverrides
12530
- );
12585
+ const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
12586
+ additionalValidChars
12587
+ }, topLevelSeqData || seqData));
12588
+ seqData.sequence = newSeq;
12531
12589
  }
12532
12590
  }
12533
12591
  if (seqData.isProtein) {
@@ -22667,7 +22725,6 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
22667
22725
  exports2.deleteSequenceDataAtRange = deleteSequenceDataAtRange;
22668
22726
  exports2.doesEnzymeChopOutsideOfRecognitionSite = doesEnzymeChopOutsideOfRecognitionSite;
22669
22727
  exports2.featureColors = featureColors;
22670
- exports2.filterAminoAcidSequenceString = filterAminoAcidSequenceString;
22671
22728
  exports2.filterSequenceString = filterSequenceString;
22672
22729
  exports2.findNearestRangeOfSequenceOverlapToPosition = findNearestRangeOfSequenceOverlapToPosition;
22673
22730
  exports2.findOrfsInPlasmid = findOrfsInPlasmid;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@teselagen/sequence-utils",
3
- "version": "0.3.8",
3
+ "version": "0.3.10",
4
4
  "dependencies": {
5
5
  "@teselagen/range-utils": "0.3.7",
6
6
  "bson-objectid": "^2.0.4",
package/src/bioData.js CHANGED
@@ -1,8 +1,8 @@
1
1
  //Adapted from biopython. Check the BIOPYTHON_LICENSE for licensing info
2
2
 
3
3
  export const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
4
-
5
- export const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
4
+ export const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
5
+ export const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
6
6
  export const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
7
7
  export const unambiguous_dna_letters = "GATC";
8
8
  export const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
@@ -29,7 +29,6 @@ export const ambiguous_dna_values = {
29
29
  N: "GATC"
30
30
  };
31
31
 
32
-
33
32
  export const extended_protein_values = {
34
33
  A: "A",
35
34
  B: "ND",
@@ -1,24 +1,113 @@
1
- // this is throwing a weird eslint error
1
+ import {
2
+ ambiguous_dna_letters,
3
+ ambiguous_rna_letters,
4
+ extended_protein_letters,
5
+ } from "./bioData";
2
6
 
3
- //
4
7
  export default function filterSequenceString(
5
8
  sequenceString,
6
- additionalValidChars = "",
7
- charOverrides
9
+ {
10
+ additionalValidChars = "",
11
+ isOligo,
12
+ name,
13
+ isProtein,
14
+ isRna,
15
+ isMixedRnaAndDna,
16
+ } = {}
8
17
  ) {
9
- // ac.throw(ac.string,sequenceString);
10
- if (sequenceString) {
11
- return sequenceString.replace(
12
- new RegExp(
13
- `[^${
14
- charOverrides ||
15
- `atgcyrswkmbvdhnu${additionalValidChars.split("").join("\\")}`
16
- }]`,
17
- "gi"
18
- ),
19
- ""
18
+ const acceptedChars = getAcceptedChars({
19
+ isOligo,
20
+ isProtein,
21
+ isRna,
22
+ isMixedRnaAndDna,
23
+ });
24
+ const replaceChars = getReplaceChars({
25
+ isOligo,
26
+ isProtein,
27
+ isRna,
28
+ isMixedRnaAndDna
29
+ });
30
+
31
+ let sanitizedVal = "";
32
+ const invalidChars = [];
33
+ const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
34
+ const warnings = [];
35
+ const replaceCount = {};
36
+ sequenceString.split("").forEach(letter => {
37
+ const lowerLetter = letter.toLowerCase();
38
+ if (replaceChars && replaceChars[lowerLetter]) {
39
+ if (!replaceCount[lowerLetter]) {
40
+ replaceCount[lowerLetter] = 0;
41
+ }
42
+ replaceCount[lowerLetter]++;
43
+ const isUpper = lowerLetter !== letter;
44
+ sanitizedVal += isUpper
45
+ ? replaceChars[lowerLetter].toUpperCase()
46
+ : replaceChars[lowerLetter];
47
+ } else if (chars.includes(lowerLetter)) {
48
+ sanitizedVal += letter;
49
+ } else {
50
+ invalidChars.push(letter);
51
+ }
52
+ });
53
+ //add replace count warnings
54
+ Object.keys(replaceCount).forEach(letter => {
55
+ warnings.push(
56
+ `Replaced "${letter}" with "${replaceChars[letter]}"${
57
+ replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""
58
+ }`
59
+ );
60
+ });
61
+ if (sequenceString.length !== sanitizedVal.length) {
62
+ warnings.push(
63
+ `${
64
+ name ? `Sequence ${name}: ` : ""
65
+ }Invalid character(s) detected and removed: ${invalidChars
66
+ .slice(0, 100)
67
+ .join(", ")} `
20
68
  );
21
- } else {
22
- return sequenceString;
23
69
  }
70
+ if (typeof window !== "undefined" && window.toastr && warnings.length) {
71
+ warnings.forEach(warning => {
72
+ window.toastr.warning(warning);
73
+ });
74
+ }
75
+
76
+ return [sanitizedVal, warnings];
77
+ }
78
+
79
+ export function getAcceptedChars({
80
+ isOligo,
81
+ isProtein,
82
+ isRna,
83
+ isMixedRnaAndDna,
84
+ } = {}) {
85
+ return isProtein
86
+ ? `${extended_protein_letters.toLowerCase()}}`
87
+ : isOligo
88
+ ? ambiguous_rna_letters.toLowerCase() + "t"
89
+ : isRna
90
+ ? ambiguous_rna_letters.toLowerCase() + "t"
91
+ : isMixedRnaAndDna
92
+ ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
93
+ : //just plain old dna
94
+ ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase();
95
+ }
96
+ export function getReplaceChars({
97
+ isOligo,
98
+ isProtein,
99
+ isRna,
100
+ isMixedRnaAndDna
101
+ } = {}) {
102
+ return isProtein
103
+ ? {}
104
+ // {".": "*"}
105
+ : isOligo
106
+ ? {}
107
+ : isRna
108
+ ? { t: "u" }
109
+ : isMixedRnaAndDna
110
+ ? {}
111
+ : //just plain old dna
112
+ {};
24
113
  }
@@ -1,13 +1,70 @@
1
1
  import filterSequenceString from "./filterSequenceString";
2
+ import { expect } from "vitest";
2
3
 
3
4
  describe("filterSequenceString", () => {
5
+ it("should not filter u's and should convert t's to u's from isOligo=true seqs", () => {
6
+ const [str, warnings] = filterSequenceString("tatuuag--a", {
7
+ isOligo: true
8
+ });
9
+ expect(str).toBe("tatuuaga");
10
+ // expect(warnings[0]).toBe('Replaced "t" with "u" 2 times');
11
+ expect(warnings[0]).toBe(
12
+ "Invalid character(s) detected and removed: -, - "
13
+ );
14
+ });
15
+ it("should not convert u's to t's for isDna (default isDna=true) seqs", () => {
16
+ const [str, warnings] = filterSequenceString("tatuuag--a", {});
17
+ // expect(warnings[0]).toBe('Replaced "u" with "t" 2 times');
18
+ expect(warnings[0]).toBe(
19
+ "Invalid character(s) detected and removed: -, - "
20
+ );
21
+ expect(str).toBe("tatuuaga");
22
+ });
4
23
  it("should filter out unwanted chars", () => {
5
- expect(filterSequenceString("tatag--a")).toBe("tataga");
24
+ const [str, warnings] = filterSequenceString("tatag--a");
25
+ expect(warnings[0]).toBe(
26
+ "Invalid character(s) detected and removed: -, - "
27
+ );
28
+ expect(str).toBe("tataga");
6
29
  });
7
30
  it("should handle additional chars option", () => {
8
- expect(filterSequenceString("tatag--a", "-")).toBe("tatag--a");
31
+ const [str, warnings] = filterSequenceString("tatag--a", {
32
+ additionalValidChars: "-"
33
+ });
34
+ expect(warnings.length).toBe(0);
35
+ expect(str).toBe("tatag--a");
9
36
  });
10
37
  it("should handle additional chars option", () => {
11
- expect(filterSequenceString("tatag--a", "f-q")).toBe("tatag--a");
38
+ const [str, warnings] = filterSequenceString("tatag--a", {
39
+ additionalValidChars: "f-q"
40
+ });
41
+ expect(warnings.length).toBe(0);
42
+ expect(str).toBe("tatag--a");
43
+ });
44
+
45
+ it("when isProtein: true, should filter only valid amino acids by default", () => {
46
+ const [str, warnings] = filterSequenceString(
47
+ 'bbb342"""xtgalmfwkqespvicyhrnd,,../',
48
+ {
49
+ isProtein: true
50
+ }
51
+ );
52
+ // expect(warnings[0]).toBe(`Replaced "." with "*" 2 times`);
53
+ expect(warnings[0]).toBe( 'Invalid character(s) detected and removed: 3, 4, 2, ", ", ", ,, ,, ., ., / ');
54
+ expect(str).toBe("bbbxtgalmfwkqespvicyhrnd");
55
+ });
56
+ it("when isProtein: true, should handle upper case letters", () => {
57
+ const [str, warnings] = filterSequenceString("xtgalmfWKQEspvicyhrnd", {
58
+ isProtein: true
59
+ });
60
+ expect(warnings.length).toBe(0);
61
+ expect(str).toBe("xtgalmfWKQEspvicyhrnd");
62
+ });
63
+ it("when isProtein: true, it should convert . to *", () => {
64
+ const [str] = filterSequenceString('BXZJUO*bbb342"""xtgalbmfwkqespvicyhrnd,,../', {
65
+ isProtein: true,
66
+ });
67
+
68
+ expect(str).toBe("BXZJUO*bbbxtgalbmfwkqespvicyhrnd");
12
69
  });
13
70
  });
package/src/index.js CHANGED
@@ -51,7 +51,6 @@ export { default as aliasedEnzymesByName } from "./aliasedEnzymesByName";
51
51
  export { default as defaultEnzymesByName } from "./defaultEnzymesByName";
52
52
  export { default as generateSequenceData } from "./generateSequenceData";
53
53
  export { default as generateAnnotations } from "./generateAnnotations";
54
- export { default as filterAminoAcidSequenceString } from "./filterAminoAcidSequenceString";
55
54
  export { default as filterSequenceString } from "./filterSequenceString";
56
55
  export { default as findNearestRangeOfSequenceOverlapToPosition } from "./findNearestRangeOfSequenceOverlapToPosition";
57
56
  export { default as findOrfsInPlasmid } from "./findOrfsInPlasmid";
@@ -1,4 +1,3 @@
1
-
2
1
  const proteinAlphabet = {
3
2
  A: {
4
3
  value: "A",
@@ -18,7 +17,7 @@ const proteinAlphabet = {
18
17
  color: "hsl(258.1, 100%, 69%)",
19
18
  mass: 156.18568
20
19
  },
21
-
20
+
22
21
  N: {
23
22
  value: "N",
24
23
  name: "Asparagine",
@@ -46,7 +45,7 @@ const proteinAlphabet = {
46
45
  color: "hsl(335.1, 100%, 69%)",
47
46
  mass: 103.1429
48
47
  },
49
-
48
+
50
49
  E: {
51
50
  value: "E",
52
51
  name: "Glutamic acid",
@@ -75,7 +74,6 @@ const proteinAlphabet = {
75
74
  mass: 57.05132
76
75
  },
77
76
 
78
-
79
77
  H: {
80
78
  value: "H",
81
79
  name: "Histidine",
@@ -114,7 +112,6 @@ const proteinAlphabet = {
114
112
  mass: 128.17228
115
113
  },
116
114
 
117
-
118
115
  M: {
119
116
  value: "M",
120
117
  name: "Methionine",
@@ -6,7 +6,6 @@ import { cloneDeep, flatMap } from "lodash";
6
6
  import { annotationTypes } from "./annotationTypes";
7
7
  import filterSequenceString from "./filterSequenceString";
8
8
  import tidyUpAnnotation from "./tidyUpAnnotation";
9
- import filterAminoAcidSequenceString from "./filterAminoAcidSequenceString";
10
9
  import getDegenerateDnaStringFromAaString from "./getDegenerateDnaStringFromAAString";
11
10
  import { getFeatureTypes } from "./featureTypesAndColors";
12
11
 
@@ -14,14 +13,13 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
14
13
  const {
15
14
  annotationsAsObjects,
16
15
  logMessages,
17
- removeUnwantedChars,
16
+ doNotRemoveInvalidChars,
18
17
  additionalValidChars,
19
18
  noTranslationData,
20
- charOverrides,
21
19
  doNotProvideIdsForAnnotations,
22
- proteinFilterOptions,
23
20
  noCdsTranslations,
24
- convertAnnotationsFromAAIndices
21
+ convertAnnotationsFromAAIndices,
22
+ topLevelSeqData
25
23
  } = options;
26
24
  let seqData = cloneDeep(pSeqData); //sequence is usually immutable, so we clone it and return it
27
25
  const response = {
@@ -54,20 +52,18 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
54
52
  //flip all t's to u's
55
53
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
56
54
  }
57
- if (removeUnwantedChars) {
55
+ if (!doNotRemoveInvalidChars) {
58
56
  if (seqData.isProtein) {
59
- seqData.proteinSequence = filterAminoAcidSequenceString(
60
- seqData.proteinSequence,
61
- { includeStopCodon: true, ...proteinFilterOptions }
62
- );
57
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, {
58
+ ...(topLevelSeqData || seqData)
59
+ });
60
+ seqData.proteinSequence = newSeq;
63
61
  } else {
64
- seqData.sequence = filterSequenceString(
65
- seqData.sequence,
66
- `${additionalValidChars || ""}${
67
- seqData.isRna || seqData.isMixedRnaAndDna ? "u" : "" //if it is rna or mixed, allow u's
68
- }`,
69
- charOverrides
70
- );
62
+ const [newSeq] = filterSequenceString(seqData.sequence, {
63
+ additionalValidChars,
64
+ ...(topLevelSeqData || seqData)
65
+ });
66
+ seqData.sequence = newSeq;
71
67
  }
72
68
  }
73
69
  if (seqData.isProtein) {
@@ -5,7 +5,7 @@ import chaiSubset from "chai-subset";
5
5
  chai.use(chaiSubset);
6
6
  chai.should();
7
7
  describe("tidyUpSequenceData", () => {
8
- it("should remove unwanted chars if passed that option, while handling annotation start,end (and location start,end) truncation correctly", () => {
8
+ it("should remove invalid chars by default, while handling annotation start,end (and location start,end) truncation correctly", () => {
9
9
  const res = tidyUpSequenceData(
10
10
  {
11
11
  sequence: "http://localhost:3344/Standalone",
@@ -26,7 +26,7 @@ describe("tidyUpSequenceData", () => {
26
26
  }
27
27
  ]
28
28
  },
29
- { removeUnwantedChars: true }
29
+
30
30
  );
31
31
  res.should.containSubset({
32
32
  sequence: "httcahstStandan",
@@ -49,15 +49,6 @@ describe("tidyUpSequenceData", () => {
49
49
  ]
50
50
  });
51
51
  });
52
- // const res = tidyUpSequenceData(
53
- // {
54
- // isProtein: true,
55
- // circular: true,
56
- // proteinSequence: "gagiuhwgagalasjglj*.",
57
- // features: [{ start: 3, end: 10 }, { start: 10, end: 20 }]
58
- // },
59
- // { convertAnnotationsFromAAIndices: true, removeUnwantedChars: true }
60
- // );
61
52
 
62
53
  it("should handle a protein sequence being passed in with isProtein set to true", () => {
63
54
  const res = tidyUpSequenceData(
@@ -71,69 +62,24 @@ describe("tidyUpSequenceData", () => {
71
62
  { name: "iDon'tFit", start: 25, end: 35 }
72
63
  ]
73
64
  },
74
- { convertAnnotationsFromAAIndices: true, removeUnwantedChars: true }
65
+ { convertAnnotationsFromAAIndices: true }
75
66
  );
67
+
76
68
  res.should.containSubset({
77
- aminoAcidDataForEachBaseOfDNA: [
78
- {
79
- aminoAcid: {
80
- value: ".",
81
- name: "Gap",
82
- threeLettersName: "Gap"
83
- },
84
- positionInCodon: 0,
85
- aminoAcidIndex: 17,
86
- sequenceIndex: 51,
87
- codonRange: {
88
- start: 51,
89
- end: 53
90
- },
91
- fullCodon: true
92
- },
93
- {
94
- aminoAcid: {
95
- value: ".",
96
- name: "Gap",
97
- threeLettersName: "Gap"
98
- },
99
- positionInCodon: 1,
100
- aminoAcidIndex: 17,
101
- sequenceIndex: 52,
102
- codonRange: {
103
- start: 51,
104
- end: 53
105
- },
106
- fullCodon: true
107
- },
108
- {
109
- aminoAcid: {
110
- value: ".",
111
- name: "Gap",
112
- threeLettersName: "Gap"
113
- },
114
- positionInCodon: 2,
115
- aminoAcidIndex: 17,
116
- sequenceIndex: 53,
117
- codonRange: {
118
- start: 51,
119
- end: 53
120
- },
121
- fullCodon: true
122
- }
123
- ],
69
+ aminoAcidDataForEachBaseOfDNA: [],
124
70
  isProtein: true,
125
- size: 54, //size should refer to the DNA length
126
- proteinSize: 18, //proteinSize should refer to the amino acid length
127
- sequence: "ggngcnggnathtgacaytggggngcnggngcnytngcnwsnggnytntrr...", //degenerate sequence
128
- proteinSequence: "gagiuhwgagalasgl*.",
71
+ size: 57, //size should refer to the DNA length
72
+ proteinSize: 19, //proteinSize should refer to the amino acid length
73
+ sequence: "ggngcnggnathtgacaytggggngcnggngcnytngcnwsnhtnggnytnhtntrr", //degenerate sequence
74
+ proteinSequence: "gagiuhwgagalasjglj*",
129
75
  circular: false,
130
76
  features: [
131
77
  { start: 9, end: 32, forward: true },
132
- { start: 30, end: 53, forward: true },
78
+ { start: 30, end: 56, forward: true },
133
79
  {
134
80
  name: "iDon'tFit",
135
- start: 51,
136
- end: 53,
81
+ start: 54,
82
+ end: 56,
137
83
  forward: true
138
84
  }
139
85
  ]
@@ -1 +0,0 @@
1
- export default function filterAminoAcidSequenceString(sequenceString: any, options: any): any;
@@ -1 +0,0 @@
1
- export {};
@@ -1,10 +0,0 @@
1
- //
2
- export default function filterAminoAcidSequenceString(sequenceString, options) {
3
- options = options || {};
4
- if (options.includeStopCodon) {
5
- //tnrtodo this maybe needs the stop codon char in it?
6
- return sequenceString?.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
7
- }
8
- // ac.throw(ac.string, sequenceString);
9
- return sequenceString?.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
10
- }
@@ -1,24 +0,0 @@
1
- import assert from "assert";
2
- import filterAminoAcidSequenceString from "./filterAminoAcidSequenceString";
3
- describe("filterAminoAcidSequenceString", () => {
4
- it("should filter only valid amino acids by default", () => {
5
- const filteredString = filterAminoAcidSequenceString(
6
- 'bbb342"""xtgalmfwkqespvicyhrnd,,../'
7
- );
8
- assert.equal(filteredString, "xtgalmfwkqespvicyhrnd");
9
- });
10
- it("should handle upper case letters", () => {
11
- const filteredString = filterAminoAcidSequenceString(
12
- "xtgalmfWKQEspvicyhrnd"
13
- );
14
- assert.equal(filteredString, "xtgalmfWKQEspvicyhrnd");
15
- });
16
- it("should handle the option to includeStopCodon by allowing periods", () => {
17
- const options = { includeStopCodon: true };
18
- const filteredString = filterAminoAcidSequenceString(
19
- 'bbb342"""xtgalmfwkqespvicyhrnd,,../',
20
- options
21
- );
22
- assert.equal(filteredString, "xtgalmfwkqespvicyhrnd..");
23
- });
24
- });