npm - @teselagen/sequence-utils - Versions diffs - 0.3.23 → 0.3.25 - Mend

@teselagen/sequence-utils 0.3.23 → 0.3.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/calculateNebTm.d.ts +3 -2
package/calculateTm.d.ts +7 -1
package/degenerateDnaToAminoAcidMap.d.ts +1 -1
package/degenerateRnaToAminoAcidMap.d.ts +1 -1
package/getAminoAcidDataForEachBaseOfDna.d.ts +13 -3
package/{index.mjs → index.cjs} +2760 -6271
package/index.d.ts +76 -81
package/index.js +2729 -6240
package/{index.umd.js → index.umd.cjs} +2639 -6150
package/insertSequenceDataAtPosition.d.ts +1 -1
package/package.json +1 -1
package/src/addGapsToSeqReads.js +1 -1
package/src/adjustAnnotationsToInsert.js +1 -1
package/src/autoAnnotate.js +1 -1
package/src/calculateNebTm.js +1 -2
package/src/calculateNebTm.test.js +17 -7
package/src/calculateTm.js +54 -17
package/src/calculateTm.test.js +7 -1
package/src/computeDigestFragments.js +1 -1
package/src/cutSequenceByRestrictionEnzyme.js +1 -1
package/src/degenerateDnaToAminoAcidMap.js +1 -1
package/src/degenerateRnaToAminoAcidMap.js +1 -1
package/src/deleteSequenceDataAtRange.test.js +1 -1
package/src/diffUtils.js +1 -1
package/src/diffUtils.test.js +1 -1
package/src/featureTypesAndColors.js +1 -1
package/src/filterSequenceString.js +1 -1
package/src/findSequenceMatches.js +1 -1
package/src/generateSequenceData.test.js +1 -1
package/src/getAminoAcidDataForEachBaseOfDna.js +246 -115
package/src/getAminoAcidDataForEachBaseOfDna.test.js +55 -0
package/src/getComplementSequenceString.js +1 -1
package/src/getDigestFragmentsForRestrictionEnzymes.js +1 -1
package/src/getReverseComplementSequenceAndAnnotations.js +1 -1
package/src/getSequenceDataBetweenRange.js +1 -1
package/src/getVirtualDigest.js +1 -1
package/src/insertSequenceDataAtPositionOrRange.js +1 -1
package/src/insertSequenceDataAtPositionOrRange.test.js +22 -0
package/src/mapAnnotationsToRows.js +1 -1
package/src/prepareCircularViewData.js +1 -1
package/src/rotateSequenceDataToPosition.js +1 -1
package/src/tidyUpAnnotation.js +1 -1
package/src/tidyUpSequenceData.js +9 -2

package/src/getAminoAcidDataForEachBaseOfDna.js CHANGED Viewed

@@ -1,24 +1,80 @@
-import { translateRange, getSequenceWithinRange } from "@teselagen/range-utils";
+import {
+  translateRange,
+  getSequenceWithinRange,
+  flipContainedRange,
+  isPositionWithinRange
+} from "@teselagen/range-utils";
 import revComp from "./getReverseComplementSequenceString";
 import getAA from "./getAminoAcidFromSequenceTriplet";
 //
 import proteinAlphabet from "./proteinAlphabet";
-// ac.throw([ac.string,ac.bool],arguments);
 /**
  * @private
- * Gets aminoAcid data, including position in string and position in codon
- * from the sequenceString and the direction of the translation
+ * Gets the next triplet of bases in the sequenceString
+ * @param  {Number} index The index of the sequenceString to start at
  * @param  {String} sequenceString The dna sequenceString.
- * @param  {boolean} forward Should we find forward facing orfs or reverse facing orfs
- * @param  {boolean} isProteinSequence We're passing in a sequence of AA chars instead of DNA chars (slightly confusing but we'll still use the dna indexing for rendering in OVE)
- * @return [{
-        aminoAcid:
-        positionInCodon:
-      }]
+ * @param  {Object[]} exonRange Array of ranges of the sequenceString that contains the positions of bases corresponding to exons.
+ * @return {Object} The triplet of bases, the number of bases read, and the positions of the codon bases in the sequenceString
+ * @property {String} triplet The triplet of bases
+ * @property {Number} basesRead The number of bases read
+ * @property {Number[]} codonPositions The positions of the codon bases in the sequenceString
  */
-export default function getAminoAcidDataForEachBaseOfDna(
+function getNextTriplet(index, sequenceString, exonRange) {
+  let triplet = "";
+  let internalIndex;
+  // Positions of codons relative to the coding sequence start
+  // including introns.
+  const codonPositions = [];
+  // A function to check if a base is within an exon, defined here
+  // to avoid function creation in the loop (linter error)
+  const isBaseInExon = baseIndex =>
+    exonRange.some(r =>
+      isPositionWithinRange(baseIndex, r, sequenceString.length, true, false)
+    );
+  for (
+    internalIndex = index;
+    internalIndex < sequenceString.length;
+    internalIndex++
+  ) {
+    // We have read three bases into the triplet (this has to be at the top of the loop)
+    if (triplet.length === 3) {
+      break;
+    }
+    // TODO: ask about ranges
+    // The base corresponds to an intron
+    if (isBaseInExon(internalIndex)) {
+      // We read a base from the sequenceString
+      triplet += sequenceString[internalIndex];
+      codonPositions.push(internalIndex);
+    }
+  }
+  return { triplet, basesRead: internalIndex - index, codonPositions };
+}
+/**
+  * @private
+  * Returns a series of derived properties from the arguments to getAminoAcidDataForEachBaseOfDna
+  * @param  {String} originalSequenceString The dna sequenceString.
+  * @param  {boolean} forward Whether the translation is in the forward direction.
+  * @param  {Object} optionalSubrangeRange The range of the sequenceString to translate.
+  * @param  {boolean} isProteinSequence Whether the sequenceString is a protein sequence.
+  * @return {Object} The derived properties
+  * @property {String} sequenceString
+  * - If !isProtein: The subsequence within originalSequenceString that will be translated, defined by transaltionRange. If
+  *   !forward, this will be the reverse complement of the subsequence.
+  * - If isProtein: The originalSequenceString.
+  * @property {Object} translationRange The range of the originalSequenceString that we're translating (if !isProtein), or getting DNA-level
+    info for (if isProtein).
+  * @property {Number} originalSequenceStringLength The length of the full DNA sequence. If !isProtein it's the length of originalSequenceString
+  * @property {Number} sequenceStringLength The length of the DNA sequence that would give the translation.
+  * @property {Object[]} exonRange Array of ranges of the sequenceString that contains the positions of bases corresponding to exons.
+*/
+function getTranslatedSequenceProperties(
   originalSequenceString,
   forward,
   optionalSubrangeRange,
@@ -27,137 +83,212 @@ export default function getAminoAcidDataForEachBaseOfDna(
   const originalSequenceStringLength = isProteinSequence
     ? originalSequenceString.length * 3
     : originalSequenceString.length;
   let sequenceString = originalSequenceString;
-  let startOffset = 0;
+  const translationRange = { start: 0, end: originalSequenceStringLength - 1 };
   if (optionalSubrangeRange) {
     sequenceString = getSequenceWithinRange(
       optionalSubrangeRange,
       originalSequenceString
     );
-    startOffset = optionalSubrangeRange.start;
+    translationRange.start = optionalSubrangeRange.start;
+    translationRange.end = optionalSubrangeRange.end;
   }
   const sequenceStringLength = isProteinSequence
     ? sequenceString.length * 3
     : sequenceString.length;
-  // ac.throw([ac.string,ac.bool],arguments);
-  const aminoAcidDataForEachBaseOfDNA = [];
-  let codonRange;
-  let revCompGapLength = 0;
-  let aminoAcidIndex = 0;
-  if (!forward) {
-    //compute the start of the amino acid sequence, but only if translating in the reverse direction
-    aminoAcidIndex = Math.floor((sequenceStringLength - 1) / 3);
-    //because we're translating in the reverse direction, we need to
-    //check to see if there are untranslated amino acids at the start of the sequenceString
-    revCompGapLength = sequenceStringLength % 3;
-    codonRange = translateRange(
-      {
-        start: 0,
-        end: revCompGapLength - 1
-      },
-      startOffset,
+  if (!isProteinSequence && !forward) {
+    sequenceString = revComp(sequenceString);
+  }
+  // TODO: what to do with protein if this is true?
+  const absoluteExonRange =
+    !isProteinSequence &&
+    optionalSubrangeRange &&
+    optionalSubrangeRange.locations
+      ? optionalSubrangeRange.locations
+      : [translationRange];
+  const exonRange = absoluteExonRange.map(range => {
+    let outputRange = translateRange(
+      range,
+      -translationRange.start,
       originalSequenceStringLength
     );
-    if (revCompGapLength > 0) {
-      for (let i = 0; i < revCompGapLength; i++) {
-        aminoAcidDataForEachBaseOfDNA.push({
-          aminoAcid: getAA("xxx"), //fake xxx triplet returns the ambiguous X amino acid
-          positionInCodon: revCompGapLength - i - 1,
-          aminoAcidIndex,
-          sequenceIndex: codonRange.start + i,
-          codonRange,
-          fullCodon: false
-        });
-      }
-      aminoAcidIndex--;
+    if (!forward) {
+      outputRange = flipContainedRange(
+        outputRange,
+        { start: 0, end: sequenceStringLength - 1 },
+        sequenceStringLength
+      );
     }
+    return outputRange;
+  });
+  return {
+    sequenceString,
+    translationRange,
+    sequenceStringLength,
+    originalSequenceStringLength,
+    exonRange
+  };
+}
+/**
+ * Function to convert the position within the CDS (where A in ATG is 0, and T in ATG is 1)
+ * to the position in the main sequence
+ *
+ * @param  {Number} index The index of the sequenceString to start at
+ * @param  {boolean} forward Whether the translation is in the forward direction.
+ * @param  {Object} translationRange The range of the originalSequenceString that we're translating (if !isProtein), or getting DNA-level
+ * info for (if isProtein).
+ * @param  {Number} mainSequenceLength The length of the full DNA sequence. If !isProtein it's the length of originalSequenceString
+ * @return {Number} The position in the main sequence
+ *
+ */
+function positionInCdsToPositionInMainSequence(
+  index,
+  forward,
+  translationRange,
+  mainSequenceLength
+) {
+  let outputRange = translateRange(
+    { start: index, end: index },
+    translationRange.start,
+    mainSequenceLength
+  );
+  if (!forward) {
+    outputRange = flipContainedRange(
+      outputRange,
+      translationRange,
+      mainSequenceLength
+    );
   }
+  return outputRange.start;
+}
-  //compute the bulk of the sequence
-  for (
-    let index = 2 + revCompGapLength;
-    index < sequenceStringLength;
-    index += 3
-  ) {
+/**
+ * @private
+ * Gets aminoAcid data, including position in string and position in codon
+ * from the sequenceString and the direction of the translation
+ * @param  {String} sequenceString The dna sequenceString.
+ * @param  {boolean} forward Should we find forward facing orfs or reverse facing orfs
+ * @param  {boolean} isProteinSequence We're passing in a sequence of AA chars instead of DNA chars (slightly confusing but we'll still use the dna indexing for rendering in OVE)
+ * @return [{
+        aminoAcid:
+        positionInCodon:
+      }]
+ */
+export default function getAminoAcidDataForEachBaseOfDna(
+  originalSequenceString,
+  forward,
+  optionalSubrangeRange,
+  isProteinSequence
+) {
+  // Obtain derived properties, see getTranslatedSequenceProperties
+  const {
+    sequenceString,
+    translationRange,
+    sequenceStringLength,
+    originalSequenceStringLength,
+    exonRange
+  } = getTranslatedSequenceProperties(
+    originalSequenceString,
+    forward,
+    optionalSubrangeRange,
+    isProteinSequence
+  );
+  const aminoAcidDataForEachBaseOfDNA = [];
+  // Iterate over the DNA sequence length in increments of 3
+  for (let index = 0; index < sequenceStringLength; index += 3) {
     let aminoAcid;
+    const aminoAcidIndex = index / 3;
+    let codonPositionsInCDS;
+    let basesRead;
     if (isProteinSequence) {
-      aminoAcid =
-        proteinAlphabet[sequenceString[(index - 2) / 3].toUpperCase()];
+      codonPositionsInCDS = [0, 1, 2].map(i => index + i);
+      basesRead = 3;
+      aminoAcid = proteinAlphabet[sequenceString[index / 3].toUpperCase()];
     } else {
-      let triplet = sequenceString.slice(index - 2, index + 1);
-      if (!forward) {
-        //we reverse the triplet
-        triplet = revComp(triplet);
-      }
-      aminoAcid = getAA(triplet);
+      // Get the triplet of DNA bases
+      const {
+        triplet,
+        basesRead: _basesRead,
+        codonPositions
+      } = getNextTriplet(index, sequenceString, exonRange);
+      basesRead = _basesRead;
+      codonPositionsInCDS = codonPositions;
+      // If the triplet is not full, we need to add the gap xxx amino acid, start
+      aminoAcid = triplet.length === 3 ? getAA(triplet) : getAA("xxx");
     }
-    codonRange = translateRange(
-      {
-        start: index - 2,
-        end: index
-      },
-      startOffset,
-      originalSequenceStringLength
+    const absoluteCodonPositions = codonPositionsInCDS.map(i =>
+      positionInCdsToPositionInMainSequence(
+        i,
+        forward,
+        translationRange,
+        originalSequenceStringLength
+      )
     );
-    aminoAcidDataForEachBaseOfDNA.push({
-      aminoAcid, //gap amino acid
-      positionInCodon: forward ? 0 : 2,
-      aminoAcidIndex,
-      sequenceIndex: codonRange.start,
-      codonRange,
-      fullCodon: true
-    });
-    aminoAcidDataForEachBaseOfDNA.push({
-      aminoAcid, //gap amino acid
-      positionInCodon: 1,
-      aminoAcidIndex,
-      sequenceIndex: codonRange.start + 1,
-      codonRange,
-      fullCodon: true
-    });
-    aminoAcidDataForEachBaseOfDNA.push({
-      aminoAcid, //gap amino acid
-      positionInCodon: forward ? 2 : 0,
-      aminoAcidIndex,
-      sequenceIndex: codonRange.start + 2,
-      codonRange,
-      fullCodon: true
-    });
-    if (forward) {
-      aminoAcidIndex++;
-    } else {
-      aminoAcidIndex--;
-    }
-  }
+    // What should the codon range be if it comprises intron bases?
+    const codonRange = forward
+      ? {
+          start: absoluteCodonPositions[0],
+          end: absoluteCodonPositions[codonPositionsInCDS.length - 1]
+        }
+      : {
+          start: absoluteCodonPositions[codonPositionsInCDS.length - 1],
+          end: absoluteCodonPositions[0]
+        };
-  //compute the end of the sequence
-  //we'll never hit the following logic if translating in the reverse direction
-  const lengthOfEndBpsNotCoveredByAminoAcids =
-    sequenceStringLength - aminoAcidDataForEachBaseOfDNA.length;
-  codonRange = translateRange(
-    {
-      start: sequenceStringLength - lengthOfEndBpsNotCoveredByAminoAcids,
-      end: sequenceStringLength - 1
-    },
-    startOffset,
-    originalSequenceStringLength
-  );
-  for (let j = 0; j < lengthOfEndBpsNotCoveredByAminoAcids; j++) {
-    aminoAcidDataForEachBaseOfDNA.push({
-      aminoAcid: getAA("xxx"), //fake xxx triplet returns the gap amino acid
-      positionInCodon: j,
-      aminoAcidIndex,
-      sequenceIndex: codonRange.start + j,
-      fullCodon: false,
-      codonRange
-    });
+    // Iterate over the positions read
+    let positionInCodon = 0;
+    for (let i = 0; i < basesRead; i++) {
+      const posInCds = i + index;
+      if (codonPositionsInCDS.includes(posInCds)) {
+        aminoAcidDataForEachBaseOfDNA.push({
+          aminoAcid,
+          positionInCodon,
+          aminoAcidIndex,
+          sequenceIndex: absoluteCodonPositions[i],
+          codonRange,
+          fullCodon: codonPositionsInCDS.length === 3
+        });
+        positionInCodon++;
+      } else {
+        // TODO: what should we insert here?
+        aminoAcidDataForEachBaseOfDNA.push({
+          aminoAcid: null,
+          positionInCodon: null,
+          aminoAcidIndex: null,
+          sequenceIndex: positionInCdsToPositionInMainSequence(
+            posInCds,
+            forward,
+            translationRange,
+            originalSequenceStringLength
+          ),
+          codonRange: null,
+          fullCodon: null
+        });
+      }
+    }
+    // Move the index in case intron bases were read
+    index += basesRead - codonPositionsInCDS.length;
   }
   if (sequenceStringLength !== aminoAcidDataForEachBaseOfDNA.length) {
     throw new Error("something went wrong!");
   }
+  // Reverse the array if we're translating in the reverse direction
+  if (!forward) {
+    aminoAcidDataForEachBaseOfDNA.reverse();
+  }
   return aminoAcidDataForEachBaseOfDNA;
 }

package/src/getAminoAcidDataForEachBaseOfDna.test.js CHANGED Viewed

@@ -5,6 +5,7 @@ import getAA from "./getAminoAcidFromSequenceTriplet";
 import assert from "assert";
 let aaData;
+let aaData2;
 describe("getAminoAcidDataForEachBaseOfDna tranlates a", () => {
   //: It gets correct amino acid mapping and position in codon for each basepair in sequence
   it("1 amino acid long sequence", () => {
@@ -419,4 +420,58 @@ describe("getAminoAcidDataForEachBaseOfDna tranlates a", () => {
       }
     ]);
   });
+  it("protein 1 amino acid long sequence", () => {
+    aaData = getAminoAcidDataForEachBaseOfDna("M", true, null, true);
+    aaData2 = getAminoAcidDataForEachBaseOfDna("atg", true, null, false);
+    assert.deepEqual(aaData, aaData2);
+  });
+  it("protein 1 amino acid long sequence in reverse direction", () => {
+    aaData = getAminoAcidDataForEachBaseOfDna("H", false, null, true);
+    aaData2 = getAminoAcidDataForEachBaseOfDna("atg", false, null, false);
+    assert.deepEqual(aaData, aaData2);
+  });
+  it("> 1 amino acid long sequence", () => {
+    aaData = getAminoAcidDataForEachBaseOfDna("MF", true, null, true);
+    aaData2 = getAminoAcidDataForEachBaseOfDna("atgttt", true, null, false);
+    assert.deepEqual(aaData, aaData2);
+  });
+  it("> 1 amino acid long sequence in reverse direction", () => {
+    aaData = getAminoAcidDataForEachBaseOfDna("KH", false, null, true);
+    aaData2 = getAminoAcidDataForEachBaseOfDna("atgttt", false, null, false);
+    assert.deepEqual(aaData, aaData2);
+  });
+  it.skip("protein 1 amino acid long sequence which is a subrange of a larger sequence", () => {
+    aaData = getAminoAcidDataForEachBaseOfDna(
+      "AMA",
+      true,
+      { start: 1, end: 1 },
+      true
+    );
+    aaData2 = getAminoAcidDataForEachBaseOfDna(
+      "xxxatgxxx",
+      true,
+      { start: 3, end: 5 },
+      false
+    );
+    // Unclear what the behavior should be here,
+    // for now it returns the same as the old code (ignores the start and end range)
+    assert.deepEqual(aaData, aaData2);
+  });
+  it.skip("protein 1 amino acid long sequence in reverse direaction which is a subrange of a larger sequence", () => {
+    aaData = getAminoAcidDataForEachBaseOfDna(
+      "AMA",
+      false,
+      { start: 1, end: 1 },
+      true
+    );
+    aaData2 = getAminoAcidDataForEachBaseOfDna(
+      "xxxatgxxx",
+      false,
+      { start: 3, end: 5 },
+      false
+    );
+    // Unclear what the behavior should be here,
+    // for now it returns the same as the old code (ignores the start and end range)
+    assert.deepEqual(aaData, aaData2);
+  });
 });

package/src/getComplementSequenceString.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import DNAComplementMap from "./DNAComplementMap";
-import { merge } from "lodash";
+import { merge } from "lodash-es";
 export default function getComplementSequenceString(sequence, isRna) {
   if (typeof sequence !== "string") return "";

package/src/getDigestFragmentsForRestrictionEnzymes.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import getDigestFragmentsForCutsites from "./getDigestFragmentsForCutsites";
 import cutSequenceByRestrictionEnzyme from "./cutSequenceByRestrictionEnzyme";
-import { flatMap } from "lodash";
+import { flatMap } from "lodash-es";
 export default function getDigestFragmentsForRestrictionEnzymes(
   sequence,

package/src/getReverseComplementSequenceAndAnnotations.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
 import getReverseComplementAnnotation from "./getReverseComplementAnnotation";
 import { annotationTypes } from "./annotationTypes";
-import { map } from "lodash";
+import { map } from "lodash-es";
 import tidyUpSequenceData from "./tidyUpSequenceData";
 import getSequenceDataBetweenRange from "./getSequenceDataBetweenRange";

package/src/getSequenceDataBetweenRange.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { flatMap, extend, forEach, startCase } from "lodash";
+import { flatMap, extend, forEach, startCase } from "lodash-es";
 import { getRangeLength } from "@teselagen/range-utils";
 import convertDnaCaretPositionOrRangeToAa from "./convertDnaCaretPositionOrRangeToAA";
 import insertSequenceDataAtPosition from "./insertSequenceDataAtPosition";

package/src/getVirtualDigest.js CHANGED Viewed

@@ -1,6 +1,6 @@
 //UNDER CONSTRUCTION
-import { get } from "lodash";
+import { get } from "lodash-es";
 import {
   normalizePositionByRangeLength,

package/src/insertSequenceDataAtPositionOrRange.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { getRangeLength } from "@teselagen/range-utils";
-import { map, cloneDeep } from "lodash";
+import { map, cloneDeep } from "lodash-es";
 import convertDnaCaretPositionOrRangeToAa from "./convertDnaCaretPositionOrRangeToAA";
 import rotateSequenceDataToPosition from "./rotateSequenceDataToPosition";
 import { adjustRangeToDeletionOfAnotherRange } from "@teselagen/range-utils";

package/src/insertSequenceDataAtPositionOrRange.test.js CHANGED Viewed

@@ -83,6 +83,28 @@ describe("insertSequenceData", () => {
     postInsertSeq.sequence.should.equal("atgatagatagggagaaa");
     postInsertSeq.proteinSequence.should.equal("MIDREK");
   });
+  it("inserts protein seq into a dna seq correctly", () => {
+    const sequenceToInsert = {
+      isProtein: true,
+      sequence: "atagatagg",
+      proteinSequence: "IDR"
+    };
+    const sequenceToInsertInto = {
+      //  012345
+      isProtein: false,
+      sequence: "atgagagagaaa",
+      proteinSequence: "MREK"
+    };
+    const range = { start: 3, end: 5 };
+    const postInsertSeq = insertSequenceDataAtPositionOrRange(
+      sequenceToInsert,
+      sequenceToInsertInto,
+      range
+    );
+    postInsertSeq.sequence.should.equal("atgatagatagggagaaa");
+    postInsertSeq.isProtein.should.equal(false);
+    postInsertSeq.proteinSequence.should.equal("MIDREK");
+  });
   it("inserts characters at correct range and computes the new size correctly", () => {
     const sequenceToInsert = {
       sequence: "rrrrrrr"

package/src/mapAnnotationsToRows.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { each, forEach, startsWith, filter } from "lodash";
+import { each, forEach, startsWith, filter } from "lodash-es";
 import {
   getYOffsetForPotentiallyCircularRange,

package/src/prepareCircularViewData.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { cloneDeep } from "lodash";
+import { cloneDeep } from "lodash-es";
 import { getYOffsetsForPotentiallyCircularRanges } from "@teselagen/range-utils";
 import { annotationTypes } from "./annotationTypes";

package/src/rotateSequenceDataToPosition.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { map } from "lodash";
+import { map } from "lodash-es";
 import { adjustRangeToRotation } from "@teselagen/range-utils";
 import tidyUpSequenceData from "./tidyUpSequenceData";
 import { modifiableTypes } from "./annotationTypes";

package/src/tidyUpAnnotation.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { cloneDeep, get, some } from "lodash";
+import { cloneDeep, get, some } from "lodash-es";
 import { getFeatureToColorMap, getFeatureTypes } from "./featureTypesAndColors";
 import shortid from "shortid";

package/src/tidyUpSequenceData.js CHANGED Viewed

@@ -2,12 +2,13 @@
 import shortid from "shortid";
 import getAminoAcidDataForEachBaseOfDna from "./getAminoAcidDataForEachBaseOfDna";
-import { cloneDeep, flatMap } from "lodash";
+import { cloneDeep, flatMap } from "lodash-es";
 import { annotationTypes } from "./annotationTypes";
 import filterSequenceString from "./filterSequenceString";
 import tidyUpAnnotation from "./tidyUpAnnotation";
 import getDegenerateDnaStringFromAaString from "./getDegenerateDnaStringFromAAString";
 import { getFeatureTypes } from "./featureTypesAndColors";
+import getAminoAcidStringFromSequenceString from "./getAminoAcidStringFromSequenceString";
 export default function tidyUpSequenceData(pSeqData, options = {}) {
   const {
@@ -16,6 +17,7 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
     doNotRemoveInvalidChars,
     additionalValidChars,
     noTranslationData,
+    includeProteinSequence,
     doNotProvideIdsForAnnotations,
     noCdsTranslations,
     convertAnnotationsFromAAIndices,
@@ -55,7 +57,8 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
   if (!doNotRemoveInvalidChars) {
     if (seqData.isProtein) {
       const [newSeq] = filterSequenceString(seqData.proteinSequence, {
-        ...(topLevelSeqData || seqData)
+        ...(topLevelSeqData || seqData),
+        isProtein: true
       });
       seqData.proteinSequence = newSeq;
     } else {
@@ -79,6 +82,10 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
       null,
       true
     );
+  } else if (includeProteinSequence) {
+    seqData.proteinSequence = getAminoAcidStringFromSequenceString(
+      seqData.sequence
+    );
   }
   seqData.size = seqData.noSequence ? seqData.size : seqData.sequence.length;