npm - @teselagen/sequence-utils - Versions diffs - 0.3.9 → 0.3.10 - Mend

@teselagen/sequence-utils 0.3.9 → 0.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/bioData.d.ts +1 -1
package/filterSequenceString.d.ts +2 -4
package/index.js +8 -13
package/index.mjs +8 -13
package/index.umd.js +8 -13
package/package.json +1 -1
package/src/bioData.js +1 -2
package/src/filterSequenceString.js +3 -7
package/src/filterSequenceString.test.js +6 -8
package/src/tidyUpSequenceData.js +2 -3
package/src/tidyUpSequenceData.test.js +12 -66

package/bioData.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 export const protein_letters: "ACDEFGHIKLMNPQRSTVWY";
 export const protein_letters_withUandX: "ACDEFGHIKLMNPQRSTVWYUX";
-export const extended_protein_letters: "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
+export const extended_protein_letters: "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
 export const ambiguous_dna_letters: "GATCRYWSMKHBVDN";
 export const unambiguous_dna_letters: "GATC";
 export const ambiguous_rna_letters: "GAUCRYWSMKHBVDN";

package/filterSequenceString.d.ts CHANGED Viewed

@@ -1,18 +1,16 @@
-export default function filterSequenceString(sequenceString: any, { additionalValidChars, isOligo, name, isProtein, isRna, isMixedRnaAndDna, includeStopCodon }?: {
+export default function filterSequenceString(sequenceString: any, { additionalValidChars, isOligo, name, isProtein, isRna, isMixedRnaAndDna, }?: {
     additionalValidChars?: string | undefined;
     isOligo: any;
     name: any;
     isProtein: any;
     isRna: any;
     isMixedRnaAndDna: any;
-    includeStopCodon: any;
 }): (string | string[])[];
-export function getAcceptedChars({ isOligo, isProtein, isRna, isMixedRnaAndDna, includeStopCodon }?: {
+export function getAcceptedChars({ isOligo, isProtein, isRna, isMixedRnaAndDna, }?: {
     isOligo: any;
     isProtein: any;
     isRna: any;
     isMixedRnaAndDna: any;
-    includeStopCodon: any;
 }): string;
 export function getReplaceChars({ isOligo, isProtein, isRna, isMixedRnaAndDna }?: {
     isOligo: any;

package/index.js CHANGED Viewed

@@ -6000,7 +6000,7 @@ lodash.exports;
 var lodashExports = lodash.exports;
 const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
 const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
-const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
+const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
 const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
 const unambiguous_dna_letters = "GATC";
 const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
@@ -12329,15 +12329,13 @@ function filterSequenceString(sequenceString, {
   name,
   isProtein,
   isRna,
-  isMixedRnaAndDna,
-  includeStopCodon
+  isMixedRnaAndDna
 } = {}) {
   const acceptedChars = getAcceptedChars({
     isOligo,
     isProtein,
     isRna,
-    isMixedRnaAndDna,
-    includeStopCodon
+    isMixedRnaAndDna
   });
   const replaceChars = getReplaceChars({
     isOligo,
@@ -12387,10 +12385,9 @@ function getAcceptedChars({
   isOligo,
   isProtein,
   isRna,
-  isMixedRnaAndDna,
-  includeStopCodon
+  isMixedRnaAndDna
 } = {}) {
-  return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
+  return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
     //just plain old dna
     ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
   );
@@ -12544,7 +12541,7 @@ function tidyUpSequenceData(pSeqData, options = {}) {
   const {
     annotationsAsObjects,
     logMessages,
-    removeUnwantedChars,
+    doNotRemoveInvalidChars,
     additionalValidChars,
     noTranslationData,
     doNotProvideIdsForAnnotations,
@@ -12578,11 +12575,9 @@ function tidyUpSequenceData(pSeqData, options = {}) {
   if (seqData.isRna) {
     seqData.sequence = seqData.sequence.replace(/t/gi, "u");
   }
-  if (removeUnwantedChars) {
+  if (!doNotRemoveInvalidChars) {
     if (seqData.isProtein) {
-      const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
-        includeStopCodon: true
-      }, topLevelSeqData || seqData));
+      const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
       seqData.proteinSequence = newSeq;
     } else {
       const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({

package/index.mjs CHANGED Viewed

@@ -5998,7 +5998,7 @@ lodash.exports;
 var lodashExports = lodash.exports;
 const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
 const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
-const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
+const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
 const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
 const unambiguous_dna_letters = "GATC";
 const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
@@ -12327,15 +12327,13 @@ function filterSequenceString(sequenceString, {
   name,
   isProtein,
   isRna,
-  isMixedRnaAndDna,
-  includeStopCodon
+  isMixedRnaAndDna
 } = {}) {
   const acceptedChars = getAcceptedChars({
     isOligo,
     isProtein,
     isRna,
-    isMixedRnaAndDna,
-    includeStopCodon
+    isMixedRnaAndDna
   });
   const replaceChars = getReplaceChars({
     isOligo,
@@ -12385,10 +12383,9 @@ function getAcceptedChars({
   isOligo,
   isProtein,
   isRna,
-  isMixedRnaAndDna,
-  includeStopCodon
+  isMixedRnaAndDna
 } = {}) {
-  return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
+  return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
     //just plain old dna
     ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
   );
@@ -12542,7 +12539,7 @@ function tidyUpSequenceData(pSeqData, options = {}) {
   const {
     annotationsAsObjects,
     logMessages,
-    removeUnwantedChars,
+    doNotRemoveInvalidChars,
     additionalValidChars,
     noTranslationData,
     doNotProvideIdsForAnnotations,
@@ -12576,11 +12573,9 @@ function tidyUpSequenceData(pSeqData, options = {}) {
   if (seqData.isRna) {
     seqData.sequence = seqData.sequence.replace(/t/gi, "u");
   }
-  if (removeUnwantedChars) {
+  if (!doNotRemoveInvalidChars) {
     if (seqData.isProtein) {
-      const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
-        includeStopCodon: true
-      }, topLevelSeqData || seqData));
+      const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
       seqData.proteinSequence = newSeq;
     } else {
       const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({

package/index.umd.js CHANGED Viewed

@@ -6002,7 +6002,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
   var lodashExports = lodash.exports;
   const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
   const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
-  const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
+  const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
   const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
   const unambiguous_dna_letters = "GATC";
   const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
@@ -12331,15 +12331,13 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
     name,
     isProtein,
     isRna,
-    isMixedRnaAndDna,
-    includeStopCodon
+    isMixedRnaAndDna
   } = {}) {
     const acceptedChars = getAcceptedChars({
       isOligo,
       isProtein,
       isRna,
-      isMixedRnaAndDna,
-      includeStopCodon
+      isMixedRnaAndDna
     });
     const replaceChars = getReplaceChars({
       isOligo,
@@ -12389,10 +12387,9 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
     isOligo,
     isProtein,
     isRna,
-    isMixedRnaAndDna,
-    includeStopCodon
+    isMixedRnaAndDna
   } = {}) {
-    return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
+    return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
       //just plain old dna
       ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
     );
@@ -12546,7 +12543,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
     const {
       annotationsAsObjects,
       logMessages,
-      removeUnwantedChars,
+      doNotRemoveInvalidChars,
       additionalValidChars,
       noTranslationData,
       doNotProvideIdsForAnnotations,
@@ -12580,11 +12577,9 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
     if (seqData.isRna) {
       seqData.sequence = seqData.sequence.replace(/t/gi, "u");
     }
-    if (removeUnwantedChars) {
+    if (!doNotRemoveInvalidChars) {
       if (seqData.isProtein) {
-        const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
-          includeStopCodon: true
-        }, topLevelSeqData || seqData));
+        const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
         seqData.proteinSequence = newSeq;
       } else {
         const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@teselagen/sequence-utils",
-  "version": "0.3.9",
+  "version": "0.3.10",
   "dependencies": {
     "@teselagen/range-utils": "0.3.7",
     "bson-objectid": "^2.0.4",

package/src/bioData.js CHANGED Viewed

@@ -2,8 +2,7 @@
 export const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
 export const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
-export const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
+export const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
 export const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
 export const unambiguous_dna_letters = "GATC";
 export const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";

package/src/filterSequenceString.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import {
   ambiguous_dna_letters,
   ambiguous_rna_letters,
-  protein_letters_withUandX
+  extended_protein_letters,
 } from "./bioData";
 export default function filterSequenceString(
@@ -13,7 +13,6 @@ export default function filterSequenceString(
     isProtein,
     isRna,
     isMixedRnaAndDna,
-    includeStopCodon
   } = {}
 ) {
   const acceptedChars = getAcceptedChars({
@@ -21,7 +20,6 @@ export default function filterSequenceString(
     isProtein,
     isRna,
     isMixedRnaAndDna,
-    includeStopCodon
   });
   const replaceChars = getReplaceChars({
     isOligo,
@@ -83,12 +81,9 @@ export function getAcceptedChars({
   isProtein,
   isRna,
   isMixedRnaAndDna,
-  includeStopCodon
 } = {}) {
   return isProtein
-    ? `${protein_letters_withUandX.toLowerCase()}${
-        includeStopCodon ? "*." : ""
-      }}`
+    ? `${extended_protein_letters.toLowerCase()}}`
     : isOligo
     ? ambiguous_rna_letters.toLowerCase() + "t"
     : isRna
@@ -106,6 +101,7 @@ export function getReplaceChars({
 } = {}) {
   return isProtein
     ? {}
+    // {".": "*"}
     : isOligo
     ? {}
     : isRna

package/src/filterSequenceString.test.js CHANGED Viewed

@@ -49,10 +49,9 @@ describe("filterSequenceString", () => {
         isProtein: true
       }
     );
-    expect(warnings[0]).toBe(
-      'Invalid character(s) detected and removed: b, b, b, 3, 4, 2, ", ", ", ,, ,, ., ., / '
-    );
-    expect(str).toBe("xtgalmfwkqespvicyhrnd");
+    // expect(warnings[0]).toBe(`Replaced "." with "*" 2 times`);
+    expect(warnings[0]).toBe(      'Invalid character(s) detected and removed: 3, 4, 2, ", ", ", ,, ,, ., ., / ');
+    expect(str).toBe("bbbxtgalmfwkqespvicyhrnd");
   });
   it("when isProtein: true, should handle upper case letters", () => {
     const [str, warnings] = filterSequenceString("xtgalmfWKQEspvicyhrnd", {
@@ -61,12 +60,11 @@ describe("filterSequenceString", () => {
     expect(warnings.length).toBe(0);
     expect(str).toBe("xtgalmfWKQEspvicyhrnd");
   });
-  it("when isProtein: true, should handle the option to includeStopCodon by allowing periods", () => {
-    const [str] = filterSequenceString('bbb342"""xtgalmfwkqespvicyhrnd,,../', {
+  it("when isProtein: true, it should convert . to *", () => {
+    const [str] = filterSequenceString('BXZJUO*bbb342"""xtgalbmfwkqespvicyhrnd,,../', {
       isProtein: true,
-      includeStopCodon: true
     });
-    expect(str).toBe("xtgalmfwkqespvicyhrnd..");
+    expect(str).toBe("BXZJUO*bbbxtgalbmfwkqespvicyhrnd");
   });
 });

package/src/tidyUpSequenceData.js CHANGED Viewed

@@ -13,7 +13,7 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
   const {
     annotationsAsObjects,
     logMessages,
-    removeUnwantedChars,
+    doNotRemoveInvalidChars,
     additionalValidChars,
     noTranslationData,
     doNotProvideIdsForAnnotations,
@@ -52,10 +52,9 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
     //flip all t's to u's
     seqData.sequence = seqData.sequence.replace(/t/gi, "u");
   }
-  if (removeUnwantedChars) {
+  if (!doNotRemoveInvalidChars) {
     if (seqData.isProtein) {
       const [newSeq] = filterSequenceString(seqData.proteinSequence, {
-        includeStopCodon: true,
         ...(topLevelSeqData || seqData)
       });
       seqData.proteinSequence = newSeq;

package/src/tidyUpSequenceData.test.js CHANGED Viewed

@@ -5,7 +5,7 @@ import chaiSubset from "chai-subset";
 chai.use(chaiSubset);
 chai.should();
 describe("tidyUpSequenceData", () => {
-  it("should remove unwanted chars if passed that option, while handling annotation start,end (and location start,end) truncation correctly", () => {
+  it("should remove invalid chars by default, while handling annotation start,end (and location start,end) truncation correctly", () => {
     const res = tidyUpSequenceData(
       {
         sequence: "http://localhost:3344/Standalone",
@@ -26,7 +26,7 @@ describe("tidyUpSequenceData", () => {
           }
         ]
       },
-      { removeUnwantedChars: true }
     );
     res.should.containSubset({
       sequence: "httcahstStandan",
@@ -49,15 +49,6 @@ describe("tidyUpSequenceData", () => {
       ]
     });
   });
-  // const res = tidyUpSequenceData(
-  //   {
-  //     isProtein: true,
-  //     circular: true,
-  //     proteinSequence: "gagiuhwgagalasjglj*.",
-  //     features: [{ start: 3, end: 10 }, { start: 10, end: 20 }]
-  //   },
-  //   { convertAnnotationsFromAAIndices: true, removeUnwantedChars: true }
-  // );
   it("should handle a protein sequence being passed in with isProtein set to true", () => {
     const res = tidyUpSequenceData(
@@ -71,69 +62,24 @@ describe("tidyUpSequenceData", () => {
           { name: "iDon'tFit", start: 25, end: 35 }
         ]
       },
-      { convertAnnotationsFromAAIndices: true, removeUnwantedChars: true }
+      { convertAnnotationsFromAAIndices: true }
     );
     res.should.containSubset({
-      aminoAcidDataForEachBaseOfDNA: [
-        {
-          aminoAcid: {
-            value: ".",
-            name: "Gap",
-            threeLettersName: "Gap"
-          },
-          positionInCodon: 0,
-          aminoAcidIndex: 17,
-          sequenceIndex: 51,
-          codonRange: {
-            start: 51,
-            end: 53
-          },
-          fullCodon: true
-        },
-        {
-          aminoAcid: {
-            value: ".",
-            name: "Gap",
-            threeLettersName: "Gap"
-          },
-          positionInCodon: 1,
-          aminoAcidIndex: 17,
-          sequenceIndex: 52,
-          codonRange: {
-            start: 51,
-            end: 53
-          },
-          fullCodon: true
-        },
-        {
-          aminoAcid: {
-            value: ".",
-            name: "Gap",
-            threeLettersName: "Gap"
-          },
-          positionInCodon: 2,
-          aminoAcidIndex: 17,
-          sequenceIndex: 53,
-          codonRange: {
-            start: 51,
-            end: 53
-          },
-          fullCodon: true
-        }
-      ],
+      aminoAcidDataForEachBaseOfDNA: [],
       isProtein: true,
-      size: 54, //size should refer to the DNA length
-      proteinSize: 18, //proteinSize should refer to the amino acid length
-      sequence: "ggngcnggnathtgacaytggggngcnggngcnytngcnwsnggnytntrr...", //degenerate sequence
-      proteinSequence: "gagiuhwgagalasgl*.",
+      size: 57, //size should refer to the DNA length
+      proteinSize: 19, //proteinSize should refer to the amino acid length
+      sequence: "ggngcnggnathtgacaytggggngcnggngcnytngcnwsnhtnggnytnhtntrr", //degenerate sequence
+      proteinSequence: "gagiuhwgagalasjglj*",
       circular: false,
       features: [
         { start: 9, end: 32, forward: true },
-        { start: 30, end: 53, forward: true },
+        { start: 30, end: 56, forward: true },
         {
           name: "iDon'tFit",
-          start: 51,
-          end: 53,
+          start: 54,
+          end: 56,
           forward: true
         }
       ]