@teselagen/sequence-utils 0.1.22 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/index.js +12030 -26126
  2. package/index.mjs +12119 -26124
  3. package/index.umd.js +24056 -38154
  4. package/package.json +2 -2
  5. package/src/DNAComplementMap.js +32 -0
  6. package/src/addGapsToSeqReads.js +417 -0
  7. package/src/addGapsToSeqReads.test.js +358 -0
  8. package/src/adjustAnnotationsToInsert.js +19 -0
  9. package/src/adjustBpsToReplaceOrInsert.js +50 -0
  10. package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
  11. package/src/aliasedEnzymesByName.js +7363 -0
  12. package/src/aminoAcidToDegenerateDnaMap.js +32 -0
  13. package/src/aminoAcidToDegenerateRnaMap.js +32 -0
  14. package/src/aminoAcidToDnaRna.test.js +27 -0
  15. package/src/annotateSingleSeq.js +29 -0
  16. package/src/annotateSingleSeq.test.js +64 -0
  17. package/src/annotationTypes.js +23 -0
  18. package/src/autoAnnotate.js +242 -0
  19. package/src/autoAnnotate.test.js +1039 -0
  20. package/src/bioData.js +431 -0
  21. package/src/calculateNebTa.js +34 -0
  22. package/src/calculateNebTa.test.js +57 -0
  23. package/src/calculateNebTm.js +127 -0
  24. package/src/calculateNebTm.test.js +32 -0
  25. package/src/calculatePercentGC.js +3 -0
  26. package/src/calculatePercentGC.test.js +14 -0
  27. package/src/calculateTm.js +297 -0
  28. package/src/calculateTm.test.js +7 -0
  29. package/src/computeDigestFragments.js +179 -0
  30. package/src/computeDigestFragments.test.js +73 -0
  31. package/src/condensePairwiseAlignmentDifferences.js +85 -0
  32. package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
  33. package/src/convertAACaretPositionOrRangeToDna.js +24 -0
  34. package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
  35. package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
  36. package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
  37. package/src/cutSequenceByRestrictionEnzyme.js +301 -0
  38. package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
  39. package/src/defaultEnzymesByName.js +278 -0
  40. package/src/degenerateDnaToAminoAcidMap.js +5 -0
  41. package/src/degenerateRnaToAminoAcidMap.js +5 -0
  42. package/src/deleteSequenceDataAtRange.js +5 -0
  43. package/src/deleteSequenceDataAtRange.test.js +146 -0
  44. package/src/diffUtils.js +64 -0
  45. package/src/diffUtils.test.js +74 -0
  46. package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
  47. package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
  48. package/src/featureTypesAndColors.js +152 -0
  49. package/src/featureTypesAndColors.test.js +52 -0
  50. package/src/filterAminoAcidSequenceString.js +13 -0
  51. package/src/filterAminoAcidSequenceString.test.js +22 -0
  52. package/src/filterSequenceString.js +22 -0
  53. package/src/filterSequenceString.test.js +13 -0
  54. package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
  55. package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
  56. package/src/findOrfsInPlasmid.js +26 -0
  57. package/src/findSequenceMatches.js +133 -0
  58. package/src/findSequenceMatches.test.js +286 -0
  59. package/src/generateAnnotations.js +34 -0
  60. package/src/generateSequenceData.js +206 -0
  61. package/src/generateSequenceData.test.js +22 -0
  62. package/src/getAllInsertionsInSeqReads.js +83 -0
  63. package/src/getAllInsertionsInSeqReads.test.js +26 -0
  64. package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
  65. package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
  66. package/src/getAminoAcidFromSequenceTriplet.js +22 -0
  67. package/src/getAminoAcidStringFromSequenceString.js +18 -0
  68. package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
  69. package/src/getCodonRangeForAASliver.js +63 -0
  70. package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
  71. package/src/getComplementSequenceAndAnnotations.js +20 -0
  72. package/src/getComplementSequenceString.js +19 -0
  73. package/src/getComplementSequenceString.test.js +13 -0
  74. package/src/getCutsiteType.js +10 -0
  75. package/src/getCutsitesFromSequence.js +17 -0
  76. package/src/getDegenerateDnaStringFromAAString.js +8 -0
  77. package/src/getDegenerateRnaStringFromAAString.js +8 -0
  78. package/src/getDigestFragmentsForCutsites.js +105 -0
  79. package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
  80. package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
  81. package/src/getInsertBetweenVals.js +28 -0
  82. package/src/getInsertBetweenVals.test.js +33 -0
  83. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
  84. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
  85. package/src/getMassOfAaString.js +24 -0
  86. package/src/getMassofAaString.test.js +18 -0
  87. package/src/getOrfsFromSequence.js +124 -0
  88. package/src/getOrfsFromSequence.test.js +210 -0
  89. package/src/getOverlapBetweenTwoSequences.js +30 -0
  90. package/src/getOverlapBetweenTwoSequences.test.js +23 -0
  91. package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
  92. package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
  93. package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
  94. package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
  95. package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
  96. package/src/getReverseComplementAnnotation.js +23 -0
  97. package/src/getReverseComplementAnnotation.test.js +44 -0
  98. package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
  99. package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
  100. package/src/getReverseComplementSequenceString.js +17 -0
  101. package/src/getReverseComplementSequenceString.test.js +11 -0
  102. package/src/getReverseSequenceString.js +12 -0
  103. package/src/getReverseSequenceString.test.js +9 -0
  104. package/src/getSequenceDataBetweenRange.js +131 -0
  105. package/src/getSequenceDataBetweenRange.test.js +474 -0
  106. package/src/getVirtualDigest.js +125 -0
  107. package/src/getVirtualDigest.test.js +134 -0
  108. package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
  109. package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
  110. package/src/index.js +106 -0
  111. package/src/index.test.js +38 -0
  112. package/src/insertGapsIntoRefSeq.js +38 -0
  113. package/src/insertGapsIntoRefSeq.test.js +20 -0
  114. package/src/insertSequenceDataAtPosition.js +2 -0
  115. package/src/insertSequenceDataAtPosition.test.js +75 -0
  116. package/src/insertSequenceDataAtPositionOrRange.js +249 -0
  117. package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
  118. package/src/isEnzymeType2S.js +3 -0
  119. package/src/mapAnnotationsToRows.js +174 -0
  120. package/src/mapAnnotationsToRows.test.js +425 -0
  121. package/src/prepareCircularViewData.js +17 -0
  122. package/src/prepareCircularViewData.test.js +196 -0
  123. package/src/prepareRowData.js +41 -0
  124. package/src/prepareRowData.test.js +36 -0
  125. package/src/prepareRowData_output1.json +391 -0
  126. package/src/proteinAlphabet.js +257 -0
  127. package/src/rotateBpsToPosition.js +13 -0
  128. package/src/rotateBpsToPosition.test.js +6 -0
  129. package/src/rotateSequenceDataToPosition.js +48 -0
  130. package/src/rotateSequenceDataToPosition.test.js +71 -0
  131. package/src/shiftAnnotationsByLen.js +17 -0
  132. package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
  133. package/src/tidyUpAnnotation.js +182 -0
  134. package/src/tidyUpSequenceData.js +169 -0
  135. package/src/tidyUpSequenceData.test.js +332 -0
@@ -0,0 +1,33 @@
1
+ import {ambiguous_dna_letters} from "./bioData";
2
+
3
+ export default function guessIfSequenceIsDnaAndNotProtein(seq, options = {}) {
4
+ const { threshold = 0.9, loose } = options;
5
+ const dnaLetters =
6
+ options.dnaLetters || loose
7
+ ? [...ambiguous_dna_letters.split(""), "U"]
8
+ : ["G", "A", "T", "C", "U"];
9
+ // Guess if the given sequence is DNA or Protein.
10
+
11
+ // It's considered DNA if more than 90% of the sequence is GATCs. The threshold
12
+ // is configurable via the threshold parameter. dnaLetters can be used to configure
13
+ // which letters are considered DNA; for instance, adding N might be useful if
14
+ // you are expecting data with ambiguous bases.
15
+ const dnaLetterMap = dnaLetters.reduce((acc, letter) => {
16
+ acc[letter.toUpperCase()] = true;
17
+ return acc;
18
+ }, {});
19
+ let count = 0;
20
+ if (!seq || !seq.length) return true;
21
+
22
+ for (let index = 0; index < seq.length; index++) {
23
+ const letter = seq[index];
24
+ if (dnaLetterMap[letter.toUpperCase()]) {
25
+ count = count + 1;
26
+ }
27
+ }
28
+
29
+ if (count / seq.length > threshold) {
30
+ return true; //it is DNA
31
+ }
32
+ return false; //it is protein
33
+ };
@@ -0,0 +1,34 @@
1
+ import chai from "chai";
2
+ import guessIfSequenceIsDnaAndNotProtein from "./guessIfSequenceIsDnaAndNotProtein";
3
+ chai.should();
4
+ describe("guessIfSequenceIsDnaAndNotProtein", () => {
5
+ it("should default to DNA for a length 0 sequecne", () => {
6
+ guessIfSequenceIsDnaAndNotProtein("").should.equal(true);
7
+ });
8
+ it("should correctly guess that a DNA seq is DNA", () => {
9
+ guessIfSequenceIsDnaAndNotProtein("gtatacc").should.equal(true);
10
+ });
11
+ it("should correctly guess that a DNA seq with some ambiguity is a DNA", () => {
12
+ guessIfSequenceIsDnaAndNotProtein("gtatacctaacn").should.equal(true);
13
+ });
14
+ it("should correctly guess that a seq with lots of ambiguity is a protein when in the default strict mode", () => {
15
+ guessIfSequenceIsDnaAndNotProtein("gtatacybctaacn", {
16
+ loose: false
17
+ }).should.equal(false);
18
+ });
19
+ it("should correctly guess that a seq with lots of ambiguity is dna when in the loose mode", () => {
20
+ guessIfSequenceIsDnaAndNotProtein("gtatacybctaacn", {
21
+ loose: true
22
+ }).should.equal(true);
23
+ });
24
+ it("should correctly guess that a DNA with lots of ambiguities is dna when the threshold is lower ", () => {
25
+ guessIfSequenceIsDnaAndNotProtein("gtatacybctaacn", {
26
+ threshold: 0.5
27
+ }).should.equal(true);
28
+ });
29
+ it("should correctly guess that a DNA with lots of ambiguity is a dna when the ambiguous letter is included ", () => {
30
+ guessIfSequenceIsDnaAndNotProtein("gtatanccnnntaacn", {
31
+ dnaLetters: ["g", "a", "t", "c", "n"]
32
+ }).should.equal(true);
33
+ });
34
+ });
package/src/index.js ADDED
@@ -0,0 +1,106 @@
1
+ import {
2
+ autoAnnotate,
3
+ convertApELikeRegexToRegex,
4
+ convertProteinSeqToDNAIupac,
5
+ } from './autoAnnotate';
6
+
7
+ import {
8
+ genbankFeatureTypes,
9
+ getFeatureToColorMap,
10
+ getFeatureTypes,
11
+ getMergedFeatureMap,
12
+ } from './featureTypesAndColors';
13
+
14
+ export * from './computeDigestFragments';
15
+ export * from './diffUtils';
16
+ export * from './annotationTypes';
17
+
18
+ /* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */
19
+ //tnr: these are deprecated exports and should no longer be used!
20
+ const FeatureTypes = getFeatureTypes();
21
+ const featureColors = getFeatureToColorMap();
22
+ export {
23
+ getFeatureToColorMap,
24
+ getFeatureTypes,
25
+ genbankFeatureTypes,
26
+ getMergedFeatureMap,
27
+ FeatureTypes,
28
+ featureColors,
29
+ };
30
+
31
+ /* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */
32
+ export { autoAnnotate };
33
+ export { convertApELikeRegexToRegex };
34
+ export { convertProteinSeqToDNAIupac };
35
+ export * as bioData from './bioData';
36
+ export { default as getAllInsertionsInSeqReads } from './getAllInsertionsInSeqReads';
37
+ export { default as annotateSingleSeq } from './annotateSingleSeq';
38
+ export { default as getDegenerateDnaStringFromAAString } from './getDegenerateDnaStringFromAAString';
39
+ export { default as getDegenerateRnaStringFromAAString } from './getDegenerateRnaStringFromAAString';
40
+ export { default as getVirtualDigest } from './getVirtualDigest';
41
+ export { default as isEnzymeType2S } from './isEnzymeType2S';
42
+ export { default as insertGapsIntoRefSeq } from './insertGapsIntoRefSeq';
43
+ export { default as adjustBpsToReplaceOrInsert } from './adjustBpsToReplaceOrInsert';
44
+ export { default as calculatePercentGC } from './calculatePercentGC';
45
+ export { default as calculateTm } from './calculateTm';
46
+ export { default as cutSequenceByRestrictionEnzyme } from './cutSequenceByRestrictionEnzyme';
47
+ export { default as deleteSequenceDataAtRange } from './deleteSequenceDataAtRange';
48
+ export { default as DNAComplementMap } from './DNAComplementMap';
49
+ export { default as doesEnzymeChopOutsideOfRecognitionSite } from './doesEnzymeChopOutsideOfRecognitionSite';
50
+ export { default as aliasedEnzymesByName } from './aliasedEnzymesByName';
51
+ export { default as defaultEnzymesByName } from './defaultEnzymesByName';
52
+ export { default as generateSequenceData } from './generateSequenceData';
53
+ export { default as generateAnnotations } from './generateAnnotations';
54
+ export { default as filterAminoAcidSequenceString } from './filterAminoAcidSequenceString';
55
+ export { default as filterSequenceString } from './filterSequenceString';
56
+ export { default as findNearestRangeOfSequenceOverlapToPosition } from './findNearestRangeOfSequenceOverlapToPosition';
57
+ export { default as findOrfsInPlasmid } from './findOrfsInPlasmid';
58
+ export { default as findSequenceMatches } from './findSequenceMatches';
59
+ export { default as getAminoAcidDataForEachBaseOfDna } from './getAminoAcidDataForEachBaseOfDna';
60
+ export { default as getAminoAcidFromSequenceTriplet } from './getAminoAcidFromSequenceTriplet';
61
+ export { default as getAminoAcidStringFromSequenceString } from './getAminoAcidStringFromSequenceString';
62
+ export { default as getCodonRangeForAASliver } from './getCodonRangeForAASliver';
63
+ export { default as getComplementAminoAcidStringFromSequenceString } from './getComplementAminoAcidStringFromSequenceString';
64
+ export { default as getComplementSequenceAndAnnotations } from './getComplementSequenceAndAnnotations';
65
+ export { default as getComplementSequenceString } from './getComplementSequenceString';
66
+ export { default as getCutsitesFromSequence } from './getCutsitesFromSequence';
67
+ export { default as getCutsiteType } from './getCutsiteType';
68
+ export { default as getInsertBetweenVals } from './getInsertBetweenVals';
69
+ export { default as getLeftAndRightOfSequenceInRangeGivenPosition } from './getLeftAndRightOfSequenceInRangeGivenPosition';
70
+ export { default as getOrfsFromSequence } from './getOrfsFromSequence';
71
+ export { default as getOverlapBetweenTwoSequences } from './getOverlapBetweenTwoSequences';
72
+ export { default as getPossiblePartsFromSequenceAndEnzymes } from './getPossiblePartsFromSequenceAndEnzymes';
73
+ export { default as getReverseAminoAcidStringFromSequenceString } from './getReverseAminoAcidStringFromSequenceString';
74
+ export { default as getReverseComplementAminoAcidStringFromSequenceString } from './getReverseComplementAminoAcidStringFromSequenceString';
75
+ export { default as getReverseComplementAnnotation } from './getReverseComplementAnnotation';
76
+ export { default as getReverseComplementSequenceAndAnnotations } from './getReverseComplementSequenceAndAnnotations';
77
+ export { default as getReverseComplementSequenceString } from './getReverseComplementSequenceString';
78
+ export { default as getReverseSequenceString } from './getReverseSequenceString';
79
+ export { default as getSequenceDataBetweenRange } from './getSequenceDataBetweenRange';
80
+ export { default as guessIfSequenceIsDnaAndNotProtein } from './guessIfSequenceIsDnaAndNotProtein';
81
+ export { default as insertSequenceDataAtPosition } from './insertSequenceDataAtPosition';
82
+ export { default as insertSequenceDataAtPositionOrRange } from './insertSequenceDataAtPositionOrRange';
83
+ export { default as mapAnnotationsToRows } from './mapAnnotationsToRows';
84
+ export { default as prepareCircularViewData } from './prepareCircularViewData';
85
+ export { default as prepareRowData } from './prepareRowData';
86
+ export { default as proteinAlphabet } from './proteinAlphabet';
87
+ export { default as rotateSequenceDataToPosition } from './rotateSequenceDataToPosition';
88
+ export { default as rotateBpsToPosition } from './rotateBpsToPosition';
89
+ export { default as threeLetterSequenceStringToAminoAcidMap } from './threeLetterSequenceStringToAminoAcidMap';
90
+ export { default as tidyUpSequenceData } from './tidyUpSequenceData';
91
+ export { default as tidyUpAnnotation } from './tidyUpAnnotation';
92
+ export { default as condensePairwiseAlignmentDifferences } from './condensePairwiseAlignmentDifferences';
93
+ export { default as addGapsToSeqReads } from './addGapsToSeqReads';
94
+ export { default as calculateNebTm } from './calculateNebTm';
95
+ export { default as calculateNebTa } from './calculateNebTa';
96
+ export { default as getDigestFragmentsForCutsites } from './getDigestFragmentsForCutsites';
97
+ export { default as getDigestFragmentsForRestrictionEnzymes } from './getDigestFragmentsForRestrictionEnzymes';
98
+ export { default as convertDnaCaretPositionOrRangeToAA } from './convertDnaCaretPositionOrRangeToAA';
99
+ export { default as convertAACaretPositionOrRangeToDna } from './convertAACaretPositionOrRangeToDna';
100
+ export { default as aminoAcidToDegenerateDnaMap } from './aminoAcidToDegenerateDnaMap';
101
+ export { default as aminoAcidToDegenerateRnaMap } from './aminoAcidToDegenerateRnaMap';
102
+ export { default as degenerateDnaToAminoAcidMap } from './degenerateDnaToAminoAcidMap';
103
+ export { default as degenerateRnaToAminoAcidMap } from './degenerateRnaToAminoAcidMap';
104
+ export { default as getMassOfAaString } from './getMassOfAaString';
105
+ export { default as shiftAnnotationsByLen } from './shiftAnnotationsByLen';
106
+ export { default as adjustAnnotationsToInsert } from './adjustAnnotationsToInsert';
@@ -0,0 +1,38 @@
1
+ import * as src from ".";
2
+ import fs from "fs";
3
+
4
+ describe("index.js", () => {
5
+ it(`should export all functions defined`, () => {
6
+ return new Promise((resolve) => {
7
+
8
+ fs.readdir(__dirname, (err, files) => {
9
+ let passes = true;
10
+ files.forEach(file => {
11
+ if (
12
+ file.indexOf(".test.js") > -1 ||
13
+ file.indexOf("index.js") > -1 ||
14
+ file.indexOf("prepareRowData_output1.json") > -1 ||
15
+ file.indexOf("featureTypesAndColors") > -1 ||
16
+ file.indexOf("diffUtils") > -1 ||
17
+ file.indexOf(".test.js") > -1 ||
18
+ file.indexOf("index.js") > -1
19
+ ) {
20
+ return;
21
+ }
22
+ const funcOrObj = src[file.replace(".js", "")];
23
+ if (!funcOrObj) {
24
+ console.info(
25
+ `Uh oh, it looks like you forgot to export (or explicitly ignore) this file:`,
26
+ file
27
+ );
28
+ passes = false;
29
+ }
30
+ });
31
+ if (!passes) {
32
+ throw new Error("Please make sure to export (or ignore) each file! Update index.js to export the file");
33
+ }
34
+ resolve();
35
+ });
36
+ })
37
+ });
38
+ });
@@ -0,0 +1,38 @@
1
+ import getAllInsertionsInSeqReads from "./getAllInsertionsInSeqReads.js";
2
+
3
+ // seqReads should be an array of objects [{name, seq, pos, cigar}, {name, seq, pos, cigar}, ...]
4
+ // add gaps in reference sequence where there are insertions
5
+ export default function insertGapsIntoRefSeq(refSeq, seqReads) {
6
+ // turn ref seq into an array ["A", "T", "C", "G"...]
7
+ const refSeqWithGaps = refSeq.split("");
8
+ const allInsertionsInSeqReads = getAllInsertionsInSeqReads(seqReads);
9
+ for (let i = 0; i < allInsertionsInSeqReads.length; i++) {
10
+ const bpPosOfInsertion = allInsertionsInSeqReads[i].bpPos;
11
+ const numberOfInsertions = allInsertionsInSeqReads[i].number;
12
+ // adding gaps at the bp pos of insertion
13
+ let insertionGaps = "";
14
+ for (let gapI = 0; gapI < numberOfInsertions; gapI++) {
15
+ insertionGaps += "-";
16
+ }
17
+ refSeqWithGaps.splice(bpPosOfInsertion - 1, 0, insertionGaps);
18
+ for (let posI = i + 1; posI < allInsertionsInSeqReads.length; posI++) {
19
+ allInsertionsInSeqReads[posI].bpPos += 1;
20
+ }
21
+ }
22
+ // refSeqWithGaps is a string "GGGA--GA-C--ACC"
23
+ return refSeqWithGaps.join("");
24
+ };
25
+
26
+ // allInsertionsInSeqReads.forEach(insertion => {
27
+ // // adding gap at the bp pos of insertion
28
+ // refSeqWithGaps.splice(insertion - 1, 0, "-");
29
+ // });
30
+ // for (let i = 0; i < allInsertionsInSeqReads.length; i++) {
31
+ // refSeqWithGaps.splice(allInsertionsInSeqReads[i] - 1, 0, "-");
32
+ // for (let innerI = i + 1; innerI < allInsertionsInSeqReads.length; innerI++){
33
+ // if (refSeqWithGaps[i] - 1 !== "-") {
34
+ // // allInsertionsInSeqReads[innerI] += 1;
35
+ // allInsertionsInSeqReads[i + 1] += 1;
36
+ // }
37
+ // }
38
+ // }
@@ -0,0 +1,20 @@
1
+ import insertGapsIntoRefSeq from "./insertGapsIntoRefSeq.js";
2
+
3
+ describe("insert gaps into ref seq from seq reads' insertions", () => {
4
+ it("ref seq with all insertions", () => {
5
+ const refSeq = "GGGAGACACC";
6
+ const seqReads = [
7
+ { name: "r1", seq: "GATTGAC", pos: 3, cigar: "2M2I3M" },
8
+ { name: "r2", seq: "GAGAGAC", pos: 3, cigar: "7M" },
9
+ { name: "r3", seq: "GGGAGATCAC", pos: 1, cigar: "6M1I3M" },
10
+ { name: "r4", seq: "GATTGAC", pos: 3, cigar: "2M2I3M" },
11
+ { name: "r5", seq: "GAGC", pos: 3, cigar: "3M1D1M" },
12
+ { name: "r6", seq: "GAGCTTACC", pos: 3, cigar: "3M1D1M2I3M" },
13
+ { name: "r7", seq: "GGCATTTCC", pos: 2, cigar: "2M3D2M3I2M" },
14
+ { name: "r8", seq: "GGATTGACATT", pos: 1, cigar: "1D3M2I4M2I2D" },
15
+ { name: "r9", seq: "GGTTTGACCTTT", pos: 1, cigar: "2M3I2D1M2D3M3I" }
16
+ ];
17
+ const result = insertGapsIntoRefSeq(refSeq, seqReads);
18
+ expect(result).toEqual("GG---GA--GA-C--A---CC---");
19
+ });
20
+ });
@@ -0,0 +1,2 @@
1
+ import insertSequenceDataAtPositionOrRange from "./insertSequenceDataAtPositionOrRange";
2
+ export default insertSequenceDataAtPositionOrRange;
@@ -0,0 +1,75 @@
1
+ //tnr: half finished test.
2
+
3
+
4
+ import chai from "chai";
5
+ import chaiSubset from "chai-subset";
6
+
7
+ import tidyUpSequenceData from "./tidyUpSequenceData";
8
+ import insertSequenceDataAtPosition from "./insertSequenceDataAtPosition";
9
+
10
+ chai.should();
11
+ chai.use(chaiSubset);
12
+
13
+ describe("insertSequenceData", () => {
14
+ it("inserts characters at correct caret position", () => {
15
+ let seqToInsert = {
16
+ sequence: "atgagagaga"
17
+ };
18
+ let preInsertSeq = {
19
+ sequence: "0"
20
+ };
21
+ seqToInsert = tidyUpSequenceData(seqToInsert);
22
+ const caretPosition = 0;
23
+ preInsertSeq = tidyUpSequenceData({});
24
+ const postInsertSeq = insertSequenceDataAtPosition(
25
+ seqToInsert,
26
+ preInsertSeq,
27
+ caretPosition
28
+ );
29
+ postInsertSeq.sequence.length.should.equal(
30
+ preInsertSeq.sequence.length + seqToInsert.sequence.length
31
+ );
32
+ });
33
+ it("inserts characters at correct caret position", () => {
34
+ let seqToInsert = {
35
+ sequence: "atgagagaga"
36
+ };
37
+ let preInsertSeq = {
38
+ sequence: "atgagagaga",
39
+ features: [
40
+ {
41
+ start: 0,
42
+ end: 9,
43
+ locations: [{ start: 0, end: 3 }, { start: 5, end: 9 }]
44
+ }
45
+ ]
46
+ };
47
+ seqToInsert = tidyUpSequenceData(seqToInsert);
48
+ preInsertSeq = tidyUpSequenceData(preInsertSeq);
49
+ const caretPosition = 0;
50
+ const postInsertSeq = insertSequenceDataAtPosition(
51
+ seqToInsert,
52
+ preInsertSeq,
53
+ caretPosition
54
+ );
55
+ postInsertSeq.sequence.length.should.equal(
56
+ preInsertSeq.sequence.length + seqToInsert.sequence.length
57
+ );
58
+ postInsertSeq.features.length.should.equal(1);
59
+ postInsertSeq.features[0].start.should.equal(
60
+ preInsertSeq.features[0].start + seqToInsert.sequence.length
61
+ );
62
+ postInsertSeq.features[0].locations[0].start.should.equal(
63
+ preInsertSeq.features[0].locations[0].start + seqToInsert.sequence.length
64
+ );
65
+ postInsertSeq.features[0].locations[1].start.should.equal(
66
+ preInsertSeq.features[0].locations[1].start + seqToInsert.sequence.length
67
+ );
68
+ postInsertSeq.features[0].locations[0].end.should.equal(
69
+ preInsertSeq.features[0].locations[0].end + seqToInsert.sequence.length
70
+ );
71
+ postInsertSeq.features[0].locations[1].end.should.equal(
72
+ preInsertSeq.features[0].locations[1].end + seqToInsert.sequence.length
73
+ );
74
+ });
75
+ });
@@ -0,0 +1,249 @@
1
+ import {getRangeLength} from "@teselagen/range-utils";
2
+ import {map, cloneDeep} from "lodash";
3
+ import convertDnaCaretPositionOrRangeToAa from "./convertDnaCaretPositionOrRangeToAA";
4
+ import rotateSequenceDataToPosition from "./rotateSequenceDataToPosition";
5
+ import {adjustRangeToDeletionOfAnotherRange} from "@teselagen/range-utils";
6
+ import tidyUpSequenceData from "./tidyUpSequenceData";
7
+ import {modifiableTypes} from "./annotationTypes";
8
+ import adjustBpsToReplaceOrInsert from "./adjustBpsToReplaceOrInsert";
9
+ import adjustAnnotationsToInsert from "./adjustAnnotationsToInsert";
10
+
11
+ export default function insertSequenceDataAtPositionOrRange(
12
+ _sequenceDataToInsert,
13
+ _existingSequenceData,
14
+ caretPositionOrRange,
15
+ options = {}
16
+ ) {
17
+ //maintainOriginSplit means that if you're inserting around the origin with n bps selected before the origin
18
+ //when inserting new seq, n bps of the new seq should go in before the origin and the rest should be
19
+ //inserted at the sequence start
20
+ const { maintainOriginSplit } = options;
21
+ let existingSequenceData = tidyUpSequenceData(_existingSequenceData, options);
22
+ const sequenceDataToInsert = tidyUpSequenceData(
23
+ _sequenceDataToInsert,
24
+ options
25
+ );
26
+ const newSequenceData = cloneDeep(existingSequenceData);
27
+ const insertLength = sequenceDataToInsert.proteinSequence
28
+ ? sequenceDataToInsert.proteinSequence.length * 3
29
+ : sequenceDataToInsert.sequence.length;
30
+ let caretPosition = caretPositionOrRange;
31
+
32
+ const isInsertSameLengthAsSelection =
33
+ sequenceDataToInsert.sequence.length ===
34
+ getRangeLength(caretPositionOrRange, existingSequenceData.sequence.length);
35
+
36
+ if (
37
+ caretPositionOrRange.start > -1 &&
38
+ getRangeLength(
39
+ caretPositionOrRange,
40
+ existingSequenceData.sequence.length
41
+ ) === existingSequenceData.sequence.length
42
+ ) {
43
+ //handle the case where we're deleting everything!
44
+ existingSequenceData = tidyUpSequenceData(
45
+ {
46
+ ...existingSequenceData,
47
+ ...modifiableTypes.reduce((acc, type) => {
48
+ return (acc[type] = []);
49
+ }, {}),
50
+ sequence: "",
51
+ proteinSequence: "",
52
+ chromatogramData: undefined
53
+ },
54
+ options
55
+ );
56
+ newSequenceData.chromatogramData = undefined;
57
+ } else if (
58
+ newSequenceData.chromatogramData &&
59
+ newSequenceData.chromatogramData.baseTraces
60
+ ) {
61
+ //handle chromatogramData updates
62
+ if (caretPositionOrRange && caretPositionOrRange.start > -1) {
63
+ if (caretPositionOrRange.start > caretPositionOrRange.end) {
64
+ newSequenceData.chromatogramData = trimChromatogram({
65
+ chromatogramData: newSequenceData.chromatogramData,
66
+ range: {
67
+ start: caretPositionOrRange.start,
68
+ end: newSequenceData.sequence.length
69
+ },
70
+ justBaseCalls: isInsertSameLengthAsSelection
71
+ });
72
+ newSequenceData.chromatogramData = trimChromatogram({
73
+ chromatogramData: newSequenceData.chromatogramData,
74
+ range: {
75
+ start: 0,
76
+ end: caretPositionOrRange.end
77
+ },
78
+ justBaseCalls: isInsertSameLengthAsSelection
79
+ });
80
+ } else {
81
+ newSequenceData.chromatogramData = trimChromatogram({
82
+ chromatogramData: newSequenceData.chromatogramData,
83
+ range: {
84
+ start: caretPositionOrRange.start,
85
+ end: caretPositionOrRange.end
86
+ },
87
+ justBaseCalls: isInsertSameLengthAsSelection
88
+ });
89
+ }
90
+ }
91
+ if (sequenceDataToInsert.sequence) {
92
+ insertIntoChromatogram({
93
+ chromatogramData: newSequenceData.chromatogramData,
94
+ caretPosition:
95
+ caretPositionOrRange.start > -1
96
+ ? caretPositionOrRange.start
97
+ : caretPositionOrRange,
98
+ seqToInsert: sequenceDataToInsert.sequence,
99
+ justBaseCalls: isInsertSameLengthAsSelection
100
+ });
101
+ }
102
+ }
103
+
104
+ //update the sequence
105
+ newSequenceData.sequence = adjustBpsToReplaceOrInsert(
106
+ existingSequenceData.sequence,
107
+ sequenceDataToInsert.sequence,
108
+ caretPositionOrRange
109
+ );
110
+ newSequenceData.size = newSequenceData.sequence.length;
111
+ newSequenceData.proteinSequence = adjustBpsToReplaceOrInsert(
112
+ existingSequenceData.proteinSequence,
113
+ sequenceDataToInsert.proteinSequence,
114
+ convertDnaCaretPositionOrRangeToAa(caretPositionOrRange)
115
+ );
116
+ newSequenceData.proteinSize = newSequenceData.proteinSequence.length;
117
+
118
+ //handle the insert
119
+ modifiableTypes.forEach(annotationType => {
120
+ let existingAnnotations = existingSequenceData[annotationType];
121
+ //update the annotations:
122
+ //handle the delete if necessary
123
+ if (caretPositionOrRange && caretPositionOrRange.start > -1) {
124
+ //we have a range! so let's delete it!
125
+ const range = caretPositionOrRange;
126
+ caretPosition = range.start > range.end ? 0 : range.start;
127
+ //update all annotations for the deletion
128
+ existingAnnotations = adjustAnnotationsToDelete(
129
+ existingAnnotations,
130
+ range,
131
+ existingSequenceData.sequence.length
132
+ );
133
+ }
134
+ //first clear the newSequenceData's annotations
135
+ newSequenceData[annotationType] = [];
136
+ //in two steps adjust the annotations to the insert
137
+ newSequenceData[annotationType] = newSequenceData[annotationType].concat(
138
+ adjustAnnotationsToInsert(
139
+ existingAnnotations,
140
+ caretPosition,
141
+ insertLength
142
+ )
143
+ );
144
+ newSequenceData[annotationType] = newSequenceData[annotationType].concat(
145
+ adjustAnnotationsToInsert(
146
+ sequenceDataToInsert[annotationType],
147
+ 0,
148
+ caretPosition
149
+ )
150
+ );
151
+ });
152
+ if (
153
+ maintainOriginSplit &&
154
+ caretPositionOrRange &&
155
+ caretPositionOrRange.start > caretPositionOrRange.end
156
+ ) {
157
+ //we're replacing around the origin and maintainOriginSplit=true
158
+ //so rotate the resulting seqData n bps
159
+ const caretPosToRotateTo =
160
+ existingSequenceData.sequence.length - caretPositionOrRange.start;
161
+ return rotateSequenceDataToPosition(
162
+ newSequenceData,
163
+ Math.min(caretPosToRotateTo, insertLength)
164
+ );
165
+ }
166
+ return newSequenceData;
167
+ };
168
+
169
+ function adjustAnnotationsToDelete(annotationsToBeAdjusted, range, maxLength) {
170
+ return map(annotationsToBeAdjusted, annotation => {
171
+ const newRange = adjustRangeToDeletionOfAnotherRange(
172
+ annotation,
173
+ range,
174
+ maxLength
175
+ );
176
+ const newLocations =
177
+ annotation.locations &&
178
+ annotation.locations
179
+ .map(loc => adjustRangeToDeletionOfAnotherRange(loc, range, maxLength))
180
+ .filter(range => !!range);
181
+ if (newLocations && newLocations.length) {
182
+ return {
183
+ ...newRange,
184
+ start: newLocations[0].start,
185
+ end: newLocations[newLocations.length - 1].end,
186
+ ...(newLocations.length > 1 && { locations: newLocations })
187
+ };
188
+ } else {
189
+ return newRange;
190
+ }
191
+ }).filter(range => !!range); //filter any fully deleted ranges
192
+ }
193
+
194
+ function insertIntoChromatogram({
195
+ chromatogramData,
196
+ caretPosition,
197
+ seqToInsert,
198
+ justBaseCalls
199
+ }) {
200
+ if (!seqToInsert.length) return;
201
+
202
+ chromatogramData.baseCalls &&
203
+ chromatogramData.baseCalls.splice(
204
+ caretPosition,
205
+ 0,
206
+ ...seqToInsert.split("")
207
+ );
208
+ if (justBaseCalls) {
209
+ //return early if just base calls
210
+ return chromatogramData;
211
+ }
212
+
213
+ const baseTracesToInsert = [];
214
+ const qualNumsToInsert = [];
215
+
216
+ for (let index = 0; index < seqToInsert.length; index++) {
217
+ qualNumsToInsert.push(0);
218
+ const toPush = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
219
+ baseTracesToInsert.push({
220
+ aTrace: toPush,
221
+ cTrace: toPush,
222
+ gTrace: toPush,
223
+ tTrace: toPush
224
+ });
225
+ }
226
+
227
+ chromatogramData.baseTraces &&
228
+ chromatogramData.baseTraces.splice(caretPosition, 0, ...baseTracesToInsert);
229
+ chromatogramData.qualNums &&
230
+ chromatogramData.qualNums.splice(caretPosition, 0, ...qualNumsToInsert);
231
+
232
+ return chromatogramData;
233
+ }
234
+
235
+ function trimChromatogram({
236
+ chromatogramData,
237
+ range: { start, end },
238
+ justBaseCalls
239
+ }) {
240
+ [
241
+ "baseCalls",
242
+ ...(justBaseCalls ? [] : ["qualNums", "baseTraces", "basePos"])
243
+ ].forEach(type => {
244
+ chromatogramData[type] &&
245
+ chromatogramData[type].splice(start, end - start + 1);
246
+ });
247
+
248
+ return chromatogramData;
249
+ }