@teselagen/sequence-utils 0.1.21 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/index.js +12030 -26126
  2. package/index.mjs +12119 -26124
  3. package/index.umd.js +24056 -38154
  4. package/package.json +2 -2
  5. package/src/DNAComplementMap.js +32 -0
  6. package/src/addGapsToSeqReads.js +417 -0
  7. package/src/addGapsToSeqReads.test.js +358 -0
  8. package/src/adjustAnnotationsToInsert.js +19 -0
  9. package/src/adjustBpsToReplaceOrInsert.js +50 -0
  10. package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
  11. package/src/aliasedEnzymesByName.js +7363 -0
  12. package/src/aminoAcidToDegenerateDnaMap.js +32 -0
  13. package/src/aminoAcidToDegenerateRnaMap.js +32 -0
  14. package/src/aminoAcidToDnaRna.test.js +27 -0
  15. package/src/annotateSingleSeq.js +29 -0
  16. package/src/annotateSingleSeq.test.js +64 -0
  17. package/src/annotationTypes.js +23 -0
  18. package/src/autoAnnotate.js +242 -0
  19. package/src/autoAnnotate.test.js +1039 -0
  20. package/src/bioData.js +431 -0
  21. package/src/calculateNebTa.js +34 -0
  22. package/src/calculateNebTa.test.js +57 -0
  23. package/src/calculateNebTm.js +127 -0
  24. package/src/calculateNebTm.test.js +32 -0
  25. package/src/calculatePercentGC.js +3 -0
  26. package/src/calculatePercentGC.test.js +14 -0
  27. package/src/calculateTm.js +297 -0
  28. package/src/calculateTm.test.js +7 -0
  29. package/src/computeDigestFragments.js +179 -0
  30. package/src/computeDigestFragments.test.js +73 -0
  31. package/src/condensePairwiseAlignmentDifferences.js +85 -0
  32. package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
  33. package/src/convertAACaretPositionOrRangeToDna.js +24 -0
  34. package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
  35. package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
  36. package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
  37. package/src/cutSequenceByRestrictionEnzyme.js +301 -0
  38. package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
  39. package/src/defaultEnzymesByName.js +278 -0
  40. package/src/degenerateDnaToAminoAcidMap.js +5 -0
  41. package/src/degenerateRnaToAminoAcidMap.js +5 -0
  42. package/src/deleteSequenceDataAtRange.js +5 -0
  43. package/src/deleteSequenceDataAtRange.test.js +146 -0
  44. package/src/diffUtils.js +64 -0
  45. package/src/diffUtils.test.js +74 -0
  46. package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
  47. package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
  48. package/src/featureTypesAndColors.js +152 -0
  49. package/src/featureTypesAndColors.test.js +52 -0
  50. package/src/filterAminoAcidSequenceString.js +13 -0
  51. package/src/filterAminoAcidSequenceString.test.js +22 -0
  52. package/src/filterSequenceString.js +22 -0
  53. package/src/filterSequenceString.test.js +13 -0
  54. package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
  55. package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
  56. package/src/findOrfsInPlasmid.js +26 -0
  57. package/src/findSequenceMatches.js +133 -0
  58. package/src/findSequenceMatches.test.js +286 -0
  59. package/src/generateAnnotations.js +34 -0
  60. package/src/generateSequenceData.js +206 -0
  61. package/src/generateSequenceData.test.js +22 -0
  62. package/src/getAllInsertionsInSeqReads.js +83 -0
  63. package/src/getAllInsertionsInSeqReads.test.js +26 -0
  64. package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
  65. package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
  66. package/src/getAminoAcidFromSequenceTriplet.js +22 -0
  67. package/src/getAminoAcidStringFromSequenceString.js +18 -0
  68. package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
  69. package/src/getCodonRangeForAASliver.js +63 -0
  70. package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
  71. package/src/getComplementSequenceAndAnnotations.js +20 -0
  72. package/src/getComplementSequenceString.js +19 -0
  73. package/src/getComplementSequenceString.test.js +13 -0
  74. package/src/getCutsiteType.js +10 -0
  75. package/src/getCutsitesFromSequence.js +17 -0
  76. package/src/getDegenerateDnaStringFromAAString.js +8 -0
  77. package/src/getDegenerateRnaStringFromAAString.js +8 -0
  78. package/src/getDigestFragmentsForCutsites.js +105 -0
  79. package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
  80. package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
  81. package/src/getInsertBetweenVals.js +28 -0
  82. package/src/getInsertBetweenVals.test.js +33 -0
  83. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
  84. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
  85. package/src/getMassOfAaString.js +24 -0
  86. package/src/getMassofAaString.test.js +18 -0
  87. package/src/getOrfsFromSequence.js +124 -0
  88. package/src/getOrfsFromSequence.test.js +210 -0
  89. package/src/getOverlapBetweenTwoSequences.js +30 -0
  90. package/src/getOverlapBetweenTwoSequences.test.js +23 -0
  91. package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
  92. package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
  93. package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
  94. package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
  95. package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
  96. package/src/getReverseComplementAnnotation.js +23 -0
  97. package/src/getReverseComplementAnnotation.test.js +44 -0
  98. package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
  99. package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
  100. package/src/getReverseComplementSequenceString.js +17 -0
  101. package/src/getReverseComplementSequenceString.test.js +11 -0
  102. package/src/getReverseSequenceString.js +12 -0
  103. package/src/getReverseSequenceString.test.js +9 -0
  104. package/src/getSequenceDataBetweenRange.js +131 -0
  105. package/src/getSequenceDataBetweenRange.test.js +474 -0
  106. package/src/getVirtualDigest.js +125 -0
  107. package/src/getVirtualDigest.test.js +134 -0
  108. package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
  109. package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
  110. package/src/index.js +106 -0
  111. package/src/index.test.js +38 -0
  112. package/src/insertGapsIntoRefSeq.js +38 -0
  113. package/src/insertGapsIntoRefSeq.test.js +20 -0
  114. package/src/insertSequenceDataAtPosition.js +2 -0
  115. package/src/insertSequenceDataAtPosition.test.js +75 -0
  116. package/src/insertSequenceDataAtPositionOrRange.js +249 -0
  117. package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
  118. package/src/isEnzymeType2S.js +3 -0
  119. package/src/mapAnnotationsToRows.js +174 -0
  120. package/src/mapAnnotationsToRows.test.js +425 -0
  121. package/src/prepareCircularViewData.js +17 -0
  122. package/src/prepareCircularViewData.test.js +196 -0
  123. package/src/prepareRowData.js +41 -0
  124. package/src/prepareRowData.test.js +36 -0
  125. package/src/prepareRowData_output1.json +391 -0
  126. package/src/proteinAlphabet.js +257 -0
  127. package/src/rotateBpsToPosition.js +13 -0
  128. package/src/rotateBpsToPosition.test.js +6 -0
  129. package/src/rotateSequenceDataToPosition.js +48 -0
  130. package/src/rotateSequenceDataToPosition.test.js +71 -0
  131. package/src/shiftAnnotationsByLen.js +17 -0
  132. package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
  133. package/src/tidyUpAnnotation.js +182 -0
  134. package/src/tidyUpSequenceData.js +169 -0
  135. package/src/tidyUpSequenceData.test.js +332 -0
@@ -0,0 +1,124 @@
1
+ import shortid from "shortid";
2
+ import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
3
+
4
+
5
+ /**
6
+ * @private
7
+ * Finds ORFs in a given DNA forward in a given frame.
8
+ * @param {Int} frame The frame to look in.
9
+ * @param {String}sequence The dna sequence.
10
+ * @param {Int} minimumOrfSize The minimum length of ORF to return.
11
+ * @param {boolean} forward Should we find forward facing orfs or reverse facing orfs
12
+ * @return {Teselagen.bio.orf.ORF[]} The list of ORFs found.
13
+ */
14
+ export default function getOrfsFromSequence(options) {
15
+ // ac.throw([ac.shape({
16
+ // sequence: ac.string,
17
+ // minimumOrfSize: ac.posInt,
18
+ // forward: ac.bool,
19
+ // circular: ac.bool
20
+ // })], arguments);
21
+
22
+ // const frame = options.frame;
23
+ let sequence = options.sequence;
24
+ const minimumOrfSize = options.minimumOrfSize;
25
+ const forward = options.forward;
26
+ const circular = options.circular;
27
+ const useAdditionalOrfStartCodons = options.useAdditionalOrfStartCodons;
28
+
29
+ const originalSequenceLength = sequence.length;
30
+ if (!forward) {
31
+ //we reverse the sequence
32
+ sequence = getReverseComplementSequenceString(sequence);
33
+ }
34
+
35
+ if (circular) {
36
+ //we'll pass in double the sequence and then trim excess orfs
37
+ sequence += sequence;
38
+ }
39
+ const re = useAdditionalOrfStartCodons
40
+ ? /(?=((?:A[TU]G|G[TU]G|C[TU]G)(?:.{3})*?(?:[TU]AG|[TU]AA|[TU]GA)))/gi
41
+ : /(?=((?:A[TU]G)(?:.{3})*?(?:[TU]AG|[TU]AA|[TU]GA)))/gi;
42
+ // const str = 'tatgaatgaatgffffffatgfftaaftaafatgfatgfffffsdfatgffatgfffstaafftaafffffffffffffffatgtaaataa\n\natgffftaaf\n\natgffatgftaafftaa\n\natgatgftaafftaa\n\natgatgtaataa\n\ntttttttttttttaatgatgfffffffffftaa';
43
+ let m;
44
+ const orfRanges = [];
45
+ //loop through orf hits!
46
+ /* eslint-disable no-cond-assign*/
47
+
48
+ while ((m = re.exec(sequence)) !== null) {
49
+ //stuff to get the regex to work
50
+ if (m.index === re.lastIndex) {
51
+ re.lastIndex++;
52
+ }
53
+ //orf logic:
54
+ const orfLength = m[1].length;
55
+ if (orfLength >= minimumOrfSize) {
56
+ //only keep orfs >= to the minimum size
57
+ const start = m.index;
58
+ let end = orfLength + start - 1;
59
+ //normalize the end if it is greater than the original sequence length
60
+ if (end >= originalSequenceLength) {
61
+ end -= originalSequenceLength;
62
+ }
63
+ if (start < originalSequenceLength) {
64
+ //only keep orfs that *begin* before the original sequence length (only the case when dealing with circular orfs)
65
+ orfRanges.push({
66
+ start: start,
67
+ end: end,
68
+ length: m[1].length,
69
+ internalStartCodonIndices: [],
70
+ frame: start % 3,
71
+ forward: forward,
72
+ annotationTypePlural: "orfs",
73
+ isOrf: true,
74
+ id: shortid()
75
+ });
76
+ }
77
+ }
78
+ }
79
+ // pair down the orfs to remove duplicates
80
+ // and deal with revComp orfs
81
+ const orfEnds = {};
82
+ orfRanges.forEach((orf, index) => {
83
+ const indexOfAlreadyExistingOrf = orfEnds[orf.end];
84
+
85
+ if (typeof indexOfAlreadyExistingOrf !== "undefined") {
86
+ let internalOrf = orf;
87
+ let containingOrf = orfRanges[indexOfAlreadyExistingOrf];
88
+ if (containingOrf.length < internalOrf.length) {
89
+ internalOrf = orfRanges[indexOfAlreadyExistingOrf];
90
+ containingOrf = orf;
91
+ orfEnds[orf.end] = index;
92
+ }
93
+ const internalStartCodonIndex = forward
94
+ ? internalOrf.start
95
+ : originalSequenceLength - internalOrf.start - 1; //use either the start or the end depending on the direction of the internalOrf
96
+ //we know because of how the regex works that larger orfs come first in the array
97
+ containingOrf.internalStartCodonIndices = [
98
+ ...containingOrf.internalStartCodonIndices,
99
+ ...internalOrf.internalStartCodonIndices,
100
+ internalStartCodonIndex
101
+ ];
102
+ //set a flag that we'll use to remove all these shorter, duplicated orfs
103
+ internalOrf.remove = true;
104
+ } else {
105
+ orfEnds[orf.end] = index;
106
+ if (!forward) {
107
+ // if (originalSequenceLength - orf.end - 1 == 3657) {
108
+ // }
109
+ //this check needs to come after the above assignment of orfEnds
110
+ //flip the start and ends
111
+ const endHolder = orf.end; //temp variable
112
+ orf.end = originalSequenceLength - orf.start - 1;
113
+ orf.start = originalSequenceLength - endHolder - 1;
114
+ }
115
+ }
116
+ });
117
+ const nonDuplicatedOrfRanges = orfRanges.filter(orf => {
118
+ if (!orf.remove) {
119
+ return true;
120
+ }
121
+ return false;
122
+ });
123
+ return nonDuplicatedOrfRanges;
124
+ };
@@ -0,0 +1,210 @@
1
+
2
+
3
+ import {expect} from "chai";
4
+
5
+ import getOrfsFromSequence from "./getOrfsFromSequence.js";
6
+ // getOrfsFromSequence(frame, sequence, minimumOrfSize, forward, circular)
7
+ describe("getOrfsFromSequence", () => {
8
+ it("finds correct orfs in reverse direction in slightly more complex sequence", () => {
9
+ const orfs = getOrfsFromSequence({
10
+ sequence:
11
+ "gattttaatcactataccaattgagatgggctagtcaatgataattactagtccttttcccgggtgatctgggtatctgtaaattctgctagacctttgctggaaaacttgtaaattctgctagaccctctgtaaattccgctagacctttgtgtgttttttttgtttatattcaagtggttataatttatagaataaagaaagaataaaaaaagataaaaagaatagatcccagccctgtgtataactcactactttagtcagttccgcagtattacaaaaggatgtcgcaaacgctgtttgctcctctacaaaacagaccttaaaaccctaaaggcttaagtagcaccctcgcaagctcgggcaaatcgctgaatattccttttgtctccgaccatcaggcacctgagtcgctgtctttttcgtgacattcagttcgctgcgctcacggctctggcagtgaatgggggtaaatggcactacaggcgccttttatggattcatgcaaggaaactacccataatacaagaaaagcccgtcacgggcttctcagggcgttttatggcgggtctgctatgtggtgctatctgactttttgctgttcagcagttcctgccctctgattttccagtctgaccacttcggattatcccgtgacaggtcattcagactggctaatgcacccagtaaggcagcggtatcatcaacaggcttacccgtcttactgtccctagtgcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatccagatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaacaagggtgaacactatcccatatcaccagctcaccgtctttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgccgatcaacgtctcattttcgccagatatcgacgtcttatgacaacttgacggctacatcattcactttttcttcacaaccggcacggaactcgctcgggctggccccggtgcattttttaaatacccgcgagaaatagagttgatcgtcaaaaccaacattgcgaccgacggtggcgataggcatccgggtggtgctcaaaagcagcttcgcctggctgatacgttggtcctcgcgccagcttaagacgctaatccctaactgctggcggaaaagatgtgacagacgcgacggcgacaagcaaacatgctgtgcgacgctggcgatatcaaaattgctgtctgccaggtgatcgctgatgtactgacaagcctcgcgtacccgattatccatcggtggatggagcgactcgttaatcgcttccatgcgccgcagtaacaattgctcaagcagatttatcgccagcagctccgaatagcgcccttccccttgcccggcgttaatgatttgcccaaacaggtcgctgaaatgcggctggtgcgcttcatccgggcgaaagaaccccgtattggcaaatattgacggccagttaagccattcatgccagtaggcgcgcggacgaaagtaaacccactggtgataccattcgcgagcctccggatgacgaccgtagtgatgaatctctcctggcgggaacagcaaaatatcacccggtcggcaaacaaattctcgtccctgatttttcaccaccccctgaccgcgaatggtgagattgagaatataacctttcattcccagcggtcggtcgataaaaaaatcgagataaccgttggcctcaatcggcgttaaacccgccaccagatgggcattaaacgagtatcccggcagcaggggatcattttgcgcttcagccatacttttcatactcccgccattcagagaagaaaccaattgtccatattgcatcagacattgccgtcactgcgtcttttactggctcttctcgctaaccaaaccggtaaccccgcttattaaaagcattctgtaacaaagcgggaccaaagccatgacaaaaacgcgtaacaaaagtgtctataatcacggcagaaaagtccacattgattatttgcacggcgtcacactttgctatgccatagcatttttatccataagattagcggattctacctgacgctttttatcgcaactctctactgtttctccatacccgtttttttgggaatttttaagaaggagatatacatatgagtaaaggagaagaacttttcactggagttgtcccaattcttgttgaattagatggtgatgttaatgggcacaaattttctgtcagtggagagggtgaaggtgatgcaacatacggaaaacttacccttaaatttatttgcactactggaaaactacctgttccatggccaacacttgtcactactttctcttatggtgttcaatgcttttcccgttatccggatcatatgaaacggcatgactttttcaagagtgccatgcccgaaggttatgtacaggaacgcactatatctttcaaagatgacgggaactacaagacgcgtgctgaagtcaagtttgaaggtgatacccttgttaatcgtatcgagttaaaaggtattgattttaaagaagatggaaacat",
12
+ minimumOrfSize: 3280,
13
+ forward: false,
14
+ circular: false
15
+ });
16
+ expect(orfs).to.be.length(0);
17
+ // const orf = orfs[0];
18
+ // expect(orf).to.be.an('object');
19
+ // expect(orf.start).to.equal(11);
20
+ // expect(orf.end).to.equal(0);
21
+ // expect(orf.forward).to.equal(false);
22
+ // expect(orf.frame).to.equal(0);
23
+ // expect(orf.internalStartCodonIndices).to.deep.equal([8]);
24
+ // expect(orf.id).to.be.a('string');
25
+ });
26
+ it("finds correct orfs in reverse direction in slightly more complex sequence", () => {
27
+ const orfs = getOrfsFromSequence({
28
+ sequence: "ttarrrcatcat",
29
+ // E S S
30
+ //rrrttarrrcatrrrcatr
31
+ //fatgfffatgffftaafff
32
+ //0123456789012345678
33
+ // S S E
34
+ //
35
+ //E S S
36
+ //ttarrrcatcat
37
+ //atgatgffftaa
38
+ //0123456789012345
39
+ //S S E
40
+ //
41
+ minimumOrfSize: 0,
42
+ forward: false,
43
+ circular: false
44
+ });
45
+ expect(orfs).to.be.length(1);
46
+ const orf = orfs[0];
47
+ expect(orf).to.be.an("object");
48
+ expect(orf.start).to.equal(0);
49
+ expect(orf.end).to.equal(11);
50
+ expect(orf.forward).to.equal(false);
51
+ expect(orf.frame).to.equal(0);
52
+ expect(orf.isOrf).to.equal(true);
53
+ expect(orf.internalStartCodonIndices).to.deep.equal([8]);
54
+ expect(orf.id).to.be.a("string");
55
+ });
56
+ it("finds correct orfs in reverse direction in simple sequence", () => {
57
+ const orfs = getOrfsFromSequence({
58
+ sequence: "ttacat",
59
+ minimumOrfSize: 0,
60
+ forward: false,
61
+ circular: false
62
+ });
63
+ expect(orfs).to.be.length(1);
64
+ const orf = orfs[0];
65
+ expect(orf).to.be.an("object");
66
+ expect(orf.start).to.equal(0);
67
+ expect(orf.end).to.equal(5);
68
+ expect(orf.forward).to.equal(false);
69
+ expect(orf.frame).to.equal(0);
70
+ expect(orf.isOrf).to.equal(true);
71
+ expect(orf.internalStartCodonIndices).to.deep.equal([]);
72
+ expect(orf.id).to.be.a("string");
73
+ });
74
+ it("finds correct orfs in slightly more complex sequence", () => {
75
+ const orfs = getOrfsFromSequence({
76
+ sequence: "atgatgffftaa",
77
+ minimumOrfSize: 0,
78
+ forward: true,
79
+ circular: false
80
+ });
81
+ expect(orfs).to.be.length(1);
82
+ const orf = orfs[0];
83
+ expect(orf).to.be.an("object");
84
+ expect(orf.start).to.equal(0);
85
+ expect(orf.end).to.equal(11);
86
+ expect(orf.isOrf).to.equal(true);
87
+ expect(orf.forward).to.equal(true);
88
+ expect(orf.frame).to.equal(0);
89
+ expect(orf.internalStartCodonIndices).to.deep.equal([3]);
90
+ expect(orf.id).to.be.a("string");
91
+ });
92
+ it("finds correct orfs in simple sequence", () => {
93
+ const orfs = getOrfsFromSequence({
94
+ sequence: "atgtaa",
95
+ minimumOrfSize: 0,
96
+ forward: true,
97
+ circular: false
98
+ });
99
+ expect(orfs).to.be.length(1);
100
+ const orf = orfs[0];
101
+ expect(orf).to.be.an("object");
102
+ expect(orf.start).to.equal(0);
103
+ expect(orf.end).to.equal(5);
104
+ expect(orf.forward).to.equal(true);
105
+ expect(orf.isOrf).to.equal(true);
106
+ expect(orf.frame).to.equal(0);
107
+ expect(orf.internalStartCodonIndices).to.deep.equal([]);
108
+ expect(orf.id).to.be.a("string");
109
+ });
110
+ it("it will find additional orfs if useAdditionalOrfStartCodons is set to true in simple sequence", () => {
111
+ let orfs = getOrfsFromSequence({
112
+ sequence: "ctgtaa",
113
+ minimumOrfSize: 0,
114
+ forward: true,
115
+ circular: false,
116
+ useAdditionalOrfStartCodons: true
117
+ });
118
+ expect(orfs).to.be.length(1);
119
+ let orf = orfs[0];
120
+ expect(orf).to.be.an("object");
121
+ expect(orf.start).to.equal(0);
122
+ expect(orf.end).to.equal(5);
123
+ expect(orf.forward).to.equal(true);
124
+ expect(orf.isOrf).to.equal(true);
125
+ expect(orf.frame).to.equal(0);
126
+ expect(orf.internalStartCodonIndices).to.deep.equal([]);
127
+ expect(orf.id).to.be.a("string");
128
+ orfs = getOrfsFromSequence({
129
+ sequence: "gtgtaa",
130
+ minimumOrfSize: 0,
131
+ forward: true,
132
+ circular: false,
133
+ useAdditionalOrfStartCodons: true
134
+ });
135
+ expect(orfs).to.be.length(1);
136
+ orf = orfs[0];
137
+ expect(orf).to.be.an("object");
138
+ expect(orf.start).to.equal(0);
139
+ expect(orf.end).to.equal(5);
140
+ expect(orf.forward).to.equal(true);
141
+ expect(orf.isOrf).to.equal(true);
142
+ expect(orf.frame).to.equal(0);
143
+ expect(orf.internalStartCodonIndices).to.deep.equal([]);
144
+ expect(orf.id).to.be.a("string");
145
+ });
146
+ it("finds correct orfs in simple sequence with different capitalizations", () => {
147
+ const orfs = getOrfsFromSequence({
148
+ sequence: "ATGTAA",
149
+ minimumOrfSize: 0,
150
+ forward: true,
151
+ circular: false
152
+ });
153
+ expect(orfs).to.be.length(1);
154
+ const orf = orfs[0];
155
+ expect(orf).to.be.an("object");
156
+ expect(orf.start).to.equal(0);
157
+ expect(orf.end).to.equal(5);
158
+ expect(orf.forward).to.equal(true);
159
+ expect(orf.isOrf).to.equal(true);
160
+ expect(orf.frame).to.equal(0);
161
+ expect(orf.internalStartCodonIndices).to.deep.equal([]);
162
+ expect(orf.id).to.be.a("string");
163
+ });
164
+ it("finds a single correct orf in simple circular sequence", () => {
165
+ const orfs = getOrfsFromSequence({
166
+ sequence: "tgtaaa",
167
+ minimumOrfSize: 0,
168
+ forward: true,
169
+ circular: true
170
+ });
171
+ expect(orfs).to.be.length(1);
172
+ const orf = orfs[0];
173
+ expect(orf).to.be.an("object");
174
+ expect(orf.start).to.equal(5);
175
+ expect(orf.end).to.equal(4);
176
+ expect(orf.forward).to.equal(true);
177
+ expect(orf.isOrf).to.equal(true);
178
+ expect(orf.frame).to.equal(2);
179
+ expect(orf.internalStartCodonIndices).to.deep.equal([]);
180
+ expect(orf.id).to.be.a("string");
181
+ });
182
+ it("finds multiple internal start codons correctly for orfs that span the origin", () => {
183
+ const orfs = getOrfsFromSequence({
184
+ sequence: "tgATGTAAatga",
185
+ minimumOrfSize: 0,
186
+ forward: true,
187
+ circular: true
188
+ });
189
+ expect(orfs).to.be.length(1);
190
+ const orf = orfs[0];
191
+ expect(orf).to.be.an("object");
192
+ expect(orf.start).to.equal(8);
193
+ expect(orf.end).to.equal(7);
194
+ expect(orf.forward).to.equal(true);
195
+ expect(orf.isOrf).to.equal(true);
196
+ expect(orf.frame).to.equal(2);
197
+ expect(orf.internalStartCodonIndices).to.deep.equal([2, 11]);
198
+ expect(orf.id).to.be.a("string");
199
+ });
200
+ it("doesnt find orfs in simple sequence with no orfs", () => {
201
+ const orfs = getOrfsFromSequence({
202
+ sequence: "gtgtaa",
203
+ minimumOrfSize: 0,
204
+ forward: true,
205
+ circular: false
206
+ });
207
+ expect(orfs).to.be.an("array");
208
+ expect(orfs).to.be.length(0);
209
+ });
210
+ });
@@ -0,0 +1,30 @@
1
+ import {modulatePositionByRange} from "@teselagen/range-utils";
2
+
3
+ /**
4
+ * This function gets the overlapping of one sequence to another based on sequence equality.
5
+ *
6
+ * @param {string} sequenceToFind
7
+ * @param {string} sequenceToSearchIn
8
+ * @param {object} options optional
9
+ * @return {object || null} null if no overlap exists or a range object with .start and .end properties
10
+ */
11
+ export default function getOverlapBetweenTwoSequences(
12
+ sequenceToFind,
13
+ sequenceToSearchIn,
14
+ ) {
15
+ sequenceToSearchIn = sequenceToSearchIn.toLowerCase();
16
+ sequenceToFind = sequenceToFind.toLowerCase();
17
+ const lengthenedSeqToSearch = sequenceToSearchIn + sequenceToSearchIn;
18
+ const index = lengthenedSeqToSearch.indexOf(sequenceToFind);
19
+ if (index > -1) {
20
+ return {
21
+ start: index,
22
+ end: modulatePositionByRange(index + sequenceToFind.length - 1, {
23
+ start: 0,
24
+ end: sequenceToSearchIn.length - 1
25
+ })
26
+ };
27
+ } else {
28
+ return null;
29
+ }
30
+ };
@@ -0,0 +1,23 @@
1
+ import {expect} from "chai";
2
+ import getOverlapBetweenTwoSequences from "./getOverlapBetweenTwoSequences.js";
3
+ describe("getOverlapBetweenTwoSequences", () => {
4
+ it("should get the range overlap given a seq and a seq to search in", () => {
5
+ expect(getOverlapBetweenTwoSequences("gtt", "agttaa")).to.deep.equal({
6
+ start: 1,
7
+ end: 3
8
+ });
9
+ expect(getOverlapBetweenTwoSequences("gtt", "ttaaag")).to.deep.equal({
10
+ start: 5,
11
+ end: 1
12
+ });
13
+ });
14
+ it("should return null if no overlap is found", () => {
15
+ expect(getOverlapBetweenTwoSequences("gtt", "agattaa")).to.deep.equal(null);
16
+ });
17
+ it("should not care about case sensitivity", () => {
18
+ expect(getOverlapBetweenTwoSequences("gTt", "agttaa")).to.deep.equal({
19
+ start: 1,
20
+ end: 3
21
+ });
22
+ });
23
+ });
@@ -0,0 +1,121 @@
1
+ import getComplementSequenceString from "./getComplementSequenceString";
2
+ import {normalizePositionByRangeLength} from "@teselagen/range-utils";
3
+ import cutSequenceByRestrictionEnzyme from "./cutSequenceByRestrictionEnzyme";
4
+
5
+ export default function getPossiblePartsFromSequenceAndEnzyme(
6
+ seqData,
7
+ restrictionEnzymes
8
+ ) {
9
+ // ac.throw([
10
+ // ac.string,
11
+ // ac.bool,
12
+ // ac.shape({
13
+ // "name": ac.string,
14
+ // "site": ac.string,
15
+ // "forwardRegex": ac.string,
16
+ // "reverseRegex": ac.string,
17
+ // "topSnipOffset": ac.number,
18
+ // "bottomSnipOffset": ac.number
19
+ // })
20
+ // ], arguments);
21
+ restrictionEnzymes = restrictionEnzymes.length
22
+ ? restrictionEnzymes
23
+ : [restrictionEnzymes];
24
+ const bps = seqData.sequence;
25
+ const seqLen = bps.length;
26
+ const circular = seqData.circular;
27
+ let cutsites = [];
28
+ restrictionEnzymes.forEach(enzyme => {
29
+ const newCutsites = cutSequenceByRestrictionEnzyme(bps, circular, enzyme);
30
+ cutsites = cutsites.concat(newCutsites);
31
+ });
32
+ const parts = [];
33
+ if (cutsites.length < 1) {
34
+ return parts;
35
+ } else if (cutsites.length === 1) {
36
+ parts.push(
37
+ getPartBetweenEnzymesWithInclusiveOverhangs(
38
+ cutsites[0],
39
+ cutsites[0],
40
+ seqLen
41
+ )
42
+ );
43
+ return parts;
44
+ } else {
45
+ const pairs = pairwise(cutsites);
46
+ pairs.forEach(pair => {
47
+ const cut1 = pair[0];
48
+ const cut2 = pair[1];
49
+ const part1 = getPartBetweenEnzymesWithInclusiveOverhangs(
50
+ cut1,
51
+ cut2,
52
+ seqLen
53
+ );
54
+ const part2 = getPartBetweenEnzymesWithInclusiveOverhangs(
55
+ cut2,
56
+ cut1,
57
+ seqLen
58
+ );
59
+ if (circular || !(part1.start > part1.end)) {
60
+ //only add origin spanning parts if the sequence is circular
61
+ parts.push(part1);
62
+ }
63
+ if (circular || !(part2.start > part2.end)) {
64
+ //only add origin spanning parts if the sequence is circular
65
+ parts.push(part2);
66
+ }
67
+ });
68
+ return parts;
69
+ }
70
+ };
71
+
72
+ function getPartBetweenEnzymesWithInclusiveOverhangs(cut1, cut2, seqLen) {
73
+ const firstCutOffset = getEnzymeRelativeOffset(cut1.restrictionEnzyme);
74
+ const secondCutOffset = getEnzymeRelativeOffset(cut2.restrictionEnzyme);
75
+ const start = cut1.topSnipBeforeBottom
76
+ ? cut1.topSnipPosition
77
+ : cut1.bottomSnipPosition;
78
+ const end = normalizePositionByRangeLength(
79
+ (cut2.topSnipBeforeBottom
80
+ ? cut2.bottomSnipPosition
81
+ : cut2.topSnipPosition) - 1,
82
+ seqLen
83
+ );
84
+ return {
85
+ start,
86
+ start1Based: start + 1,
87
+ end,
88
+ end1Based: end + 1,
89
+ firstCut: cut1,
90
+ //the offset is always counting with 0 being at the top snip position
91
+ firstCutOffset,
92
+ firstCutOverhang: cut1.overhangBps,
93
+ firstCutOverhangTop: firstCutOffset > 0 ? cut1.overhangBps : "",
94
+ firstCutOverhangBottom:
95
+ firstCutOffset < 0 ? getComplementSequenceString(cut1.overhangBps) : "",
96
+ secondCut: cut2,
97
+ //the offset is always counting with 0 being at the top snip position
98
+ secondCutOffset,
99
+ secondCutOverhang: cut2.overhangBps,
100
+ secondCutOverhangTop: secondCutOffset < 0 ? cut2.overhangBps : "",
101
+ secondCutOverhangBottom:
102
+ secondCutOffset > 0 ? getComplementSequenceString(cut2.overhangBps) : ""
103
+ };
104
+ }
105
+
106
+ function getEnzymeRelativeOffset(enzyme) {
107
+ //the offset is always counting with 0 being at the top snip position
108
+ return enzyme.bottomSnipOffset - enzyme.topSnipOffset;
109
+ }
110
+
111
+ function pairwise(list) {
112
+ if (list.length < 2) {
113
+ return [];
114
+ }
115
+ const first = list[0],
116
+ rest = list.slice(1),
117
+ pairs = rest.map(x => {
118
+ return [first, x];
119
+ });
120
+ return pairs.concat(pairwise(rest));
121
+ }