@teselagen/sequence-utils 0.1.21 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/index.js +12030 -26126
  2. package/index.mjs +12119 -26124
  3. package/index.umd.js +24056 -38154
  4. package/package.json +2 -2
  5. package/src/DNAComplementMap.js +32 -0
  6. package/src/addGapsToSeqReads.js +417 -0
  7. package/src/addGapsToSeqReads.test.js +358 -0
  8. package/src/adjustAnnotationsToInsert.js +19 -0
  9. package/src/adjustBpsToReplaceOrInsert.js +50 -0
  10. package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
  11. package/src/aliasedEnzymesByName.js +7363 -0
  12. package/src/aminoAcidToDegenerateDnaMap.js +32 -0
  13. package/src/aminoAcidToDegenerateRnaMap.js +32 -0
  14. package/src/aminoAcidToDnaRna.test.js +27 -0
  15. package/src/annotateSingleSeq.js +29 -0
  16. package/src/annotateSingleSeq.test.js +64 -0
  17. package/src/annotationTypes.js +23 -0
  18. package/src/autoAnnotate.js +242 -0
  19. package/src/autoAnnotate.test.js +1039 -0
  20. package/src/bioData.js +431 -0
  21. package/src/calculateNebTa.js +34 -0
  22. package/src/calculateNebTa.test.js +57 -0
  23. package/src/calculateNebTm.js +127 -0
  24. package/src/calculateNebTm.test.js +32 -0
  25. package/src/calculatePercentGC.js +3 -0
  26. package/src/calculatePercentGC.test.js +14 -0
  27. package/src/calculateTm.js +297 -0
  28. package/src/calculateTm.test.js +7 -0
  29. package/src/computeDigestFragments.js +179 -0
  30. package/src/computeDigestFragments.test.js +73 -0
  31. package/src/condensePairwiseAlignmentDifferences.js +85 -0
  32. package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
  33. package/src/convertAACaretPositionOrRangeToDna.js +24 -0
  34. package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
  35. package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
  36. package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
  37. package/src/cutSequenceByRestrictionEnzyme.js +301 -0
  38. package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
  39. package/src/defaultEnzymesByName.js +278 -0
  40. package/src/degenerateDnaToAminoAcidMap.js +5 -0
  41. package/src/degenerateRnaToAminoAcidMap.js +5 -0
  42. package/src/deleteSequenceDataAtRange.js +5 -0
  43. package/src/deleteSequenceDataAtRange.test.js +146 -0
  44. package/src/diffUtils.js +64 -0
  45. package/src/diffUtils.test.js +74 -0
  46. package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
  47. package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
  48. package/src/featureTypesAndColors.js +152 -0
  49. package/src/featureTypesAndColors.test.js +52 -0
  50. package/src/filterAminoAcidSequenceString.js +13 -0
  51. package/src/filterAminoAcidSequenceString.test.js +22 -0
  52. package/src/filterSequenceString.js +22 -0
  53. package/src/filterSequenceString.test.js +13 -0
  54. package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
  55. package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
  56. package/src/findOrfsInPlasmid.js +26 -0
  57. package/src/findSequenceMatches.js +133 -0
  58. package/src/findSequenceMatches.test.js +286 -0
  59. package/src/generateAnnotations.js +34 -0
  60. package/src/generateSequenceData.js +206 -0
  61. package/src/generateSequenceData.test.js +22 -0
  62. package/src/getAllInsertionsInSeqReads.js +83 -0
  63. package/src/getAllInsertionsInSeqReads.test.js +26 -0
  64. package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
  65. package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
  66. package/src/getAminoAcidFromSequenceTriplet.js +22 -0
  67. package/src/getAminoAcidStringFromSequenceString.js +18 -0
  68. package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
  69. package/src/getCodonRangeForAASliver.js +63 -0
  70. package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
  71. package/src/getComplementSequenceAndAnnotations.js +20 -0
  72. package/src/getComplementSequenceString.js +19 -0
  73. package/src/getComplementSequenceString.test.js +13 -0
  74. package/src/getCutsiteType.js +10 -0
  75. package/src/getCutsitesFromSequence.js +17 -0
  76. package/src/getDegenerateDnaStringFromAAString.js +8 -0
  77. package/src/getDegenerateRnaStringFromAAString.js +8 -0
  78. package/src/getDigestFragmentsForCutsites.js +105 -0
  79. package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
  80. package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
  81. package/src/getInsertBetweenVals.js +28 -0
  82. package/src/getInsertBetweenVals.test.js +33 -0
  83. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
  84. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
  85. package/src/getMassOfAaString.js +24 -0
  86. package/src/getMassofAaString.test.js +18 -0
  87. package/src/getOrfsFromSequence.js +124 -0
  88. package/src/getOrfsFromSequence.test.js +210 -0
  89. package/src/getOverlapBetweenTwoSequences.js +30 -0
  90. package/src/getOverlapBetweenTwoSequences.test.js +23 -0
  91. package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
  92. package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
  93. package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
  94. package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
  95. package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
  96. package/src/getReverseComplementAnnotation.js +23 -0
  97. package/src/getReverseComplementAnnotation.test.js +44 -0
  98. package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
  99. package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
  100. package/src/getReverseComplementSequenceString.js +17 -0
  101. package/src/getReverseComplementSequenceString.test.js +11 -0
  102. package/src/getReverseSequenceString.js +12 -0
  103. package/src/getReverseSequenceString.test.js +9 -0
  104. package/src/getSequenceDataBetweenRange.js +131 -0
  105. package/src/getSequenceDataBetweenRange.test.js +474 -0
  106. package/src/getVirtualDigest.js +125 -0
  107. package/src/getVirtualDigest.test.js +134 -0
  108. package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
  109. package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
  110. package/src/index.js +106 -0
  111. package/src/index.test.js +38 -0
  112. package/src/insertGapsIntoRefSeq.js +38 -0
  113. package/src/insertGapsIntoRefSeq.test.js +20 -0
  114. package/src/insertSequenceDataAtPosition.js +2 -0
  115. package/src/insertSequenceDataAtPosition.test.js +75 -0
  116. package/src/insertSequenceDataAtPositionOrRange.js +249 -0
  117. package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
  118. package/src/isEnzymeType2S.js +3 -0
  119. package/src/mapAnnotationsToRows.js +174 -0
  120. package/src/mapAnnotationsToRows.test.js +425 -0
  121. package/src/prepareCircularViewData.js +17 -0
  122. package/src/prepareCircularViewData.test.js +196 -0
  123. package/src/prepareRowData.js +41 -0
  124. package/src/prepareRowData.test.js +36 -0
  125. package/src/prepareRowData_output1.json +391 -0
  126. package/src/proteinAlphabet.js +257 -0
  127. package/src/rotateBpsToPosition.js +13 -0
  128. package/src/rotateBpsToPosition.test.js +6 -0
  129. package/src/rotateSequenceDataToPosition.js +48 -0
  130. package/src/rotateSequenceDataToPosition.test.js +71 -0
  131. package/src/shiftAnnotationsByLen.js +17 -0
  132. package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
  133. package/src/tidyUpAnnotation.js +182 -0
  134. package/src/tidyUpSequenceData.js +169 -0
  135. package/src/tidyUpSequenceData.test.js +332 -0
@@ -0,0 +1,31 @@
1
+ import assert from "assert";
2
+ import findNearestRangeOfSequenceOverlapToPosition from "./findNearestRangeOfSequenceOverlapToPosition";
3
+ describe("findNearestRangeOfSequenceOverlapToPosition", () => {
4
+ it("should find the nearest overlap range to the given position", () => {
5
+ const range = findNearestRangeOfSequenceOverlapToPosition(
6
+ "gagagtagagatagagtagagatagagatagagagagagccagcagacgacgagcagcctacgtcatcatagagagagaag",
7
+ "atagagagag",
8
+ 17
9
+ );
10
+ assert.equal(range.start, 27);
11
+ assert.equal(range.end, 36);
12
+ });
13
+ it("should find the nearest overlap range to the given position at the end of the sequence", () => {
14
+ const range = findNearestRangeOfSequenceOverlapToPosition(
15
+ "gagagtagagatagagtagagatagagatagagagagagccagcagacgacgagcagcctacgtcatcatagagagagaag",
16
+ "atagagagag",
17
+ 0
18
+ );
19
+ assert.equal(range.start, 68);
20
+ assert.equal(range.end, 77);
21
+ });
22
+ it("should find the nearest overlap range even when that range overlaps the origin", () => {
23
+ const range = findNearestRangeOfSequenceOverlapToPosition(
24
+ "agagaggagagtagagatagagtagagatagagatagagagagagccagcagacgacgagcagcctacgtcatcatagagagagaagatag",
25
+ "atagagagag",
26
+ 0
27
+ );
28
+ assert.equal(range.start, 87);
29
+ assert.equal(range.end, 5);
30
+ });
31
+ });
@@ -0,0 +1,26 @@
1
+ import getOrfsFromSequence from "./getOrfsFromSequence.js";
2
+
3
+ export default function findOrfsInPlasmid(
4
+ sequence,
5
+ circular,
6
+ minimumOrfSize,
7
+ useAdditionalOrfStartCodons
8
+ ) {
9
+ //tnr, we should do the parsing down of the orfs immediately after they're returned from this sequence
10
+ // const orfs1Forward = eliminateCircularOrfsThatOverlapWithNonCircularOrfs(getOrfsFromSequence(0, doubleForwardSequence, minimumOrfSize, true), maxLength);
11
+ const forwardOrfs = getOrfsFromSequence({
12
+ sequence: sequence,
13
+ minimumOrfSize: minimumOrfSize,
14
+ forward: true,
15
+ circular: circular,
16
+ useAdditionalOrfStartCodons
17
+ });
18
+ const reverseOrfs = getOrfsFromSequence({
19
+ sequence: sequence,
20
+ minimumOrfSize: minimumOrfSize,
21
+ forward: false,
22
+ circular: circular,
23
+ useAdditionalOrfStartCodons
24
+ });
25
+ return forwardOrfs.concat(reverseOrfs);
26
+ };
@@ -0,0 +1,133 @@
1
+ import {modulateRangeBySequenceLength, flipContainedRange} from "@teselagen/range-utils";
2
+ import {reduce, uniqBy} from "lodash";
3
+ import escapeStringRegexp from "escape-string-regexp";
4
+ import getAminoAcidStringFromSequenceString from "./getAminoAcidStringFromSequenceString";
5
+ import {ambiguous_dna_values, extended_protein_values} from "./bioData";
6
+ import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
7
+
8
+ export default function findSequenceMatches(
9
+ sequence,
10
+ searchString,
11
+ options = {}
12
+ ) {
13
+ let matches = findSequenceMatchesTopStrand(sequence, searchString, options);
14
+ const { searchReverseStrand } = options;
15
+
16
+ if (searchReverseStrand) {
17
+ const sequenceLength = sequence.length;
18
+ const reverseSeq = getReverseComplementSequenceString(sequence);
19
+ const reverseMatches = findSequenceMatchesTopStrand(
20
+ reverseSeq,
21
+ searchString,
22
+ options
23
+ );
24
+ const flippedReverseMatches = reverseMatches.map(range => {
25
+ return {
26
+ ...flipContainedRange(
27
+ range,
28
+ { start: 0, end: sequenceLength - 1 },
29
+ sequenceLength
30
+ ),
31
+ bottomStrand: true
32
+ };
33
+ });
34
+ matches = [...matches, ...flippedReverseMatches];
35
+ }
36
+ return matches;
37
+ };
38
+
39
+ function findSequenceMatchesTopStrand(sequence, searchString, options = {}) {
40
+ const {
41
+ isCircular,
42
+ isAmbiguous,
43
+ isProteinSequence,
44
+ isProteinSearch
45
+ } = options;
46
+ let searchStringToUse = escapeStringRegexp(searchString);
47
+ if (isAmbiguous) {
48
+ if (isProteinSearch || isProteinSequence) {
49
+ searchStringToUse = convertAmbiguousStringToRegex(
50
+ searchStringToUse,
51
+ true
52
+ );
53
+ } else {
54
+ //we're searching DNA
55
+ searchStringToUse = convertAmbiguousStringToRegex(searchStringToUse);
56
+ }
57
+ }
58
+ if (!searchStringToUse) return []; //short circuit if nothing is actually being searched for (eg searching for "%%"")
59
+ let sequenceToUse = sequence;
60
+ if (isCircular) {
61
+ sequenceToUse = sequenceToUse + sequenceToUse;
62
+ }
63
+
64
+ let sequencesToCheck = [{ seqToCheck: sequenceToUse, offset: 0 }];
65
+ if (isProteinSearch) {
66
+ sequencesToCheck = [
67
+ {
68
+ seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse),
69
+ offset: 0
70
+ },
71
+ {
72
+ seqToCheck: getAminoAcidStringFromSequenceString(
73
+ sequenceToUse.substr(1)
74
+ ),
75
+ offset: 1
76
+ },
77
+ {
78
+ seqToCheck: getAminoAcidStringFromSequenceString(
79
+ sequenceToUse.substr(2)
80
+ ),
81
+ offset: 2
82
+ }
83
+ ];
84
+ }
85
+
86
+ const ranges = [];
87
+ sequencesToCheck.forEach(({ seqToCheck, offset }) => {
88
+ const reg = new RegExp(searchStringToUse, "ig");
89
+ let match;
90
+ let range;
91
+ /* eslint-disable no-cond-assign*/
92
+
93
+ while ((match = reg.exec(seqToCheck)) !== null) {
94
+ range = {
95
+ start: match.index,
96
+ end: match.index + searchString.length - 1 //this should be the original searchString here j
97
+ };
98
+ if (isProteinSearch) {
99
+ range.start = range.start * 3 + offset;
100
+ range.end = range.end * 3 + 2 + offset;
101
+ }
102
+ ranges.push(modulateRangeBySequenceLength(range, sequence.length));
103
+ reg.lastIndex = match.index + 1;
104
+ }
105
+ /* eslint-enable no-cond-assign*/
106
+ });
107
+
108
+ return uniqBy(ranges, e => {
109
+ return e.start + "-" + e.end;
110
+ });
111
+ }
112
+
113
+ function convertAmbiguousStringToRegex(string, isProtein) {
114
+ // Search for a DNA subseq in sequence.
115
+ // use ambiguous values (like N = A or T or C or G, R = A or G etc.)
116
+ // searches only on forward strand
117
+ return reduce(
118
+ string,
119
+ (acc, char) => {
120
+ const value = isProtein
121
+ ? extended_protein_values[char.toUpperCase()]
122
+ : ambiguous_dna_values[char.toUpperCase()];
123
+ if (!value) return acc;
124
+ if (value.length === 1) {
125
+ acc += value;
126
+ } else {
127
+ acc += `[${value}]`;
128
+ }
129
+ return acc;
130
+ },
131
+ ""
132
+ );
133
+ }
@@ -0,0 +1,286 @@
1
+ import findSequenceMatches from "./findSequenceMatches";
2
+
3
+ describe("findSequenceMatches", () => {
4
+ it("ambiguous protein sequence with * as stop codon", () => {
5
+ expect(
6
+ findSequenceMatches("mmhlrl*", "Mxxlrl*", {
7
+ isAmbiguous: true,
8
+ isProteinSequence: true /* isProteinSearch: true */
9
+ })
10
+ ).toEqual([
11
+ {
12
+ start: 0,
13
+ end: 6
14
+ }
15
+ ]);
16
+ expect(
17
+ findSequenceMatches("mmhlrl*", "mx", {
18
+ isAmbiguous: true,
19
+ isProteinSequence: true /* isProteinSearch: true */
20
+ })
21
+ ).toEqual([
22
+ {
23
+ start: 0,
24
+ end: 1
25
+ },
26
+ {
27
+ start: 1,
28
+ end: 2
29
+ }
30
+ ]);
31
+ });
32
+ it("protein sequence with * as stop codon", () => {
33
+ expect(
34
+ findSequenceMatches("mmhlrl*", "mMh", {
35
+ isProteinSequence: true /* isProteinSearch: true */
36
+ })
37
+ ).toEqual([
38
+ {
39
+ start: 0,
40
+ end: 2
41
+ }
42
+ ]);
43
+ expect(
44
+ findSequenceMatches("mmhlrl*", "Mmhlrl*", {
45
+ isProteinSequence: true /* isProteinSearch: true */
46
+ })
47
+ ).toEqual([
48
+ {
49
+ start: 0,
50
+ end: 6
51
+ }
52
+ ]);
53
+ });
54
+ it("returns an empty array when nothing matches", () => {
55
+ expect([]).toEqual(findSequenceMatches("atg", "xtag"));
56
+ });
57
+ it("handles various weird characters", () => {
58
+ expect([]).toEqual(findSequenceMatches("atg", " . xt ** ag $#@@!"));
59
+ });
60
+ it("returns matches for non-circular, non-ambiguous, dna searches", () => {
61
+ expect([
62
+ {
63
+ start: 1,
64
+ end: 1
65
+ }
66
+ ]).toEqual(findSequenceMatches("atg", "t"));
67
+ expect([
68
+ {
69
+ start: 2,
70
+ end: 3
71
+ },
72
+ {
73
+ start: 3,
74
+ end: 4
75
+ },
76
+ {
77
+ start: 7,
78
+ end: 8
79
+ }
80
+ ]).toEqual(findSequenceMatches("atgggaagg", "gg"));
81
+ //atgggaagg
82
+ //012345678
83
+ });
84
+ it("returns matches for circular, non-ambiguous, dna searches", () => {
85
+ const matches = findSequenceMatches("atg", "ga", { isCircular: true });
86
+ expect(matches).toEqual([
87
+ {
88
+ start: 2,
89
+ end: 0
90
+ }
91
+ ]);
92
+ });
93
+ it("returns matches for circular, non-ambiguous, dna searches on bottom strand that cross origin", () => {
94
+ const matches = findSequenceMatches("atga", "ttc", {
95
+ isCircular: true,
96
+ searchReverseStrand: true
97
+ });
98
+ expect(matches).toEqual([
99
+ {
100
+ bottomStrand: true,
101
+ start: 2,
102
+ end: 0
103
+ }
104
+ ]);
105
+ });
106
+ it("returns matches for a long circular, non-ambiguous, dna searches", () => {
107
+ const matches = findSequenceMatches(
108
+ "gacgtcttatgacaacttgacggctacatcattcactttttcttcacaaccggcacggaactcgctcgggctggccccggtgcattttttaaatacccgcgagaaatagagttgatcgtcaaaaccaacattgcgaccgacggtggcgataggcatccgggtggtgctcaaaagcagcttcgcctggctgatacgttggtcctcgcgccagcttaagacgctaatccctaactgctggcggaaaagatgtgacagacgcgacggcgacaagcaaacatgctgtgcgacgctggcgatatcaaaattgctgtctgccaggtgatcgctgatgtactgacaagcctcgcgtacccgattatccatcggtggatggagcgactcgttaatcgcttccatgcgccgcagtaacaattgctcaagcagatttatcgccagcagctccgaatagcgcccttccccttgcccggcgttaatgatttgcccaaacaggtcgctgaaatgcggctggtgcgcttcatccgggcgaaagaaccccgtattggcaaatattgacggccagttaagccattcatgccagtaggcgcgcggacgaaagtaaacccactggtgataccattcgcgagcctccggatgacgaccgtagtgatgaatctctcctggcgggaacagcaaaatatcacccggtcggcaaacaaattctcgtccctgatttttcaccaccccctgaccgcgaatggtgagattgagaatataacctttcattcccagcggtcggtcgataaaaaaatcgagataaccgttggcctcaatcggcgttaaacccgccaccagatgggcattaaacgagtatcccggcagcaggggatcattttgcgcttcagccatacttttcatactcccgccattcagagaagaaaccaattgtccatattgcatcagacattgccgtcactgcgtcttttactggctcttctcgctaaccaaaccggtaaccccgcttattaaaagcattctgtaacaaagcgggaccaaagccatgacaaaaacgcgtaacaaaagtgtctataatcacggcagaaaagtccacattgattatttgcacggcgtcacactttgctatgccatagcatttttatccataagattagcggattctacctgacgctttttatcgcaactctctactgtttctccatacccgtttttttgggaatttttaagaaggagatatacatatgagtaaaggagaagaacttttcactggagttgtcccaattcttgttgaattagatggtgatgttaatgggcacaaattttctgtcagtggagagggtgaaggtgatgcaacatacggaaaacttacccttaaatttatttgcactactggaaaactacctgttccatggccaacacttgtcactactttctcttatggtgttcaatgcttttcccgttatccggatcatatgaaacggcatgactttttcaagagtgccatgcccgaaggttatgtacaggaacgcactatatctttcaaagatgacgggaactacaagacgcgtgctgaagtcaagtttgaaggtgatacccttgttaatcgtatcgagttaaaaggtattgattttaaagaagatggaaacattctcggacacaaactcgaatacaactataactcacacaatgtatacatcacggcagacaaacaaaagaatggaatcaaagctaacttcaaaattcgccacaacattgaagatggatctgttcaactagcagaccattatcaacaaaatactccaattggcgatggccctgtccttttaccagacaaccattacctgtcgacacaatctgccctttcgaaagatcccaacgaaaagcgtgaccacatggtccttcttgagtttgtaactgctgctgggattacacatggcatggatgagctcggcggcggcggcagcaaggtctacggcaaggaacagtttttgcggatgcgccagagcatgttccccgatcgctaaatcgagtaaggatctccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttatacctagggtacgggttttgctgcccgcaaacgggctgttctggtgttgctagtttgttatcagaatcgcagatccggcttcagccggtttgccggctgaaagcgctatttcttccagaattgccatgattttttccccacgggaggcgtcactggctcccgtgttgtcggcagctttgattcgataagcagcatcgcctgtttcaggctgtctatgtgtgactgttgagctgtaacaagttgtctcaggtgttcaatttcatgttctagttgctttgttttactggtttcacctgttctattaggtgttacatgctgttcatctgttacattgtcgatctgttcatggtgaacagctttgaatgcaccaaaaactcgtaaaagctctgatgtatctatcttttttacaccgttttcatctgtgcatatggacagttttccctttgatatgtaacggtgaacagttgttctacttttgtttgttagtcttgatgcttcactgatagatacaagagccataagaacctcagatccttccgtatttagccagtatgttctctagtgtggttcgttgtttttgcgtgagccatgagaacgaaccattgagatcatacttactttgcatgtcactcaaaaattttgcctcaaaactggtgagctgaatttttgcagttaaagcatcgtgtagtgtttttcttagtccgttatgtaggtaggaatctgatgtaatggttgttggtattttgtcaccattcatttttatctggttgttctcaagttcggttacgagatccatttgtctatctagttcaacttggaaaatcaacgtatcagtcgggcggcctcgcttatcaaccaccaatttcatattgctgtaagtgtttaaatctttacttattggtttcaaaacccattggttaagccttttaaactcatggtagttattttcaagcattaacatgaacttaaattcatcaaggctaatctctatatttgccttgtgagttttcttttgtgttagttcttttaataaccactcataaatcctcatagagtatttgttttcaaaagacttaacatgttccagattatattttatgaatttttttaactggaaaagataaggcaatatctcttcactaaaaactaattctaatttttcgcttgagaacttggcatagtttgtccactggaaaatctcaaagcctttaaccaaaggattcctgatttccacagttctcgtcatcagctctctggttgctttagctaatacaccataagcattttccctactgatgttcatcatctgagcgtattggttataagtgaacgataccgtccgttctttccttgtagggttttcaatcgtggggttgagtagtgccacacagcataaaattagcttggtttcatgctccgttaagtcatagcgactaatcgctagttcatttgctttgaaaacaactaattcagacatacatctcaattggtctaggtgattttaatcactataccaattgagatgggctagtcaatgataattactagtccttttcccgggtgatctgggtatctgtaaattctgctagacctttgctggaaaacttgtaaattctgctagaccctctgtaaattccgctagacctttgtgtgttttttttgtttatattcaagtggttataatttatagaataaagaaagaataaaaaaagataaaaagaatagatcccagccctgtgtataactcactactttagtcagttccgcagtattacaaaaggatgtcgcaaacgctgtttgctcctctacaaaacagaccttaaaaccctaaaggcttaagtagcaccctcgcaagctcgggcaaatcgctgaatattccttttgtctccgaccatcaggcacctgagtcgctgtctttttcgtgacattcagttcgctgcgctcacggctctggcagtgaatgggggtaaatggcactacaggcgccttttatggattcatgcaaggaaactacccataatacaagaaaagcccgtcacgggcttctcagggcgttttatggcgggtctgctatgtggtgctatctgactttttgctgttcagcagttcctgccctctgattttccagtctgaccacttcggattatcccgtgacaggtcattcagactggctaatgcacccagtaaggcagcggtatcatcaacaggcttacccgtcttactgtccctagtgcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatccagatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaacaagggtgaacactatcccatatcaccagctcaccgtctttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgccgatcaacgtctcattttcgccagatatc",
109
+ "atgagacg",
110
+ { isCircular: true, searchReverseStrand: true }
111
+ );
112
+ expect(matches).toEqual([
113
+ {
114
+ bottomStrand: true,
115
+ end: 5284,
116
+ start: 5277
117
+ }
118
+ ]);
119
+ });
120
+ it("non-ambiguous, dna searches for nothing results in empty array", () => {
121
+ const matches = findSequenceMatches("atg", "*", {});
122
+ expect(matches).toEqual([]);
123
+ });
124
+ it("ambiguous, protein searches for nothing results in empty array", () => {
125
+ const matches = findSequenceMatches("atg", "*", {
126
+ isProteinSearch: true,
127
+ isAmbiguous: true
128
+ });
129
+ expect(matches).toEqual([]);
130
+ });
131
+ it("ambiguous, dna searches for nothing results in empty array", () => {
132
+ const matches = findSequenceMatches("atg", "*", { isAmbiguous: true });
133
+ expect(matches).toEqual([]);
134
+ });
135
+ it("ambiguous, dna searches with *", () => {
136
+ const matches = findSequenceMatches("atg", "", { isAmbiguous: true });
137
+ expect(matches).toEqual([]);
138
+ });
139
+ it(" AA with * as stop codon", () => {
140
+ expect(
141
+ findSequenceMatches("atgtaa", "M*", { isProteinSearch: true })
142
+ ).toEqual([
143
+ {
144
+ start: 0,
145
+ end: 5
146
+ }
147
+ ]);
148
+ });
149
+ it(" AA with * as stop codon", () => {
150
+ expect(
151
+ findSequenceMatches("atgtaaccc", "M**", { isProteinSearch: true })
152
+ ).toEqual([]);
153
+ });
154
+ it("works with ambiguous AA", () => {
155
+ expect(
156
+ findSequenceMatches("atgatg", "MX", {
157
+ isProteinSearch: true,
158
+ isAmbiguous: true
159
+ })
160
+ ).toEqual([
161
+ {
162
+ start: 0,
163
+ end: 5
164
+ }
165
+ ]);
166
+ });
167
+ it("works with ambiguous AA with * in search string", () => {
168
+ expect(
169
+ findSequenceMatches("atgtaa", "M*", {
170
+ isProteinSearch: true,
171
+ isAmbiguous: true
172
+ })
173
+ ).toEqual([
174
+ {
175
+ start: 0,
176
+ end: 5
177
+ }
178
+ ]);
179
+ });
180
+ it("returns matches for non-circular, non-ambiguous, AA searches", () => {
181
+ expect(findSequenceMatches("atg", "M", { isProteinSearch: true })).toEqual([
182
+ {
183
+ start: 0,
184
+ end: 2
185
+ }
186
+ ]);
187
+ expect(
188
+ findSequenceMatches("TTTATGAGT", "MS", { isProteinSearch: true })
189
+ ).toEqual([
190
+ {
191
+ start: 3,
192
+ end: 8
193
+ }
194
+ ]);
195
+ expect(
196
+ findSequenceMatches("TTATGAGT", "MS", { isProteinSearch: true })
197
+ ).toEqual([
198
+ {
199
+ start: 2,
200
+ end: 7
201
+ }
202
+ ]);
203
+ expect(
204
+ findSequenceMatches("TTTTATGAGT", "MS", { isProteinSearch: true })
205
+ ).toEqual([
206
+ {
207
+ start: 4,
208
+ end: 9
209
+ }
210
+ ]);
211
+
212
+ // 0 1 2
213
+ // P T R
214
+ // 012 345 678
215
+ // ATG ATG ATG
216
+ });
217
+ it("returns matches for non-circular, ambiguous, dna searches", () => {
218
+ const matches = findSequenceMatches("atg", "m", { isAmbiguous: true });
219
+ expect(matches).toEqual([
220
+ {
221
+ start: 0,
222
+ end: 0
223
+ }
224
+ ]);
225
+ expect(findSequenceMatches("atg", "n", { isAmbiguous: true })).toEqual([
226
+ {
227
+ start: 0,
228
+ end: 0
229
+ },
230
+ {
231
+ start: 1,
232
+ end: 1
233
+ },
234
+ {
235
+ start: 2,
236
+ end: 2
237
+ }
238
+ ]);
239
+ expect(
240
+ findSequenceMatches("atgcctcc", "ccnnc", { isAmbiguous: true })
241
+ ).toEqual([
242
+ {
243
+ start: 3,
244
+ end: 7
245
+ }
246
+ ]);
247
+ });
248
+ it("returns matches for both strands for non-circular, ambiguous, dna searches", () => {
249
+ const matches = findSequenceMatches("atg", "m", {
250
+ isAmbiguous: true,
251
+ searchReverseStrand: true
252
+ });
253
+ expect(matches).toEqual([
254
+ {
255
+ start: 0,
256
+ end: 0
257
+ },
258
+ { bottomStrand: true, end: 2, start: 2 },
259
+ { bottomStrand: true, end: 1, start: 1 }
260
+ ]);
261
+ expect(
262
+ findSequenceMatches("atg", "n", {
263
+ isAmbiguous: true,
264
+ searchReverseStrand: true
265
+ })
266
+ ).toEqual([
267
+ { end: 0, start: 0 },
268
+ { end: 1, start: 1 },
269
+ { end: 2, start: 2 },
270
+ { bottomStrand: true, end: 2, start: 2 },
271
+ { bottomStrand: true, end: 1, start: 1 },
272
+ { bottomStrand: true, end: 0, start: 0 }
273
+ ]);
274
+ expect(
275
+ findSequenceMatches("atgcctcc", "ccnnc", {
276
+ isAmbiguous: true,
277
+ searchReverseStrand: true
278
+ })
279
+ ).toEqual([
280
+ {
281
+ start: 3,
282
+ end: 7
283
+ }
284
+ ]);
285
+ });
286
+ });
@@ -0,0 +1,34 @@
1
+ import {generateRandomRange} from "@teselagen/range-utils";
2
+ import shortid from "shortid";
3
+
4
+ function generateAnnotations(
5
+ numberOfAnnotationsToGenerate,
6
+ start,
7
+ end,
8
+ maxLength
9
+ ) {
10
+ const result = {};
11
+ for (let i = 0; i < numberOfAnnotationsToGenerate; i++) {
12
+ const annotation = generateAnnotation(start, end, maxLength);
13
+ result[annotation.id] = annotation;
14
+ }
15
+ return result;
16
+ }
17
+
18
+ function generateAnnotation(start, end, maxLength) {
19
+ const range = generateRandomRange(start, end, maxLength);
20
+ return {
21
+ ...range,
22
+ name: getRandomInt(0, 100000).toString(),
23
+ type: "misc_feature",
24
+ id: shortid(),
25
+ forward: Math.random() > 0.5,
26
+ notes: {}
27
+ };
28
+ }
29
+
30
+ function getRandomInt(min, max) {
31
+ return Math.floor(Math.random() * (max - min)) + min;
32
+ }
33
+
34
+ export default generateAnnotations;