@teselagen/sequence-utils 0.1.22 → 0.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/index.js +12030 -26126
  2. package/index.mjs +12119 -26124
  3. package/index.umd.js +24056 -38154
  4. package/package.json +4 -3
  5. package/src/DNAComplementMap.js +32 -0
  6. package/src/addGapsToSeqReads.js +417 -0
  7. package/src/addGapsToSeqReads.test.js +358 -0
  8. package/src/adjustAnnotationsToInsert.js +19 -0
  9. package/src/adjustBpsToReplaceOrInsert.js +50 -0
  10. package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
  11. package/src/aliasedEnzymesByName.js +7363 -0
  12. package/src/aminoAcidToDegenerateDnaMap.js +32 -0
  13. package/src/aminoAcidToDegenerateRnaMap.js +32 -0
  14. package/src/aminoAcidToDnaRna.test.js +27 -0
  15. package/src/annotateSingleSeq.js +29 -0
  16. package/src/annotateSingleSeq.test.js +64 -0
  17. package/src/annotationTypes.js +23 -0
  18. package/src/autoAnnotate.js +242 -0
  19. package/src/autoAnnotate.test.js +1039 -0
  20. package/src/bioData.js +431 -0
  21. package/src/calculateNebTa.js +34 -0
  22. package/src/calculateNebTa.test.js +57 -0
  23. package/src/calculateNebTm.js +127 -0
  24. package/src/calculateNebTm.test.js +32 -0
  25. package/src/calculatePercentGC.js +3 -0
  26. package/src/calculatePercentGC.test.js +14 -0
  27. package/src/calculateTm.js +297 -0
  28. package/src/calculateTm.test.js +7 -0
  29. package/src/computeDigestFragments.js +179 -0
  30. package/src/computeDigestFragments.test.js +73 -0
  31. package/src/condensePairwiseAlignmentDifferences.js +85 -0
  32. package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
  33. package/src/convertAACaretPositionOrRangeToDna.js +24 -0
  34. package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
  35. package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
  36. package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
  37. package/src/cutSequenceByRestrictionEnzyme.js +301 -0
  38. package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
  39. package/src/defaultEnzymesByName.js +278 -0
  40. package/src/degenerateDnaToAminoAcidMap.js +5 -0
  41. package/src/degenerateRnaToAminoAcidMap.js +5 -0
  42. package/src/deleteSequenceDataAtRange.js +5 -0
  43. package/src/deleteSequenceDataAtRange.test.js +146 -0
  44. package/src/diffUtils.js +64 -0
  45. package/src/diffUtils.test.js +74 -0
  46. package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
  47. package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
  48. package/src/featureTypesAndColors.js +152 -0
  49. package/src/featureTypesAndColors.test.js +52 -0
  50. package/src/filterAminoAcidSequenceString.js +13 -0
  51. package/src/filterAminoAcidSequenceString.test.js +22 -0
  52. package/src/filterSequenceString.js +22 -0
  53. package/src/filterSequenceString.test.js +13 -0
  54. package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
  55. package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
  56. package/src/findOrfsInPlasmid.js +26 -0
  57. package/src/findSequenceMatches.js +133 -0
  58. package/src/findSequenceMatches.test.js +286 -0
  59. package/src/generateAnnotations.js +34 -0
  60. package/src/generateSequenceData.js +206 -0
  61. package/src/generateSequenceData.test.js +22 -0
  62. package/src/getAllInsertionsInSeqReads.js +83 -0
  63. package/src/getAllInsertionsInSeqReads.test.js +26 -0
  64. package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
  65. package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
  66. package/src/getAminoAcidFromSequenceTriplet.js +22 -0
  67. package/src/getAminoAcidStringFromSequenceString.js +18 -0
  68. package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
  69. package/src/getCodonRangeForAASliver.js +63 -0
  70. package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
  71. package/src/getComplementSequenceAndAnnotations.js +20 -0
  72. package/src/getComplementSequenceString.js +19 -0
  73. package/src/getComplementSequenceString.test.js +13 -0
  74. package/src/getCutsiteType.js +10 -0
  75. package/src/getCutsitesFromSequence.js +17 -0
  76. package/src/getDegenerateDnaStringFromAAString.js +8 -0
  77. package/src/getDegenerateRnaStringFromAAString.js +8 -0
  78. package/src/getDigestFragmentsForCutsites.js +105 -0
  79. package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
  80. package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
  81. package/src/getInsertBetweenVals.js +28 -0
  82. package/src/getInsertBetweenVals.test.js +33 -0
  83. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
  84. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
  85. package/src/getMassOfAaString.js +24 -0
  86. package/src/getMassofAaString.test.js +18 -0
  87. package/src/getOrfsFromSequence.js +124 -0
  88. package/src/getOrfsFromSequence.test.js +210 -0
  89. package/src/getOverlapBetweenTwoSequences.js +30 -0
  90. package/src/getOverlapBetweenTwoSequences.test.js +23 -0
  91. package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
  92. package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
  93. package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
  94. package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
  95. package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
  96. package/src/getReverseComplementAnnotation.js +23 -0
  97. package/src/getReverseComplementAnnotation.test.js +44 -0
  98. package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
  99. package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
  100. package/src/getReverseComplementSequenceString.js +17 -0
  101. package/src/getReverseComplementSequenceString.test.js +11 -0
  102. package/src/getReverseSequenceString.js +12 -0
  103. package/src/getReverseSequenceString.test.js +9 -0
  104. package/src/getSequenceDataBetweenRange.js +131 -0
  105. package/src/getSequenceDataBetweenRange.test.js +474 -0
  106. package/src/getVirtualDigest.js +125 -0
  107. package/src/getVirtualDigest.test.js +134 -0
  108. package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
  109. package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
  110. package/src/index.js +106 -0
  111. package/src/index.test.js +38 -0
  112. package/src/insertGapsIntoRefSeq.js +38 -0
  113. package/src/insertGapsIntoRefSeq.test.js +20 -0
  114. package/src/insertSequenceDataAtPosition.js +2 -0
  115. package/src/insertSequenceDataAtPosition.test.js +75 -0
  116. package/src/insertSequenceDataAtPositionOrRange.js +249 -0
  117. package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
  118. package/src/isEnzymeType2S.js +3 -0
  119. package/src/mapAnnotationsToRows.js +174 -0
  120. package/src/mapAnnotationsToRows.test.js +425 -0
  121. package/src/prepareCircularViewData.js +17 -0
  122. package/src/prepareCircularViewData.test.js +196 -0
  123. package/src/prepareRowData.js +41 -0
  124. package/src/prepareRowData.test.js +36 -0
  125. package/src/prepareRowData_output1.json +391 -0
  126. package/src/proteinAlphabet.js +257 -0
  127. package/src/rotateBpsToPosition.js +13 -0
  128. package/src/rotateBpsToPosition.test.js +6 -0
  129. package/src/rotateSequenceDataToPosition.js +48 -0
  130. package/src/rotateSequenceDataToPosition.test.js +71 -0
  131. package/src/shiftAnnotationsByLen.js +17 -0
  132. package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
  133. package/src/tidyUpAnnotation.js +182 -0
  134. package/src/tidyUpSequenceData.js +169 -0
  135. package/src/tidyUpSequenceData.test.js +332 -0
@@ -0,0 +1,208 @@
1
+ import chai from "chai";
2
+ import chaiSubset from "chai-subset";
3
+ import getPossiblePartsFromSequenceAndEnzymes from "./getPossiblePartsFromSequenceAndEnzymes";
4
+ import enzymeList from "./aliasedEnzymesByName";
5
+ chai.should();
6
+ chai.use(chaiSubset);
7
+ describe("getPossiblePartsFromSequenceAndEnzymes", () => {
8
+ //bamhi
9
+ // "bamhi": {
10
+ // "name": "bamhi",
11
+ // "site": "ggatcc",
12
+ // "forwardRegex": "g{2}atc{2}",
13
+ // "reverseRegex": "g{2}atc{2}",
14
+ // "topSnipOffset": 1,
15
+ // "bottomSnipOffset": 5,
16
+ // "usForward": 0,
17
+ // "usReverse": 0
18
+ // },
19
+ it("cuts using a single palindromic enzyme", () => {
20
+ const sequence = {
21
+ sequence:
22
+ "tggttgtagtagttagttgatgttatagggatcctgtagtatttatgtagtagtatgatgtagagtagtagtggatcctattatatata",
23
+ circular: true
24
+ };
25
+ const parts = getPossiblePartsFromSequenceAndEnzymes(sequence, [
26
+ enzymeList["bamhi"]
27
+ ]);
28
+ // eslint-disable-next-line no-unused-expressions
29
+ parts.should.be.an("array");
30
+ parts.length.should.equal(2);
31
+ parts[0].start.should.equal(29);
32
+ parts[0].end.should.equal(76);
33
+ parts[0].firstCutOffset.should.equal(4);
34
+ parts[0].firstCutOverhang.should.equal("gatc");
35
+ parts[0].firstCutOverhangTop.should.equal("gatc");
36
+ parts[0].secondCutOffset.should.equal(4);
37
+ parts[0].secondCutOverhang.should.equal("gatc");
38
+ parts[0].secondCutOverhangTop.should.equal("");
39
+
40
+ parts[1].start.should.equal(73);
41
+ parts[1].end.should.equal(32);
42
+ parts.should.containSubset([
43
+ {
44
+ start: 29,
45
+ end: 76,
46
+ start1Based: 30,
47
+ end1Based: 77,
48
+ firstCut: {
49
+ start: 28,
50
+ end: 33,
51
+ topSnipPosition: 29,
52
+ bottomSnipPosition: 33,
53
+ topSnipBeforeBottom: true,
54
+ overhangBps: "gatc",
55
+ forward: true
56
+ },
57
+ firstCutOffset: 4,
58
+ firstCutOverhang: "gatc",
59
+ firstCutOverhangTop: "gatc",
60
+ firstCutOverhangBottom: "",
61
+ secondCut: {
62
+ start: 72,
63
+ end: 77,
64
+ topSnipPosition: 73,
65
+ bottomSnipPosition: 77,
66
+ topSnipBeforeBottom: true,
67
+ overhangBps: "gatc",
68
+ forward: true
69
+ },
70
+ secondCutOffset: 4,
71
+ secondCutOverhang: "gatc",
72
+ secondCutOverhangTop: "",
73
+ secondCutOverhangBottom: "ctag"
74
+ },
75
+ {
76
+ start: 73,
77
+ end: 32,
78
+ start1Based: 74,
79
+ end1Based: 33,
80
+ firstCut: {
81
+ start: 72,
82
+ end: 77,
83
+ topSnipPosition: 73,
84
+ bottomSnipPosition: 77,
85
+ topSnipBeforeBottom: true,
86
+ overhangBps: "gatc",
87
+ upstreamTopBeforeBottom: false,
88
+ upstreamTopSnip: null,
89
+ upstreamBottomSnip: null,
90
+ forward: true
91
+ },
92
+ firstCutOffset: 4,
93
+ firstCutOverhang: "gatc",
94
+ firstCutOverhangTop: "gatc",
95
+ firstCutOverhangBottom: "",
96
+ secondCut: {
97
+ start: 28,
98
+ end: 33,
99
+ topSnipPosition: 29,
100
+ bottomSnipPosition: 33,
101
+ topSnipBeforeBottom: true,
102
+ overhangBps: "gatc",
103
+ upstreamTopBeforeBottom: false,
104
+ upstreamTopSnip: null,
105
+ upstreamBottomSnip: null,
106
+ forward: true
107
+ },
108
+ secondCutOffset: 4,
109
+ secondCutOverhang: "gatc",
110
+ secondCutOverhangTop: "",
111
+ secondCutOverhangBottom: "ctag"
112
+ }
113
+ ]);
114
+ });
115
+ it("cuts using two golden gate enzymes", () => {
116
+ const sequence = {
117
+ // sapi ->
118
+ sequence:
119
+ "tggttgtagtGCTCTTCagttagttgatgttatagggatcctgtagtatttatgtagtaGGAGACCtatgatgtagggtcatcagtagtagtggatcctattatatata",
120
+ // accaacatcacgagaagtcaatcaactacaatatccctaggacatcataaatacatcatcctctggatactacatcCCAGAGtcatcatcacctaggataatatatat
121
+ // <- bsai
122
+ circular: true
123
+ };
124
+ const parts = getPossiblePartsFromSequenceAndEnzymes(sequence, [
125
+ enzymeList["sapi"],
126
+ enzymeList["bsai"]
127
+ ]);
128
+ parts.length.should.equal(2);
129
+ parts.should.containSubset([
130
+ {
131
+ start: 18,
132
+ end: 58,
133
+ start1Based: 19,
134
+ end1Based: 59,
135
+ firstCut: {
136
+ start: 10,
137
+ end: 20,
138
+ topSnipPosition: 18,
139
+ bottomSnipPosition: 21,
140
+ topSnipBeforeBottom: true,
141
+ overhangBps: "gtt",
142
+ upstreamTopBeforeBottom: false,
143
+ upstreamTopSnip: null,
144
+ upstreamBottomSnip: null,
145
+ forward: true
146
+ },
147
+ firstCutOffset: 3,
148
+ firstCutOverhang: "gtt",
149
+ firstCutOverhangTop: "gtt",
150
+ firstCutOverhangBottom: "",
151
+ secondCut: {
152
+ start: 55,
153
+ end: 65,
154
+ topSnipPosition: 55,
155
+ bottomSnipPosition: 59,
156
+ topSnipBeforeBottom: true,
157
+ overhangBps: "agta",
158
+ upstreamTopBeforeBottom: false,
159
+ upstreamTopSnip: null,
160
+ upstreamBottomSnip: null,
161
+ forward: false
162
+ },
163
+ secondCutOffset: 4,
164
+ secondCutOverhang: "agta",
165
+ secondCutOverhangTop: "",
166
+ secondCutOverhangBottom: "tcat"
167
+ },
168
+ {
169
+ start: 55,
170
+ end: 20,
171
+ start1Based: 56,
172
+ end1Based: 21,
173
+ firstCut: {
174
+ start: 55,
175
+ end: 65,
176
+ topSnipPosition: 55,
177
+ bottomSnipPosition: 59,
178
+ topSnipBeforeBottom: true,
179
+ overhangBps: "agta",
180
+ upstreamTopBeforeBottom: false,
181
+ upstreamTopSnip: null,
182
+ upstreamBottomSnip: null,
183
+ forward: false
184
+ },
185
+ firstCutOffset: 4,
186
+ firstCutOverhang: "agta",
187
+ firstCutOverhangTop: "agta",
188
+ firstCutOverhangBottom: "",
189
+ secondCut: {
190
+ start: 10,
191
+ end: 20,
192
+ topSnipPosition: 18,
193
+ bottomSnipPosition: 21,
194
+ topSnipBeforeBottom: true,
195
+ overhangBps: "gtt",
196
+ upstreamTopBeforeBottom: false,
197
+ upstreamTopSnip: null,
198
+ upstreamBottomSnip: null,
199
+ forward: true
200
+ },
201
+ secondCutOffset: 3,
202
+ secondCutOverhang: "gtt",
203
+ secondCutOverhangTop: "",
204
+ secondCutOverhangBottom: "caa"
205
+ }
206
+ ]);
207
+ });
208
+ });
@@ -0,0 +1,20 @@
1
+ import getAminoAcidDataForEachBaseOfDna from "./getAminoAcidDataForEachBaseOfDna";
2
+
3
+ export default function getReverseAminoAcidStringFromSequenceString(
4
+ sequenceString
5
+ ) {
6
+ const aminoAcidsPerBase = getAminoAcidDataForEachBaseOfDna(
7
+ sequenceString,
8
+ false
9
+ );
10
+ const aaArray = [];
11
+ let aaString = "";
12
+ aminoAcidsPerBase.forEach(aa => {
13
+ if (!aa.fullCodon) {
14
+ return;
15
+ }
16
+ aaArray[aa.aminoAcidIndex] = aa.aminoAcid.value;
17
+ });
18
+ aaString = aaArray.join("");
19
+ return aaString;
20
+ };
@@ -0,0 +1,11 @@
1
+ import getReverseAminoAcidStringFromSequenceString from "./getReverseAminoAcidStringFromSequenceString";
2
+ import assert from "assert";
3
+
4
+ describe("getReverseAminoAcidStringFromSequenceString", () => {
5
+ it("computes a aa string from dna", () => {
6
+ assert.equal("M", getReverseAminoAcidStringFromSequenceString("cat"));
7
+ assert.equal("H", getReverseAminoAcidStringFromSequenceString("atg"));
8
+ assert.equal("HH", getReverseAminoAcidStringFromSequenceString("atgatg"));
9
+ assert.equal("", getReverseAminoAcidStringFromSequenceString("at"));
10
+ });
11
+ });
@@ -0,0 +1,7 @@
1
+
2
+ import getAminoAcidStringFromSequenceString from './getAminoAcidStringFromSequenceString';
3
+ import getReverseComplementSequenceString from './getReverseComplementSequenceString';
4
+
5
+ export default function getReverseComplementAminoAcidStringFromSequenceString(sequenceString) {
6
+ return getAminoAcidStringFromSequenceString(getReverseComplementSequenceString(sequenceString))
7
+ };
@@ -0,0 +1,23 @@
1
+ export default function getReverseComplementAnnotation(
2
+ annotation,
3
+ sequenceLength
4
+ ) {
5
+ //note this function assumes that the entire sequence (or subsequence) is being reverse complemented
6
+ //TNR: this is what is happening:
7
+ //0123456789
8
+ //-feature-- //normal
9
+ //--erutaef- //reverse complemented
10
+
11
+ //sequence length = 10
12
+ //feature start = 1
13
+ //feature end = 7
14
+ //so, erutaef start = 2 = 10 - (7+1)
15
+ //and, erutaef end = 8 = 10 - (1+1)
16
+
17
+ return Object.assign({}, annotation, {
18
+ start: sequenceLength - (annotation.end + 1),
19
+ end: sequenceLength - (annotation.start + 1),
20
+ forward: !annotation.forward,
21
+ strand: annotation.strand === 1 ? -1 : 1
22
+ });
23
+ };
@@ -0,0 +1,44 @@
1
+ import chai from "chai";
2
+ import chaiSubset from "chai-subset";
3
+ import getReverseComplementAnnotation from "./getReverseComplementAnnotation";
4
+ chai.should();
5
+ chai.use(chaiSubset);
6
+ describe("getReverseComplementAnnotation", () => {
7
+ it("reverse complements an annotation ", () => {
8
+ //0123456789
9
+ //---abc---- //normal
10
+ //----cba--- //reverse complemented
11
+ const newAnn = getReverseComplementAnnotation(
12
+ {
13
+ start: 3,
14
+ end: 5
15
+ },
16
+ 10
17
+ );
18
+ newAnn.should.deep.equal({
19
+ start: 4,
20
+ end: 6,
21
+ forward: true,
22
+ strand: 1
23
+ });
24
+ });
25
+ it("reverse complements an annotation crossing origin", () => {
26
+ //0123456789
27
+ //cde-----ab //normal
28
+ //ab-----edc //reverse complemented
29
+ const newAnn = getReverseComplementAnnotation(
30
+ {
31
+ start: 8,
32
+ end: 2,
33
+ strand: 1
34
+ },
35
+ 10
36
+ );
37
+ newAnn.should.deep.equal({
38
+ start: 7,
39
+ end: 1,
40
+ forward: true,
41
+ strand: -1
42
+ });
43
+ });
44
+ });
@@ -0,0 +1,38 @@
1
+ import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
2
+ import getReverseComplementAnnotation from "./getReverseComplementAnnotation";
3
+ import { annotationTypes } from "./annotationTypes";
4
+ import {map} from "lodash";
5
+ import tidyUpSequenceData from "./tidyUpSequenceData";
6
+
7
+
8
+ import getSequenceDataBetweenRange from "./getSequenceDataBetweenRange";
9
+
10
+ // ac.throw([ac.string,ac.bool],arguments);
11
+ export default function getReverseComplementSequenceAndAnnoations(
12
+ pSeqObj,
13
+ options = {}
14
+ ) {
15
+ const seqObj = tidyUpSequenceData(
16
+ getSequenceDataBetweenRange(pSeqObj, options.range),
17
+ options
18
+ );
19
+ const newSeqObj = Object.assign(
20
+ {},
21
+ seqObj,
22
+ {
23
+ sequence: getReverseComplementSequenceString(seqObj.sequence)
24
+ },
25
+ annotationTypes.reduce((acc, type) => {
26
+ if (seqObj[type]) {
27
+ acc[type] = map(seqObj[type], annotation => {
28
+ return getReverseComplementAnnotation(
29
+ annotation,
30
+ seqObj.sequence.length
31
+ );
32
+ });
33
+ }
34
+ return acc;
35
+ }, {})
36
+ );
37
+ return tidyUpSequenceData(newSeqObj, options);
38
+ };
@@ -0,0 +1,105 @@
1
+ import chai from "chai";
2
+ import chaiSubset from "chai-subset";
3
+ import getReverseComplementSequenceAndAnnotations from "./getReverseComplementSequenceAndAnnotations";
4
+ chai.should();
5
+ chai.use(chaiSubset);
6
+ describe("getReverseComplementSequenceAndAnnotations", () => {
7
+ it("reverse complements an annotation ", () => {
8
+ const newSeq = getReverseComplementSequenceAndAnnotations({
9
+ sequence: "aaatttcccg",
10
+ circular: true,
11
+ features: [
12
+ {
13
+ start: 3,
14
+ end: 5
15
+ },
16
+ {
17
+ start: 8,
18
+ end: 2
19
+ }
20
+ ]
21
+ });
22
+ newSeq.should.containSubset({
23
+ sequence: "cgggaaattt",
24
+ features: [
25
+ {
26
+ start: 4,
27
+ end: 6,
28
+ forward: true
29
+ },
30
+ {
31
+ start: 7,
32
+ end: 1,
33
+ forward: true
34
+ }
35
+ ]
36
+ });
37
+ });
38
+
39
+ it("handles a range option correctly and reverse complements a subset of the sequence ", () => {
40
+ const newSeq = getReverseComplementSequenceAndAnnotations(
41
+ {
42
+ sequence: "aaatttcccgttt",
43
+ circular: true,
44
+ features: [
45
+ {
46
+ start: 3,
47
+ end: 5
48
+ }
49
+ ]
50
+ },
51
+ { range: { start: 0, end: 9 } }
52
+ );
53
+ newSeq.should.containSubset({
54
+ sequence: "cgggaaattt",
55
+ features: [
56
+ {
57
+ start: 4,
58
+ end: 6,
59
+ forward: true
60
+ }
61
+ ]
62
+ });
63
+ });
64
+ it("handles a range option correctly and reverse complements a subset of the sequence across the origin ", () => {
65
+ const newSeq = getReverseComplementSequenceAndAnnotations(
66
+ {
67
+ sequence: "aaatttcccgttt",
68
+ // 0123456789
69
+ // rrr rrrrrrr
70
+ // fffff
71
+ circular: true,
72
+ features: [
73
+ {
74
+ start: 3,
75
+ end: 7
76
+ }
77
+ ]
78
+ },
79
+ { range: { start: 6, end: 2 } }
80
+ );
81
+ newSeq.should.containSubset({
82
+ sequence: "tttaaacggg",
83
+ features: [
84
+ {
85
+ start: 8,
86
+ end: 9,
87
+ forward: true
88
+ }
89
+ ]
90
+ });
91
+ });
92
+ it("handles a range option correctly and reverse complements a subset of the sequence across the origin ", () => {
93
+ const newSeq = getReverseComplementSequenceAndAnnotations(
94
+ {
95
+ sequence: "cccttt"
96
+ // 012345
97
+ // rr rr
98
+ },
99
+ { range: { start: 4, end: 1 } }
100
+ );
101
+ newSeq.should.containSubset({
102
+ sequence: "ggaa"
103
+ });
104
+ });
105
+ });
@@ -0,0 +1,17 @@
1
+ import DNAComplementMap from "./DNAComplementMap";
2
+
3
+
4
+ // ac.throw([ac.string,ac.bool],arguments);
5
+ export default function getReverseComplementSequenceString(sequence) {
6
+ // ac.throw([ac.string],arguments);
7
+ let reverseComplementSequenceString = "";
8
+ for (let i = sequence.length - 1; i >= 0; i--) {
9
+ let revChar = DNAComplementMap[sequence[i]];
10
+ if (!revChar) {
11
+ revChar = sequence[i];
12
+ // throw new Error('trying to get the reverse compelement of an invalid base');
13
+ }
14
+ reverseComplementSequenceString += revChar;
15
+ }
16
+ return reverseComplementSequenceString;
17
+ };
@@ -0,0 +1,11 @@
1
+ import chai from "chai";
2
+ import chaiSubset from "chai-subset";
3
+ import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
4
+ chai.should();
5
+ chai.use(chaiSubset);
6
+ describe("getReverseComplementSequenceAndAnnotations", () => {
7
+ it("handles a range option correctly and reverse complements a subset of the sequence across the origin ", () => {
8
+ const newSeq = getReverseComplementSequenceString("uuuucccttt");
9
+ newSeq.should.eq("aaagggaaaa");
10
+ });
11
+ });
@@ -0,0 +1,12 @@
1
+ export default function getReverseSequenceString(sequence) {
2
+ let reverseSequenceString = "";
3
+ for (let i = sequence.length - 1; i >= 0; i--) {
4
+ let revChar = sequence[i];
5
+ if (!revChar) {
6
+ revChar = sequence[i];
7
+ // throw new Error('trying to get the reverse of an invalid base');
8
+ }
9
+ reverseSequenceString += revChar;
10
+ }
11
+ return reverseSequenceString;
12
+ };
@@ -0,0 +1,9 @@
1
+ import chai from "chai";
2
+ import getReverseSequenceString from "./getReverseSequenceString";
3
+ chai.should();
4
+ describe("getReverseSequenceAndAnnotations", () => {
5
+ it("handles a range option correctly and reverse complements a subset of the sequence across the origin ", () => {
6
+ const newSeq = getReverseSequenceString("uuuucccttt");
7
+ newSeq.should.eq("tttcccuuuu");
8
+ });
9
+ });
@@ -0,0 +1,131 @@
1
+ import {flatMap, extend, forEach, startCase} from "lodash";
2
+ import {getRangeLength} from "@teselagen/range-utils";
3
+ import convertDnaCaretPositionOrRangeToAa from "./convertDnaCaretPositionOrRangeToAA";
4
+ import insertSequenceDataAtPosition from "./insertSequenceDataAtPosition";
5
+ import {getSequenceWithinRange, getZeroedRangeOverlaps} from "@teselagen/range-utils";
6
+ import tidyUpSequenceData from "./tidyUpSequenceData";
7
+ import { annotationTypes } from "./annotationTypes";
8
+
9
+ export default function getSequenceDataBetweenRange(
10
+ seqData,
11
+ range,
12
+ options = {}
13
+ ) {
14
+ if (!range) return seqData;
15
+ const { exclude = {}, excludePartial = {} } = options;
16
+ const seqDataToUse = tidyUpSequenceData(seqData, options);
17
+ annotationTypes.forEach(type => {
18
+ delete seqDataToUse[`filtered${startCase(type)}`];
19
+ });
20
+ const seqDataToReturn = extend(
21
+ {},
22
+ seqDataToUse,
23
+ {
24
+ circular:
25
+ seqDataToUse.sequence.length ===
26
+ getRangeLength(range, seqData.sequence.length)
27
+ ? seqDataToUse.circular
28
+ : false,
29
+ sequence: getSequenceWithinRange(range, seqDataToUse.sequence),
30
+ proteinSequence: getSequenceWithinRange(
31
+ convertDnaCaretPositionOrRangeToAa(range),
32
+ seqDataToUse.proteinSequence
33
+ )
34
+ },
35
+ annotationTypes.reduce((acc, type) => {
36
+ if (exclude[type]) {
37
+ acc[type] = [];
38
+ return acc; //return early cause we're not interested in these annotations
39
+ }
40
+ acc[type] = getAnnotationsBetweenRange(
41
+ seqDataToUse[type],
42
+ range,
43
+ seqDataToUse.sequence.length,
44
+ excludePartial[type]
45
+ );
46
+ return acc;
47
+ }, {})
48
+ );
49
+ if (range.overlapsSelf) {
50
+ const extendedSeqData = insertSequenceDataAtPosition(
51
+ { sequence: seqDataToReturn.sequence },
52
+ seqDataToUse,
53
+ range.start
54
+ );
55
+
56
+ const toRet = getSequenceDataBetweenRange(
57
+ extendedSeqData,
58
+ {
59
+ start: range.end + 1,
60
+ end: range.end
61
+ },
62
+ options
63
+ );
64
+ annotationTypes.forEach(type => {
65
+ //we need to go through and adjust any anns where overlapsSelf=true to no longer overlap themselves if they match the range completely
66
+ forEach(toRet[type], ann => {
67
+ if (
68
+ ann.overlapsSelf &&
69
+ ann.start === 0 &&
70
+ getRangeLength(ann, seqDataToUse.sequence.length) ===
71
+ getRangeLength(range, seqDataToUse.sequence.length)
72
+ ) {
73
+ ann.overlapsSelf = false;
74
+ ann.end = toRet.sequence.length - 1;
75
+ }
76
+ });
77
+ });
78
+ return tidyUpSequenceData(toRet, options);
79
+ }
80
+
81
+ return tidyUpSequenceData(seqDataToReturn, options);
82
+ };
83
+
84
+ function getAnnotationsBetweenRange(
85
+ annotationsToBeAdjusted,
86
+ range,
87
+ maxLength,
88
+ shouldExcludePartial
89
+ ) {
90
+ return flatMap(annotationsToBeAdjusted, annotation => {
91
+ if (annotation.locations && annotation.locations.length) {
92
+ annotation.locations = getAnnotationsBetweenRange(
93
+ annotation.locations,
94
+ range,
95
+ maxLength,
96
+ shouldExcludePartial
97
+ );
98
+ }
99
+ //map through every annotation and get the overlap of the annotation with the range
100
+ const overlaps = getZeroedRangeOverlaps(annotation, range, maxLength).map(
101
+ overlap => {
102
+ //we get back 1 or more overlaps here
103
+
104
+ return extend({}, annotation, overlap);
105
+ }
106
+ );
107
+ if (shouldExcludePartial) {
108
+ if (overlaps.length > 1) return []; //the annotation has multiple overlaps and thus must be a partial copy so we exclude it completely
109
+ if (overlaps[0]) {
110
+ //there is just 1 overlap, if it doesn't have the same length, it must be a partial copy so we need to exclude it
111
+ if (
112
+ getRangeLength(overlaps[0], maxLength) !==
113
+ getRangeLength(annotation, maxLength)
114
+ ) {
115
+ return [];
116
+ }
117
+ }
118
+ }
119
+
120
+ return overlaps;
121
+ }).map(annotation => {
122
+ if (annotation.locations && annotation.locations.length) {
123
+ annotation.start = annotation.locations[0].start;
124
+ annotation.end =
125
+ annotation.locations[annotation.locations.length - 1].end;
126
+
127
+ if (annotation.locations.length === 1) delete annotation.locations;
128
+ }
129
+ return annotation;
130
+ }); //filter any fully deleted ranges
131
+ }