@teselagen/sequence-utils 0.1.21 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/index.js +12030 -26126
  2. package/index.mjs +12119 -26124
  3. package/index.umd.js +24056 -38154
  4. package/package.json +2 -2
  5. package/src/DNAComplementMap.js +32 -0
  6. package/src/addGapsToSeqReads.js +417 -0
  7. package/src/addGapsToSeqReads.test.js +358 -0
  8. package/src/adjustAnnotationsToInsert.js +19 -0
  9. package/src/adjustBpsToReplaceOrInsert.js +50 -0
  10. package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
  11. package/src/aliasedEnzymesByName.js +7363 -0
  12. package/src/aminoAcidToDegenerateDnaMap.js +32 -0
  13. package/src/aminoAcidToDegenerateRnaMap.js +32 -0
  14. package/src/aminoAcidToDnaRna.test.js +27 -0
  15. package/src/annotateSingleSeq.js +29 -0
  16. package/src/annotateSingleSeq.test.js +64 -0
  17. package/src/annotationTypes.js +23 -0
  18. package/src/autoAnnotate.js +242 -0
  19. package/src/autoAnnotate.test.js +1039 -0
  20. package/src/bioData.js +431 -0
  21. package/src/calculateNebTa.js +34 -0
  22. package/src/calculateNebTa.test.js +57 -0
  23. package/src/calculateNebTm.js +127 -0
  24. package/src/calculateNebTm.test.js +32 -0
  25. package/src/calculatePercentGC.js +3 -0
  26. package/src/calculatePercentGC.test.js +14 -0
  27. package/src/calculateTm.js +297 -0
  28. package/src/calculateTm.test.js +7 -0
  29. package/src/computeDigestFragments.js +179 -0
  30. package/src/computeDigestFragments.test.js +73 -0
  31. package/src/condensePairwiseAlignmentDifferences.js +85 -0
  32. package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
  33. package/src/convertAACaretPositionOrRangeToDna.js +24 -0
  34. package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
  35. package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
  36. package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
  37. package/src/cutSequenceByRestrictionEnzyme.js +301 -0
  38. package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
  39. package/src/defaultEnzymesByName.js +278 -0
  40. package/src/degenerateDnaToAminoAcidMap.js +5 -0
  41. package/src/degenerateRnaToAminoAcidMap.js +5 -0
  42. package/src/deleteSequenceDataAtRange.js +5 -0
  43. package/src/deleteSequenceDataAtRange.test.js +146 -0
  44. package/src/diffUtils.js +64 -0
  45. package/src/diffUtils.test.js +74 -0
  46. package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
  47. package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
  48. package/src/featureTypesAndColors.js +152 -0
  49. package/src/featureTypesAndColors.test.js +52 -0
  50. package/src/filterAminoAcidSequenceString.js +13 -0
  51. package/src/filterAminoAcidSequenceString.test.js +22 -0
  52. package/src/filterSequenceString.js +22 -0
  53. package/src/filterSequenceString.test.js +13 -0
  54. package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
  55. package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
  56. package/src/findOrfsInPlasmid.js +26 -0
  57. package/src/findSequenceMatches.js +133 -0
  58. package/src/findSequenceMatches.test.js +286 -0
  59. package/src/generateAnnotations.js +34 -0
  60. package/src/generateSequenceData.js +206 -0
  61. package/src/generateSequenceData.test.js +22 -0
  62. package/src/getAllInsertionsInSeqReads.js +83 -0
  63. package/src/getAllInsertionsInSeqReads.test.js +26 -0
  64. package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
  65. package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
  66. package/src/getAminoAcidFromSequenceTriplet.js +22 -0
  67. package/src/getAminoAcidStringFromSequenceString.js +18 -0
  68. package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
  69. package/src/getCodonRangeForAASliver.js +63 -0
  70. package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
  71. package/src/getComplementSequenceAndAnnotations.js +20 -0
  72. package/src/getComplementSequenceString.js +19 -0
  73. package/src/getComplementSequenceString.test.js +13 -0
  74. package/src/getCutsiteType.js +10 -0
  75. package/src/getCutsitesFromSequence.js +17 -0
  76. package/src/getDegenerateDnaStringFromAAString.js +8 -0
  77. package/src/getDegenerateRnaStringFromAAString.js +8 -0
  78. package/src/getDigestFragmentsForCutsites.js +105 -0
  79. package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
  80. package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
  81. package/src/getInsertBetweenVals.js +28 -0
  82. package/src/getInsertBetweenVals.test.js +33 -0
  83. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
  84. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
  85. package/src/getMassOfAaString.js +24 -0
  86. package/src/getMassofAaString.test.js +18 -0
  87. package/src/getOrfsFromSequence.js +124 -0
  88. package/src/getOrfsFromSequence.test.js +210 -0
  89. package/src/getOverlapBetweenTwoSequences.js +30 -0
  90. package/src/getOverlapBetweenTwoSequences.test.js +23 -0
  91. package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
  92. package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
  93. package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
  94. package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
  95. package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
  96. package/src/getReverseComplementAnnotation.js +23 -0
  97. package/src/getReverseComplementAnnotation.test.js +44 -0
  98. package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
  99. package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
  100. package/src/getReverseComplementSequenceString.js +17 -0
  101. package/src/getReverseComplementSequenceString.test.js +11 -0
  102. package/src/getReverseSequenceString.js +12 -0
  103. package/src/getReverseSequenceString.test.js +9 -0
  104. package/src/getSequenceDataBetweenRange.js +131 -0
  105. package/src/getSequenceDataBetweenRange.test.js +474 -0
  106. package/src/getVirtualDigest.js +125 -0
  107. package/src/getVirtualDigest.test.js +134 -0
  108. package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
  109. package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
  110. package/src/index.js +106 -0
  111. package/src/index.test.js +38 -0
  112. package/src/insertGapsIntoRefSeq.js +38 -0
  113. package/src/insertGapsIntoRefSeq.test.js +20 -0
  114. package/src/insertSequenceDataAtPosition.js +2 -0
  115. package/src/insertSequenceDataAtPosition.test.js +75 -0
  116. package/src/insertSequenceDataAtPositionOrRange.js +249 -0
  117. package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
  118. package/src/isEnzymeType2S.js +3 -0
  119. package/src/mapAnnotationsToRows.js +174 -0
  120. package/src/mapAnnotationsToRows.test.js +425 -0
  121. package/src/prepareCircularViewData.js +17 -0
  122. package/src/prepareCircularViewData.test.js +196 -0
  123. package/src/prepareRowData.js +41 -0
  124. package/src/prepareRowData.test.js +36 -0
  125. package/src/prepareRowData_output1.json +391 -0
  126. package/src/proteinAlphabet.js +257 -0
  127. package/src/rotateBpsToPosition.js +13 -0
  128. package/src/rotateBpsToPosition.test.js +6 -0
  129. package/src/rotateSequenceDataToPosition.js +48 -0
  130. package/src/rotateSequenceDataToPosition.test.js +71 -0
  131. package/src/shiftAnnotationsByLen.js +17 -0
  132. package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
  133. package/src/tidyUpAnnotation.js +182 -0
  134. package/src/tidyUpSequenceData.js +169 -0
  135. package/src/tidyUpSequenceData.test.js +332 -0
@@ -0,0 +1,206 @@
1
+ // this is throwing a weird eslint error
2
+
3
+ //
4
+
5
+ import generateAnnotations from "./generateAnnotations";
6
+
7
+ export default function generateSequenceData({
8
+ isProtein,
9
+ sequenceLength = 1000,
10
+ numFeatures,
11
+ numParts,
12
+ numPrimers,
13
+ numTranslations
14
+ } = {}) {
15
+ const proteinSequence = isProtein && generateSequence(sequenceLength, true);
16
+ const sequence = !isProtein && generateSequence(sequenceLength);
17
+
18
+ return {
19
+ circular: isProtein ? false : Math.random() > 0.5,
20
+ name: "p-" + Math.floor(Math.random * 100),
21
+ description: "",
22
+ isProtein,
23
+ sequence,
24
+ proteinSequence,
25
+ translations: isProtein
26
+ ? undefined
27
+ : generateAnnotations(
28
+ numTranslations || 5,
29
+ 0,
30
+ sequenceLength - 1,
31
+ sequenceLength / 3
32
+ ),
33
+ features: generateAnnotations(
34
+ numFeatures || 10,
35
+ 0,
36
+ sequenceLength - 1,
37
+ sequenceLength / 3
38
+ ),
39
+ primers: isProtein
40
+ ? undefined
41
+ : generateAnnotations(numPrimers || 10, 0, sequenceLength - 1, 50),
42
+ parts: generateAnnotations(
43
+ numParts || 10,
44
+ 0,
45
+ sequenceLength - 1,
46
+ sequenceLength / 3
47
+ )
48
+ };
49
+ };
50
+
51
+ // export default tidyUpSequenceData(exampleData)
52
+
53
+ function generateSequence(m = 9, isProtein) {
54
+ let s = "";
55
+ const r = isProtein ? "" : "gatc";
56
+ for (let i = 0; i < m; i++) {
57
+ s += r.charAt(Math.floor(Math.random() * r.length));
58
+ }
59
+ return s;
60
+ }
61
+
62
+ // tnr: this is used to generate a very large, multi-featured sequence
63
+ // var string = "ggggcccccgggggccc";
64
+ // var reallyLongFakeSequence = "";
65
+ // for (var i = 1; i < 100000; i++) {
66
+ // reallyLongFakeSequence += string;
67
+ // if (i % 100 === 0) {
68
+ // reallyLongFakeSequence += 'taafatg';
69
+ // sequenceData.features.push({
70
+ // id: i,
71
+ // start: parseInt(i * 10),
72
+ // end: parseInt(i * 10 + 100),
73
+ // name: 'cooljim',
74
+ // color: 'green',
75
+ // forward: true,
76
+ // annotationType: "feature"
77
+ // });
78
+ // }
79
+ // }
80
+ // sequenceData.sequence += reallyLongFakeSequence;
81
+ //
82
+ // export default function() {
83
+ // var baseSeqData = {
84
+ //
85
+ // }
86
+ // function seqGen() {
87
+ //
88
+ // }
89
+ // }
90
+ // "features" : [
91
+ // {
92
+ // "name" : "1",
93
+ // "type" : "misc_feature",
94
+ // "start" : 1,
95
+ // "end" : 1,
96
+ // "strand" : 1,
97
+ // "notes" : [],
98
+ // "color": 'blue'
99
+ // },
100
+ // {
101
+ // "name" : "2",
102
+ // "type" : "misc_feature",
103
+ // "start" : 1,
104
+ // "end" : 1,
105
+ // "strand" : 1,
106
+ // "notes" : [],
107
+ // "color": 'blue'
108
+ // },
109
+ // {
110
+ // "name" : "3",
111
+ // "type" : "misc_feature",
112
+ // "start" : 1,
113
+ // "end" : 1,
114
+ // "strand" : 1,
115
+ // "notes" : [],
116
+ // "color": 'blue'
117
+ // },
118
+ // {
119
+ // "name" : "4",
120
+ // "type" : "misc_feature",
121
+ // "start" : 1,
122
+ // "end" : 14,
123
+ // "strand" : 1,
124
+ // "notes" : [],
125
+ // "color": 'blue'
126
+ // },
127
+ // {
128
+ // "name" : "5",
129
+ // "type" : "misc_feature",
130
+ // "start" : 1,
131
+ // "end" : 1,
132
+ // "strand" : 1,
133
+ // "notes" : [],
134
+ // "color": 'blue'
135
+ // },
136
+ // {
137
+ // "name" : "6",
138
+ // "type" : "misc_feature",
139
+ // "id" : "5590c1978fafgw979df000a4f02c7a",
140
+ // "start" : 4,
141
+ // "end" : 6,
142
+ // "strand" : 1,
143
+ // "notes" : [],
144
+ // "color": 'orange'
145
+ // },
146
+ // {
147
+ // "name" : "housemouserousepouse",
148
+ // "type" : "misc_feature",
149
+ // "id" : "5590c197897fs9df000a4f02c7a",
150
+ // "start" : 4,
151
+ // "end" : 6,
152
+ // "strand" : 1,
153
+ // "notes" : [],
154
+ // "color": 'orange'
155
+ // },
156
+ // {
157
+ // "name" : "housemouserousepouse",
158
+ // "type" : "misc_feature",
159
+ // "id" : "5590c1978979dasdfaf000a4f02c7a",
160
+ // "start" : 4,
161
+ // "end" : 6,
162
+ // "strand" : 1,
163
+ // "notes" : [],
164
+ // "color": 'orange'
165
+ // },
166
+ // {
167
+ // "name" : "housemouserousepouse",
168
+ // "type" : "misc_feature",
169
+ // "id" : "5590c197faas8979df000a4f02c7a",
170
+ // "start" : 4,
171
+ // "end" : 6,
172
+ // "strand" : 1,
173
+ // "notes" : [],
174
+ // "color": 'orange'
175
+ // },
176
+ // {
177
+ // "name" : "housemouserousepouse",
178
+ // "type" : "misc_feature",
179
+ // "id" : "5590c1978979df000a4f02c7aasd",
180
+ // "start" : 4,
181
+ // "end" : 6,
182
+ // "strand" : 1,
183
+ // "notes" : [],
184
+ // "color": 'orange'
185
+ // },
186
+ // {
187
+ // "name" : "house",
188
+ // "type" : "misc_feature",
189
+ // "id" : "5590c1978979df000a4f02c7b",
190
+ // "start" : 70,
191
+ // "end" : 90,
192
+ // "strand" : 1,
193
+ // "notes" : [],
194
+ // "color": 'green'
195
+ // },
196
+ // {
197
+ // "name" : "weer",
198
+ // "type" : "misc_feature",
199
+ // "id" : "5590c1d88979df000a4f02f5c",
200
+ // "start" : 3,
201
+ // "end" : 69,
202
+ // "strand" : 1,
203
+ // "notes" : [],
204
+ // "color": 'red'
205
+ // }
206
+ // ],
@@ -0,0 +1,22 @@
1
+ import generateSequenceData from "./generateSequenceData";
2
+ import chai from "chai";
3
+ import chaiSubset from "chai-subset";
4
+ import {map} from "lodash";
5
+
6
+ chai.should();
7
+ chai.use(chaiSubset);
8
+
9
+ describe("generateSequenceData", () => {
10
+ it("should generate some nice random data", () => {
11
+ generateSequenceData({ sequenceLength: 100 }).sequence.length.should.equal(
12
+ 100
13
+ );
14
+ });
15
+ it("numFeatures should work", () => {
16
+ const a = generateSequenceData({
17
+ sequenceLength: 100,
18
+ numFeatures: 100
19
+ });
20
+ map(a.features).length.should.equal(100);
21
+ });
22
+ });
@@ -0,0 +1,83 @@
1
+ // seqReads should be an array of objects [{name, seq, pos, cigar}, {name, seq, pos, cigar}, ...]
2
+ export default function getAllInsertionsInSeqReads(seqReads) {
3
+ const allInsertionsInSeqReads = [];
4
+ seqReads.forEach(seqRead => {
5
+ // split cigar string at M, D, or I (match, deletion, or insertion), e.g. ["2M", "3I", "39M", "3D"...]
6
+ const splitSeqRead = seqRead.cigar.match(/([0-9]*[MDI])/g);
7
+
8
+ for (let componentI = 0; componentI < splitSeqRead.length; componentI++) {
9
+ if (splitSeqRead[componentI].slice(-1) === "I") {
10
+ let bpPosOfInsertion = seqRead.pos;
11
+ const numberOfInsertions = Number(
12
+ splitSeqRead[componentI].slice(0, -1)
13
+ );
14
+ for (let i = 0; i < componentI; i++) {
15
+ if (splitSeqRead[i].slice(-1) !== "I") {
16
+ const previousComponentNumber = Number(
17
+ splitSeqRead[i].slice(0, -1)
18
+ );
19
+ bpPosOfInsertion += previousComponentNumber;
20
+ }
21
+ }
22
+ const insertionInfo = {
23
+ // keeping bpPos 1-based
24
+ bpPos: bpPosOfInsertion,
25
+ number: numberOfInsertions
26
+ };
27
+ allInsertionsInSeqReads.push(insertionInfo);
28
+ }
29
+ }
30
+ });
31
+ // sort insertions by ascending bp pos
32
+ const sortedInsertions = allInsertionsInSeqReads.sort((a, b) => {
33
+ return a.bpPos - b.bpPos;
34
+ });
35
+ // combine duplicate or overlapping insertions from seq reads
36
+ for (let i = 0; i < sortedInsertions.length - 1; i++) {
37
+ if (sortedInsertions[i].bpPos === sortedInsertions[i + 1].bpPos) {
38
+ if (sortedInsertions[i].number > sortedInsertions[i + 1].number) {
39
+ // remove the one with fewer number of gaps from array
40
+ sortedInsertions.splice(i + 1, 1);
41
+ i--;
42
+ } else if (sortedInsertions[i].number < sortedInsertions[i + 1].number) {
43
+ sortedInsertions.splice(i, 1);
44
+ i--;
45
+ } else if (
46
+ sortedInsertions[i].number === sortedInsertions[i + 1].number
47
+ ) {
48
+ sortedInsertions.splice(i, 1);
49
+ i--;
50
+ }
51
+ }
52
+ }
53
+ // sortedInsertions is an array of objects [{bpPos: bp pos of insertion, number: # of insertions}, {bpPos, number}, ...]
54
+ return sortedInsertions;
55
+ };
56
+
57
+ // function getAllInsertionsInSeqReads(seqReads) {
58
+ // let allInsertionBpPosInSeqReads = [];
59
+ // seqReads.forEach(seqRead => {
60
+ // // split cigar string at M, D, or I (match, deletion, or insertion)
61
+ // // ["2M", "3I", "39M", "3D"...]
62
+ // const splitSeqRead = seqRead.cigar.match(/([0-9]*[MDI])/g)
63
+ // splitSeqRead.forEach(component => {
64
+ // // keeping bpPos 1-based
65
+ // let bpPosOfInsertion = seqRead.pos;
66
+ // if (component.slice(-1) === "I") {
67
+ // const numberOfInsertions = Number(component.slice(0, -1));
68
+ // const componentIndex = splitSeqRead.indexOf(component);
69
+ // for (let i = 0; i < componentIndex; i++) {
70
+ // const previousComponentNumber = Number(splitSeqRead[i].slice(0, -1));
71
+ // bpPosOfInsertion += previousComponentNumber;
72
+ // }
73
+ // for (let i = 1; i <= numberOfInsertions; i++) {
74
+ // allInsertionBpPosInSeqReads.push(bpPosOfInsertion - i);
75
+ // }
76
+ // }
77
+ // });
78
+ // });
79
+ // // allInsertionBpPosInSeqReads should be an array of bp pos [6, 15, 9, 2, 23...]
80
+ // // remove duplicates, organize in ascending order
81
+ // const uniqueInsertionBpPos = [...new Set(allInsertionBpPosInSeqReads)].sort(function(a, b) { return a - b });
82
+ // return uniqueInsertionBpPos;
83
+ // }
@@ -0,0 +1,26 @@
1
+ import getAllInsertionsInSeqReads from "./getAllInsertionsInSeqReads.js";
2
+
3
+ describe("get bp pos of all insertions in seq reads after bowtie2 alignment", () => {
4
+ it("allInsertionsInSeqReads should be an array of objects [{bpPos: bp pos of insertion, number: # of insertions}, {bpPos, number}, ...]", () => {
5
+ const seqReads = [
6
+ { name: "r1", seq: "GATTGAC", pos: 3, cigar: "2M2I3M" },
7
+ { name: "r2", seq: "GAGAGAC", pos: 3, cigar: "7M" },
8
+ { name: "r3", seq: "GGGAGATCAC", pos: 1, cigar: "6M1I3M" },
9
+ { name: "r4", seq: "GATTGAC", pos: 3, cigar: "2M2I3M" },
10
+ { name: "r5", seq: "GAGC", pos: 3, cigar: "3M1D1M" },
11
+ { name: "r6", seq: "GAGCTTACC", pos: 3, cigar: "3M1D1M2I3M" },
12
+ { name: "r7", seq: "GGCATTTCC", pos: 2, cigar: "2M3D2M3I2M" },
13
+ { name: "r8", seq: "GGATTGACATT", pos: 1, cigar: "1D3M2I4M2I2D" },
14
+ { name: "r9", seq: "GGTTTGACCTTT", pos: 1, cigar: "2M3I2D1M2D3M3I" }
15
+ ];
16
+ const result = getAllInsertionsInSeqReads(seqReads);
17
+ expect(result).toEqual([
18
+ { bpPos: 3, number: 3 },
19
+ { bpPos: 5, number: 2 },
20
+ { bpPos: 7, number: 1 },
21
+ { bpPos: 8, number: 2 },
22
+ { bpPos: 9, number: 3 },
23
+ { bpPos: 11, number: 3 }
24
+ ]);
25
+ });
26
+ });
@@ -0,0 +1,163 @@
1
+ import {translateRange, getSequenceWithinRange} from "@teselagen/range-utils";
2
+ import revComp from "./getReverseComplementSequenceString";
3
+ import getAA from "./getAminoAcidFromSequenceTriplet";
4
+
5
+ //
6
+ import proteinAlphabet from "./proteinAlphabet";
7
+
8
+ // ac.throw([ac.string,ac.bool],arguments);
9
+ /**
10
+ * @private
11
+ * Gets aminoAcid data, including position in string and position in codon
12
+ * from the sequenceString and the direction of the translation
13
+ * @param {String} sequenceString The dna sequenceString.
14
+ * @param {boolean} forward Should we find forward facing orfs or reverse facing orfs
15
+ * @param {boolean} isProteinSequence We're passing in a sequence of AA chars instead of DNA chars (slightly confusing but we'll still use the dna indexing for rendering in OVE)
16
+ * @return [{
17
+ aminoAcid:
18
+ positionInCodon:
19
+ }]
20
+ */
21
+ export default function getAminoAcidDataForEachBaseOfDna(
22
+ originalSequenceString,
23
+ forward,
24
+ optionalSubrangeRange,
25
+ isProteinSequence
26
+ ) {
27
+ const originalSequenceStringLength = isProteinSequence
28
+ ? originalSequenceString.length * 3
29
+ : originalSequenceString.length;
30
+ let sequenceString = originalSequenceString;
31
+ let startOffset = 0;
32
+ if (optionalSubrangeRange) {
33
+ sequenceString = getSequenceWithinRange(
34
+ optionalSubrangeRange,
35
+ originalSequenceString
36
+ );
37
+ startOffset = optionalSubrangeRange.start;
38
+ }
39
+ const sequenceStringLength = isProteinSequence
40
+ ? sequenceString.length * 3
41
+ : sequenceString.length;
42
+
43
+ // ac.throw([ac.string,ac.bool],arguments);
44
+ const aminoAcidDataForEachBaseOfDNA = [];
45
+ let codonRange;
46
+ let revCompGapLength = 0;
47
+ let aminoAcidIndex = 0;
48
+ if (!forward) {
49
+ //compute the start of the amino acid sequence, but only if translating in the reverse direction
50
+ aminoAcidIndex = Math.floor((sequenceStringLength - 1) / 3);
51
+ //because we're translating in the reverse direction, we need to
52
+ //check to see if there are untranslated amino acids at the start of the sequenceString
53
+ revCompGapLength = sequenceStringLength % 3;
54
+ codonRange = translateRange(
55
+ {
56
+ start: 0,
57
+ end: revCompGapLength - 1
58
+ },
59
+ startOffset,
60
+ originalSequenceStringLength
61
+ );
62
+
63
+ if (revCompGapLength > 0) {
64
+ for (let i = 0; i < revCompGapLength; i++) {
65
+ aminoAcidDataForEachBaseOfDNA.push({
66
+ aminoAcid: getAA("xxx"), //fake xxx triplet returns the ambiguous X amino acid
67
+ positionInCodon: revCompGapLength - i - 1,
68
+ aminoAcidIndex,
69
+ sequenceIndex: codonRange.start + i,
70
+ codonRange,
71
+ fullCodon: false
72
+ });
73
+ }
74
+ aminoAcidIndex--;
75
+ }
76
+ }
77
+
78
+ //compute the bulk of the sequence
79
+ for (
80
+ let index = 2 + revCompGapLength;
81
+ index < sequenceStringLength;
82
+ index += 3
83
+ ) {
84
+ let aminoAcid;
85
+ if (isProteinSequence) {
86
+ aminoAcid =
87
+ proteinAlphabet[sequenceString[(index - 2) / 3].toUpperCase()];
88
+ } else {
89
+ let triplet = sequenceString.slice(index - 2, index + 1);
90
+ if (!forward) {
91
+ //we reverse the triplet
92
+ triplet = revComp(triplet);
93
+ }
94
+ aminoAcid = getAA(triplet);
95
+ }
96
+ codonRange = translateRange(
97
+ {
98
+ start: index - 2,
99
+ end: index
100
+ },
101
+ startOffset,
102
+ originalSequenceStringLength
103
+ );
104
+
105
+ aminoAcidDataForEachBaseOfDNA.push({
106
+ aminoAcid, //gap amino acid
107
+ positionInCodon: forward ? 0 : 2,
108
+ aminoAcidIndex,
109
+ sequenceIndex: codonRange.start,
110
+ codonRange,
111
+ fullCodon: true
112
+ });
113
+ aminoAcidDataForEachBaseOfDNA.push({
114
+ aminoAcid, //gap amino acid
115
+ positionInCodon: 1,
116
+ aminoAcidIndex,
117
+ sequenceIndex: codonRange.start + 1,
118
+ codonRange,
119
+ fullCodon: true
120
+ });
121
+ aminoAcidDataForEachBaseOfDNA.push({
122
+ aminoAcid, //gap amino acid
123
+ positionInCodon: forward ? 2 : 0,
124
+ aminoAcidIndex,
125
+ sequenceIndex: codonRange.start + 2,
126
+ codonRange,
127
+ fullCodon: true
128
+ });
129
+ if (forward) {
130
+ aminoAcidIndex++;
131
+ } else {
132
+ aminoAcidIndex--;
133
+ }
134
+ }
135
+
136
+ //compute the end of the sequence
137
+ //we'll never hit the following logic if translating in the reverse direction
138
+ const lengthOfEndBpsNotCoveredByAminoAcids =
139
+ sequenceStringLength - aminoAcidDataForEachBaseOfDNA.length;
140
+ codonRange = translateRange(
141
+ {
142
+ start: sequenceStringLength - lengthOfEndBpsNotCoveredByAminoAcids,
143
+ end: sequenceStringLength - 1
144
+ },
145
+ startOffset,
146
+ originalSequenceStringLength
147
+ );
148
+ for (let j = 0; j < lengthOfEndBpsNotCoveredByAminoAcids; j++) {
149
+ aminoAcidDataForEachBaseOfDNA.push({
150
+ aminoAcid: getAA("xxx"), //fake xxx triplet returns the gap amino acid
151
+ positionInCodon: j,
152
+ aminoAcidIndex,
153
+ sequenceIndex: codonRange.start + j,
154
+ fullCodon: false,
155
+ codonRange
156
+ });
157
+ }
158
+
159
+ if (sequenceStringLength !== aminoAcidDataForEachBaseOfDNA.length) {
160
+ throw new Error("something went wrong!");
161
+ }
162
+ return aminoAcidDataForEachBaseOfDNA;
163
+ };