@teselagen/sequence-utils 0.1.22 → 0.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/index.js +12030 -26126
  2. package/index.mjs +12119 -26124
  3. package/index.umd.js +24056 -38154
  4. package/package.json +4 -3
  5. package/src/DNAComplementMap.js +32 -0
  6. package/src/addGapsToSeqReads.js +417 -0
  7. package/src/addGapsToSeqReads.test.js +358 -0
  8. package/src/adjustAnnotationsToInsert.js +19 -0
  9. package/src/adjustBpsToReplaceOrInsert.js +50 -0
  10. package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
  11. package/src/aliasedEnzymesByName.js +7363 -0
  12. package/src/aminoAcidToDegenerateDnaMap.js +32 -0
  13. package/src/aminoAcidToDegenerateRnaMap.js +32 -0
  14. package/src/aminoAcidToDnaRna.test.js +27 -0
  15. package/src/annotateSingleSeq.js +29 -0
  16. package/src/annotateSingleSeq.test.js +64 -0
  17. package/src/annotationTypes.js +23 -0
  18. package/src/autoAnnotate.js +242 -0
  19. package/src/autoAnnotate.test.js +1039 -0
  20. package/src/bioData.js +431 -0
  21. package/src/calculateNebTa.js +34 -0
  22. package/src/calculateNebTa.test.js +57 -0
  23. package/src/calculateNebTm.js +127 -0
  24. package/src/calculateNebTm.test.js +32 -0
  25. package/src/calculatePercentGC.js +3 -0
  26. package/src/calculatePercentGC.test.js +14 -0
  27. package/src/calculateTm.js +297 -0
  28. package/src/calculateTm.test.js +7 -0
  29. package/src/computeDigestFragments.js +179 -0
  30. package/src/computeDigestFragments.test.js +73 -0
  31. package/src/condensePairwiseAlignmentDifferences.js +85 -0
  32. package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
  33. package/src/convertAACaretPositionOrRangeToDna.js +24 -0
  34. package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
  35. package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
  36. package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
  37. package/src/cutSequenceByRestrictionEnzyme.js +301 -0
  38. package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
  39. package/src/defaultEnzymesByName.js +278 -0
  40. package/src/degenerateDnaToAminoAcidMap.js +5 -0
  41. package/src/degenerateRnaToAminoAcidMap.js +5 -0
  42. package/src/deleteSequenceDataAtRange.js +5 -0
  43. package/src/deleteSequenceDataAtRange.test.js +146 -0
  44. package/src/diffUtils.js +64 -0
  45. package/src/diffUtils.test.js +74 -0
  46. package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
  47. package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
  48. package/src/featureTypesAndColors.js +152 -0
  49. package/src/featureTypesAndColors.test.js +52 -0
  50. package/src/filterAminoAcidSequenceString.js +13 -0
  51. package/src/filterAminoAcidSequenceString.test.js +22 -0
  52. package/src/filterSequenceString.js +22 -0
  53. package/src/filterSequenceString.test.js +13 -0
  54. package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
  55. package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
  56. package/src/findOrfsInPlasmid.js +26 -0
  57. package/src/findSequenceMatches.js +133 -0
  58. package/src/findSequenceMatches.test.js +286 -0
  59. package/src/generateAnnotations.js +34 -0
  60. package/src/generateSequenceData.js +206 -0
  61. package/src/generateSequenceData.test.js +22 -0
  62. package/src/getAllInsertionsInSeqReads.js +83 -0
  63. package/src/getAllInsertionsInSeqReads.test.js +26 -0
  64. package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
  65. package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
  66. package/src/getAminoAcidFromSequenceTriplet.js +22 -0
  67. package/src/getAminoAcidStringFromSequenceString.js +18 -0
  68. package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
  69. package/src/getCodonRangeForAASliver.js +63 -0
  70. package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
  71. package/src/getComplementSequenceAndAnnotations.js +20 -0
  72. package/src/getComplementSequenceString.js +19 -0
  73. package/src/getComplementSequenceString.test.js +13 -0
  74. package/src/getCutsiteType.js +10 -0
  75. package/src/getCutsitesFromSequence.js +17 -0
  76. package/src/getDegenerateDnaStringFromAAString.js +8 -0
  77. package/src/getDegenerateRnaStringFromAAString.js +8 -0
  78. package/src/getDigestFragmentsForCutsites.js +105 -0
  79. package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
  80. package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
  81. package/src/getInsertBetweenVals.js +28 -0
  82. package/src/getInsertBetweenVals.test.js +33 -0
  83. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
  84. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
  85. package/src/getMassOfAaString.js +24 -0
  86. package/src/getMassofAaString.test.js +18 -0
  87. package/src/getOrfsFromSequence.js +124 -0
  88. package/src/getOrfsFromSequence.test.js +210 -0
  89. package/src/getOverlapBetweenTwoSequences.js +30 -0
  90. package/src/getOverlapBetweenTwoSequences.test.js +23 -0
  91. package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
  92. package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
  93. package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
  94. package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
  95. package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
  96. package/src/getReverseComplementAnnotation.js +23 -0
  97. package/src/getReverseComplementAnnotation.test.js +44 -0
  98. package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
  99. package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
  100. package/src/getReverseComplementSequenceString.js +17 -0
  101. package/src/getReverseComplementSequenceString.test.js +11 -0
  102. package/src/getReverseSequenceString.js +12 -0
  103. package/src/getReverseSequenceString.test.js +9 -0
  104. package/src/getSequenceDataBetweenRange.js +131 -0
  105. package/src/getSequenceDataBetweenRange.test.js +474 -0
  106. package/src/getVirtualDigest.js +125 -0
  107. package/src/getVirtualDigest.test.js +134 -0
  108. package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
  109. package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
  110. package/src/index.js +106 -0
  111. package/src/index.test.js +38 -0
  112. package/src/insertGapsIntoRefSeq.js +38 -0
  113. package/src/insertGapsIntoRefSeq.test.js +20 -0
  114. package/src/insertSequenceDataAtPosition.js +2 -0
  115. package/src/insertSequenceDataAtPosition.test.js +75 -0
  116. package/src/insertSequenceDataAtPositionOrRange.js +249 -0
  117. package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
  118. package/src/isEnzymeType2S.js +3 -0
  119. package/src/mapAnnotationsToRows.js +174 -0
  120. package/src/mapAnnotationsToRows.test.js +425 -0
  121. package/src/prepareCircularViewData.js +17 -0
  122. package/src/prepareCircularViewData.test.js +196 -0
  123. package/src/prepareRowData.js +41 -0
  124. package/src/prepareRowData.test.js +36 -0
  125. package/src/prepareRowData_output1.json +391 -0
  126. package/src/proteinAlphabet.js +257 -0
  127. package/src/rotateBpsToPosition.js +13 -0
  128. package/src/rotateBpsToPosition.test.js +6 -0
  129. package/src/rotateSequenceDataToPosition.js +48 -0
  130. package/src/rotateSequenceDataToPosition.test.js +71 -0
  131. package/src/shiftAnnotationsByLen.js +17 -0
  132. package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
  133. package/src/tidyUpAnnotation.js +182 -0
  134. package/src/tidyUpSequenceData.js +169 -0
  135. package/src/tidyUpSequenceData.test.js +332 -0
@@ -0,0 +1,257 @@
1
+ const proteinAlphabet = {
2
+ A: {
3
+ value: "A",
4
+ name: "Alanine",
5
+ threeLettersName: "Ala",
6
+ hydrophobicity: 1.8,
7
+ colorByFamily: "#00FFFF",
8
+ color: "hsl(327.3, 100%, 69%)",
9
+ mass: 89.1
10
+ },
11
+ R: {
12
+ value: "R",
13
+ name: "Arginine",
14
+ threeLettersName: "Arg",
15
+ hydrophobicity: -4.5,
16
+ colorByFamily: "#FFC0CB",
17
+ color: "hsl(258.1, 100%, 69%)",
18
+ mass: 174.2
19
+ },
20
+ N: {
21
+ value: "N",
22
+ name: "Asparagine",
23
+ threeLettersName: "Asn",
24
+ hydrophobicity: -3.5,
25
+ colorByFamily: "#D3D3D3",
26
+ color: "hsl(268.9, 100%, 69%)",
27
+ mass: 132.1
28
+ },
29
+ D: {
30
+ value: "D",
31
+ name: "Aspartic acid",
32
+ threeLettersName: "Asp",
33
+ hydrophobicity: -3.5,
34
+ colorByFamily: "#EE82EE",
35
+ color: "hsl(268.9, 100%, 69%)",
36
+ mass: 133.1
37
+ },
38
+ C: {
39
+ value: "C",
40
+ name: "Cysteine",
41
+ threeLettersName: "Cys",
42
+ hydrophobicity: 2.5,
43
+ colorByFamily: "#FFFF00",
44
+ color: "hsl(335.1, 100%, 69%)",
45
+ mass: 121.2
46
+ },
47
+ E: {
48
+ value: "E",
49
+ name: "Glutamic acid",
50
+ threeLettersName: "Glu",
51
+ hydrophobicity: -3.5,
52
+ colorByFamily: "#EE82EE",
53
+ color: "hsl(268.9, 100%, 69%)",
54
+ mass: 147.1
55
+ },
56
+ Q: {
57
+ value: "Q",
58
+ name: "Glutamine",
59
+ threeLettersName: "Gln",
60
+ hydrophobicity: -3.5,
61
+ colorByFamily: "#D3D3D3",
62
+ color: "hsl(268.9, 100%, 69%)",
63
+ mass: 146.2
64
+ },
65
+ G: {
66
+ value: "G",
67
+ name: "Glycine",
68
+ threeLettersName: "Gly",
69
+ hydrophobicity: -0.4,
70
+ colorByFamily: "#00FFFF",
71
+ color: "hsl(303.1, 100%, 69%)",
72
+ mass: 75.1
73
+ },
74
+ H: {
75
+ value: "H",
76
+ name: "Histidine",
77
+ threeLettersName: "His",
78
+ hydrophobicity: -3.2,
79
+ colorByFamily: "#FFC0CB",
80
+ color: "hsl(272.2, 100%, 69%)",
81
+ mass: 155.2
82
+ },
83
+ I: {
84
+ value: "I",
85
+ name: "Isoleucine ",
86
+ threeLettersName: "Ile",
87
+ hydrophobicity: 4.5,
88
+ colorByFamily: "#00FFFF",
89
+ color: "hsl(356.9, 100%, 69%)",
90
+ mass: 131.2
91
+ },
92
+ L: {
93
+ value: "L",
94
+ name: "Leucine",
95
+ threeLettersName: "Leu",
96
+ hydrophobicity: 3.8,
97
+ colorByFamily: "#00FFFF",
98
+ color: "hsl(349.4, 100%, 69%)",
99
+ mass: 131.2
100
+ },
101
+ K: {
102
+ value: "K",
103
+ name: "Lysine",
104
+ threeLettersName: "Lys",
105
+ hydrophobicity: -3.9,
106
+ colorByFamily: "#FFC0CB",
107
+ color: "hsl(264.7, 100%, 69%)",
108
+ mass: 146.2
109
+ },
110
+ M: {
111
+ value: "M",
112
+ name: "Methionine",
113
+ threeLettersName: "Met",
114
+ hydrophobicity: 1.9,
115
+ colorByFamily: "#FFFF00",
116
+ color: "hsl(328.5, 100%, 69%)",
117
+ mass: 149.2
118
+ },
119
+ F: {
120
+ value: "F",
121
+ name: "Phenylalanine",
122
+ threeLettersName: "Phe",
123
+ hydrophobicity: 2.8,
124
+ colorByFamily: "#FFA500",
125
+ color: "hsl(338.4, 100%, 69%)",
126
+ mass: 165.2
127
+ },
128
+ P: {
129
+ value: "P",
130
+ name: "Proline",
131
+ threeLettersName: "Pro",
132
+ hydrophobicity: -1.6,
133
+ colorByFamily: "#00FFFF",
134
+ color: "hsl(289.9, 100%, 69%)",
135
+ mass: 115.1
136
+ },
137
+ S: {
138
+ value: "S",
139
+ name: "Serine",
140
+ threeLettersName: "Ser",
141
+ hydrophobicity: -0.8,
142
+ colorByFamily: "#90EE90",
143
+ color: "hsl(298.6, 100%, 69%)",
144
+ mass: 105.1
145
+ },
146
+ T: {
147
+ value: "T",
148
+ name: "Threonine",
149
+ threeLettersName: "Thr",
150
+ hydrophobicity: -0.7,
151
+ colorByFamily: "#90EE90",
152
+ color: "hsl(299.8, 100%, 69%)",
153
+ mass: 119.1
154
+ },
155
+ U: {
156
+ value: "U",
157
+ name: "Selenocysteine",
158
+ threeLettersName: "Sec",
159
+ colorByFamily: "#FF0000",
160
+ color: "hsl(0, 100%, 69%)",
161
+ mass: 168.1
162
+ },
163
+ W: {
164
+ value: "W",
165
+ name: "Tryptophan",
166
+ threeLettersName: "Trp",
167
+ hydrophobicity: -0.9,
168
+ colorByFamily: "#FFA500",
169
+ color: "hsl(297.6, 100%, 69%)",
170
+ mass: 204.2
171
+ },
172
+ Y: {
173
+ value: "Y",
174
+ name: "Tyrosine",
175
+ threeLettersName: "Tyr",
176
+ hydrophobicity: -1.3,
177
+ colorByFamily: "#FFA500",
178
+ color: "hsl(293.2, 100%, 69%)",
179
+ mass: 181.2
180
+ },
181
+ V: {
182
+ value: "V",
183
+ name: "Valine",
184
+ threeLettersName: "Val",
185
+ hydrophobicity: 4.2,
186
+ colorByFamily: "#00FFFF",
187
+ color: "hsl(353.6, 100%, 69%)",
188
+ mass: 117.1
189
+ },
190
+ "*": {
191
+ value: "*",
192
+ name: "Stop",
193
+ threeLettersName: "Stop",
194
+ colorByFamily: "#FF0000",
195
+ color: "hsl(0, 100%, 69%)",
196
+ mass: 0
197
+ },
198
+ ".": {
199
+ //tnr: this is actually a deletion/gap character (previously we had this as a stop character which is incorrect) https://www.dnabaser.com/articles/IUPAC%20ambiguity%20codes.html
200
+ value: ".",
201
+ name: "Gap",
202
+ threeLettersName: "Gap",
203
+ colorByFamily: "#FF0000",
204
+ color: "hsl(0, 100%, 69%)",
205
+ mass: 0
206
+ },
207
+ "-": {
208
+ value: "-",
209
+ name: "Gap",
210
+ threeLettersName: "Gap",
211
+ colorByFamily: "#FF0000",
212
+ color: "hsl(0, 100%, 69%)",
213
+ mass: 0,
214
+ },
215
+ B: {
216
+ value: "B",
217
+ threeLettersName: "ND",
218
+ colorByFamily: "#D3D3D3",
219
+ color: "hsl(268.9, 100%, 69%)",
220
+ isAmbiguous: true,
221
+ name: "B",
222
+ aliases: "ND",
223
+ mass: 0,
224
+ },
225
+ J: {
226
+ value: "J",
227
+ threeLettersName: "IL",
228
+ colorByFamily: "#00FFFF",
229
+ color: "hsl(352, 100%, 69%)",
230
+ isAmbiguous: true,
231
+ name: "J",
232
+ aliases: "IL",
233
+ mass: 0,
234
+ },
235
+ X: {
236
+ value: "X",
237
+ threeLettersName: "ACDEFGHIKLMNPQRSTVWY",
238
+ colorByFamily: "#FFFFFF",
239
+ color: "hsl(60, 100%, 69%)",
240
+ isAmbiguous: true,
241
+ name: "X",
242
+ aliases: "ACDEFGHIKLMNPQRSTVWY",
243
+ mass: 0,
244
+ },
245
+ Z: {
246
+ value: "Z",
247
+ threeLettersName: "QE",
248
+ colorByFamily: "#D3D3D3",
249
+ color: "hsl(268.9, 100%, 69%)",
250
+ isAmbiguous: true,
251
+ name: "Z",
252
+ aliases: "QE",
253
+ mass: 0
254
+ }
255
+ };
256
+
257
+ export default proteinAlphabet
@@ -0,0 +1,13 @@
1
+ export default function rotateBpsToPosition(
2
+ bps,
3
+ caretPosition,
4
+ ) {
5
+ return arrayRotate(bps.split(""), caretPosition).join("")
6
+ };
7
+
8
+
9
+ function arrayRotate(arr, count) {
10
+ count -= arr.length * Math.floor(count / arr.length)
11
+ arr.push.apply(arr, arr.splice(0, count))
12
+ return arr
13
+ }
@@ -0,0 +1,6 @@
1
+ import rotateBpsToPosition from './rotateBpsToPosition';
2
+ describe('rotateBpsToPosition', () => {
3
+ it('should rotate Bps To Position correctly ', () => {
4
+ expect(rotateBpsToPosition("atgaccc",4)).toEqual("cccatga")
5
+ })
6
+ })
@@ -0,0 +1,48 @@
1
+ import {map} from "lodash";
2
+ import {adjustRangeToRotation} from "@teselagen/range-utils";
3
+ import tidyUpSequenceData from "./tidyUpSequenceData";
4
+ import {modifiableTypes} from "./annotationTypes";
5
+ import rotateBpsToPosition from "./rotateBpsToPosition";
6
+
7
+ export default function rotateSequenceDataToPosition(
8
+ sequenceData,
9
+ caretPosition,
10
+ options
11
+ ) {
12
+ const newSequenceData = tidyUpSequenceData(sequenceData, options);
13
+
14
+ //update the sequence
15
+ newSequenceData.sequence = rotateBpsToPosition(
16
+ newSequenceData.sequence,
17
+ caretPosition
18
+ );
19
+
20
+ //handle the insert
21
+ modifiableTypes.forEach(annotationType => {
22
+ //update the annotations:
23
+ //handle the delete if necessary
24
+ newSequenceData[annotationType] = adjustAnnotationsToRotation(
25
+ newSequenceData[annotationType],
26
+ caretPosition,
27
+ newSequenceData.sequence.length
28
+ );
29
+ });
30
+ return newSequenceData;
31
+ };
32
+
33
+ function adjustAnnotationsToRotation(
34
+ annotationsToBeAdjusted,
35
+ positionToRotateTo,
36
+ maxLength
37
+ ) {
38
+ return map(annotationsToBeAdjusted, annotation => {
39
+ return {
40
+ ...adjustRangeToRotation(annotation, positionToRotateTo, maxLength),
41
+ locations: annotation.locations
42
+ ? annotation.locations.map(location =>
43
+ adjustRangeToRotation(location, positionToRotateTo, maxLength)
44
+ )
45
+ : undefined
46
+ };
47
+ }).filter(range => !!range); //filter any fully deleted ranges
48
+ }
@@ -0,0 +1,71 @@
1
+ import rotateSequenceDataToPosition from "./rotateSequenceDataToPosition";
2
+ describe("rotateSequenceDataToPosition", () => {
3
+ it("should rotate vanilla sequence data correctly", () => {
4
+ const newData = rotateSequenceDataToPosition(
5
+ {
6
+ // 0123456
7
+ sequence: "atgaccc"
8
+ },
9
+ 4
10
+ );
11
+ expect(newData.sequence).toEqual("cccatga");
12
+ });
13
+ it("should rotate sequence data with features correctly", () => {
14
+ const newData = rotateSequenceDataToPosition(
15
+ {
16
+ // 0123456
17
+ sequence: "atgaccc",
18
+ features: [
19
+ {
20
+ start: 4,
21
+ end: 4
22
+ },
23
+ {
24
+ start: 1,
25
+ end: 0
26
+ },
27
+ {
28
+ start: 2,
29
+ end: 6,
30
+ locations: [
31
+ {
32
+ start: 2,
33
+ end: 3
34
+ },
35
+ {
36
+ start: 4,
37
+ end: 6
38
+ }
39
+ ]
40
+ }
41
+ ]
42
+ },
43
+ 4
44
+ );
45
+ expect(newData.sequence).toEqual("cccatga");
46
+ expect(newData.features).toMatchObject([
47
+ {
48
+ start: 0,
49
+ end: 0
50
+ },
51
+ {
52
+ start: 4,
53
+ end: 3
54
+ },
55
+ {
56
+ start: 5,
57
+ end: 2,
58
+ locations: [
59
+ {
60
+ start: 5,
61
+ end: 6
62
+ },
63
+ {
64
+ start: 0,
65
+ end: 2
66
+ }
67
+ ]
68
+ }
69
+ ]);
70
+ });
71
+ });
@@ -0,0 +1,17 @@
1
+ import {modifiableTypes} from "./annotationTypes";
2
+ import adjustAnnotationsToInsert from "./adjustAnnotationsToInsert";
3
+
4
+ export default function shiftAnnotationsByLen({
5
+ seqData,
6
+ caretPosition,
7
+ insertLength
8
+ }) {
9
+ modifiableTypes.forEach(annotationType => {
10
+ const existingAnnotations = seqData[annotationType];
11
+ seqData[annotationType] = adjustAnnotationsToInsert(
12
+ existingAnnotations,
13
+ caretPosition,
14
+ insertLength
15
+ );
16
+ });
17
+ };
@@ -0,0 +1,106 @@
1
+ import proteinAlphabet from "./proteinAlphabet";
2
+
3
+ const threeLetterSequenceStringToAminoAcidMap = {
4
+ gct: proteinAlphabet.A,
5
+ gcc: proteinAlphabet.A,
6
+ gca: proteinAlphabet.A,
7
+ gcg: proteinAlphabet.A,
8
+ gcu: proteinAlphabet.A,
9
+ cgt: proteinAlphabet.R,
10
+ cgc: proteinAlphabet.R,
11
+ cga: proteinAlphabet.R,
12
+ cgg: proteinAlphabet.R,
13
+ aga: proteinAlphabet.R,
14
+ agg: proteinAlphabet.R,
15
+ cgu: proteinAlphabet.R,
16
+ aat: proteinAlphabet.N,
17
+ aac: proteinAlphabet.N,
18
+ aau: proteinAlphabet.N,
19
+ gat: proteinAlphabet.D,
20
+ gac: proteinAlphabet.D,
21
+ gau: proteinAlphabet.D,
22
+ tgt: proteinAlphabet.C,
23
+ tgc: proteinAlphabet.C,
24
+ ugu: proteinAlphabet.C,
25
+ ugc: proteinAlphabet.C,
26
+ gaa: proteinAlphabet.E,
27
+ gag: proteinAlphabet.E,
28
+ caa: proteinAlphabet.Q,
29
+ cag: proteinAlphabet.Q,
30
+ ggt: proteinAlphabet.G,
31
+ ggc: proteinAlphabet.G,
32
+ gga: proteinAlphabet.G,
33
+ ggg: proteinAlphabet.G,
34
+ ggu: proteinAlphabet.G,
35
+ cat: proteinAlphabet.H,
36
+ cac: proteinAlphabet.H,
37
+ cau: proteinAlphabet.H,
38
+ att: proteinAlphabet.I,
39
+ atc: proteinAlphabet.I,
40
+ ata: proteinAlphabet.I,
41
+ auu: proteinAlphabet.I,
42
+ auc: proteinAlphabet.I,
43
+ aua: proteinAlphabet.I,
44
+ ctt: proteinAlphabet.L,
45
+ ctc: proteinAlphabet.L,
46
+ cta: proteinAlphabet.L,
47
+ ctg: proteinAlphabet.L,
48
+ tta: proteinAlphabet.L,
49
+ ttg: proteinAlphabet.L,
50
+ cuu: proteinAlphabet.L,
51
+ cuc: proteinAlphabet.L,
52
+ cua: proteinAlphabet.L,
53
+ cug: proteinAlphabet.L,
54
+ uua: proteinAlphabet.L,
55
+ uug: proteinAlphabet.L,
56
+ aaa: proteinAlphabet.K,
57
+ aag: proteinAlphabet.K,
58
+ atg: proteinAlphabet.M,
59
+ aug: proteinAlphabet.M,
60
+ ttt: proteinAlphabet.F,
61
+ ttc: proteinAlphabet.F,
62
+ uuu: proteinAlphabet.F,
63
+ uuc: proteinAlphabet.F,
64
+ cct: proteinAlphabet.P,
65
+ ccc: proteinAlphabet.P,
66
+ cca: proteinAlphabet.P,
67
+ ccg: proteinAlphabet.P,
68
+ ccu: proteinAlphabet.P,
69
+ tct: proteinAlphabet.S,
70
+ tcc: proteinAlphabet.S,
71
+ tca: proteinAlphabet.S,
72
+ tcg: proteinAlphabet.S,
73
+ agt: proteinAlphabet.S,
74
+ agc: proteinAlphabet.S,
75
+ ucu: proteinAlphabet.S,
76
+ ucc: proteinAlphabet.S,
77
+ uca: proteinAlphabet.S,
78
+ ucg: proteinAlphabet.S,
79
+ agu: proteinAlphabet.S,
80
+ act: proteinAlphabet.T,
81
+ acc: proteinAlphabet.T,
82
+ aca: proteinAlphabet.T,
83
+ acg: proteinAlphabet.T,
84
+ acu: proteinAlphabet.T,
85
+ tgg: proteinAlphabet.W,
86
+ ugg: proteinAlphabet.W,
87
+ tat: proteinAlphabet.Y,
88
+ tac: proteinAlphabet.Y,
89
+ uau: proteinAlphabet.Y,
90
+ uac: proteinAlphabet.Y,
91
+ gtt: proteinAlphabet.V,
92
+ gtc: proteinAlphabet.V,
93
+ gta: proteinAlphabet.V,
94
+ gtg: proteinAlphabet.V,
95
+ guu: proteinAlphabet.V,
96
+ guc: proteinAlphabet.V,
97
+ gua: proteinAlphabet.V,
98
+ gug: proteinAlphabet.V,
99
+ taa: proteinAlphabet["*"],
100
+ tag: proteinAlphabet["*"],
101
+ tga: proteinAlphabet["*"],
102
+ "...": proteinAlphabet["."],
103
+ "---": proteinAlphabet["-"]
104
+ };
105
+
106
+ export default threeLetterSequenceStringToAminoAcidMap;
@@ -0,0 +1,182 @@
1
+ import {cloneDeep, get, some} from "lodash";
2
+ import {getFeatureToColorMap, getFeatureTypes} from "./featureTypesAndColors";
3
+ import shortid from "shortid";
4
+
5
+ export default function tidyUpAnnotation(
6
+ _annotation,
7
+ {
8
+ sequenceData = {},
9
+ convertAnnotationsFromAAIndices,
10
+ annotationType,
11
+ provideNewIdsForAnnotations,
12
+ doNotProvideIdsForAnnotations,
13
+ messages = [],
14
+ mutative,
15
+ allowNonStandardGenbankTypes,
16
+ featureTypes
17
+ }
18
+ ) {
19
+ const { size, circular, isProtein } = sequenceData;
20
+ if (!_annotation || typeof _annotation !== "object") {
21
+ messages.push("Invalid annotation detected and removed");
22
+ return false;
23
+ }
24
+ let annotation = _annotation;
25
+ if (!mutative) {
26
+ annotation = cloneDeep(_annotation);
27
+ }
28
+ annotation.annotationTypePlural = annotationType;
29
+
30
+ if (!annotation.name || typeof annotation.name !== "string") {
31
+ messages.push(
32
+ 'Unable to detect valid name for annotation, setting name to "Untitled annotation"'
33
+ );
34
+ annotation.name = "Untitled annotation";
35
+ }
36
+ if (provideNewIdsForAnnotations) {
37
+ annotation.id = shortid();
38
+ }
39
+ if (!annotation.id && annotation.id !== 0 && !doNotProvideIdsForAnnotations) {
40
+ annotation.id = shortid();
41
+ messages.push(
42
+ "Unable to detect valid ID for annotation, setting ID to " + annotation.id
43
+ );
44
+ }
45
+
46
+ //run this for the annotation itself
47
+ coerceLocation({
48
+ isProtein,
49
+ location: annotation,
50
+ convertAnnotationsFromAAIndices,
51
+ size,
52
+ messages,
53
+ circular,
54
+ name: annotation.name
55
+ });
56
+ //and for each location
57
+ annotation.locations &&
58
+ annotation.locations.forEach(location => {
59
+ coerceLocation({
60
+ isProtein,
61
+ location,
62
+ convertAnnotationsFromAAIndices,
63
+ size,
64
+ messages,
65
+ circular,
66
+ name: annotation.name
67
+ });
68
+ });
69
+
70
+ if (
71
+ isProtein ||
72
+ annotation.forward === true ||
73
+ annotation.forward === "true" ||
74
+ annotation.strand === 1 ||
75
+ annotation.strand === "1" ||
76
+ annotation.strand === "+"
77
+ ) {
78
+ annotation.forward = true;
79
+ annotation.strand = 1;
80
+ } else {
81
+ annotation.forward = false;
82
+ annotation.strand = -1;
83
+ }
84
+ if (
85
+ !annotation.type ||
86
+ typeof annotation.type !== "string" ||
87
+ !some(featureTypes || getFeatureTypes(), featureType => {
88
+ if (featureType.toLowerCase() === annotation.type.toLowerCase()) {
89
+ annotation.type = featureType; //this makes sure the annotation.type is being set to the exact value of the accepted featureType
90
+ return true;
91
+ }
92
+ if (
93
+ allowNonStandardGenbankTypes ||
94
+ (typeof window !== "undefined" &&
95
+ get(window, "tg_allowNonStandardGenbankTypes")) ||
96
+ (typeof global !== "undefined" &&
97
+ get(global, "tg_allowNonStandardGenbankTypes"))
98
+ )
99
+ return true;
100
+ return false;
101
+ })
102
+ ) {
103
+ messages.push(
104
+ "Invalid annotation type detected: " +
105
+ annotation.type +
106
+ " for " +
107
+ annotation.name +
108
+ ". set type to misc_feature"
109
+ );
110
+ annotation.type = "misc_feature";
111
+ }
112
+ if (annotation.notes && typeof annotation.notes === "string") {
113
+ try {
114
+ annotation.notes = JSON.parse(annotation.notes);
115
+ } catch (error) {
116
+ console.info(
117
+ `warning 33y00a0912 - couldn't parse notes for ${annotation.name ||
118
+ ""} ${annotation.notes}:`,
119
+ error
120
+ );
121
+ }
122
+ }
123
+
124
+ if (!annotation.color) {
125
+ annotation.color = getFeatureToColorMap()[annotation.type];
126
+ }
127
+ return annotation;
128
+ };
129
+
130
+ function coerceLocation({
131
+ location,
132
+ convertAnnotationsFromAAIndices,
133
+ size,
134
+ isProtein,
135
+ messages,
136
+ circular,
137
+ name
138
+ }) {
139
+ location.start = parseInt(location.start, 10);
140
+ location.end = parseInt(location.end, 10);
141
+
142
+ if (convertAnnotationsFromAAIndices) {
143
+ location.start = location.start * 3;
144
+ location.end = location.end * 3 + 2;
145
+ }
146
+ if (
147
+ location.start < 0 ||
148
+ !(location.start <= size - 1) ||
149
+ location.start > size - 1
150
+ ) {
151
+ messages.push(
152
+ "Invalid annotation start: " +
153
+ location.start +
154
+ " detected for " +
155
+ location.name +
156
+ " and set to size: " +
157
+ size
158
+ ); //setting it to 0 internally, but users will see it as 1
159
+ location.start = size - (isProtein ? 3 : 1);
160
+ }
161
+ if (
162
+ location.end < 0 ||
163
+ !(location.end <= size - 1) ||
164
+ location.end > size - 1
165
+ ) {
166
+ messages.push(
167
+ "Invalid annotation end: " +
168
+ location.end +
169
+ " detected for " +
170
+ location.name +
171
+ " and set to seq size: " +
172
+ size
173
+ ); //setting it to 0 internally, but users will see it as 1
174
+ location.end = size - 1;
175
+ }
176
+ if (location.start > location.end && circular === false) {
177
+ messages.push(
178
+ "Invalid circular annotation detected for " + name + ". end set to 1"
179
+ ); //setting it to 0 internally, but users will see it as 1
180
+ location.end = size;
181
+ }
182
+ }