@teselagen/sequence-utils 0.3.37 → 0.3.38-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. package/DNAComplementMap.d.ts +1 -1
  2. package/addGapsToSeqReads.d.ts +16 -3
  3. package/adjustAnnotationsToInsert.d.ts +2 -1
  4. package/adjustBpsToReplaceOrInsert.d.ts +2 -1
  5. package/aliasedEnzymesByName.d.ts +37 -1
  6. package/aminoAcidToDegenerateDnaMap.d.ts +1 -31
  7. package/aminoAcidToDegenerateRnaMap.d.ts +1 -1
  8. package/annotateSingleSeq.d.ts +5 -4
  9. package/annotationTypes.d.ts +2 -2
  10. package/autoAnnotate.d.ts +17 -8
  11. package/bioData.d.ts +10 -58
  12. package/calculateEndStability.d.ts +1 -1
  13. package/calculateNebTa.d.ts +6 -1
  14. package/calculateNebTm.d.ts +6 -4
  15. package/calculatePercentGC.d.ts +1 -1
  16. package/calculateSantaLuciaTm.d.ts +28 -114
  17. package/calculateTm.d.ts +13 -1
  18. package/computeDigestFragments.d.ts +30 -24
  19. package/condensePairwiseAlignmentDifferences.d.ts +1 -1
  20. package/convertAACaretPositionOrRangeToDna.d.ts +2 -1
  21. package/convertDnaCaretPositionOrRangeToAA.d.ts +2 -1
  22. package/cutSequenceByRestrictionEnzyme.d.ts +2 -1
  23. package/defaultEnzymesByName.d.ts +2 -1
  24. package/degenerateDnaToAminoAcidMap.d.ts +1 -1
  25. package/degenerateRnaToAminoAcidMap.d.ts +1 -1
  26. package/deleteSequenceDataAtRange.d.ts +2 -1
  27. package/diffUtils.d.ts +9 -7
  28. package/doesEnzymeChopOutsideOfRecognitionSite.d.ts +2 -1
  29. package/featureTypesAndColors.d.ts +19 -6
  30. package/filterSequenceString.d.ts +14 -10
  31. package/findApproxMatches.d.ts +7 -1
  32. package/findNearestRangeOfSequenceOverlapToPosition.d.ts +2 -1
  33. package/findOrfsInPlasmid.d.ts +2 -11
  34. package/findSequenceMatches.d.ts +11 -1
  35. package/generateAnnotations.d.ts +2 -1
  36. package/generateSequenceData.d.ts +8 -13
  37. package/getAllInsertionsInSeqReads.d.ts +11 -1
  38. package/getAminoAcidDataForEachBaseOfDna.d.ts +6 -5
  39. package/getAminoAcidFromSequenceTriplet.d.ts +1 -1
  40. package/getAminoAcidStringFromSequenceString.d.ts +3 -1
  41. package/getCodonRangeForAASliver.d.ts +3 -4
  42. package/getComplementAminoAcidStringFromSequenceString.d.ts +1 -1
  43. package/getComplementSequenceAndAnnotations.d.ts +5 -1
  44. package/getComplementSequenceString.d.ts +1 -1
  45. package/getCutsiteType.d.ts +2 -1
  46. package/getCutsitesFromSequence.d.ts +2 -1
  47. package/getDegenerateDnaStringFromAAString.d.ts +1 -1
  48. package/getDegenerateRnaStringFromAAString.d.ts +1 -1
  49. package/getDigestFragmentsForCutsites.d.ts +4 -1
  50. package/getDigestFragmentsForRestrictionEnzymes.d.ts +8 -1
  51. package/getInsertBetweenVals.d.ts +2 -1
  52. package/getLeftAndRightOfSequenceInRangeGivenPosition.d.ts +2 -1
  53. package/getOrfsFromSequence.d.ts +17 -11
  54. package/getOverlapBetweenTwoSequences.d.ts +2 -1
  55. package/getPossiblePartsFromSequenceAndEnzymes.d.ts +18 -1
  56. package/getReverseAminoAcidStringFromSequenceString.d.ts +1 -1
  57. package/getReverseComplementAminoAcidStringFromSequenceString.d.ts +1 -1
  58. package/getReverseComplementAnnotation.d.ts +11 -1
  59. package/getReverseComplementSequenceAndAnnotations.d.ts +5 -1
  60. package/getReverseComplementSequenceString.d.ts +1 -1
  61. package/getReverseSequenceString.d.ts +1 -1
  62. package/getSequenceDataBetweenRange.d.ts +9 -1
  63. package/getVirtualDigest.d.ts +11 -10
  64. package/guessIfSequenceIsDnaAndNotProtein.d.ts +5 -1
  65. package/index.cjs +732 -483
  66. package/index.d.ts +8 -5
  67. package/index.js +732 -483
  68. package/index.umd.cjs +732 -483
  69. package/insertGapsIntoRefSeq.d.ts +2 -1
  70. package/insertSequenceDataAtPositionOrRange.d.ts +10 -1
  71. package/isEnzymeType2S.d.ts +2 -1
  72. package/mapAnnotationsToRows.d.ts +9 -1
  73. package/package.json +9 -6
  74. package/prepareCircularViewData.d.ts +2 -1
  75. package/prepareRowData.d.ts +7 -3
  76. package/proteinAlphabet.d.ts +1 -1
  77. package/rotateBpsToPosition.d.ts +1 -1
  78. package/rotateSequenceDataToPosition.d.ts +3 -1
  79. package/shiftAnnotationsByLen.d.ts +4 -3
  80. package/src/DNAComplementMap.ts +32 -0
  81. package/src/addGapsToSeqReads.ts +436 -0
  82. package/src/adjustAnnotationsToInsert.ts +20 -0
  83. package/src/adjustBpsToReplaceOrInsert.ts +73 -0
  84. package/src/aliasedEnzymesByName.ts +7366 -0
  85. package/src/aminoAcidToDegenerateDnaMap.ts +32 -0
  86. package/src/aminoAcidToDegenerateRnaMap.ts +32 -0
  87. package/src/annotateSingleSeq.ts +37 -0
  88. package/src/annotationTypes.ts +23 -0
  89. package/src/autoAnnotate.test.js +0 -1
  90. package/src/autoAnnotate.ts +290 -0
  91. package/src/bioData.ts +65 -0
  92. package/src/calculateEndStability.ts +91 -0
  93. package/src/calculateNebTa.ts +46 -0
  94. package/src/calculateNebTm.ts +132 -0
  95. package/src/calculatePercentGC.ts +3 -0
  96. package/src/calculateSantaLuciaTm.ts +184 -0
  97. package/src/calculateTm.ts +242 -0
  98. package/src/computeDigestFragments.ts +238 -0
  99. package/src/condensePairwiseAlignmentDifferences.ts +85 -0
  100. package/src/convertAACaretPositionOrRangeToDna.ts +28 -0
  101. package/src/convertDnaCaretPositionOrRangeToAA.ts +28 -0
  102. package/src/cutSequenceByRestrictionEnzyme.ts +345 -0
  103. package/src/defaultEnzymesByName.ts +280 -0
  104. package/src/degenerateDnaToAminoAcidMap.ts +5 -0
  105. package/src/degenerateRnaToAminoAcidMap.ts +5 -0
  106. package/src/deleteSequenceDataAtRange.ts +13 -0
  107. package/src/diffUtils.ts +80 -0
  108. package/src/doesEnzymeChopOutsideOfRecognitionSite.ts +16 -0
  109. package/src/featureTypesAndColors.ts +167 -0
  110. package/src/filterSequenceString.ts +153 -0
  111. package/src/findApproxMatches.ts +58 -0
  112. package/src/findNearestRangeOfSequenceOverlapToPosition.ts +43 -0
  113. package/src/findOrfsInPlasmid.js +6 -1
  114. package/src/findOrfsInPlasmid.ts +31 -0
  115. package/src/findSequenceMatches.ts +154 -0
  116. package/src/generateAnnotations.ts +39 -0
  117. package/src/generateSequenceData.ts +212 -0
  118. package/src/getAllInsertionsInSeqReads.ts +100 -0
  119. package/src/getAminoAcidDataForEachBaseOfDna.ts +305 -0
  120. package/src/getAminoAcidFromSequenceTriplet.ts +27 -0
  121. package/src/getAminoAcidStringFromSequenceString.ts +36 -0
  122. package/src/getCodonRangeForAASliver.ts +73 -0
  123. package/src/getComplementAminoAcidStringFromSequenceString.ts +10 -0
  124. package/src/getComplementSequenceAndAnnotations.ts +25 -0
  125. package/src/getComplementSequenceString.ts +23 -0
  126. package/src/getCutsiteType.ts +18 -0
  127. package/src/getCutsitesFromSequence.ts +22 -0
  128. package/src/getDegenerateDnaStringFromAAString.ts +15 -0
  129. package/src/getDegenerateRnaStringFromAAString.ts +15 -0
  130. package/src/getDigestFragmentsForCutsites.ts +126 -0
  131. package/src/getDigestFragmentsForRestrictionEnzymes.ts +50 -0
  132. package/src/getInsertBetweenVals.ts +31 -0
  133. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.ts +40 -0
  134. package/src/getMassOfAaString.ts +29 -0
  135. package/src/getOrfsFromSequence.ts +132 -0
  136. package/src/getOverlapBetweenTwoSequences.ts +30 -0
  137. package/src/getPossiblePartsFromSequenceAndEnzymes.ts +149 -0
  138. package/src/getReverseAminoAcidStringFromSequenceString.ts +22 -0
  139. package/src/getReverseComplementAminoAcidStringFromSequenceString.ts +10 -0
  140. package/src/getReverseComplementAnnotation.ts +33 -0
  141. package/src/getReverseComplementSequenceAndAnnotations.ts +46 -0
  142. package/src/getReverseComplementSequenceString.ts +18 -0
  143. package/src/getReverseSequenceString.ts +12 -0
  144. package/src/getSequenceDataBetweenRange.ts +154 -0
  145. package/src/getVirtualDigest.ts +139 -0
  146. package/src/guessIfSequenceIsDnaAndNotProtein.ts +39 -0
  147. package/src/index.test.ts +43 -0
  148. package/src/index.ts +111 -0
  149. package/src/insertGapsIntoRefSeq.ts +43 -0
  150. package/src/insertSequenceDataAtPosition.ts +2 -0
  151. package/src/insertSequenceDataAtPositionOrRange.ts +328 -0
  152. package/src/isEnzymeType2S.ts +5 -0
  153. package/src/mapAnnotationsToRows.ts +256 -0
  154. package/src/prepareCircularViewData.ts +24 -0
  155. package/src/prepareRowData.ts +61 -0
  156. package/src/prepareRowData_output1.json +1 -0
  157. package/src/proteinAlphabet.ts +271 -0
  158. package/src/rotateBpsToPosition.ts +12 -0
  159. package/src/rotateSequenceDataToPosition.ts +54 -0
  160. package/src/shiftAnnotationsByLen.ts +24 -0
  161. package/src/threeLetterSequenceStringToAminoAcidMap.ts +198 -0
  162. package/src/tidyUpAnnotation.ts +205 -0
  163. package/src/tidyUpSequenceData.ts +213 -0
  164. package/src/types.ts +109 -0
  165. package/threeLetterSequenceStringToAminoAcidMap.d.ts +11 -921
  166. package/tidyUpAnnotation.d.ts +13 -11
  167. package/tidyUpSequenceData.d.ts +15 -1
  168. package/types.d.ts +105 -0
@@ -0,0 +1,271 @@
1
+ const proteinAlphabet = {
2
+ A: {
3
+ value: "A",
4
+ name: "Alanine",
5
+ threeLettersName: "Ala",
6
+ hydrophobicity: 1.8,
7
+ colorByFamily: "#00FFFF",
8
+ color: "hsl(327.3, 100%, 69%)",
9
+ mass: 71.0779
10
+ },
11
+ R: {
12
+ value: "R",
13
+ name: "Arginine",
14
+ threeLettersName: "Arg",
15
+ hydrophobicity: -4.5,
16
+ colorByFamily: "#FFC0CB",
17
+ color: "hsl(258.1, 100%, 69%)",
18
+ mass: 156.18568
19
+ },
20
+
21
+ N: {
22
+ value: "N",
23
+ name: "Asparagine",
24
+ threeLettersName: "Asn",
25
+ hydrophobicity: -3.5,
26
+ colorByFamily: "#D3D3D3",
27
+ color: "hsl(268.9, 100%, 69%)",
28
+ mass: 114.10264
29
+ },
30
+ D: {
31
+ value: "D",
32
+ name: "Aspartic acid",
33
+ threeLettersName: "Asp",
34
+ hydrophobicity: -3.5,
35
+ colorByFamily: "#EE82EE",
36
+ color: "hsl(268.9, 100%, 69%)",
37
+ mass: 115.0874
38
+ },
39
+ C: {
40
+ value: "C",
41
+ name: "Cysteine",
42
+ threeLettersName: "Cys",
43
+ hydrophobicity: 2.5,
44
+ colorByFamily: "#FFFF00",
45
+ color: "hsl(335.1, 100%, 69%)",
46
+ mass: 103.1429
47
+ },
48
+
49
+ E: {
50
+ value: "E",
51
+ name: "Glutamic acid",
52
+ threeLettersName: "Glu",
53
+ hydrophobicity: -3.5,
54
+ colorByFamily: "#EE82EE",
55
+ color: "hsl(268.9, 100%, 69%)",
56
+ mass: 129.11398
57
+ },
58
+ Q: {
59
+ value: "Q",
60
+ name: "Glutamine",
61
+ threeLettersName: "Gln",
62
+ hydrophobicity: -3.5,
63
+ colorByFamily: "#D3D3D3",
64
+ color: "hsl(268.9, 100%, 69%)",
65
+ mass: 128.12922
66
+ },
67
+ G: {
68
+ value: "G",
69
+ name: "Glycine",
70
+ threeLettersName: "Gly",
71
+ hydrophobicity: -0.4,
72
+ colorByFamily: "#00FFFF",
73
+ color: "hsl(303.1, 100%, 69%)",
74
+ mass: 57.05132
75
+ },
76
+
77
+ H: {
78
+ value: "H",
79
+ name: "Histidine",
80
+ threeLettersName: "His",
81
+ hydrophobicity: -3.2,
82
+ colorByFamily: "#FFC0CB",
83
+ color: "hsl(272.2, 100%, 69%)",
84
+ mass: 137.13928
85
+ },
86
+
87
+ I: {
88
+ value: "I",
89
+ name: "Isoleucine ",
90
+ threeLettersName: "Ile",
91
+ hydrophobicity: 4.5,
92
+ colorByFamily: "#00FFFF",
93
+ color: "hsl(356.9, 100%, 69%)",
94
+ mass: 113.15764
95
+ },
96
+ L: {
97
+ value: "L",
98
+ name: "Leucine",
99
+ threeLettersName: "Leu",
100
+ hydrophobicity: 3.8,
101
+ colorByFamily: "#00FFFF",
102
+ color: "hsl(349.4, 100%, 69%)",
103
+ mass: 113.15764
104
+ },
105
+ K: {
106
+ value: "K",
107
+ name: "Lysine",
108
+ threeLettersName: "Lys",
109
+ hydrophobicity: -3.9,
110
+ colorByFamily: "#FFC0CB",
111
+ color: "hsl(264.7, 100%, 69%)",
112
+ mass: 128.17228
113
+ },
114
+
115
+ O: {
116
+ value: "O",
117
+ name: "Pyrrolysine",
118
+ threeLettersName: "Pyl",
119
+ colorByFamily: "#FFC0CB",
120
+ color: "hsl(264.7, 100%, 69%)",
121
+ mass: 255.313
122
+ },
123
+
124
+ M: {
125
+ value: "M",
126
+ name: "Methionine",
127
+ threeLettersName: "Met",
128
+ hydrophobicity: 1.9,
129
+ colorByFamily: "#FFFF00",
130
+ color: "hsl(328.5, 100%, 69%)",
131
+ mass: 131.19606
132
+ },
133
+ F: {
134
+ value: "F",
135
+ name: "Phenylalanine",
136
+ threeLettersName: "Phe",
137
+ hydrophobicity: 2.8,
138
+ colorByFamily: "#FFA500",
139
+ color: "hsl(338.4, 100%, 69%)",
140
+ mass: 147.17386
141
+ },
142
+ P: {
143
+ value: "P",
144
+ name: "Proline",
145
+ threeLettersName: "Pro",
146
+ hydrophobicity: -1.6,
147
+ colorByFamily: "#00FFFF",
148
+ color: "hsl(289.9, 100%, 69%)",
149
+ mass: 97.11518
150
+ },
151
+ S: {
152
+ value: "S",
153
+ name: "Serine",
154
+ threeLettersName: "Ser",
155
+ hydrophobicity: -0.8,
156
+ colorByFamily: "#90EE90",
157
+ color: "hsl(298.6, 100%, 69%)",
158
+ mass: 87.0773
159
+ },
160
+ T: {
161
+ value: "T",
162
+ name: "Threonine",
163
+ threeLettersName: "Thr",
164
+ hydrophobicity: -0.7,
165
+ colorByFamily: "#90EE90",
166
+ color: "hsl(299.8, 100%, 69%)",
167
+ mass: 101.10388
168
+ },
169
+ U: {
170
+ value: "U",
171
+ name: "Selenocysteine",
172
+ threeLettersName: "Sec",
173
+ colorByFamily: "#FF0000",
174
+ color: "hsl(0, 100%, 69%)",
175
+ mass: 150.3079
176
+ },
177
+ W: {
178
+ value: "W",
179
+ name: "Tryptophan",
180
+ threeLettersName: "Trp",
181
+ hydrophobicity: -0.9,
182
+ colorByFamily: "#FFA500",
183
+ color: "hsl(297.6, 100%, 69%)",
184
+ mass: 186.2099
185
+ },
186
+ Y: {
187
+ value: "Y",
188
+ name: "Tyrosine",
189
+ threeLettersName: "Tyr",
190
+ hydrophobicity: -1.3,
191
+ colorByFamily: "#FFA500",
192
+ color: "hsl(293.2, 100%, 69%)",
193
+ mass: 163.17326
194
+ },
195
+ V: {
196
+ value: "V",
197
+ name: "Valine",
198
+ threeLettersName: "Val",
199
+ hydrophobicity: 4.2,
200
+ colorByFamily: "#00FFFF",
201
+ color: "hsl(353.6, 100%, 69%)",
202
+ mass: 99.13106
203
+ },
204
+ "*": {
205
+ value: "*",
206
+ name: "Stop",
207
+ threeLettersName: "Stop",
208
+ colorByFamily: "#FF0000",
209
+ color: "hsl(0, 100%, 69%)",
210
+ mass: 0
211
+ },
212
+ ".": {
213
+ //tnr: this is actually a deletion/gap character (previously we had this as a stop character which is incorrect) https://www.dnabaser.com/articles/IUPAC%20ambiguity%20codes.html
214
+ value: ".",
215
+ name: "Gap",
216
+ threeLettersName: "Gap",
217
+ colorByFamily: "#FF0000",
218
+ color: "hsl(0, 100%, 69%)",
219
+ mass: 0
220
+ },
221
+ "-": {
222
+ value: "-",
223
+ name: "Gap",
224
+ threeLettersName: "Gap",
225
+ colorByFamily: "#FF0000",
226
+ color: "hsl(0, 100%, 69%)",
227
+ mass: 0
228
+ },
229
+ B: {
230
+ value: "B",
231
+ threeLettersName: "ND",
232
+ colorByFamily: "#D3D3D3",
233
+ color: "hsl(268.9, 100%, 69%)",
234
+ isAmbiguous: true,
235
+ name: "B",
236
+ aliases: "ND",
237
+ mass: 0
238
+ },
239
+ J: {
240
+ value: "J",
241
+ threeLettersName: "IL",
242
+ colorByFamily: "#00FFFF",
243
+ color: "hsl(352, 100%, 69%)",
244
+ isAmbiguous: true,
245
+ name: "J",
246
+ aliases: "IL",
247
+ mass: 0
248
+ },
249
+ X: {
250
+ value: "X",
251
+ threeLettersName: "ACDEFGHIKLMNPQRSTVWY",
252
+ colorByFamily: "#FFFFFF",
253
+ color: "hsl(60, 100%, 69%)",
254
+ isAmbiguous: true,
255
+ name: "X",
256
+ aliases: "ACDEFGHIKLMNPQRSTVWY",
257
+ mass: 0
258
+ },
259
+ Z: {
260
+ value: "Z",
261
+ threeLettersName: "QE",
262
+ colorByFamily: "#D3D3D3",
263
+ color: "hsl(268.9, 100%, 69%)",
264
+ isAmbiguous: true,
265
+ name: "Z",
266
+ aliases: "QE",
267
+ mass: 0
268
+ }
269
+ };
270
+
271
+ export default proteinAlphabet;
@@ -0,0 +1,12 @@
1
+ export default function rotateBpsToPosition(
2
+ bps: string,
3
+ caretPosition: number
4
+ ) {
5
+ return arrayRotate(bps.split(""), caretPosition).join("");
6
+ }
7
+
8
+ function arrayRotate<T>(arr: T[], count: number): T[] {
9
+ count -= arr.length * Math.floor(count / arr.length);
10
+ arr.push(...arr.splice(0, count));
11
+ return arr;
12
+ }
@@ -0,0 +1,54 @@
1
+ import { map } from "lodash-es";
2
+ import { adjustRangeToRotation } from "@teselagen/range-utils";
3
+ import tidyUpSequenceData, {
4
+ TidyUpSequenceDataOptions
5
+ } from "./tidyUpSequenceData";
6
+ import { modifiableTypes } from "./annotationTypes";
7
+ import rotateBpsToPosition from "./rotateBpsToPosition";
8
+ import { SequenceData, Annotation } from "./types";
9
+
10
+ export default function rotateSequenceDataToPosition(
11
+ sequenceData: SequenceData,
12
+ caretPosition: number,
13
+ options: TidyUpSequenceDataOptions = {}
14
+ ) {
15
+ const newSequenceData = tidyUpSequenceData(sequenceData, {
16
+ doNotRemoveInvalidChars: true,
17
+ ...options
18
+ });
19
+
20
+ //update the sequence
21
+ newSequenceData.sequence = rotateBpsToPosition(
22
+ newSequenceData.sequence,
23
+ caretPosition
24
+ );
25
+
26
+ //handle the insert
27
+ modifiableTypes.forEach(annotationType => {
28
+ //update the annotations:
29
+ //handle the delete if necessary
30
+ newSequenceData[annotationType] = adjustAnnotationsToRotation(
31
+ newSequenceData[annotationType] as Annotation[],
32
+ caretPosition,
33
+ newSequenceData.sequence.length
34
+ );
35
+ });
36
+ return newSequenceData;
37
+ }
38
+
39
+ function adjustAnnotationsToRotation(
40
+ annotationsToBeAdjusted: Annotation[],
41
+ positionToRotateTo: number,
42
+ maxLength: number
43
+ ) {
44
+ return map(annotationsToBeAdjusted, annotation => {
45
+ return {
46
+ ...adjustRangeToRotation(annotation, positionToRotateTo, maxLength),
47
+ locations: annotation.locations
48
+ ? annotation.locations.map(location =>
49
+ adjustRangeToRotation(location, positionToRotateTo, maxLength)
50
+ )
51
+ : undefined
52
+ };
53
+ }).filter(range => !!range); //filter any fully deleted ranges
54
+ }
@@ -0,0 +1,24 @@
1
+ import { modifiableTypes } from "./annotationTypes";
2
+ import adjustAnnotationsToInsert from "./adjustAnnotationsToInsert";
3
+ import { SequenceData } from "./types";
4
+
5
+ export default function shiftAnnotationsByLen({
6
+ seqData,
7
+ caretPosition,
8
+ insertLength
9
+ }: {
10
+ seqData: SequenceData;
11
+ caretPosition: number;
12
+ insertLength: number;
13
+ }) {
14
+ modifiableTypes.forEach(annotationType => {
15
+ const existingAnnotations = seqData[annotationType];
16
+ if (existingAnnotations) {
17
+ seqData[annotationType] = adjustAnnotationsToInsert(
18
+ existingAnnotations,
19
+ caretPosition,
20
+ insertLength
21
+ );
22
+ }
23
+ });
24
+ }
@@ -0,0 +1,198 @@
1
+ import proteinAlphabet from "./proteinAlphabet";
2
+
3
+ const initThreeLetterSequenceStringToAminoAcidMap: Record<
4
+ string,
5
+ {
6
+ value: string;
7
+ name: string;
8
+ threeLettersName: string;
9
+ hydrophobicity?: number;
10
+ colorByFamily: string;
11
+ color: string;
12
+ mass: number;
13
+ isAmbiguous?: boolean;
14
+ aliases?: string;
15
+ }
16
+ > = {
17
+ gct: proteinAlphabet.A,
18
+ gcc: proteinAlphabet.A,
19
+ gca: proteinAlphabet.A,
20
+ gcg: proteinAlphabet.A,
21
+ gcu: proteinAlphabet.A,
22
+ cgt: proteinAlphabet.R,
23
+ cgc: proteinAlphabet.R,
24
+ cga: proteinAlphabet.R,
25
+ cgg: proteinAlphabet.R,
26
+ aga: proteinAlphabet.R,
27
+ agg: proteinAlphabet.R,
28
+ cgu: proteinAlphabet.R,
29
+ aat: proteinAlphabet.N,
30
+ aac: proteinAlphabet.N,
31
+ aau: proteinAlphabet.N,
32
+ gat: proteinAlphabet.D,
33
+ gac: proteinAlphabet.D,
34
+ gau: proteinAlphabet.D,
35
+ tgt: proteinAlphabet.C,
36
+ tgc: proteinAlphabet.C,
37
+ ugu: proteinAlphabet.C,
38
+ ugc: proteinAlphabet.C,
39
+ gaa: proteinAlphabet.E,
40
+ gag: proteinAlphabet.E,
41
+ caa: proteinAlphabet.Q,
42
+ cag: proteinAlphabet.Q,
43
+ ggt: proteinAlphabet.G,
44
+ ggc: proteinAlphabet.G,
45
+ gga: proteinAlphabet.G,
46
+ ggg: proteinAlphabet.G,
47
+ ggu: proteinAlphabet.G,
48
+ cat: proteinAlphabet.H,
49
+ cac: proteinAlphabet.H,
50
+ cau: proteinAlphabet.H,
51
+ att: proteinAlphabet.I,
52
+ atc: proteinAlphabet.I,
53
+ ata: proteinAlphabet.I,
54
+ auu: proteinAlphabet.I,
55
+ auc: proteinAlphabet.I,
56
+ aua: proteinAlphabet.I,
57
+ ctt: proteinAlphabet.L,
58
+ ctc: proteinAlphabet.L,
59
+ cta: proteinAlphabet.L,
60
+ ctg: proteinAlphabet.L,
61
+ tta: proteinAlphabet.L,
62
+ ttg: proteinAlphabet.L,
63
+ cuu: proteinAlphabet.L,
64
+ cuc: proteinAlphabet.L,
65
+ cua: proteinAlphabet.L,
66
+ cug: proteinAlphabet.L,
67
+ uua: proteinAlphabet.L,
68
+ uug: proteinAlphabet.L,
69
+ aaa: proteinAlphabet.K,
70
+ aag: proteinAlphabet.K,
71
+ atg: proteinAlphabet.M,
72
+ aug: proteinAlphabet.M,
73
+ ttt: proteinAlphabet.F,
74
+ ttc: proteinAlphabet.F,
75
+ uuu: proteinAlphabet.F,
76
+ uuc: proteinAlphabet.F,
77
+ cct: proteinAlphabet.P,
78
+ ccc: proteinAlphabet.P,
79
+ cca: proteinAlphabet.P,
80
+ ccg: proteinAlphabet.P,
81
+ ccu: proteinAlphabet.P,
82
+ tct: proteinAlphabet.S,
83
+ tcc: proteinAlphabet.S,
84
+ tca: proteinAlphabet.S,
85
+ tcg: proteinAlphabet.S,
86
+ agt: proteinAlphabet.S,
87
+ agc: proteinAlphabet.S,
88
+ ucu: proteinAlphabet.S,
89
+ ucc: proteinAlphabet.S,
90
+ uca: proteinAlphabet.S,
91
+ ucg: proteinAlphabet.S,
92
+ agu: proteinAlphabet.S,
93
+ act: proteinAlphabet.T,
94
+ acc: proteinAlphabet.T,
95
+ aca: proteinAlphabet.T,
96
+ acg: proteinAlphabet.T,
97
+ acu: proteinAlphabet.T,
98
+ tgg: proteinAlphabet.W,
99
+ ugg: proteinAlphabet.W,
100
+ tat: proteinAlphabet.Y,
101
+ tac: proteinAlphabet.Y,
102
+ uau: proteinAlphabet.Y,
103
+ uac: proteinAlphabet.Y,
104
+ gtt: proteinAlphabet.V,
105
+ gtc: proteinAlphabet.V,
106
+ gta: proteinAlphabet.V,
107
+ gtg: proteinAlphabet.V,
108
+ guu: proteinAlphabet.V,
109
+ guc: proteinAlphabet.V,
110
+ gua: proteinAlphabet.V,
111
+ gug: proteinAlphabet.V,
112
+ taa: proteinAlphabet["*"],
113
+ tag: proteinAlphabet["*"],
114
+ tga: proteinAlphabet["*"],
115
+ uaa: proteinAlphabet["*"],
116
+ uag: proteinAlphabet["*"],
117
+ uga: proteinAlphabet["*"],
118
+ "...": proteinAlphabet["."],
119
+ "---": proteinAlphabet["-"]
120
+ };
121
+
122
+ // IUPAC nucleotide codes (DNA/RNA) with U awareness
123
+ const IUPAC: Record<string, string[]> = {
124
+ A: ["A"],
125
+ C: ["C"],
126
+ G: ["G"],
127
+ T: ["T"],
128
+ U: ["U"],
129
+
130
+ R: ["A", "G"],
131
+ Y: ["C", "T", "U"],
132
+ K: ["G", "T", "U"],
133
+ M: ["A", "C"],
134
+ S: ["G", "C"],
135
+ W: ["A", "T", "U"],
136
+ B: ["C", "G", "T", "U"],
137
+ D: ["A", "G", "T", "U"],
138
+ H: ["A", "C", "T", "U"],
139
+ V: ["A", "C", "G"],
140
+ N: ["A", "C", "G", "T", "U"],
141
+ X: ["A", "C", "G", "T", "U"]
142
+ };
143
+
144
+ function expandAndResolve(threeLetterCodon: string) {
145
+ const chars = threeLetterCodon.toUpperCase().split("");
146
+ const picks = chars.map(c => IUPAC[c] || [c]);
147
+
148
+ let allPossibleThreeLetterCodons = [""];
149
+ for (const set of picks) {
150
+ const next = [];
151
+ for (const prefix of allPossibleThreeLetterCodons)
152
+ for (const b of set) next.push(prefix + b);
153
+ allPossibleThreeLetterCodons = next;
154
+ }
155
+ let foundAminoAcid = null;
156
+ for (const codon of allPossibleThreeLetterCodons) {
157
+ const lowerCodon = codon.toLowerCase();
158
+ const aminoAcidObj =
159
+ initThreeLetterSequenceStringToAminoAcidMap[lowerCodon] ??
160
+ initThreeLetterSequenceStringToAminoAcidMap[
161
+ lowerCodon.replace(/u/g, "t")
162
+ ] ??
163
+ initThreeLetterSequenceStringToAminoAcidMap[
164
+ lowerCodon.replace(/t/g, "u")
165
+ ];
166
+ if (aminoAcidObj) {
167
+ if (!foundAminoAcid) {
168
+ foundAminoAcid = aminoAcidObj;
169
+ } else if (foundAminoAcid.value !== aminoAcidObj.value) {
170
+ return null;
171
+ }
172
+ } else {
173
+ return null;
174
+ }
175
+ }
176
+ return foundAminoAcid;
177
+ }
178
+
179
+ function getCodonToAminoAcidMap() {
180
+ const map = initThreeLetterSequenceStringToAminoAcidMap;
181
+ // generate all IUPAC 3-mers
182
+ const codes = Object.keys(IUPAC);
183
+ for (const a of codes)
184
+ for (const b of codes)
185
+ for (const c of codes) {
186
+ const codon = a + b + c;
187
+ const lowerCodon = codon.toLowerCase();
188
+ if (map[lowerCodon]) continue;
189
+ const aminoAcidObj = expandAndResolve(codon);
190
+ if (aminoAcidObj) map[lowerCodon] = aminoAcidObj;
191
+ }
192
+
193
+ return map;
194
+ }
195
+
196
+ const threeLetterSequenceStringToAminoAcidMap = getCodonToAminoAcidMap();
197
+
198
+ export default threeLetterSequenceStringToAminoAcidMap;