@teselagen/sequence-utils 0.3.38-beta.3 → 0.3.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/DNAComplementMap.d.ts +1 -1
  2. package/README.md +2 -8
  3. package/addGapsToSeqReads.d.ts +3 -16
  4. package/adjustAnnotationsToInsert.d.ts +1 -2
  5. package/adjustBpsToReplaceOrInsert.d.ts +1 -2
  6. package/aliasedEnzymesByName.d.ts +1 -37
  7. package/aminoAcidToDegenerateDnaMap.d.ts +31 -1
  8. package/aminoAcidToDegenerateRnaMap.d.ts +1 -1
  9. package/annotateSingleSeq.d.ts +4 -5
  10. package/annotationTypes.d.ts +2 -2
  11. package/autoAnnotate.d.ts +8 -17
  12. package/bioData.d.ts +58 -10
  13. package/calculateEndStability.d.ts +1 -1
  14. package/calculateNebTa.d.ts +1 -6
  15. package/calculateNebTm.d.ts +4 -6
  16. package/calculatePercentGC.d.ts +1 -1
  17. package/calculateSantaLuciaTm.d.ts +114 -28
  18. package/calculateTm.d.ts +1 -13
  19. package/computeDigestFragments.d.ts +24 -30
  20. package/condensePairwiseAlignmentDifferences.d.ts +1 -1
  21. package/convertAACaretPositionOrRangeToDna.d.ts +1 -2
  22. package/convertDnaCaretPositionOrRangeToAA.d.ts +1 -2
  23. package/cutSequenceByRestrictionEnzyme.d.ts +1 -2
  24. package/defaultEnzymesByName.d.ts +1 -2
  25. package/degenerateDnaToAminoAcidMap.d.ts +1 -1
  26. package/degenerateRnaToAminoAcidMap.d.ts +1 -1
  27. package/deleteSequenceDataAtRange.d.ts +1 -2
  28. package/diffUtils.d.ts +7 -9
  29. package/doesEnzymeChopOutsideOfRecognitionSite.d.ts +1 -2
  30. package/featureTypesAndColors.d.ts +6 -19
  31. package/filterSequenceString.d.ts +10 -14
  32. package/findApproxMatches.d.ts +1 -7
  33. package/findNearestRangeOfSequenceOverlapToPosition.d.ts +1 -2
  34. package/findOrfsInPlasmid.d.ts +11 -2
  35. package/findSequenceMatches.d.ts +1 -11
  36. package/generateAnnotations.d.ts +1 -2
  37. package/generateSequenceData.d.ts +13 -8
  38. package/getAllInsertionsInSeqReads.d.ts +1 -11
  39. package/getAminoAcidDataForEachBaseOfDna.d.ts +5 -6
  40. package/getAminoAcidFromSequenceTriplet.d.ts +1 -1
  41. package/getAminoAcidStringFromSequenceString.d.ts +1 -3
  42. package/getCodonRangeForAASliver.d.ts +4 -3
  43. package/getComplementAminoAcidStringFromSequenceString.d.ts +1 -1
  44. package/getComplementSequenceAndAnnotations.d.ts +1 -5
  45. package/getComplementSequenceString.d.ts +1 -1
  46. package/getCutsiteType.d.ts +1 -2
  47. package/getCutsitesFromSequence.d.ts +1 -2
  48. package/getDegenerateDnaStringFromAAString.d.ts +1 -1
  49. package/getDegenerateRnaStringFromAAString.d.ts +1 -1
  50. package/getDigestFragmentsForCutsites.d.ts +1 -4
  51. package/getDigestFragmentsForRestrictionEnzymes.d.ts +1 -8
  52. package/getInsertBetweenVals.d.ts +1 -2
  53. package/getLeftAndRightOfSequenceInRangeGivenPosition.d.ts +1 -2
  54. package/getOrfsFromSequence.d.ts +11 -17
  55. package/getOverlapBetweenTwoSequences.d.ts +1 -2
  56. package/getPossiblePartsFromSequenceAndEnzymes.d.ts +1 -18
  57. package/getReverseAminoAcidStringFromSequenceString.d.ts +1 -1
  58. package/getReverseComplementAminoAcidStringFromSequenceString.d.ts +1 -1
  59. package/getReverseComplementAnnotation.d.ts +1 -11
  60. package/getReverseComplementSequenceAndAnnotations.d.ts +1 -5
  61. package/getReverseComplementSequenceString.d.ts +1 -1
  62. package/getReverseSequenceString.d.ts +1 -1
  63. package/getSequenceDataBetweenRange.d.ts +1 -9
  64. package/getVirtualDigest.d.ts +10 -11
  65. package/guessIfSequenceIsDnaAndNotProtein.d.ts +1 -5
  66. package/index.cjs +503 -734
  67. package/index.d.ts +5 -8
  68. package/index.js +503 -734
  69. package/index.umd.cjs +503 -734
  70. package/insertGapsIntoRefSeq.d.ts +1 -2
  71. package/insertSequenceDataAtPositionOrRange.d.ts +1 -10
  72. package/isEnzymeType2S.d.ts +1 -2
  73. package/mapAnnotationsToRows.d.ts +1 -9
  74. package/package.json +9 -12
  75. package/prepareCircularViewData.d.ts +1 -2
  76. package/prepareRowData.d.ts +3 -7
  77. package/proteinAlphabet.d.ts +1 -1
  78. package/rotateBpsToPosition.d.ts +1 -1
  79. package/rotateSequenceDataToPosition.d.ts +1 -3
  80. package/shiftAnnotationsByLen.d.ts +3 -4
  81. package/src/autoAnnotate.test.js +1 -0
  82. package/src/filterSequenceString.js +7 -4
  83. package/src/getSequenceDataBetweenRange.js +11 -2
  84. package/src/getSequenceDataBetweenRange.test.js +42 -0
  85. package/src/prepareRowData_output1.json +0 -1
  86. package/src/tidyUpSequenceData.js +5 -2
  87. package/threeLetterSequenceStringToAminoAcidMap.d.ts +921 -11
  88. package/tidyUpAnnotation.d.ts +11 -13
  89. package/tidyUpSequenceData.d.ts +1 -15
  90. package/src/DNAComplementMap.ts +0 -32
  91. package/src/addGapsToSeqReads.ts +0 -436
  92. package/src/adjustAnnotationsToInsert.ts +0 -20
  93. package/src/adjustBpsToReplaceOrInsert.ts +0 -73
  94. package/src/aliasedEnzymesByName.ts +0 -7366
  95. package/src/aminoAcidToDegenerateDnaMap.ts +0 -32
  96. package/src/aminoAcidToDegenerateRnaMap.ts +0 -32
  97. package/src/annotateSingleSeq.ts +0 -37
  98. package/src/annotationTypes.ts +0 -23
  99. package/src/autoAnnotate.ts +0 -290
  100. package/src/bioData.ts +0 -65
  101. package/src/calculateEndStability.ts +0 -91
  102. package/src/calculateNebTa.ts +0 -46
  103. package/src/calculateNebTm.ts +0 -132
  104. package/src/calculatePercentGC.ts +0 -3
  105. package/src/calculateSantaLuciaTm.ts +0 -184
  106. package/src/calculateTm.ts +0 -242
  107. package/src/computeDigestFragments.ts +0 -238
  108. package/src/condensePairwiseAlignmentDifferences.ts +0 -85
  109. package/src/convertAACaretPositionOrRangeToDna.ts +0 -28
  110. package/src/convertDnaCaretPositionOrRangeToAA.ts +0 -28
  111. package/src/cutSequenceByRestrictionEnzyme.ts +0 -345
  112. package/src/defaultEnzymesByName.ts +0 -280
  113. package/src/degenerateDnaToAminoAcidMap.ts +0 -5
  114. package/src/degenerateRnaToAminoAcidMap.ts +0 -5
  115. package/src/deleteSequenceDataAtRange.ts +0 -13
  116. package/src/diffUtils.ts +0 -80
  117. package/src/doesEnzymeChopOutsideOfRecognitionSite.ts +0 -16
  118. package/src/featureTypesAndColors.ts +0 -167
  119. package/src/filterSequenceString.ts +0 -153
  120. package/src/findApproxMatches.ts +0 -58
  121. package/src/findNearestRangeOfSequenceOverlapToPosition.ts +0 -43
  122. package/src/findOrfsInPlasmid.ts +0 -31
  123. package/src/findSequenceMatches.ts +0 -154
  124. package/src/generateAnnotations.ts +0 -39
  125. package/src/generateSequenceData.ts +0 -212
  126. package/src/getAllInsertionsInSeqReads.ts +0 -100
  127. package/src/getAminoAcidDataForEachBaseOfDna.ts +0 -305
  128. package/src/getAminoAcidFromSequenceTriplet.ts +0 -27
  129. package/src/getAminoAcidStringFromSequenceString.ts +0 -36
  130. package/src/getCodonRangeForAASliver.ts +0 -73
  131. package/src/getComplementAminoAcidStringFromSequenceString.ts +0 -10
  132. package/src/getComplementSequenceAndAnnotations.ts +0 -25
  133. package/src/getComplementSequenceString.ts +0 -23
  134. package/src/getCutsiteType.ts +0 -18
  135. package/src/getCutsitesFromSequence.ts +0 -22
  136. package/src/getDegenerateDnaStringFromAAString.ts +0 -15
  137. package/src/getDegenerateRnaStringFromAAString.ts +0 -15
  138. package/src/getDigestFragmentsForCutsites.ts +0 -126
  139. package/src/getDigestFragmentsForRestrictionEnzymes.ts +0 -50
  140. package/src/getInsertBetweenVals.ts +0 -31
  141. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.ts +0 -40
  142. package/src/getMassOfAaString.ts +0 -29
  143. package/src/getOrfsFromSequence.ts +0 -132
  144. package/src/getOverlapBetweenTwoSequences.ts +0 -30
  145. package/src/getPossiblePartsFromSequenceAndEnzymes.ts +0 -149
  146. package/src/getReverseAminoAcidStringFromSequenceString.ts +0 -22
  147. package/src/getReverseComplementAminoAcidStringFromSequenceString.ts +0 -10
  148. package/src/getReverseComplementAnnotation.ts +0 -33
  149. package/src/getReverseComplementSequenceAndAnnotations.ts +0 -46
  150. package/src/getReverseComplementSequenceString.ts +0 -18
  151. package/src/getReverseSequenceString.ts +0 -12
  152. package/src/getSequenceDataBetweenRange.ts +0 -154
  153. package/src/getVirtualDigest.ts +0 -139
  154. package/src/guessIfSequenceIsDnaAndNotProtein.ts +0 -39
  155. package/src/index.test.ts +0 -43
  156. package/src/index.ts +0 -111
  157. package/src/insertGapsIntoRefSeq.ts +0 -43
  158. package/src/insertSequenceDataAtPosition.ts +0 -2
  159. package/src/insertSequenceDataAtPositionOrRange.ts +0 -328
  160. package/src/isEnzymeType2S.ts +0 -5
  161. package/src/mapAnnotationsToRows.ts +0 -256
  162. package/src/prepareCircularViewData.ts +0 -24
  163. package/src/prepareRowData.ts +0 -61
  164. package/src/proteinAlphabet.ts +0 -271
  165. package/src/rotateBpsToPosition.ts +0 -12
  166. package/src/rotateSequenceDataToPosition.ts +0 -54
  167. package/src/shiftAnnotationsByLen.ts +0 -24
  168. package/src/threeLetterSequenceStringToAminoAcidMap.ts +0 -198
  169. package/src/tidyUpAnnotation.ts +0 -205
  170. package/src/tidyUpSequenceData.ts +0 -213
  171. package/src/types.ts +0 -109
  172. package/types.d.ts +0 -105
@@ -1,271 +0,0 @@
1
- const proteinAlphabet = {
2
- A: {
3
- value: "A",
4
- name: "Alanine",
5
- threeLettersName: "Ala",
6
- hydrophobicity: 1.8,
7
- colorByFamily: "#00FFFF",
8
- color: "hsl(327.3, 100%, 69%)",
9
- mass: 71.0779
10
- },
11
- R: {
12
- value: "R",
13
- name: "Arginine",
14
- threeLettersName: "Arg",
15
- hydrophobicity: -4.5,
16
- colorByFamily: "#FFC0CB",
17
- color: "hsl(258.1, 100%, 69%)",
18
- mass: 156.18568
19
- },
20
-
21
- N: {
22
- value: "N",
23
- name: "Asparagine",
24
- threeLettersName: "Asn",
25
- hydrophobicity: -3.5,
26
- colorByFamily: "#D3D3D3",
27
- color: "hsl(268.9, 100%, 69%)",
28
- mass: 114.10264
29
- },
30
- D: {
31
- value: "D",
32
- name: "Aspartic acid",
33
- threeLettersName: "Asp",
34
- hydrophobicity: -3.5,
35
- colorByFamily: "#EE82EE",
36
- color: "hsl(268.9, 100%, 69%)",
37
- mass: 115.0874
38
- },
39
- C: {
40
- value: "C",
41
- name: "Cysteine",
42
- threeLettersName: "Cys",
43
- hydrophobicity: 2.5,
44
- colorByFamily: "#FFFF00",
45
- color: "hsl(335.1, 100%, 69%)",
46
- mass: 103.1429
47
- },
48
-
49
- E: {
50
- value: "E",
51
- name: "Glutamic acid",
52
- threeLettersName: "Glu",
53
- hydrophobicity: -3.5,
54
- colorByFamily: "#EE82EE",
55
- color: "hsl(268.9, 100%, 69%)",
56
- mass: 129.11398
57
- },
58
- Q: {
59
- value: "Q",
60
- name: "Glutamine",
61
- threeLettersName: "Gln",
62
- hydrophobicity: -3.5,
63
- colorByFamily: "#D3D3D3",
64
- color: "hsl(268.9, 100%, 69%)",
65
- mass: 128.12922
66
- },
67
- G: {
68
- value: "G",
69
- name: "Glycine",
70
- threeLettersName: "Gly",
71
- hydrophobicity: -0.4,
72
- colorByFamily: "#00FFFF",
73
- color: "hsl(303.1, 100%, 69%)",
74
- mass: 57.05132
75
- },
76
-
77
- H: {
78
- value: "H",
79
- name: "Histidine",
80
- threeLettersName: "His",
81
- hydrophobicity: -3.2,
82
- colorByFamily: "#FFC0CB",
83
- color: "hsl(272.2, 100%, 69%)",
84
- mass: 137.13928
85
- },
86
-
87
- I: {
88
- value: "I",
89
- name: "Isoleucine ",
90
- threeLettersName: "Ile",
91
- hydrophobicity: 4.5,
92
- colorByFamily: "#00FFFF",
93
- color: "hsl(356.9, 100%, 69%)",
94
- mass: 113.15764
95
- },
96
- L: {
97
- value: "L",
98
- name: "Leucine",
99
- threeLettersName: "Leu",
100
- hydrophobicity: 3.8,
101
- colorByFamily: "#00FFFF",
102
- color: "hsl(349.4, 100%, 69%)",
103
- mass: 113.15764
104
- },
105
- K: {
106
- value: "K",
107
- name: "Lysine",
108
- threeLettersName: "Lys",
109
- hydrophobicity: -3.9,
110
- colorByFamily: "#FFC0CB",
111
- color: "hsl(264.7, 100%, 69%)",
112
- mass: 128.17228
113
- },
114
-
115
- O: {
116
- value: "O",
117
- name: "Pyrrolysine",
118
- threeLettersName: "Pyl",
119
- colorByFamily: "#FFC0CB",
120
- color: "hsl(264.7, 100%, 69%)",
121
- mass: 255.313
122
- },
123
-
124
- M: {
125
- value: "M",
126
- name: "Methionine",
127
- threeLettersName: "Met",
128
- hydrophobicity: 1.9,
129
- colorByFamily: "#FFFF00",
130
- color: "hsl(328.5, 100%, 69%)",
131
- mass: 131.19606
132
- },
133
- F: {
134
- value: "F",
135
- name: "Phenylalanine",
136
- threeLettersName: "Phe",
137
- hydrophobicity: 2.8,
138
- colorByFamily: "#FFA500",
139
- color: "hsl(338.4, 100%, 69%)",
140
- mass: 147.17386
141
- },
142
- P: {
143
- value: "P",
144
- name: "Proline",
145
- threeLettersName: "Pro",
146
- hydrophobicity: -1.6,
147
- colorByFamily: "#00FFFF",
148
- color: "hsl(289.9, 100%, 69%)",
149
- mass: 97.11518
150
- },
151
- S: {
152
- value: "S",
153
- name: "Serine",
154
- threeLettersName: "Ser",
155
- hydrophobicity: -0.8,
156
- colorByFamily: "#90EE90",
157
- color: "hsl(298.6, 100%, 69%)",
158
- mass: 87.0773
159
- },
160
- T: {
161
- value: "T",
162
- name: "Threonine",
163
- threeLettersName: "Thr",
164
- hydrophobicity: -0.7,
165
- colorByFamily: "#90EE90",
166
- color: "hsl(299.8, 100%, 69%)",
167
- mass: 101.10388
168
- },
169
- U: {
170
- value: "U",
171
- name: "Selenocysteine",
172
- threeLettersName: "Sec",
173
- colorByFamily: "#FF0000",
174
- color: "hsl(0, 100%, 69%)",
175
- mass: 150.3079
176
- },
177
- W: {
178
- value: "W",
179
- name: "Tryptophan",
180
- threeLettersName: "Trp",
181
- hydrophobicity: -0.9,
182
- colorByFamily: "#FFA500",
183
- color: "hsl(297.6, 100%, 69%)",
184
- mass: 186.2099
185
- },
186
- Y: {
187
- value: "Y",
188
- name: "Tyrosine",
189
- threeLettersName: "Tyr",
190
- hydrophobicity: -1.3,
191
- colorByFamily: "#FFA500",
192
- color: "hsl(293.2, 100%, 69%)",
193
- mass: 163.17326
194
- },
195
- V: {
196
- value: "V",
197
- name: "Valine",
198
- threeLettersName: "Val",
199
- hydrophobicity: 4.2,
200
- colorByFamily: "#00FFFF",
201
- color: "hsl(353.6, 100%, 69%)",
202
- mass: 99.13106
203
- },
204
- "*": {
205
- value: "*",
206
- name: "Stop",
207
- threeLettersName: "Stop",
208
- colorByFamily: "#FF0000",
209
- color: "hsl(0, 100%, 69%)",
210
- mass: 0
211
- },
212
- ".": {
213
- //tnr: this is actually a deletion/gap character (previously we had this as a stop character which is incorrect) https://www.dnabaser.com/articles/IUPAC%20ambiguity%20codes.html
214
- value: ".",
215
- name: "Gap",
216
- threeLettersName: "Gap",
217
- colorByFamily: "#FF0000",
218
- color: "hsl(0, 100%, 69%)",
219
- mass: 0
220
- },
221
- "-": {
222
- value: "-",
223
- name: "Gap",
224
- threeLettersName: "Gap",
225
- colorByFamily: "#FF0000",
226
- color: "hsl(0, 100%, 69%)",
227
- mass: 0
228
- },
229
- B: {
230
- value: "B",
231
- threeLettersName: "ND",
232
- colorByFamily: "#D3D3D3",
233
- color: "hsl(268.9, 100%, 69%)",
234
- isAmbiguous: true,
235
- name: "B",
236
- aliases: "ND",
237
- mass: 0
238
- },
239
- J: {
240
- value: "J",
241
- threeLettersName: "IL",
242
- colorByFamily: "#00FFFF",
243
- color: "hsl(352, 100%, 69%)",
244
- isAmbiguous: true,
245
- name: "J",
246
- aliases: "IL",
247
- mass: 0
248
- },
249
- X: {
250
- value: "X",
251
- threeLettersName: "ACDEFGHIKLMNPQRSTVWY",
252
- colorByFamily: "#FFFFFF",
253
- color: "hsl(60, 100%, 69%)",
254
- isAmbiguous: true,
255
- name: "X",
256
- aliases: "ACDEFGHIKLMNPQRSTVWY",
257
- mass: 0
258
- },
259
- Z: {
260
- value: "Z",
261
- threeLettersName: "QE",
262
- colorByFamily: "#D3D3D3",
263
- color: "hsl(268.9, 100%, 69%)",
264
- isAmbiguous: true,
265
- name: "Z",
266
- aliases: "QE",
267
- mass: 0
268
- }
269
- };
270
-
271
- export default proteinAlphabet;
@@ -1,12 +0,0 @@
1
- export default function rotateBpsToPosition(
2
- bps: string,
3
- caretPosition: number
4
- ) {
5
- return arrayRotate(bps.split(""), caretPosition).join("");
6
- }
7
-
8
- function arrayRotate<T>(arr: T[], count: number): T[] {
9
- count -= arr.length * Math.floor(count / arr.length);
10
- arr.push(...arr.splice(0, count));
11
- return arr;
12
- }
@@ -1,54 +0,0 @@
1
- import { map } from "lodash-es";
2
- import { adjustRangeToRotation } from "@teselagen/range-utils";
3
- import tidyUpSequenceData, {
4
- TidyUpSequenceDataOptions
5
- } from "./tidyUpSequenceData";
6
- import { modifiableTypes } from "./annotationTypes";
7
- import rotateBpsToPosition from "./rotateBpsToPosition";
8
- import { SequenceData, Annotation } from "./types";
9
-
10
- export default function rotateSequenceDataToPosition(
11
- sequenceData: SequenceData,
12
- caretPosition: number,
13
- options: TidyUpSequenceDataOptions = {}
14
- ) {
15
- const newSequenceData = tidyUpSequenceData(sequenceData, {
16
- doNotRemoveInvalidChars: true,
17
- ...options
18
- });
19
-
20
- //update the sequence
21
- newSequenceData.sequence = rotateBpsToPosition(
22
- newSequenceData.sequence,
23
- caretPosition
24
- );
25
-
26
- //handle the insert
27
- modifiableTypes.forEach(annotationType => {
28
- //update the annotations:
29
- //handle the delete if necessary
30
- newSequenceData[annotationType] = adjustAnnotationsToRotation(
31
- newSequenceData[annotationType] as Annotation[],
32
- caretPosition,
33
- newSequenceData.sequence.length
34
- );
35
- });
36
- return newSequenceData;
37
- }
38
-
39
- function adjustAnnotationsToRotation(
40
- annotationsToBeAdjusted: Annotation[],
41
- positionToRotateTo: number,
42
- maxLength: number
43
- ) {
44
- return map(annotationsToBeAdjusted, annotation => {
45
- return {
46
- ...adjustRangeToRotation(annotation, positionToRotateTo, maxLength),
47
- locations: annotation.locations
48
- ? annotation.locations.map(location =>
49
- adjustRangeToRotation(location, positionToRotateTo, maxLength)
50
- )
51
- : undefined
52
- };
53
- }).filter(range => !!range); //filter any fully deleted ranges
54
- }
@@ -1,24 +0,0 @@
1
- import { modifiableTypes } from "./annotationTypes";
2
- import adjustAnnotationsToInsert from "./adjustAnnotationsToInsert";
3
- import { SequenceData } from "./types";
4
-
5
- export default function shiftAnnotationsByLen({
6
- seqData,
7
- caretPosition,
8
- insertLength
9
- }: {
10
- seqData: SequenceData;
11
- caretPosition: number;
12
- insertLength: number;
13
- }) {
14
- modifiableTypes.forEach(annotationType => {
15
- const existingAnnotations = seqData[annotationType];
16
- if (existingAnnotations) {
17
- seqData[annotationType] = adjustAnnotationsToInsert(
18
- existingAnnotations,
19
- caretPosition,
20
- insertLength
21
- );
22
- }
23
- });
24
- }
@@ -1,198 +0,0 @@
1
- import proteinAlphabet from "./proteinAlphabet";
2
-
3
- const initThreeLetterSequenceStringToAminoAcidMap: Record<
4
- string,
5
- {
6
- value: string;
7
- name: string;
8
- threeLettersName: string;
9
- hydrophobicity?: number;
10
- colorByFamily: string;
11
- color: string;
12
- mass: number;
13
- isAmbiguous?: boolean;
14
- aliases?: string;
15
- }
16
- > = {
17
- gct: proteinAlphabet.A,
18
- gcc: proteinAlphabet.A,
19
- gca: proteinAlphabet.A,
20
- gcg: proteinAlphabet.A,
21
- gcu: proteinAlphabet.A,
22
- cgt: proteinAlphabet.R,
23
- cgc: proteinAlphabet.R,
24
- cga: proteinAlphabet.R,
25
- cgg: proteinAlphabet.R,
26
- aga: proteinAlphabet.R,
27
- agg: proteinAlphabet.R,
28
- cgu: proteinAlphabet.R,
29
- aat: proteinAlphabet.N,
30
- aac: proteinAlphabet.N,
31
- aau: proteinAlphabet.N,
32
- gat: proteinAlphabet.D,
33
- gac: proteinAlphabet.D,
34
- gau: proteinAlphabet.D,
35
- tgt: proteinAlphabet.C,
36
- tgc: proteinAlphabet.C,
37
- ugu: proteinAlphabet.C,
38
- ugc: proteinAlphabet.C,
39
- gaa: proteinAlphabet.E,
40
- gag: proteinAlphabet.E,
41
- caa: proteinAlphabet.Q,
42
- cag: proteinAlphabet.Q,
43
- ggt: proteinAlphabet.G,
44
- ggc: proteinAlphabet.G,
45
- gga: proteinAlphabet.G,
46
- ggg: proteinAlphabet.G,
47
- ggu: proteinAlphabet.G,
48
- cat: proteinAlphabet.H,
49
- cac: proteinAlphabet.H,
50
- cau: proteinAlphabet.H,
51
- att: proteinAlphabet.I,
52
- atc: proteinAlphabet.I,
53
- ata: proteinAlphabet.I,
54
- auu: proteinAlphabet.I,
55
- auc: proteinAlphabet.I,
56
- aua: proteinAlphabet.I,
57
- ctt: proteinAlphabet.L,
58
- ctc: proteinAlphabet.L,
59
- cta: proteinAlphabet.L,
60
- ctg: proteinAlphabet.L,
61
- tta: proteinAlphabet.L,
62
- ttg: proteinAlphabet.L,
63
- cuu: proteinAlphabet.L,
64
- cuc: proteinAlphabet.L,
65
- cua: proteinAlphabet.L,
66
- cug: proteinAlphabet.L,
67
- uua: proteinAlphabet.L,
68
- uug: proteinAlphabet.L,
69
- aaa: proteinAlphabet.K,
70
- aag: proteinAlphabet.K,
71
- atg: proteinAlphabet.M,
72
- aug: proteinAlphabet.M,
73
- ttt: proteinAlphabet.F,
74
- ttc: proteinAlphabet.F,
75
- uuu: proteinAlphabet.F,
76
- uuc: proteinAlphabet.F,
77
- cct: proteinAlphabet.P,
78
- ccc: proteinAlphabet.P,
79
- cca: proteinAlphabet.P,
80
- ccg: proteinAlphabet.P,
81
- ccu: proteinAlphabet.P,
82
- tct: proteinAlphabet.S,
83
- tcc: proteinAlphabet.S,
84
- tca: proteinAlphabet.S,
85
- tcg: proteinAlphabet.S,
86
- agt: proteinAlphabet.S,
87
- agc: proteinAlphabet.S,
88
- ucu: proteinAlphabet.S,
89
- ucc: proteinAlphabet.S,
90
- uca: proteinAlphabet.S,
91
- ucg: proteinAlphabet.S,
92
- agu: proteinAlphabet.S,
93
- act: proteinAlphabet.T,
94
- acc: proteinAlphabet.T,
95
- aca: proteinAlphabet.T,
96
- acg: proteinAlphabet.T,
97
- acu: proteinAlphabet.T,
98
- tgg: proteinAlphabet.W,
99
- ugg: proteinAlphabet.W,
100
- tat: proteinAlphabet.Y,
101
- tac: proteinAlphabet.Y,
102
- uau: proteinAlphabet.Y,
103
- uac: proteinAlphabet.Y,
104
- gtt: proteinAlphabet.V,
105
- gtc: proteinAlphabet.V,
106
- gta: proteinAlphabet.V,
107
- gtg: proteinAlphabet.V,
108
- guu: proteinAlphabet.V,
109
- guc: proteinAlphabet.V,
110
- gua: proteinAlphabet.V,
111
- gug: proteinAlphabet.V,
112
- taa: proteinAlphabet["*"],
113
- tag: proteinAlphabet["*"],
114
- tga: proteinAlphabet["*"],
115
- uaa: proteinAlphabet["*"],
116
- uag: proteinAlphabet["*"],
117
- uga: proteinAlphabet["*"],
118
- "...": proteinAlphabet["."],
119
- "---": proteinAlphabet["-"]
120
- };
121
-
122
- // IUPAC nucleotide codes (DNA/RNA) with U awareness
123
- const IUPAC: Record<string, string[]> = {
124
- A: ["A"],
125
- C: ["C"],
126
- G: ["G"],
127
- T: ["T"],
128
- U: ["U"],
129
-
130
- R: ["A", "G"],
131
- Y: ["C", "T", "U"],
132
- K: ["G", "T", "U"],
133
- M: ["A", "C"],
134
- S: ["G", "C"],
135
- W: ["A", "T", "U"],
136
- B: ["C", "G", "T", "U"],
137
- D: ["A", "G", "T", "U"],
138
- H: ["A", "C", "T", "U"],
139
- V: ["A", "C", "G"],
140
- N: ["A", "C", "G", "T", "U"],
141
- X: ["A", "C", "G", "T", "U"]
142
- };
143
-
144
- function expandAndResolve(threeLetterCodon: string) {
145
- const chars = threeLetterCodon.toUpperCase().split("");
146
- const picks = chars.map(c => IUPAC[c] || [c]);
147
-
148
- let allPossibleThreeLetterCodons = [""];
149
- for (const set of picks) {
150
- const next = [];
151
- for (const prefix of allPossibleThreeLetterCodons)
152
- for (const b of set) next.push(prefix + b);
153
- allPossibleThreeLetterCodons = next;
154
- }
155
- let foundAminoAcid = null;
156
- for (const codon of allPossibleThreeLetterCodons) {
157
- const lowerCodon = codon.toLowerCase();
158
- const aminoAcidObj =
159
- initThreeLetterSequenceStringToAminoAcidMap[lowerCodon] ??
160
- initThreeLetterSequenceStringToAminoAcidMap[
161
- lowerCodon.replace(/u/g, "t")
162
- ] ??
163
- initThreeLetterSequenceStringToAminoAcidMap[
164
- lowerCodon.replace(/t/g, "u")
165
- ];
166
- if (aminoAcidObj) {
167
- if (!foundAminoAcid) {
168
- foundAminoAcid = aminoAcidObj;
169
- } else if (foundAminoAcid.value !== aminoAcidObj.value) {
170
- return null;
171
- }
172
- } else {
173
- return null;
174
- }
175
- }
176
- return foundAminoAcid;
177
- }
178
-
179
- function getCodonToAminoAcidMap() {
180
- const map = initThreeLetterSequenceStringToAminoAcidMap;
181
- // generate all IUPAC 3-mers
182
- const codes = Object.keys(IUPAC);
183
- for (const a of codes)
184
- for (const b of codes)
185
- for (const c of codes) {
186
- const codon = a + b + c;
187
- const lowerCodon = codon.toLowerCase();
188
- if (map[lowerCodon]) continue;
189
- const aminoAcidObj = expandAndResolve(codon);
190
- if (aminoAcidObj) map[lowerCodon] = aminoAcidObj;
191
- }
192
-
193
- return map;
194
- }
195
-
196
- const threeLetterSequenceStringToAminoAcidMap = getCodonToAminoAcidMap();
197
-
198
- export default threeLetterSequenceStringToAminoAcidMap;