@teselagen/sequence-utils 0.3.37 → 0.3.38-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DNAComplementMap.d.ts +1 -1
- package/addGapsToSeqReads.d.ts +16 -3
- package/adjustAnnotationsToInsert.d.ts +2 -1
- package/adjustBpsToReplaceOrInsert.d.ts +2 -1
- package/aliasedEnzymesByName.d.ts +37 -1
- package/aminoAcidToDegenerateDnaMap.d.ts +1 -31
- package/aminoAcidToDegenerateRnaMap.d.ts +1 -1
- package/annotateSingleSeq.d.ts +5 -4
- package/annotationTypes.d.ts +2 -2
- package/autoAnnotate.d.ts +17 -8
- package/bioData.d.ts +10 -58
- package/calculateEndStability.d.ts +1 -1
- package/calculateNebTa.d.ts +6 -1
- package/calculateNebTm.d.ts +6 -4
- package/calculatePercentGC.d.ts +1 -1
- package/calculateSantaLuciaTm.d.ts +28 -114
- package/calculateTm.d.ts +13 -1
- package/computeDigestFragments.d.ts +30 -24
- package/condensePairwiseAlignmentDifferences.d.ts +1 -1
- package/convertAACaretPositionOrRangeToDna.d.ts +2 -1
- package/convertDnaCaretPositionOrRangeToAA.d.ts +2 -1
- package/cutSequenceByRestrictionEnzyme.d.ts +2 -1
- package/defaultEnzymesByName.d.ts +2 -1
- package/degenerateDnaToAminoAcidMap.d.ts +1 -1
- package/degenerateRnaToAminoAcidMap.d.ts +1 -1
- package/deleteSequenceDataAtRange.d.ts +2 -1
- package/diffUtils.d.ts +9 -7
- package/doesEnzymeChopOutsideOfRecognitionSite.d.ts +2 -1
- package/featureTypesAndColors.d.ts +19 -6
- package/filterSequenceString.d.ts +14 -10
- package/findApproxMatches.d.ts +7 -1
- package/findNearestRangeOfSequenceOverlapToPosition.d.ts +2 -1
- package/findOrfsInPlasmid.d.ts +2 -11
- package/findSequenceMatches.d.ts +11 -1
- package/generateAnnotations.d.ts +2 -1
- package/generateSequenceData.d.ts +8 -13
- package/getAllInsertionsInSeqReads.d.ts +11 -1
- package/getAminoAcidDataForEachBaseOfDna.d.ts +6 -5
- package/getAminoAcidFromSequenceTriplet.d.ts +1 -1
- package/getAminoAcidStringFromSequenceString.d.ts +3 -1
- package/getCodonRangeForAASliver.d.ts +3 -4
- package/getComplementAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getComplementSequenceAndAnnotations.d.ts +5 -1
- package/getComplementSequenceString.d.ts +1 -1
- package/getCutsiteType.d.ts +2 -1
- package/getCutsitesFromSequence.d.ts +2 -1
- package/getDegenerateDnaStringFromAAString.d.ts +1 -1
- package/getDegenerateRnaStringFromAAString.d.ts +1 -1
- package/getDigestFragmentsForCutsites.d.ts +4 -1
- package/getDigestFragmentsForRestrictionEnzymes.d.ts +8 -1
- package/getInsertBetweenVals.d.ts +2 -1
- package/getLeftAndRightOfSequenceInRangeGivenPosition.d.ts +2 -1
- package/getOrfsFromSequence.d.ts +17 -11
- package/getOverlapBetweenTwoSequences.d.ts +2 -1
- package/getPossiblePartsFromSequenceAndEnzymes.d.ts +18 -1
- package/getReverseAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getReverseComplementAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getReverseComplementAnnotation.d.ts +11 -1
- package/getReverseComplementSequenceAndAnnotations.d.ts +5 -1
- package/getReverseComplementSequenceString.d.ts +1 -1
- package/getReverseSequenceString.d.ts +1 -1
- package/getSequenceDataBetweenRange.d.ts +9 -1
- package/getVirtualDigest.d.ts +11 -10
- package/guessIfSequenceIsDnaAndNotProtein.d.ts +5 -1
- package/index.cjs +732 -483
- package/index.d.ts +8 -5
- package/index.js +732 -483
- package/index.umd.cjs +732 -483
- package/insertGapsIntoRefSeq.d.ts +2 -1
- package/insertSequenceDataAtPositionOrRange.d.ts +10 -1
- package/isEnzymeType2S.d.ts +2 -1
- package/mapAnnotationsToRows.d.ts +9 -1
- package/package.json +9 -6
- package/prepareCircularViewData.d.ts +2 -1
- package/prepareRowData.d.ts +7 -3
- package/proteinAlphabet.d.ts +1 -1
- package/rotateBpsToPosition.d.ts +1 -1
- package/rotateSequenceDataToPosition.d.ts +3 -1
- package/shiftAnnotationsByLen.d.ts +4 -3
- package/src/DNAComplementMap.ts +32 -0
- package/src/addGapsToSeqReads.ts +436 -0
- package/src/adjustAnnotationsToInsert.ts +20 -0
- package/src/adjustBpsToReplaceOrInsert.ts +73 -0
- package/src/aliasedEnzymesByName.ts +7366 -0
- package/src/aminoAcidToDegenerateDnaMap.ts +32 -0
- package/src/aminoAcidToDegenerateRnaMap.ts +32 -0
- package/src/annotateSingleSeq.ts +37 -0
- package/src/annotationTypes.ts +23 -0
- package/src/autoAnnotate.test.js +0 -1
- package/src/autoAnnotate.ts +290 -0
- package/src/bioData.ts +65 -0
- package/src/calculateEndStability.ts +91 -0
- package/src/calculateNebTa.ts +46 -0
- package/src/calculateNebTm.ts +132 -0
- package/src/calculatePercentGC.ts +3 -0
- package/src/calculateSantaLuciaTm.ts +184 -0
- package/src/calculateTm.ts +242 -0
- package/src/computeDigestFragments.ts +238 -0
- package/src/condensePairwiseAlignmentDifferences.ts +85 -0
- package/src/convertAACaretPositionOrRangeToDna.ts +28 -0
- package/src/convertDnaCaretPositionOrRangeToAA.ts +28 -0
- package/src/cutSequenceByRestrictionEnzyme.ts +345 -0
- package/src/defaultEnzymesByName.ts +280 -0
- package/src/degenerateDnaToAminoAcidMap.ts +5 -0
- package/src/degenerateRnaToAminoAcidMap.ts +5 -0
- package/src/deleteSequenceDataAtRange.ts +13 -0
- package/src/diffUtils.ts +80 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.ts +16 -0
- package/src/featureTypesAndColors.ts +167 -0
- package/src/filterSequenceString.ts +153 -0
- package/src/findApproxMatches.ts +58 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.ts +43 -0
- package/src/findOrfsInPlasmid.js +6 -1
- package/src/findOrfsInPlasmid.ts +31 -0
- package/src/findSequenceMatches.ts +154 -0
- package/src/generateAnnotations.ts +39 -0
- package/src/generateSequenceData.ts +212 -0
- package/src/getAllInsertionsInSeqReads.ts +100 -0
- package/src/getAminoAcidDataForEachBaseOfDna.ts +305 -0
- package/src/getAminoAcidFromSequenceTriplet.ts +27 -0
- package/src/getAminoAcidStringFromSequenceString.ts +36 -0
- package/src/getCodonRangeForAASliver.ts +73 -0
- package/src/getComplementAminoAcidStringFromSequenceString.ts +10 -0
- package/src/getComplementSequenceAndAnnotations.ts +25 -0
- package/src/getComplementSequenceString.ts +23 -0
- package/src/getCutsiteType.ts +18 -0
- package/src/getCutsitesFromSequence.ts +22 -0
- package/src/getDegenerateDnaStringFromAAString.ts +15 -0
- package/src/getDegenerateRnaStringFromAAString.ts +15 -0
- package/src/getDigestFragmentsForCutsites.ts +126 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.ts +50 -0
- package/src/getInsertBetweenVals.ts +31 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.ts +40 -0
- package/src/getMassOfAaString.ts +29 -0
- package/src/getOrfsFromSequence.ts +132 -0
- package/src/getOverlapBetweenTwoSequences.ts +30 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.ts +149 -0
- package/src/getReverseAminoAcidStringFromSequenceString.ts +22 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.ts +10 -0
- package/src/getReverseComplementAnnotation.ts +33 -0
- package/src/getReverseComplementSequenceAndAnnotations.ts +46 -0
- package/src/getReverseComplementSequenceString.ts +18 -0
- package/src/getReverseSequenceString.ts +12 -0
- package/src/getSequenceDataBetweenRange.ts +154 -0
- package/src/getVirtualDigest.ts +139 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.ts +39 -0
- package/src/index.test.ts +43 -0
- package/src/index.ts +111 -0
- package/src/insertGapsIntoRefSeq.ts +43 -0
- package/src/insertSequenceDataAtPosition.ts +2 -0
- package/src/insertSequenceDataAtPositionOrRange.ts +328 -0
- package/src/isEnzymeType2S.ts +5 -0
- package/src/mapAnnotationsToRows.ts +256 -0
- package/src/prepareCircularViewData.ts +24 -0
- package/src/prepareRowData.ts +61 -0
- package/src/prepareRowData_output1.json +1 -0
- package/src/proteinAlphabet.ts +271 -0
- package/src/rotateBpsToPosition.ts +12 -0
- package/src/rotateSequenceDataToPosition.ts +54 -0
- package/src/shiftAnnotationsByLen.ts +24 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.ts +198 -0
- package/src/tidyUpAnnotation.ts +205 -0
- package/src/tidyUpSequenceData.ts +213 -0
- package/src/types.ts +109 -0
- package/threeLetterSequenceStringToAminoAcidMap.d.ts +11 -921
- package/tidyUpAnnotation.d.ts +13 -11
- package/tidyUpSequenceData.d.ts +15 -1
- package/types.d.ts +105 -0
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
const proteinAlphabet = {
|
|
2
|
+
A: {
|
|
3
|
+
value: "A",
|
|
4
|
+
name: "Alanine",
|
|
5
|
+
threeLettersName: "Ala",
|
|
6
|
+
hydrophobicity: 1.8,
|
|
7
|
+
colorByFamily: "#00FFFF",
|
|
8
|
+
color: "hsl(327.3, 100%, 69%)",
|
|
9
|
+
mass: 71.0779
|
|
10
|
+
},
|
|
11
|
+
R: {
|
|
12
|
+
value: "R",
|
|
13
|
+
name: "Arginine",
|
|
14
|
+
threeLettersName: "Arg",
|
|
15
|
+
hydrophobicity: -4.5,
|
|
16
|
+
colorByFamily: "#FFC0CB",
|
|
17
|
+
color: "hsl(258.1, 100%, 69%)",
|
|
18
|
+
mass: 156.18568
|
|
19
|
+
},
|
|
20
|
+
|
|
21
|
+
N: {
|
|
22
|
+
value: "N",
|
|
23
|
+
name: "Asparagine",
|
|
24
|
+
threeLettersName: "Asn",
|
|
25
|
+
hydrophobicity: -3.5,
|
|
26
|
+
colorByFamily: "#D3D3D3",
|
|
27
|
+
color: "hsl(268.9, 100%, 69%)",
|
|
28
|
+
mass: 114.10264
|
|
29
|
+
},
|
|
30
|
+
D: {
|
|
31
|
+
value: "D",
|
|
32
|
+
name: "Aspartic acid",
|
|
33
|
+
threeLettersName: "Asp",
|
|
34
|
+
hydrophobicity: -3.5,
|
|
35
|
+
colorByFamily: "#EE82EE",
|
|
36
|
+
color: "hsl(268.9, 100%, 69%)",
|
|
37
|
+
mass: 115.0874
|
|
38
|
+
},
|
|
39
|
+
C: {
|
|
40
|
+
value: "C",
|
|
41
|
+
name: "Cysteine",
|
|
42
|
+
threeLettersName: "Cys",
|
|
43
|
+
hydrophobicity: 2.5,
|
|
44
|
+
colorByFamily: "#FFFF00",
|
|
45
|
+
color: "hsl(335.1, 100%, 69%)",
|
|
46
|
+
mass: 103.1429
|
|
47
|
+
},
|
|
48
|
+
|
|
49
|
+
E: {
|
|
50
|
+
value: "E",
|
|
51
|
+
name: "Glutamic acid",
|
|
52
|
+
threeLettersName: "Glu",
|
|
53
|
+
hydrophobicity: -3.5,
|
|
54
|
+
colorByFamily: "#EE82EE",
|
|
55
|
+
color: "hsl(268.9, 100%, 69%)",
|
|
56
|
+
mass: 129.11398
|
|
57
|
+
},
|
|
58
|
+
Q: {
|
|
59
|
+
value: "Q",
|
|
60
|
+
name: "Glutamine",
|
|
61
|
+
threeLettersName: "Gln",
|
|
62
|
+
hydrophobicity: -3.5,
|
|
63
|
+
colorByFamily: "#D3D3D3",
|
|
64
|
+
color: "hsl(268.9, 100%, 69%)",
|
|
65
|
+
mass: 128.12922
|
|
66
|
+
},
|
|
67
|
+
G: {
|
|
68
|
+
value: "G",
|
|
69
|
+
name: "Glycine",
|
|
70
|
+
threeLettersName: "Gly",
|
|
71
|
+
hydrophobicity: -0.4,
|
|
72
|
+
colorByFamily: "#00FFFF",
|
|
73
|
+
color: "hsl(303.1, 100%, 69%)",
|
|
74
|
+
mass: 57.05132
|
|
75
|
+
},
|
|
76
|
+
|
|
77
|
+
H: {
|
|
78
|
+
value: "H",
|
|
79
|
+
name: "Histidine",
|
|
80
|
+
threeLettersName: "His",
|
|
81
|
+
hydrophobicity: -3.2,
|
|
82
|
+
colorByFamily: "#FFC0CB",
|
|
83
|
+
color: "hsl(272.2, 100%, 69%)",
|
|
84
|
+
mass: 137.13928
|
|
85
|
+
},
|
|
86
|
+
|
|
87
|
+
I: {
|
|
88
|
+
value: "I",
|
|
89
|
+
name: "Isoleucine ",
|
|
90
|
+
threeLettersName: "Ile",
|
|
91
|
+
hydrophobicity: 4.5,
|
|
92
|
+
colorByFamily: "#00FFFF",
|
|
93
|
+
color: "hsl(356.9, 100%, 69%)",
|
|
94
|
+
mass: 113.15764
|
|
95
|
+
},
|
|
96
|
+
L: {
|
|
97
|
+
value: "L",
|
|
98
|
+
name: "Leucine",
|
|
99
|
+
threeLettersName: "Leu",
|
|
100
|
+
hydrophobicity: 3.8,
|
|
101
|
+
colorByFamily: "#00FFFF",
|
|
102
|
+
color: "hsl(349.4, 100%, 69%)",
|
|
103
|
+
mass: 113.15764
|
|
104
|
+
},
|
|
105
|
+
K: {
|
|
106
|
+
value: "K",
|
|
107
|
+
name: "Lysine",
|
|
108
|
+
threeLettersName: "Lys",
|
|
109
|
+
hydrophobicity: -3.9,
|
|
110
|
+
colorByFamily: "#FFC0CB",
|
|
111
|
+
color: "hsl(264.7, 100%, 69%)",
|
|
112
|
+
mass: 128.17228
|
|
113
|
+
},
|
|
114
|
+
|
|
115
|
+
O: {
|
|
116
|
+
value: "O",
|
|
117
|
+
name: "Pyrrolysine",
|
|
118
|
+
threeLettersName: "Pyl",
|
|
119
|
+
colorByFamily: "#FFC0CB",
|
|
120
|
+
color: "hsl(264.7, 100%, 69%)",
|
|
121
|
+
mass: 255.313
|
|
122
|
+
},
|
|
123
|
+
|
|
124
|
+
M: {
|
|
125
|
+
value: "M",
|
|
126
|
+
name: "Methionine",
|
|
127
|
+
threeLettersName: "Met",
|
|
128
|
+
hydrophobicity: 1.9,
|
|
129
|
+
colorByFamily: "#FFFF00",
|
|
130
|
+
color: "hsl(328.5, 100%, 69%)",
|
|
131
|
+
mass: 131.19606
|
|
132
|
+
},
|
|
133
|
+
F: {
|
|
134
|
+
value: "F",
|
|
135
|
+
name: "Phenylalanine",
|
|
136
|
+
threeLettersName: "Phe",
|
|
137
|
+
hydrophobicity: 2.8,
|
|
138
|
+
colorByFamily: "#FFA500",
|
|
139
|
+
color: "hsl(338.4, 100%, 69%)",
|
|
140
|
+
mass: 147.17386
|
|
141
|
+
},
|
|
142
|
+
P: {
|
|
143
|
+
value: "P",
|
|
144
|
+
name: "Proline",
|
|
145
|
+
threeLettersName: "Pro",
|
|
146
|
+
hydrophobicity: -1.6,
|
|
147
|
+
colorByFamily: "#00FFFF",
|
|
148
|
+
color: "hsl(289.9, 100%, 69%)",
|
|
149
|
+
mass: 97.11518
|
|
150
|
+
},
|
|
151
|
+
S: {
|
|
152
|
+
value: "S",
|
|
153
|
+
name: "Serine",
|
|
154
|
+
threeLettersName: "Ser",
|
|
155
|
+
hydrophobicity: -0.8,
|
|
156
|
+
colorByFamily: "#90EE90",
|
|
157
|
+
color: "hsl(298.6, 100%, 69%)",
|
|
158
|
+
mass: 87.0773
|
|
159
|
+
},
|
|
160
|
+
T: {
|
|
161
|
+
value: "T",
|
|
162
|
+
name: "Threonine",
|
|
163
|
+
threeLettersName: "Thr",
|
|
164
|
+
hydrophobicity: -0.7,
|
|
165
|
+
colorByFamily: "#90EE90",
|
|
166
|
+
color: "hsl(299.8, 100%, 69%)",
|
|
167
|
+
mass: 101.10388
|
|
168
|
+
},
|
|
169
|
+
U: {
|
|
170
|
+
value: "U",
|
|
171
|
+
name: "Selenocysteine",
|
|
172
|
+
threeLettersName: "Sec",
|
|
173
|
+
colorByFamily: "#FF0000",
|
|
174
|
+
color: "hsl(0, 100%, 69%)",
|
|
175
|
+
mass: 150.3079
|
|
176
|
+
},
|
|
177
|
+
W: {
|
|
178
|
+
value: "W",
|
|
179
|
+
name: "Tryptophan",
|
|
180
|
+
threeLettersName: "Trp",
|
|
181
|
+
hydrophobicity: -0.9,
|
|
182
|
+
colorByFamily: "#FFA500",
|
|
183
|
+
color: "hsl(297.6, 100%, 69%)",
|
|
184
|
+
mass: 186.2099
|
|
185
|
+
},
|
|
186
|
+
Y: {
|
|
187
|
+
value: "Y",
|
|
188
|
+
name: "Tyrosine",
|
|
189
|
+
threeLettersName: "Tyr",
|
|
190
|
+
hydrophobicity: -1.3,
|
|
191
|
+
colorByFamily: "#FFA500",
|
|
192
|
+
color: "hsl(293.2, 100%, 69%)",
|
|
193
|
+
mass: 163.17326
|
|
194
|
+
},
|
|
195
|
+
V: {
|
|
196
|
+
value: "V",
|
|
197
|
+
name: "Valine",
|
|
198
|
+
threeLettersName: "Val",
|
|
199
|
+
hydrophobicity: 4.2,
|
|
200
|
+
colorByFamily: "#00FFFF",
|
|
201
|
+
color: "hsl(353.6, 100%, 69%)",
|
|
202
|
+
mass: 99.13106
|
|
203
|
+
},
|
|
204
|
+
"*": {
|
|
205
|
+
value: "*",
|
|
206
|
+
name: "Stop",
|
|
207
|
+
threeLettersName: "Stop",
|
|
208
|
+
colorByFamily: "#FF0000",
|
|
209
|
+
color: "hsl(0, 100%, 69%)",
|
|
210
|
+
mass: 0
|
|
211
|
+
},
|
|
212
|
+
".": {
|
|
213
|
+
//tnr: this is actually a deletion/gap character (previously we had this as a stop character which is incorrect) https://www.dnabaser.com/articles/IUPAC%20ambiguity%20codes.html
|
|
214
|
+
value: ".",
|
|
215
|
+
name: "Gap",
|
|
216
|
+
threeLettersName: "Gap",
|
|
217
|
+
colorByFamily: "#FF0000",
|
|
218
|
+
color: "hsl(0, 100%, 69%)",
|
|
219
|
+
mass: 0
|
|
220
|
+
},
|
|
221
|
+
"-": {
|
|
222
|
+
value: "-",
|
|
223
|
+
name: "Gap",
|
|
224
|
+
threeLettersName: "Gap",
|
|
225
|
+
colorByFamily: "#FF0000",
|
|
226
|
+
color: "hsl(0, 100%, 69%)",
|
|
227
|
+
mass: 0
|
|
228
|
+
},
|
|
229
|
+
B: {
|
|
230
|
+
value: "B",
|
|
231
|
+
threeLettersName: "ND",
|
|
232
|
+
colorByFamily: "#D3D3D3",
|
|
233
|
+
color: "hsl(268.9, 100%, 69%)",
|
|
234
|
+
isAmbiguous: true,
|
|
235
|
+
name: "B",
|
|
236
|
+
aliases: "ND",
|
|
237
|
+
mass: 0
|
|
238
|
+
},
|
|
239
|
+
J: {
|
|
240
|
+
value: "J",
|
|
241
|
+
threeLettersName: "IL",
|
|
242
|
+
colorByFamily: "#00FFFF",
|
|
243
|
+
color: "hsl(352, 100%, 69%)",
|
|
244
|
+
isAmbiguous: true,
|
|
245
|
+
name: "J",
|
|
246
|
+
aliases: "IL",
|
|
247
|
+
mass: 0
|
|
248
|
+
},
|
|
249
|
+
X: {
|
|
250
|
+
value: "X",
|
|
251
|
+
threeLettersName: "ACDEFGHIKLMNPQRSTVWY",
|
|
252
|
+
colorByFamily: "#FFFFFF",
|
|
253
|
+
color: "hsl(60, 100%, 69%)",
|
|
254
|
+
isAmbiguous: true,
|
|
255
|
+
name: "X",
|
|
256
|
+
aliases: "ACDEFGHIKLMNPQRSTVWY",
|
|
257
|
+
mass: 0
|
|
258
|
+
},
|
|
259
|
+
Z: {
|
|
260
|
+
value: "Z",
|
|
261
|
+
threeLettersName: "QE",
|
|
262
|
+
colorByFamily: "#D3D3D3",
|
|
263
|
+
color: "hsl(268.9, 100%, 69%)",
|
|
264
|
+
isAmbiguous: true,
|
|
265
|
+
name: "Z",
|
|
266
|
+
aliases: "QE",
|
|
267
|
+
mass: 0
|
|
268
|
+
}
|
|
269
|
+
};
|
|
270
|
+
|
|
271
|
+
export default proteinAlphabet;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export default function rotateBpsToPosition(
|
|
2
|
+
bps: string,
|
|
3
|
+
caretPosition: number
|
|
4
|
+
) {
|
|
5
|
+
return arrayRotate(bps.split(""), caretPosition).join("");
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
function arrayRotate<T>(arr: T[], count: number): T[] {
|
|
9
|
+
count -= arr.length * Math.floor(count / arr.length);
|
|
10
|
+
arr.push(...arr.splice(0, count));
|
|
11
|
+
return arr;
|
|
12
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { map } from "lodash-es";
|
|
2
|
+
import { adjustRangeToRotation } from "@teselagen/range-utils";
|
|
3
|
+
import tidyUpSequenceData, {
|
|
4
|
+
TidyUpSequenceDataOptions
|
|
5
|
+
} from "./tidyUpSequenceData";
|
|
6
|
+
import { modifiableTypes } from "./annotationTypes";
|
|
7
|
+
import rotateBpsToPosition from "./rotateBpsToPosition";
|
|
8
|
+
import { SequenceData, Annotation } from "./types";
|
|
9
|
+
|
|
10
|
+
export default function rotateSequenceDataToPosition(
|
|
11
|
+
sequenceData: SequenceData,
|
|
12
|
+
caretPosition: number,
|
|
13
|
+
options: TidyUpSequenceDataOptions = {}
|
|
14
|
+
) {
|
|
15
|
+
const newSequenceData = tidyUpSequenceData(sequenceData, {
|
|
16
|
+
doNotRemoveInvalidChars: true,
|
|
17
|
+
...options
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
//update the sequence
|
|
21
|
+
newSequenceData.sequence = rotateBpsToPosition(
|
|
22
|
+
newSequenceData.sequence,
|
|
23
|
+
caretPosition
|
|
24
|
+
);
|
|
25
|
+
|
|
26
|
+
//handle the insert
|
|
27
|
+
modifiableTypes.forEach(annotationType => {
|
|
28
|
+
//update the annotations:
|
|
29
|
+
//handle the delete if necessary
|
|
30
|
+
newSequenceData[annotationType] = adjustAnnotationsToRotation(
|
|
31
|
+
newSequenceData[annotationType] as Annotation[],
|
|
32
|
+
caretPosition,
|
|
33
|
+
newSequenceData.sequence.length
|
|
34
|
+
);
|
|
35
|
+
});
|
|
36
|
+
return newSequenceData;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function adjustAnnotationsToRotation(
|
|
40
|
+
annotationsToBeAdjusted: Annotation[],
|
|
41
|
+
positionToRotateTo: number,
|
|
42
|
+
maxLength: number
|
|
43
|
+
) {
|
|
44
|
+
return map(annotationsToBeAdjusted, annotation => {
|
|
45
|
+
return {
|
|
46
|
+
...adjustRangeToRotation(annotation, positionToRotateTo, maxLength),
|
|
47
|
+
locations: annotation.locations
|
|
48
|
+
? annotation.locations.map(location =>
|
|
49
|
+
adjustRangeToRotation(location, positionToRotateTo, maxLength)
|
|
50
|
+
)
|
|
51
|
+
: undefined
|
|
52
|
+
};
|
|
53
|
+
}).filter(range => !!range); //filter any fully deleted ranges
|
|
54
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { modifiableTypes } from "./annotationTypes";
|
|
2
|
+
import adjustAnnotationsToInsert from "./adjustAnnotationsToInsert";
|
|
3
|
+
import { SequenceData } from "./types";
|
|
4
|
+
|
|
5
|
+
export default function shiftAnnotationsByLen({
|
|
6
|
+
seqData,
|
|
7
|
+
caretPosition,
|
|
8
|
+
insertLength
|
|
9
|
+
}: {
|
|
10
|
+
seqData: SequenceData;
|
|
11
|
+
caretPosition: number;
|
|
12
|
+
insertLength: number;
|
|
13
|
+
}) {
|
|
14
|
+
modifiableTypes.forEach(annotationType => {
|
|
15
|
+
const existingAnnotations = seqData[annotationType];
|
|
16
|
+
if (existingAnnotations) {
|
|
17
|
+
seqData[annotationType] = adjustAnnotationsToInsert(
|
|
18
|
+
existingAnnotations,
|
|
19
|
+
caretPosition,
|
|
20
|
+
insertLength
|
|
21
|
+
);
|
|
22
|
+
}
|
|
23
|
+
});
|
|
24
|
+
}
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
import proteinAlphabet from "./proteinAlphabet";
|
|
2
|
+
|
|
3
|
+
const initThreeLetterSequenceStringToAminoAcidMap: Record<
|
|
4
|
+
string,
|
|
5
|
+
{
|
|
6
|
+
value: string;
|
|
7
|
+
name: string;
|
|
8
|
+
threeLettersName: string;
|
|
9
|
+
hydrophobicity?: number;
|
|
10
|
+
colorByFamily: string;
|
|
11
|
+
color: string;
|
|
12
|
+
mass: number;
|
|
13
|
+
isAmbiguous?: boolean;
|
|
14
|
+
aliases?: string;
|
|
15
|
+
}
|
|
16
|
+
> = {
|
|
17
|
+
gct: proteinAlphabet.A,
|
|
18
|
+
gcc: proteinAlphabet.A,
|
|
19
|
+
gca: proteinAlphabet.A,
|
|
20
|
+
gcg: proteinAlphabet.A,
|
|
21
|
+
gcu: proteinAlphabet.A,
|
|
22
|
+
cgt: proteinAlphabet.R,
|
|
23
|
+
cgc: proteinAlphabet.R,
|
|
24
|
+
cga: proteinAlphabet.R,
|
|
25
|
+
cgg: proteinAlphabet.R,
|
|
26
|
+
aga: proteinAlphabet.R,
|
|
27
|
+
agg: proteinAlphabet.R,
|
|
28
|
+
cgu: proteinAlphabet.R,
|
|
29
|
+
aat: proteinAlphabet.N,
|
|
30
|
+
aac: proteinAlphabet.N,
|
|
31
|
+
aau: proteinAlphabet.N,
|
|
32
|
+
gat: proteinAlphabet.D,
|
|
33
|
+
gac: proteinAlphabet.D,
|
|
34
|
+
gau: proteinAlphabet.D,
|
|
35
|
+
tgt: proteinAlphabet.C,
|
|
36
|
+
tgc: proteinAlphabet.C,
|
|
37
|
+
ugu: proteinAlphabet.C,
|
|
38
|
+
ugc: proteinAlphabet.C,
|
|
39
|
+
gaa: proteinAlphabet.E,
|
|
40
|
+
gag: proteinAlphabet.E,
|
|
41
|
+
caa: proteinAlphabet.Q,
|
|
42
|
+
cag: proteinAlphabet.Q,
|
|
43
|
+
ggt: proteinAlphabet.G,
|
|
44
|
+
ggc: proteinAlphabet.G,
|
|
45
|
+
gga: proteinAlphabet.G,
|
|
46
|
+
ggg: proteinAlphabet.G,
|
|
47
|
+
ggu: proteinAlphabet.G,
|
|
48
|
+
cat: proteinAlphabet.H,
|
|
49
|
+
cac: proteinAlphabet.H,
|
|
50
|
+
cau: proteinAlphabet.H,
|
|
51
|
+
att: proteinAlphabet.I,
|
|
52
|
+
atc: proteinAlphabet.I,
|
|
53
|
+
ata: proteinAlphabet.I,
|
|
54
|
+
auu: proteinAlphabet.I,
|
|
55
|
+
auc: proteinAlphabet.I,
|
|
56
|
+
aua: proteinAlphabet.I,
|
|
57
|
+
ctt: proteinAlphabet.L,
|
|
58
|
+
ctc: proteinAlphabet.L,
|
|
59
|
+
cta: proteinAlphabet.L,
|
|
60
|
+
ctg: proteinAlphabet.L,
|
|
61
|
+
tta: proteinAlphabet.L,
|
|
62
|
+
ttg: proteinAlphabet.L,
|
|
63
|
+
cuu: proteinAlphabet.L,
|
|
64
|
+
cuc: proteinAlphabet.L,
|
|
65
|
+
cua: proteinAlphabet.L,
|
|
66
|
+
cug: proteinAlphabet.L,
|
|
67
|
+
uua: proteinAlphabet.L,
|
|
68
|
+
uug: proteinAlphabet.L,
|
|
69
|
+
aaa: proteinAlphabet.K,
|
|
70
|
+
aag: proteinAlphabet.K,
|
|
71
|
+
atg: proteinAlphabet.M,
|
|
72
|
+
aug: proteinAlphabet.M,
|
|
73
|
+
ttt: proteinAlphabet.F,
|
|
74
|
+
ttc: proteinAlphabet.F,
|
|
75
|
+
uuu: proteinAlphabet.F,
|
|
76
|
+
uuc: proteinAlphabet.F,
|
|
77
|
+
cct: proteinAlphabet.P,
|
|
78
|
+
ccc: proteinAlphabet.P,
|
|
79
|
+
cca: proteinAlphabet.P,
|
|
80
|
+
ccg: proteinAlphabet.P,
|
|
81
|
+
ccu: proteinAlphabet.P,
|
|
82
|
+
tct: proteinAlphabet.S,
|
|
83
|
+
tcc: proteinAlphabet.S,
|
|
84
|
+
tca: proteinAlphabet.S,
|
|
85
|
+
tcg: proteinAlphabet.S,
|
|
86
|
+
agt: proteinAlphabet.S,
|
|
87
|
+
agc: proteinAlphabet.S,
|
|
88
|
+
ucu: proteinAlphabet.S,
|
|
89
|
+
ucc: proteinAlphabet.S,
|
|
90
|
+
uca: proteinAlphabet.S,
|
|
91
|
+
ucg: proteinAlphabet.S,
|
|
92
|
+
agu: proteinAlphabet.S,
|
|
93
|
+
act: proteinAlphabet.T,
|
|
94
|
+
acc: proteinAlphabet.T,
|
|
95
|
+
aca: proteinAlphabet.T,
|
|
96
|
+
acg: proteinAlphabet.T,
|
|
97
|
+
acu: proteinAlphabet.T,
|
|
98
|
+
tgg: proteinAlphabet.W,
|
|
99
|
+
ugg: proteinAlphabet.W,
|
|
100
|
+
tat: proteinAlphabet.Y,
|
|
101
|
+
tac: proteinAlphabet.Y,
|
|
102
|
+
uau: proteinAlphabet.Y,
|
|
103
|
+
uac: proteinAlphabet.Y,
|
|
104
|
+
gtt: proteinAlphabet.V,
|
|
105
|
+
gtc: proteinAlphabet.V,
|
|
106
|
+
gta: proteinAlphabet.V,
|
|
107
|
+
gtg: proteinAlphabet.V,
|
|
108
|
+
guu: proteinAlphabet.V,
|
|
109
|
+
guc: proteinAlphabet.V,
|
|
110
|
+
gua: proteinAlphabet.V,
|
|
111
|
+
gug: proteinAlphabet.V,
|
|
112
|
+
taa: proteinAlphabet["*"],
|
|
113
|
+
tag: proteinAlphabet["*"],
|
|
114
|
+
tga: proteinAlphabet["*"],
|
|
115
|
+
uaa: proteinAlphabet["*"],
|
|
116
|
+
uag: proteinAlphabet["*"],
|
|
117
|
+
uga: proteinAlphabet["*"],
|
|
118
|
+
"...": proteinAlphabet["."],
|
|
119
|
+
"---": proteinAlphabet["-"]
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
// IUPAC nucleotide codes (DNA/RNA) with U awareness
|
|
123
|
+
const IUPAC: Record<string, string[]> = {
|
|
124
|
+
A: ["A"],
|
|
125
|
+
C: ["C"],
|
|
126
|
+
G: ["G"],
|
|
127
|
+
T: ["T"],
|
|
128
|
+
U: ["U"],
|
|
129
|
+
|
|
130
|
+
R: ["A", "G"],
|
|
131
|
+
Y: ["C", "T", "U"],
|
|
132
|
+
K: ["G", "T", "U"],
|
|
133
|
+
M: ["A", "C"],
|
|
134
|
+
S: ["G", "C"],
|
|
135
|
+
W: ["A", "T", "U"],
|
|
136
|
+
B: ["C", "G", "T", "U"],
|
|
137
|
+
D: ["A", "G", "T", "U"],
|
|
138
|
+
H: ["A", "C", "T", "U"],
|
|
139
|
+
V: ["A", "C", "G"],
|
|
140
|
+
N: ["A", "C", "G", "T", "U"],
|
|
141
|
+
X: ["A", "C", "G", "T", "U"]
|
|
142
|
+
};
|
|
143
|
+
|
|
144
|
+
function expandAndResolve(threeLetterCodon: string) {
|
|
145
|
+
const chars = threeLetterCodon.toUpperCase().split("");
|
|
146
|
+
const picks = chars.map(c => IUPAC[c] || [c]);
|
|
147
|
+
|
|
148
|
+
let allPossibleThreeLetterCodons = [""];
|
|
149
|
+
for (const set of picks) {
|
|
150
|
+
const next = [];
|
|
151
|
+
for (const prefix of allPossibleThreeLetterCodons)
|
|
152
|
+
for (const b of set) next.push(prefix + b);
|
|
153
|
+
allPossibleThreeLetterCodons = next;
|
|
154
|
+
}
|
|
155
|
+
let foundAminoAcid = null;
|
|
156
|
+
for (const codon of allPossibleThreeLetterCodons) {
|
|
157
|
+
const lowerCodon = codon.toLowerCase();
|
|
158
|
+
const aminoAcidObj =
|
|
159
|
+
initThreeLetterSequenceStringToAminoAcidMap[lowerCodon] ??
|
|
160
|
+
initThreeLetterSequenceStringToAminoAcidMap[
|
|
161
|
+
lowerCodon.replace(/u/g, "t")
|
|
162
|
+
] ??
|
|
163
|
+
initThreeLetterSequenceStringToAminoAcidMap[
|
|
164
|
+
lowerCodon.replace(/t/g, "u")
|
|
165
|
+
];
|
|
166
|
+
if (aminoAcidObj) {
|
|
167
|
+
if (!foundAminoAcid) {
|
|
168
|
+
foundAminoAcid = aminoAcidObj;
|
|
169
|
+
} else if (foundAminoAcid.value !== aminoAcidObj.value) {
|
|
170
|
+
return null;
|
|
171
|
+
}
|
|
172
|
+
} else {
|
|
173
|
+
return null;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
return foundAminoAcid;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function getCodonToAminoAcidMap() {
|
|
180
|
+
const map = initThreeLetterSequenceStringToAminoAcidMap;
|
|
181
|
+
// generate all IUPAC 3-mers
|
|
182
|
+
const codes = Object.keys(IUPAC);
|
|
183
|
+
for (const a of codes)
|
|
184
|
+
for (const b of codes)
|
|
185
|
+
for (const c of codes) {
|
|
186
|
+
const codon = a + b + c;
|
|
187
|
+
const lowerCodon = codon.toLowerCase();
|
|
188
|
+
if (map[lowerCodon]) continue;
|
|
189
|
+
const aminoAcidObj = expandAndResolve(codon);
|
|
190
|
+
if (aminoAcidObj) map[lowerCodon] = aminoAcidObj;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
return map;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const threeLetterSequenceStringToAminoAcidMap = getCodonToAminoAcidMap();
|
|
197
|
+
|
|
198
|
+
export default threeLetterSequenceStringToAminoAcidMap;
|