@teselagen/sequence-utils 0.1.22 → 0.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +12030 -26126
- package/index.mjs +12119 -26124
- package/index.umd.js +24056 -38154
- package/package.json +4 -3
- package/src/DNAComplementMap.js +32 -0
- package/src/addGapsToSeqReads.js +417 -0
- package/src/addGapsToSeqReads.test.js +358 -0
- package/src/adjustAnnotationsToInsert.js +19 -0
- package/src/adjustBpsToReplaceOrInsert.js +50 -0
- package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
- package/src/aliasedEnzymesByName.js +7363 -0
- package/src/aminoAcidToDegenerateDnaMap.js +32 -0
- package/src/aminoAcidToDegenerateRnaMap.js +32 -0
- package/src/aminoAcidToDnaRna.test.js +27 -0
- package/src/annotateSingleSeq.js +29 -0
- package/src/annotateSingleSeq.test.js +64 -0
- package/src/annotationTypes.js +23 -0
- package/src/autoAnnotate.js +242 -0
- package/src/autoAnnotate.test.js +1039 -0
- package/src/bioData.js +431 -0
- package/src/calculateNebTa.js +34 -0
- package/src/calculateNebTa.test.js +57 -0
- package/src/calculateNebTm.js +127 -0
- package/src/calculateNebTm.test.js +32 -0
- package/src/calculatePercentGC.js +3 -0
- package/src/calculatePercentGC.test.js +14 -0
- package/src/calculateTm.js +297 -0
- package/src/calculateTm.test.js +7 -0
- package/src/computeDigestFragments.js +179 -0
- package/src/computeDigestFragments.test.js +73 -0
- package/src/condensePairwiseAlignmentDifferences.js +85 -0
- package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
- package/src/convertAACaretPositionOrRangeToDna.js +24 -0
- package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
- package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
- package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
- package/src/cutSequenceByRestrictionEnzyme.js +301 -0
- package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
- package/src/defaultEnzymesByName.js +278 -0
- package/src/degenerateDnaToAminoAcidMap.js +5 -0
- package/src/degenerateRnaToAminoAcidMap.js +5 -0
- package/src/deleteSequenceDataAtRange.js +5 -0
- package/src/deleteSequenceDataAtRange.test.js +146 -0
- package/src/diffUtils.js +64 -0
- package/src/diffUtils.test.js +74 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
- package/src/featureTypesAndColors.js +152 -0
- package/src/featureTypesAndColors.test.js +52 -0
- package/src/filterAminoAcidSequenceString.js +13 -0
- package/src/filterAminoAcidSequenceString.test.js +22 -0
- package/src/filterSequenceString.js +22 -0
- package/src/filterSequenceString.test.js +13 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
- package/src/findOrfsInPlasmid.js +26 -0
- package/src/findSequenceMatches.js +133 -0
- package/src/findSequenceMatches.test.js +286 -0
- package/src/generateAnnotations.js +34 -0
- package/src/generateSequenceData.js +206 -0
- package/src/generateSequenceData.test.js +22 -0
- package/src/getAllInsertionsInSeqReads.js +83 -0
- package/src/getAllInsertionsInSeqReads.test.js +26 -0
- package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
- package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
- package/src/getAminoAcidFromSequenceTriplet.js +22 -0
- package/src/getAminoAcidStringFromSequenceString.js +18 -0
- package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
- package/src/getCodonRangeForAASliver.js +63 -0
- package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
- package/src/getComplementSequenceAndAnnotations.js +20 -0
- package/src/getComplementSequenceString.js +19 -0
- package/src/getComplementSequenceString.test.js +13 -0
- package/src/getCutsiteType.js +10 -0
- package/src/getCutsitesFromSequence.js +17 -0
- package/src/getDegenerateDnaStringFromAAString.js +8 -0
- package/src/getDegenerateRnaStringFromAAString.js +8 -0
- package/src/getDigestFragmentsForCutsites.js +105 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
- package/src/getInsertBetweenVals.js +28 -0
- package/src/getInsertBetweenVals.test.js +33 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
- package/src/getMassOfAaString.js +24 -0
- package/src/getMassofAaString.test.js +18 -0
- package/src/getOrfsFromSequence.js +124 -0
- package/src/getOrfsFromSequence.test.js +210 -0
- package/src/getOverlapBetweenTwoSequences.js +30 -0
- package/src/getOverlapBetweenTwoSequences.test.js +23 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
- package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
- package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
- package/src/getReverseComplementAnnotation.js +23 -0
- package/src/getReverseComplementAnnotation.test.js +44 -0
- package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
- package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
- package/src/getReverseComplementSequenceString.js +17 -0
- package/src/getReverseComplementSequenceString.test.js +11 -0
- package/src/getReverseSequenceString.js +12 -0
- package/src/getReverseSequenceString.test.js +9 -0
- package/src/getSequenceDataBetweenRange.js +131 -0
- package/src/getSequenceDataBetweenRange.test.js +474 -0
- package/src/getVirtualDigest.js +125 -0
- package/src/getVirtualDigest.test.js +134 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
- package/src/index.js +106 -0
- package/src/index.test.js +38 -0
- package/src/insertGapsIntoRefSeq.js +38 -0
- package/src/insertGapsIntoRefSeq.test.js +20 -0
- package/src/insertSequenceDataAtPosition.js +2 -0
- package/src/insertSequenceDataAtPosition.test.js +75 -0
- package/src/insertSequenceDataAtPositionOrRange.js +249 -0
- package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
- package/src/isEnzymeType2S.js +3 -0
- package/src/mapAnnotationsToRows.js +174 -0
- package/src/mapAnnotationsToRows.test.js +425 -0
- package/src/prepareCircularViewData.js +17 -0
- package/src/prepareCircularViewData.test.js +196 -0
- package/src/prepareRowData.js +41 -0
- package/src/prepareRowData.test.js +36 -0
- package/src/prepareRowData_output1.json +391 -0
- package/src/proteinAlphabet.js +257 -0
- package/src/rotateBpsToPosition.js +13 -0
- package/src/rotateBpsToPosition.test.js +6 -0
- package/src/rotateSequenceDataToPosition.js +48 -0
- package/src/rotateSequenceDataToPosition.test.js +71 -0
- package/src/shiftAnnotationsByLen.js +17 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
- package/src/tidyUpAnnotation.js +182 -0
- package/src/tidyUpSequenceData.js +169 -0
- package/src/tidyUpSequenceData.test.js +332 -0
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
const proteinAlphabet = {
|
|
2
|
+
A: {
|
|
3
|
+
value: "A",
|
|
4
|
+
name: "Alanine",
|
|
5
|
+
threeLettersName: "Ala",
|
|
6
|
+
hydrophobicity: 1.8,
|
|
7
|
+
colorByFamily: "#00FFFF",
|
|
8
|
+
color: "hsl(327.3, 100%, 69%)",
|
|
9
|
+
mass: 89.1
|
|
10
|
+
},
|
|
11
|
+
R: {
|
|
12
|
+
value: "R",
|
|
13
|
+
name: "Arginine",
|
|
14
|
+
threeLettersName: "Arg",
|
|
15
|
+
hydrophobicity: -4.5,
|
|
16
|
+
colorByFamily: "#FFC0CB",
|
|
17
|
+
color: "hsl(258.1, 100%, 69%)",
|
|
18
|
+
mass: 174.2
|
|
19
|
+
},
|
|
20
|
+
N: {
|
|
21
|
+
value: "N",
|
|
22
|
+
name: "Asparagine",
|
|
23
|
+
threeLettersName: "Asn",
|
|
24
|
+
hydrophobicity: -3.5,
|
|
25
|
+
colorByFamily: "#D3D3D3",
|
|
26
|
+
color: "hsl(268.9, 100%, 69%)",
|
|
27
|
+
mass: 132.1
|
|
28
|
+
},
|
|
29
|
+
D: {
|
|
30
|
+
value: "D",
|
|
31
|
+
name: "Aspartic acid",
|
|
32
|
+
threeLettersName: "Asp",
|
|
33
|
+
hydrophobicity: -3.5,
|
|
34
|
+
colorByFamily: "#EE82EE",
|
|
35
|
+
color: "hsl(268.9, 100%, 69%)",
|
|
36
|
+
mass: 133.1
|
|
37
|
+
},
|
|
38
|
+
C: {
|
|
39
|
+
value: "C",
|
|
40
|
+
name: "Cysteine",
|
|
41
|
+
threeLettersName: "Cys",
|
|
42
|
+
hydrophobicity: 2.5,
|
|
43
|
+
colorByFamily: "#FFFF00",
|
|
44
|
+
color: "hsl(335.1, 100%, 69%)",
|
|
45
|
+
mass: 121.2
|
|
46
|
+
},
|
|
47
|
+
E: {
|
|
48
|
+
value: "E",
|
|
49
|
+
name: "Glutamic acid",
|
|
50
|
+
threeLettersName: "Glu",
|
|
51
|
+
hydrophobicity: -3.5,
|
|
52
|
+
colorByFamily: "#EE82EE",
|
|
53
|
+
color: "hsl(268.9, 100%, 69%)",
|
|
54
|
+
mass: 147.1
|
|
55
|
+
},
|
|
56
|
+
Q: {
|
|
57
|
+
value: "Q",
|
|
58
|
+
name: "Glutamine",
|
|
59
|
+
threeLettersName: "Gln",
|
|
60
|
+
hydrophobicity: -3.5,
|
|
61
|
+
colorByFamily: "#D3D3D3",
|
|
62
|
+
color: "hsl(268.9, 100%, 69%)",
|
|
63
|
+
mass: 146.2
|
|
64
|
+
},
|
|
65
|
+
G: {
|
|
66
|
+
value: "G",
|
|
67
|
+
name: "Glycine",
|
|
68
|
+
threeLettersName: "Gly",
|
|
69
|
+
hydrophobicity: -0.4,
|
|
70
|
+
colorByFamily: "#00FFFF",
|
|
71
|
+
color: "hsl(303.1, 100%, 69%)",
|
|
72
|
+
mass: 75.1
|
|
73
|
+
},
|
|
74
|
+
H: {
|
|
75
|
+
value: "H",
|
|
76
|
+
name: "Histidine",
|
|
77
|
+
threeLettersName: "His",
|
|
78
|
+
hydrophobicity: -3.2,
|
|
79
|
+
colorByFamily: "#FFC0CB",
|
|
80
|
+
color: "hsl(272.2, 100%, 69%)",
|
|
81
|
+
mass: 155.2
|
|
82
|
+
},
|
|
83
|
+
I: {
|
|
84
|
+
value: "I",
|
|
85
|
+
name: "Isoleucine ",
|
|
86
|
+
threeLettersName: "Ile",
|
|
87
|
+
hydrophobicity: 4.5,
|
|
88
|
+
colorByFamily: "#00FFFF",
|
|
89
|
+
color: "hsl(356.9, 100%, 69%)",
|
|
90
|
+
mass: 131.2
|
|
91
|
+
},
|
|
92
|
+
L: {
|
|
93
|
+
value: "L",
|
|
94
|
+
name: "Leucine",
|
|
95
|
+
threeLettersName: "Leu",
|
|
96
|
+
hydrophobicity: 3.8,
|
|
97
|
+
colorByFamily: "#00FFFF",
|
|
98
|
+
color: "hsl(349.4, 100%, 69%)",
|
|
99
|
+
mass: 131.2
|
|
100
|
+
},
|
|
101
|
+
K: {
|
|
102
|
+
value: "K",
|
|
103
|
+
name: "Lysine",
|
|
104
|
+
threeLettersName: "Lys",
|
|
105
|
+
hydrophobicity: -3.9,
|
|
106
|
+
colorByFamily: "#FFC0CB",
|
|
107
|
+
color: "hsl(264.7, 100%, 69%)",
|
|
108
|
+
mass: 146.2
|
|
109
|
+
},
|
|
110
|
+
M: {
|
|
111
|
+
value: "M",
|
|
112
|
+
name: "Methionine",
|
|
113
|
+
threeLettersName: "Met",
|
|
114
|
+
hydrophobicity: 1.9,
|
|
115
|
+
colorByFamily: "#FFFF00",
|
|
116
|
+
color: "hsl(328.5, 100%, 69%)",
|
|
117
|
+
mass: 149.2
|
|
118
|
+
},
|
|
119
|
+
F: {
|
|
120
|
+
value: "F",
|
|
121
|
+
name: "Phenylalanine",
|
|
122
|
+
threeLettersName: "Phe",
|
|
123
|
+
hydrophobicity: 2.8,
|
|
124
|
+
colorByFamily: "#FFA500",
|
|
125
|
+
color: "hsl(338.4, 100%, 69%)",
|
|
126
|
+
mass: 165.2
|
|
127
|
+
},
|
|
128
|
+
P: {
|
|
129
|
+
value: "P",
|
|
130
|
+
name: "Proline",
|
|
131
|
+
threeLettersName: "Pro",
|
|
132
|
+
hydrophobicity: -1.6,
|
|
133
|
+
colorByFamily: "#00FFFF",
|
|
134
|
+
color: "hsl(289.9, 100%, 69%)",
|
|
135
|
+
mass: 115.1
|
|
136
|
+
},
|
|
137
|
+
S: {
|
|
138
|
+
value: "S",
|
|
139
|
+
name: "Serine",
|
|
140
|
+
threeLettersName: "Ser",
|
|
141
|
+
hydrophobicity: -0.8,
|
|
142
|
+
colorByFamily: "#90EE90",
|
|
143
|
+
color: "hsl(298.6, 100%, 69%)",
|
|
144
|
+
mass: 105.1
|
|
145
|
+
},
|
|
146
|
+
T: {
|
|
147
|
+
value: "T",
|
|
148
|
+
name: "Threonine",
|
|
149
|
+
threeLettersName: "Thr",
|
|
150
|
+
hydrophobicity: -0.7,
|
|
151
|
+
colorByFamily: "#90EE90",
|
|
152
|
+
color: "hsl(299.8, 100%, 69%)",
|
|
153
|
+
mass: 119.1
|
|
154
|
+
},
|
|
155
|
+
U: {
|
|
156
|
+
value: "U",
|
|
157
|
+
name: "Selenocysteine",
|
|
158
|
+
threeLettersName: "Sec",
|
|
159
|
+
colorByFamily: "#FF0000",
|
|
160
|
+
color: "hsl(0, 100%, 69%)",
|
|
161
|
+
mass: 168.1
|
|
162
|
+
},
|
|
163
|
+
W: {
|
|
164
|
+
value: "W",
|
|
165
|
+
name: "Tryptophan",
|
|
166
|
+
threeLettersName: "Trp",
|
|
167
|
+
hydrophobicity: -0.9,
|
|
168
|
+
colorByFamily: "#FFA500",
|
|
169
|
+
color: "hsl(297.6, 100%, 69%)",
|
|
170
|
+
mass: 204.2
|
|
171
|
+
},
|
|
172
|
+
Y: {
|
|
173
|
+
value: "Y",
|
|
174
|
+
name: "Tyrosine",
|
|
175
|
+
threeLettersName: "Tyr",
|
|
176
|
+
hydrophobicity: -1.3,
|
|
177
|
+
colorByFamily: "#FFA500",
|
|
178
|
+
color: "hsl(293.2, 100%, 69%)",
|
|
179
|
+
mass: 181.2
|
|
180
|
+
},
|
|
181
|
+
V: {
|
|
182
|
+
value: "V",
|
|
183
|
+
name: "Valine",
|
|
184
|
+
threeLettersName: "Val",
|
|
185
|
+
hydrophobicity: 4.2,
|
|
186
|
+
colorByFamily: "#00FFFF",
|
|
187
|
+
color: "hsl(353.6, 100%, 69%)",
|
|
188
|
+
mass: 117.1
|
|
189
|
+
},
|
|
190
|
+
"*": {
|
|
191
|
+
value: "*",
|
|
192
|
+
name: "Stop",
|
|
193
|
+
threeLettersName: "Stop",
|
|
194
|
+
colorByFamily: "#FF0000",
|
|
195
|
+
color: "hsl(0, 100%, 69%)",
|
|
196
|
+
mass: 0
|
|
197
|
+
},
|
|
198
|
+
".": {
|
|
199
|
+
//tnr: this is actually a deletion/gap character (previously we had this as a stop character which is incorrect) https://www.dnabaser.com/articles/IUPAC%20ambiguity%20codes.html
|
|
200
|
+
value: ".",
|
|
201
|
+
name: "Gap",
|
|
202
|
+
threeLettersName: "Gap",
|
|
203
|
+
colorByFamily: "#FF0000",
|
|
204
|
+
color: "hsl(0, 100%, 69%)",
|
|
205
|
+
mass: 0
|
|
206
|
+
},
|
|
207
|
+
"-": {
|
|
208
|
+
value: "-",
|
|
209
|
+
name: "Gap",
|
|
210
|
+
threeLettersName: "Gap",
|
|
211
|
+
colorByFamily: "#FF0000",
|
|
212
|
+
color: "hsl(0, 100%, 69%)",
|
|
213
|
+
mass: 0,
|
|
214
|
+
},
|
|
215
|
+
B: {
|
|
216
|
+
value: "B",
|
|
217
|
+
threeLettersName: "ND",
|
|
218
|
+
colorByFamily: "#D3D3D3",
|
|
219
|
+
color: "hsl(268.9, 100%, 69%)",
|
|
220
|
+
isAmbiguous: true,
|
|
221
|
+
name: "B",
|
|
222
|
+
aliases: "ND",
|
|
223
|
+
mass: 0,
|
|
224
|
+
},
|
|
225
|
+
J: {
|
|
226
|
+
value: "J",
|
|
227
|
+
threeLettersName: "IL",
|
|
228
|
+
colorByFamily: "#00FFFF",
|
|
229
|
+
color: "hsl(352, 100%, 69%)",
|
|
230
|
+
isAmbiguous: true,
|
|
231
|
+
name: "J",
|
|
232
|
+
aliases: "IL",
|
|
233
|
+
mass: 0,
|
|
234
|
+
},
|
|
235
|
+
X: {
|
|
236
|
+
value: "X",
|
|
237
|
+
threeLettersName: "ACDEFGHIKLMNPQRSTVWY",
|
|
238
|
+
colorByFamily: "#FFFFFF",
|
|
239
|
+
color: "hsl(60, 100%, 69%)",
|
|
240
|
+
isAmbiguous: true,
|
|
241
|
+
name: "X",
|
|
242
|
+
aliases: "ACDEFGHIKLMNPQRSTVWY",
|
|
243
|
+
mass: 0,
|
|
244
|
+
},
|
|
245
|
+
Z: {
|
|
246
|
+
value: "Z",
|
|
247
|
+
threeLettersName: "QE",
|
|
248
|
+
colorByFamily: "#D3D3D3",
|
|
249
|
+
color: "hsl(268.9, 100%, 69%)",
|
|
250
|
+
isAmbiguous: true,
|
|
251
|
+
name: "Z",
|
|
252
|
+
aliases: "QE",
|
|
253
|
+
mass: 0
|
|
254
|
+
}
|
|
255
|
+
};
|
|
256
|
+
|
|
257
|
+
export default proteinAlphabet
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export default function rotateBpsToPosition(
|
|
2
|
+
bps,
|
|
3
|
+
caretPosition,
|
|
4
|
+
) {
|
|
5
|
+
return arrayRotate(bps.split(""), caretPosition).join("")
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
function arrayRotate(arr, count) {
|
|
10
|
+
count -= arr.length * Math.floor(count / arr.length)
|
|
11
|
+
arr.push.apply(arr, arr.splice(0, count))
|
|
12
|
+
return arr
|
|
13
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import {map} from "lodash";
|
|
2
|
+
import {adjustRangeToRotation} from "@teselagen/range-utils";
|
|
3
|
+
import tidyUpSequenceData from "./tidyUpSequenceData";
|
|
4
|
+
import {modifiableTypes} from "./annotationTypes";
|
|
5
|
+
import rotateBpsToPosition from "./rotateBpsToPosition";
|
|
6
|
+
|
|
7
|
+
export default function rotateSequenceDataToPosition(
|
|
8
|
+
sequenceData,
|
|
9
|
+
caretPosition,
|
|
10
|
+
options
|
|
11
|
+
) {
|
|
12
|
+
const newSequenceData = tidyUpSequenceData(sequenceData, options);
|
|
13
|
+
|
|
14
|
+
//update the sequence
|
|
15
|
+
newSequenceData.sequence = rotateBpsToPosition(
|
|
16
|
+
newSequenceData.sequence,
|
|
17
|
+
caretPosition
|
|
18
|
+
);
|
|
19
|
+
|
|
20
|
+
//handle the insert
|
|
21
|
+
modifiableTypes.forEach(annotationType => {
|
|
22
|
+
//update the annotations:
|
|
23
|
+
//handle the delete if necessary
|
|
24
|
+
newSequenceData[annotationType] = adjustAnnotationsToRotation(
|
|
25
|
+
newSequenceData[annotationType],
|
|
26
|
+
caretPosition,
|
|
27
|
+
newSequenceData.sequence.length
|
|
28
|
+
);
|
|
29
|
+
});
|
|
30
|
+
return newSequenceData;
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
function adjustAnnotationsToRotation(
|
|
34
|
+
annotationsToBeAdjusted,
|
|
35
|
+
positionToRotateTo,
|
|
36
|
+
maxLength
|
|
37
|
+
) {
|
|
38
|
+
return map(annotationsToBeAdjusted, annotation => {
|
|
39
|
+
return {
|
|
40
|
+
...adjustRangeToRotation(annotation, positionToRotateTo, maxLength),
|
|
41
|
+
locations: annotation.locations
|
|
42
|
+
? annotation.locations.map(location =>
|
|
43
|
+
adjustRangeToRotation(location, positionToRotateTo, maxLength)
|
|
44
|
+
)
|
|
45
|
+
: undefined
|
|
46
|
+
};
|
|
47
|
+
}).filter(range => !!range); //filter any fully deleted ranges
|
|
48
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import rotateSequenceDataToPosition from "./rotateSequenceDataToPosition";
|
|
2
|
+
describe("rotateSequenceDataToPosition", () => {
|
|
3
|
+
it("should rotate vanilla sequence data correctly", () => {
|
|
4
|
+
const newData = rotateSequenceDataToPosition(
|
|
5
|
+
{
|
|
6
|
+
// 0123456
|
|
7
|
+
sequence: "atgaccc"
|
|
8
|
+
},
|
|
9
|
+
4
|
|
10
|
+
);
|
|
11
|
+
expect(newData.sequence).toEqual("cccatga");
|
|
12
|
+
});
|
|
13
|
+
it("should rotate sequence data with features correctly", () => {
|
|
14
|
+
const newData = rotateSequenceDataToPosition(
|
|
15
|
+
{
|
|
16
|
+
// 0123456
|
|
17
|
+
sequence: "atgaccc",
|
|
18
|
+
features: [
|
|
19
|
+
{
|
|
20
|
+
start: 4,
|
|
21
|
+
end: 4
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
start: 1,
|
|
25
|
+
end: 0
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
start: 2,
|
|
29
|
+
end: 6,
|
|
30
|
+
locations: [
|
|
31
|
+
{
|
|
32
|
+
start: 2,
|
|
33
|
+
end: 3
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
start: 4,
|
|
37
|
+
end: 6
|
|
38
|
+
}
|
|
39
|
+
]
|
|
40
|
+
}
|
|
41
|
+
]
|
|
42
|
+
},
|
|
43
|
+
4
|
|
44
|
+
);
|
|
45
|
+
expect(newData.sequence).toEqual("cccatga");
|
|
46
|
+
expect(newData.features).toMatchObject([
|
|
47
|
+
{
|
|
48
|
+
start: 0,
|
|
49
|
+
end: 0
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
start: 4,
|
|
53
|
+
end: 3
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
start: 5,
|
|
57
|
+
end: 2,
|
|
58
|
+
locations: [
|
|
59
|
+
{
|
|
60
|
+
start: 5,
|
|
61
|
+
end: 6
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
start: 0,
|
|
65
|
+
end: 2
|
|
66
|
+
}
|
|
67
|
+
]
|
|
68
|
+
}
|
|
69
|
+
]);
|
|
70
|
+
});
|
|
71
|
+
});
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import {modifiableTypes} from "./annotationTypes";
|
|
2
|
+
import adjustAnnotationsToInsert from "./adjustAnnotationsToInsert";
|
|
3
|
+
|
|
4
|
+
export default function shiftAnnotationsByLen({
|
|
5
|
+
seqData,
|
|
6
|
+
caretPosition,
|
|
7
|
+
insertLength
|
|
8
|
+
}) {
|
|
9
|
+
modifiableTypes.forEach(annotationType => {
|
|
10
|
+
const existingAnnotations = seqData[annotationType];
|
|
11
|
+
seqData[annotationType] = adjustAnnotationsToInsert(
|
|
12
|
+
existingAnnotations,
|
|
13
|
+
caretPosition,
|
|
14
|
+
insertLength
|
|
15
|
+
);
|
|
16
|
+
});
|
|
17
|
+
};
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import proteinAlphabet from "./proteinAlphabet";
|
|
2
|
+
|
|
3
|
+
const threeLetterSequenceStringToAminoAcidMap = {
|
|
4
|
+
gct: proteinAlphabet.A,
|
|
5
|
+
gcc: proteinAlphabet.A,
|
|
6
|
+
gca: proteinAlphabet.A,
|
|
7
|
+
gcg: proteinAlphabet.A,
|
|
8
|
+
gcu: proteinAlphabet.A,
|
|
9
|
+
cgt: proteinAlphabet.R,
|
|
10
|
+
cgc: proteinAlphabet.R,
|
|
11
|
+
cga: proteinAlphabet.R,
|
|
12
|
+
cgg: proteinAlphabet.R,
|
|
13
|
+
aga: proteinAlphabet.R,
|
|
14
|
+
agg: proteinAlphabet.R,
|
|
15
|
+
cgu: proteinAlphabet.R,
|
|
16
|
+
aat: proteinAlphabet.N,
|
|
17
|
+
aac: proteinAlphabet.N,
|
|
18
|
+
aau: proteinAlphabet.N,
|
|
19
|
+
gat: proteinAlphabet.D,
|
|
20
|
+
gac: proteinAlphabet.D,
|
|
21
|
+
gau: proteinAlphabet.D,
|
|
22
|
+
tgt: proteinAlphabet.C,
|
|
23
|
+
tgc: proteinAlphabet.C,
|
|
24
|
+
ugu: proteinAlphabet.C,
|
|
25
|
+
ugc: proteinAlphabet.C,
|
|
26
|
+
gaa: proteinAlphabet.E,
|
|
27
|
+
gag: proteinAlphabet.E,
|
|
28
|
+
caa: proteinAlphabet.Q,
|
|
29
|
+
cag: proteinAlphabet.Q,
|
|
30
|
+
ggt: proteinAlphabet.G,
|
|
31
|
+
ggc: proteinAlphabet.G,
|
|
32
|
+
gga: proteinAlphabet.G,
|
|
33
|
+
ggg: proteinAlphabet.G,
|
|
34
|
+
ggu: proteinAlphabet.G,
|
|
35
|
+
cat: proteinAlphabet.H,
|
|
36
|
+
cac: proteinAlphabet.H,
|
|
37
|
+
cau: proteinAlphabet.H,
|
|
38
|
+
att: proteinAlphabet.I,
|
|
39
|
+
atc: proteinAlphabet.I,
|
|
40
|
+
ata: proteinAlphabet.I,
|
|
41
|
+
auu: proteinAlphabet.I,
|
|
42
|
+
auc: proteinAlphabet.I,
|
|
43
|
+
aua: proteinAlphabet.I,
|
|
44
|
+
ctt: proteinAlphabet.L,
|
|
45
|
+
ctc: proteinAlphabet.L,
|
|
46
|
+
cta: proteinAlphabet.L,
|
|
47
|
+
ctg: proteinAlphabet.L,
|
|
48
|
+
tta: proteinAlphabet.L,
|
|
49
|
+
ttg: proteinAlphabet.L,
|
|
50
|
+
cuu: proteinAlphabet.L,
|
|
51
|
+
cuc: proteinAlphabet.L,
|
|
52
|
+
cua: proteinAlphabet.L,
|
|
53
|
+
cug: proteinAlphabet.L,
|
|
54
|
+
uua: proteinAlphabet.L,
|
|
55
|
+
uug: proteinAlphabet.L,
|
|
56
|
+
aaa: proteinAlphabet.K,
|
|
57
|
+
aag: proteinAlphabet.K,
|
|
58
|
+
atg: proteinAlphabet.M,
|
|
59
|
+
aug: proteinAlphabet.M,
|
|
60
|
+
ttt: proteinAlphabet.F,
|
|
61
|
+
ttc: proteinAlphabet.F,
|
|
62
|
+
uuu: proteinAlphabet.F,
|
|
63
|
+
uuc: proteinAlphabet.F,
|
|
64
|
+
cct: proteinAlphabet.P,
|
|
65
|
+
ccc: proteinAlphabet.P,
|
|
66
|
+
cca: proteinAlphabet.P,
|
|
67
|
+
ccg: proteinAlphabet.P,
|
|
68
|
+
ccu: proteinAlphabet.P,
|
|
69
|
+
tct: proteinAlphabet.S,
|
|
70
|
+
tcc: proteinAlphabet.S,
|
|
71
|
+
tca: proteinAlphabet.S,
|
|
72
|
+
tcg: proteinAlphabet.S,
|
|
73
|
+
agt: proteinAlphabet.S,
|
|
74
|
+
agc: proteinAlphabet.S,
|
|
75
|
+
ucu: proteinAlphabet.S,
|
|
76
|
+
ucc: proteinAlphabet.S,
|
|
77
|
+
uca: proteinAlphabet.S,
|
|
78
|
+
ucg: proteinAlphabet.S,
|
|
79
|
+
agu: proteinAlphabet.S,
|
|
80
|
+
act: proteinAlphabet.T,
|
|
81
|
+
acc: proteinAlphabet.T,
|
|
82
|
+
aca: proteinAlphabet.T,
|
|
83
|
+
acg: proteinAlphabet.T,
|
|
84
|
+
acu: proteinAlphabet.T,
|
|
85
|
+
tgg: proteinAlphabet.W,
|
|
86
|
+
ugg: proteinAlphabet.W,
|
|
87
|
+
tat: proteinAlphabet.Y,
|
|
88
|
+
tac: proteinAlphabet.Y,
|
|
89
|
+
uau: proteinAlphabet.Y,
|
|
90
|
+
uac: proteinAlphabet.Y,
|
|
91
|
+
gtt: proteinAlphabet.V,
|
|
92
|
+
gtc: proteinAlphabet.V,
|
|
93
|
+
gta: proteinAlphabet.V,
|
|
94
|
+
gtg: proteinAlphabet.V,
|
|
95
|
+
guu: proteinAlphabet.V,
|
|
96
|
+
guc: proteinAlphabet.V,
|
|
97
|
+
gua: proteinAlphabet.V,
|
|
98
|
+
gug: proteinAlphabet.V,
|
|
99
|
+
taa: proteinAlphabet["*"],
|
|
100
|
+
tag: proteinAlphabet["*"],
|
|
101
|
+
tga: proteinAlphabet["*"],
|
|
102
|
+
"...": proteinAlphabet["."],
|
|
103
|
+
"---": proteinAlphabet["-"]
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
export default threeLetterSequenceStringToAminoAcidMap;
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
import {cloneDeep, get, some} from "lodash";
|
|
2
|
+
import {getFeatureToColorMap, getFeatureTypes} from "./featureTypesAndColors";
|
|
3
|
+
import shortid from "shortid";
|
|
4
|
+
|
|
5
|
+
export default function tidyUpAnnotation(
|
|
6
|
+
_annotation,
|
|
7
|
+
{
|
|
8
|
+
sequenceData = {},
|
|
9
|
+
convertAnnotationsFromAAIndices,
|
|
10
|
+
annotationType,
|
|
11
|
+
provideNewIdsForAnnotations,
|
|
12
|
+
doNotProvideIdsForAnnotations,
|
|
13
|
+
messages = [],
|
|
14
|
+
mutative,
|
|
15
|
+
allowNonStandardGenbankTypes,
|
|
16
|
+
featureTypes
|
|
17
|
+
}
|
|
18
|
+
) {
|
|
19
|
+
const { size, circular, isProtein } = sequenceData;
|
|
20
|
+
if (!_annotation || typeof _annotation !== "object") {
|
|
21
|
+
messages.push("Invalid annotation detected and removed");
|
|
22
|
+
return false;
|
|
23
|
+
}
|
|
24
|
+
let annotation = _annotation;
|
|
25
|
+
if (!mutative) {
|
|
26
|
+
annotation = cloneDeep(_annotation);
|
|
27
|
+
}
|
|
28
|
+
annotation.annotationTypePlural = annotationType;
|
|
29
|
+
|
|
30
|
+
if (!annotation.name || typeof annotation.name !== "string") {
|
|
31
|
+
messages.push(
|
|
32
|
+
'Unable to detect valid name for annotation, setting name to "Untitled annotation"'
|
|
33
|
+
);
|
|
34
|
+
annotation.name = "Untitled annotation";
|
|
35
|
+
}
|
|
36
|
+
if (provideNewIdsForAnnotations) {
|
|
37
|
+
annotation.id = shortid();
|
|
38
|
+
}
|
|
39
|
+
if (!annotation.id && annotation.id !== 0 && !doNotProvideIdsForAnnotations) {
|
|
40
|
+
annotation.id = shortid();
|
|
41
|
+
messages.push(
|
|
42
|
+
"Unable to detect valid ID for annotation, setting ID to " + annotation.id
|
|
43
|
+
);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
//run this for the annotation itself
|
|
47
|
+
coerceLocation({
|
|
48
|
+
isProtein,
|
|
49
|
+
location: annotation,
|
|
50
|
+
convertAnnotationsFromAAIndices,
|
|
51
|
+
size,
|
|
52
|
+
messages,
|
|
53
|
+
circular,
|
|
54
|
+
name: annotation.name
|
|
55
|
+
});
|
|
56
|
+
//and for each location
|
|
57
|
+
annotation.locations &&
|
|
58
|
+
annotation.locations.forEach(location => {
|
|
59
|
+
coerceLocation({
|
|
60
|
+
isProtein,
|
|
61
|
+
location,
|
|
62
|
+
convertAnnotationsFromAAIndices,
|
|
63
|
+
size,
|
|
64
|
+
messages,
|
|
65
|
+
circular,
|
|
66
|
+
name: annotation.name
|
|
67
|
+
});
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
if (
|
|
71
|
+
isProtein ||
|
|
72
|
+
annotation.forward === true ||
|
|
73
|
+
annotation.forward === "true" ||
|
|
74
|
+
annotation.strand === 1 ||
|
|
75
|
+
annotation.strand === "1" ||
|
|
76
|
+
annotation.strand === "+"
|
|
77
|
+
) {
|
|
78
|
+
annotation.forward = true;
|
|
79
|
+
annotation.strand = 1;
|
|
80
|
+
} else {
|
|
81
|
+
annotation.forward = false;
|
|
82
|
+
annotation.strand = -1;
|
|
83
|
+
}
|
|
84
|
+
if (
|
|
85
|
+
!annotation.type ||
|
|
86
|
+
typeof annotation.type !== "string" ||
|
|
87
|
+
!some(featureTypes || getFeatureTypes(), featureType => {
|
|
88
|
+
if (featureType.toLowerCase() === annotation.type.toLowerCase()) {
|
|
89
|
+
annotation.type = featureType; //this makes sure the annotation.type is being set to the exact value of the accepted featureType
|
|
90
|
+
return true;
|
|
91
|
+
}
|
|
92
|
+
if (
|
|
93
|
+
allowNonStandardGenbankTypes ||
|
|
94
|
+
(typeof window !== "undefined" &&
|
|
95
|
+
get(window, "tg_allowNonStandardGenbankTypes")) ||
|
|
96
|
+
(typeof global !== "undefined" &&
|
|
97
|
+
get(global, "tg_allowNonStandardGenbankTypes"))
|
|
98
|
+
)
|
|
99
|
+
return true;
|
|
100
|
+
return false;
|
|
101
|
+
})
|
|
102
|
+
) {
|
|
103
|
+
messages.push(
|
|
104
|
+
"Invalid annotation type detected: " +
|
|
105
|
+
annotation.type +
|
|
106
|
+
" for " +
|
|
107
|
+
annotation.name +
|
|
108
|
+
". set type to misc_feature"
|
|
109
|
+
);
|
|
110
|
+
annotation.type = "misc_feature";
|
|
111
|
+
}
|
|
112
|
+
if (annotation.notes && typeof annotation.notes === "string") {
|
|
113
|
+
try {
|
|
114
|
+
annotation.notes = JSON.parse(annotation.notes);
|
|
115
|
+
} catch (error) {
|
|
116
|
+
console.info(
|
|
117
|
+
`warning 33y00a0912 - couldn't parse notes for ${annotation.name ||
|
|
118
|
+
""} ${annotation.notes}:`,
|
|
119
|
+
error
|
|
120
|
+
);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
if (!annotation.color) {
|
|
125
|
+
annotation.color = getFeatureToColorMap()[annotation.type];
|
|
126
|
+
}
|
|
127
|
+
return annotation;
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
function coerceLocation({
|
|
131
|
+
location,
|
|
132
|
+
convertAnnotationsFromAAIndices,
|
|
133
|
+
size,
|
|
134
|
+
isProtein,
|
|
135
|
+
messages,
|
|
136
|
+
circular,
|
|
137
|
+
name
|
|
138
|
+
}) {
|
|
139
|
+
location.start = parseInt(location.start, 10);
|
|
140
|
+
location.end = parseInt(location.end, 10);
|
|
141
|
+
|
|
142
|
+
if (convertAnnotationsFromAAIndices) {
|
|
143
|
+
location.start = location.start * 3;
|
|
144
|
+
location.end = location.end * 3 + 2;
|
|
145
|
+
}
|
|
146
|
+
if (
|
|
147
|
+
location.start < 0 ||
|
|
148
|
+
!(location.start <= size - 1) ||
|
|
149
|
+
location.start > size - 1
|
|
150
|
+
) {
|
|
151
|
+
messages.push(
|
|
152
|
+
"Invalid annotation start: " +
|
|
153
|
+
location.start +
|
|
154
|
+
" detected for " +
|
|
155
|
+
location.name +
|
|
156
|
+
" and set to size: " +
|
|
157
|
+
size
|
|
158
|
+
); //setting it to 0 internally, but users will see it as 1
|
|
159
|
+
location.start = size - (isProtein ? 3 : 1);
|
|
160
|
+
}
|
|
161
|
+
if (
|
|
162
|
+
location.end < 0 ||
|
|
163
|
+
!(location.end <= size - 1) ||
|
|
164
|
+
location.end > size - 1
|
|
165
|
+
) {
|
|
166
|
+
messages.push(
|
|
167
|
+
"Invalid annotation end: " +
|
|
168
|
+
location.end +
|
|
169
|
+
" detected for " +
|
|
170
|
+
location.name +
|
|
171
|
+
" and set to seq size: " +
|
|
172
|
+
size
|
|
173
|
+
); //setting it to 0 internally, but users will see it as 1
|
|
174
|
+
location.end = size - 1;
|
|
175
|
+
}
|
|
176
|
+
if (location.start > location.end && circular === false) {
|
|
177
|
+
messages.push(
|
|
178
|
+
"Invalid circular annotation detected for " + name + ". end set to 1"
|
|
179
|
+
); //setting it to 0 internally, but users will see it as 1
|
|
180
|
+
location.end = size;
|
|
181
|
+
}
|
|
182
|
+
}
|