@teselagen/sequence-utils 0.1.22 → 0.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +12030 -26126
- package/index.mjs +12119 -26124
- package/index.umd.js +24056 -38154
- package/package.json +4 -3
- package/src/DNAComplementMap.js +32 -0
- package/src/addGapsToSeqReads.js +417 -0
- package/src/addGapsToSeqReads.test.js +358 -0
- package/src/adjustAnnotationsToInsert.js +19 -0
- package/src/adjustBpsToReplaceOrInsert.js +50 -0
- package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
- package/src/aliasedEnzymesByName.js +7363 -0
- package/src/aminoAcidToDegenerateDnaMap.js +32 -0
- package/src/aminoAcidToDegenerateRnaMap.js +32 -0
- package/src/aminoAcidToDnaRna.test.js +27 -0
- package/src/annotateSingleSeq.js +29 -0
- package/src/annotateSingleSeq.test.js +64 -0
- package/src/annotationTypes.js +23 -0
- package/src/autoAnnotate.js +242 -0
- package/src/autoAnnotate.test.js +1039 -0
- package/src/bioData.js +431 -0
- package/src/calculateNebTa.js +34 -0
- package/src/calculateNebTa.test.js +57 -0
- package/src/calculateNebTm.js +127 -0
- package/src/calculateNebTm.test.js +32 -0
- package/src/calculatePercentGC.js +3 -0
- package/src/calculatePercentGC.test.js +14 -0
- package/src/calculateTm.js +297 -0
- package/src/calculateTm.test.js +7 -0
- package/src/computeDigestFragments.js +179 -0
- package/src/computeDigestFragments.test.js +73 -0
- package/src/condensePairwiseAlignmentDifferences.js +85 -0
- package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
- package/src/convertAACaretPositionOrRangeToDna.js +24 -0
- package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
- package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
- package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
- package/src/cutSequenceByRestrictionEnzyme.js +301 -0
- package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
- package/src/defaultEnzymesByName.js +278 -0
- package/src/degenerateDnaToAminoAcidMap.js +5 -0
- package/src/degenerateRnaToAminoAcidMap.js +5 -0
- package/src/deleteSequenceDataAtRange.js +5 -0
- package/src/deleteSequenceDataAtRange.test.js +146 -0
- package/src/diffUtils.js +64 -0
- package/src/diffUtils.test.js +74 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
- package/src/featureTypesAndColors.js +152 -0
- package/src/featureTypesAndColors.test.js +52 -0
- package/src/filterAminoAcidSequenceString.js +13 -0
- package/src/filterAminoAcidSequenceString.test.js +22 -0
- package/src/filterSequenceString.js +22 -0
- package/src/filterSequenceString.test.js +13 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
- package/src/findOrfsInPlasmid.js +26 -0
- package/src/findSequenceMatches.js +133 -0
- package/src/findSequenceMatches.test.js +286 -0
- package/src/generateAnnotations.js +34 -0
- package/src/generateSequenceData.js +206 -0
- package/src/generateSequenceData.test.js +22 -0
- package/src/getAllInsertionsInSeqReads.js +83 -0
- package/src/getAllInsertionsInSeqReads.test.js +26 -0
- package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
- package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
- package/src/getAminoAcidFromSequenceTriplet.js +22 -0
- package/src/getAminoAcidStringFromSequenceString.js +18 -0
- package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
- package/src/getCodonRangeForAASliver.js +63 -0
- package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
- package/src/getComplementSequenceAndAnnotations.js +20 -0
- package/src/getComplementSequenceString.js +19 -0
- package/src/getComplementSequenceString.test.js +13 -0
- package/src/getCutsiteType.js +10 -0
- package/src/getCutsitesFromSequence.js +17 -0
- package/src/getDegenerateDnaStringFromAAString.js +8 -0
- package/src/getDegenerateRnaStringFromAAString.js +8 -0
- package/src/getDigestFragmentsForCutsites.js +105 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
- package/src/getInsertBetweenVals.js +28 -0
- package/src/getInsertBetweenVals.test.js +33 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
- package/src/getMassOfAaString.js +24 -0
- package/src/getMassofAaString.test.js +18 -0
- package/src/getOrfsFromSequence.js +124 -0
- package/src/getOrfsFromSequence.test.js +210 -0
- package/src/getOverlapBetweenTwoSequences.js +30 -0
- package/src/getOverlapBetweenTwoSequences.test.js +23 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
- package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
- package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
- package/src/getReverseComplementAnnotation.js +23 -0
- package/src/getReverseComplementAnnotation.test.js +44 -0
- package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
- package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
- package/src/getReverseComplementSequenceString.js +17 -0
- package/src/getReverseComplementSequenceString.test.js +11 -0
- package/src/getReverseSequenceString.js +12 -0
- package/src/getReverseSequenceString.test.js +9 -0
- package/src/getSequenceDataBetweenRange.js +131 -0
- package/src/getSequenceDataBetweenRange.test.js +474 -0
- package/src/getVirtualDigest.js +125 -0
- package/src/getVirtualDigest.test.js +134 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
- package/src/index.js +106 -0
- package/src/index.test.js +38 -0
- package/src/insertGapsIntoRefSeq.js +38 -0
- package/src/insertGapsIntoRefSeq.test.js +20 -0
- package/src/insertSequenceDataAtPosition.js +2 -0
- package/src/insertSequenceDataAtPosition.test.js +75 -0
- package/src/insertSequenceDataAtPositionOrRange.js +249 -0
- package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
- package/src/isEnzymeType2S.js +3 -0
- package/src/mapAnnotationsToRows.js +174 -0
- package/src/mapAnnotationsToRows.test.js +425 -0
- package/src/prepareCircularViewData.js +17 -0
- package/src/prepareCircularViewData.test.js +196 -0
- package/src/prepareRowData.js +41 -0
- package/src/prepareRowData.test.js +36 -0
- package/src/prepareRowData_output1.json +391 -0
- package/src/proteinAlphabet.js +257 -0
- package/src/rotateBpsToPosition.js +13 -0
- package/src/rotateBpsToPosition.test.js +6 -0
- package/src/rotateSequenceDataToPosition.js +48 -0
- package/src/rotateSequenceDataToPosition.test.js +71 -0
- package/src/shiftAnnotationsByLen.js +17 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
- package/src/tidyUpAnnotation.js +182 -0
- package/src/tidyUpSequenceData.js +169 -0
- package/src/tidyUpSequenceData.test.js +332 -0
package/src/bioData.js
ADDED
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
//Adapted from biopython. Check the BIOPYTHON_LICENSE for licensing info
|
|
2
|
+
|
|
3
|
+
export const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
|
|
4
|
+
|
|
5
|
+
export const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
|
|
6
|
+
|
|
7
|
+
// # B = "Asx"; aspartic acid or asparagine (D or N)
|
|
8
|
+
// # X = "Xxx"; unknown or 'other' amino acid
|
|
9
|
+
// # Z = "Glx"; glutamic acid or glutamine (E or Q)
|
|
10
|
+
// # http://www.chem.qmul.ac.uk/iupac/AminoAcid/A2021.html#AA212
|
|
11
|
+
// #
|
|
12
|
+
// # J = "Xle"; leucine or isoleucine (L or I, used in NMR)
|
|
13
|
+
// # Mentioned in http://www.chem.qmul.ac.uk/iubmb/newsletter/1999/item3.html
|
|
14
|
+
// # Also the International Nucleotide Sequence Database Collaboration (INSDC)
|
|
15
|
+
// # (i.e. GenBank, EMBL, DDBJ) adopted this in 2006
|
|
16
|
+
// # http://www.ddbj.nig.ac.jp/insdc/icm2006-e.html
|
|
17
|
+
// #
|
|
18
|
+
// # Xle (J); Leucine or Isoleucine
|
|
19
|
+
// # The residue abbreviations, Xle (the three-letter abbreviation) and J
|
|
20
|
+
// # (the one-letter abbreviation) are reserved for the case that cannot
|
|
21
|
+
// # experimentally distinguish leucine from isoleucine.
|
|
22
|
+
// #
|
|
23
|
+
// # U = "Sec"; selenocysteine
|
|
24
|
+
// # http://www.chem.qmul.ac.uk/iubmb/newsletter/1999/item3.html
|
|
25
|
+
// #
|
|
26
|
+
// # O = "Pyl"; pyrrolysine
|
|
27
|
+
// # http://www.chem.qmul.ac.uk/iubmb/newsletter/2009.html#item35
|
|
28
|
+
|
|
29
|
+
export const protein_letters_1to3 = {
|
|
30
|
+
A: "Ala",
|
|
31
|
+
C: "Cys",
|
|
32
|
+
D: "Asp",
|
|
33
|
+
E: "Glu",
|
|
34
|
+
F: "Phe",
|
|
35
|
+
G: "Gly",
|
|
36
|
+
H: "His",
|
|
37
|
+
I: "Ile",
|
|
38
|
+
K: "Lys",
|
|
39
|
+
L: "Leu",
|
|
40
|
+
M: "Met",
|
|
41
|
+
N: "Asn",
|
|
42
|
+
P: "Pro",
|
|
43
|
+
Q: "Gln",
|
|
44
|
+
R: "Arg",
|
|
45
|
+
S: "Ser",
|
|
46
|
+
T: "Thr",
|
|
47
|
+
V: "Val",
|
|
48
|
+
W: "Trp",
|
|
49
|
+
Y: "Tyr",
|
|
50
|
+
O: "Pyl",
|
|
51
|
+
U: "Sec"
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
// module.exports.protein_letters_1to3_extended = dict(
|
|
55
|
+
// list(protein_letters_1to3.items()) +
|
|
56
|
+
// list(
|
|
57
|
+
// {
|
|
58
|
+
// B: "Asx",
|
|
59
|
+
// X: "Xaa",
|
|
60
|
+
// Z: "Glx",
|
|
61
|
+
// J: "Xle",
|
|
62
|
+
// U: "Sel",
|
|
63
|
+
// O: "Pyl"
|
|
64
|
+
// }.items()
|
|
65
|
+
// )
|
|
66
|
+
// );
|
|
67
|
+
|
|
68
|
+
export const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
69
|
+
|
|
70
|
+
export const unambiguous_dna_letters = "GATC";
|
|
71
|
+
export const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
|
|
72
|
+
export const unambiguous_rna_letters = "GAUC";
|
|
73
|
+
|
|
74
|
+
// # B == 5-bromouridine
|
|
75
|
+
// # D == 5,6-dihydrouridine
|
|
76
|
+
// # S == thiouridine
|
|
77
|
+
// # W == wyosine
|
|
78
|
+
export const extended_dna_letters = "GATCBDSW";
|
|
79
|
+
|
|
80
|
+
// # are there extended forms?
|
|
81
|
+
// # extended_rna_letters = "GAUCBDSW"
|
|
82
|
+
|
|
83
|
+
export const ambiguous_dna_values = {
|
|
84
|
+
".": ".",
|
|
85
|
+
A: "A",
|
|
86
|
+
C: "C",
|
|
87
|
+
G: "G",
|
|
88
|
+
T: "T",
|
|
89
|
+
M: "AC",
|
|
90
|
+
R: "AG",
|
|
91
|
+
W: "AT",
|
|
92
|
+
S: "CG",
|
|
93
|
+
Y: "CT",
|
|
94
|
+
K: "GT",
|
|
95
|
+
V: "ACG",
|
|
96
|
+
H: "ACT",
|
|
97
|
+
D: "AGT",
|
|
98
|
+
B: "CGT",
|
|
99
|
+
X: "GATC",
|
|
100
|
+
N: "GATC"
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
export const ambiguous_rna_values = {
|
|
104
|
+
".": ".",
|
|
105
|
+
A: "A",
|
|
106
|
+
C: "C",
|
|
107
|
+
G: "G",
|
|
108
|
+
U: "U",
|
|
109
|
+
M: "AC",
|
|
110
|
+
R: "AG",
|
|
111
|
+
W: "AU",
|
|
112
|
+
S: "CG",
|
|
113
|
+
Y: "CU",
|
|
114
|
+
K: "GU",
|
|
115
|
+
V: "ACG",
|
|
116
|
+
H: "ACU",
|
|
117
|
+
D: "AGU",
|
|
118
|
+
B: "CGU",
|
|
119
|
+
X: "GAUC",
|
|
120
|
+
N: "GAUC"
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
export const ambiguous_dna_complement = {
|
|
124
|
+
".": ".",
|
|
125
|
+
A: "T",
|
|
126
|
+
C: "G",
|
|
127
|
+
G: "C",
|
|
128
|
+
T: "A",
|
|
129
|
+
M: "K",
|
|
130
|
+
R: "Y",
|
|
131
|
+
W: "W",
|
|
132
|
+
S: "S",
|
|
133
|
+
Y: "R",
|
|
134
|
+
K: "M",
|
|
135
|
+
V: "B",
|
|
136
|
+
H: "D",
|
|
137
|
+
D: "H",
|
|
138
|
+
B: "V",
|
|
139
|
+
X: "X",
|
|
140
|
+
N: "N"
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
export const ambiguous_rna_complement = {
|
|
144
|
+
".": ".",
|
|
145
|
+
A: "U",
|
|
146
|
+
C: "G",
|
|
147
|
+
G: "C",
|
|
148
|
+
U: "A",
|
|
149
|
+
M: "K",
|
|
150
|
+
R: "Y",
|
|
151
|
+
W: "W",
|
|
152
|
+
S: "S",
|
|
153
|
+
Y: "R",
|
|
154
|
+
K: "M",
|
|
155
|
+
V: "B",
|
|
156
|
+
H: "D",
|
|
157
|
+
D: "H",
|
|
158
|
+
B: "V",
|
|
159
|
+
X: "X",
|
|
160
|
+
N: "N"
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
// # Mass data taken from PubChem
|
|
164
|
+
|
|
165
|
+
// # Average masses of monophosphate deoxy nucleotides
|
|
166
|
+
export const unambiguous_dna_weights = {
|
|
167
|
+
A: 331.2218,
|
|
168
|
+
C: 307.1971,
|
|
169
|
+
G: 347.2212,
|
|
170
|
+
T: 322.2085
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
// # Monoisotopic masses of monophospate deoxy nucleotides
|
|
174
|
+
export const monoisotopic_unambiguous_dna_weights = {
|
|
175
|
+
A: 331.06817,
|
|
176
|
+
C: 307.056936,
|
|
177
|
+
G: 347.063084,
|
|
178
|
+
T: 322.056602
|
|
179
|
+
};
|
|
180
|
+
|
|
181
|
+
// module.exports.unambiguous_dna_weight_ranges = _make_ranges(
|
|
182
|
+
// unambiguous_dna_weights
|
|
183
|
+
// );
|
|
184
|
+
|
|
185
|
+
export const unambiguous_rna_weights = {
|
|
186
|
+
A: 347.2212,
|
|
187
|
+
C: 323.1965,
|
|
188
|
+
G: 363.2206,
|
|
189
|
+
U: 324.1813
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
export const monoisotopic_unambiguous_rna_weights = {
|
|
193
|
+
A: 347.063084,
|
|
194
|
+
C: 323.051851,
|
|
195
|
+
G: 363.057999,
|
|
196
|
+
U: 324.035867
|
|
197
|
+
};
|
|
198
|
+
|
|
199
|
+
// module.exports.unambiguous_rna_weight_ranges = _make_ranges(
|
|
200
|
+
// unambiguous_rna_weights
|
|
201
|
+
// );
|
|
202
|
+
|
|
203
|
+
// def _make_ambiguous_ranges(mydict, weight_table):
|
|
204
|
+
// range_d = {}
|
|
205
|
+
// avg_d = {}
|
|
206
|
+
// for letter, values in mydict.items():
|
|
207
|
+
// # Following line is a quick hack to skip undefined weights for U and O
|
|
208
|
+
// if len(values) == 1 and values[0] not in weight_table:
|
|
209
|
+
// continue
|
|
210
|
+
|
|
211
|
+
// weights = [weight_table.get(x) for x in values]
|
|
212
|
+
// range_d[letter] = (min(weights), max(weights))
|
|
213
|
+
// total_w = 0.0
|
|
214
|
+
// for w in weights:
|
|
215
|
+
// total_w = total_w + w
|
|
216
|
+
// avg_d[letter] = total_w / len(weights)
|
|
217
|
+
// return range_d, avg_d
|
|
218
|
+
|
|
219
|
+
// ambiguous_dna_weight_ranges, avg_ambiguous_dna_weights = \
|
|
220
|
+
// _make_ambiguous_ranges(ambiguous_dna_values,
|
|
221
|
+
// unambiguous_dna_weights)
|
|
222
|
+
|
|
223
|
+
// ambiguous_rna_weight_ranges, avg_ambiguous_rna_weights = \
|
|
224
|
+
// _make_ambiguous_ranges(ambiguous_rna_values,
|
|
225
|
+
// unambiguous_rna_weights)
|
|
226
|
+
|
|
227
|
+
export const protein_weights = {
|
|
228
|
+
A: 89.0932,
|
|
229
|
+
C: 121.1582,
|
|
230
|
+
D: 133.1027,
|
|
231
|
+
E: 147.1293,
|
|
232
|
+
F: 165.1891,
|
|
233
|
+
G: 75.0666,
|
|
234
|
+
H: 155.1546,
|
|
235
|
+
I: 131.1729,
|
|
236
|
+
K: 146.1876,
|
|
237
|
+
L: 131.1729,
|
|
238
|
+
M: 149.2113,
|
|
239
|
+
N: 132.1179,
|
|
240
|
+
O: 255.3134,
|
|
241
|
+
P: 115.1305,
|
|
242
|
+
Q: 146.1445,
|
|
243
|
+
R: 174.201,
|
|
244
|
+
S: 105.0926,
|
|
245
|
+
T: 119.1192,
|
|
246
|
+
U: 168.0532,
|
|
247
|
+
V: 117.1463,
|
|
248
|
+
W: 204.2252,
|
|
249
|
+
Y: 181.1885
|
|
250
|
+
};
|
|
251
|
+
|
|
252
|
+
export const monoisotopic_protein_weights = {
|
|
253
|
+
A: 89.047678,
|
|
254
|
+
C: 121.019749,
|
|
255
|
+
D: 133.037508,
|
|
256
|
+
E: 147.053158,
|
|
257
|
+
F: 165.078979,
|
|
258
|
+
G: 75.032028,
|
|
259
|
+
H: 155.069477,
|
|
260
|
+
I: 131.094629,
|
|
261
|
+
K: 146.105528,
|
|
262
|
+
L: 131.094629,
|
|
263
|
+
M: 149.051049,
|
|
264
|
+
N: 132.053492,
|
|
265
|
+
O: 255.158292,
|
|
266
|
+
P: 115.063329,
|
|
267
|
+
Q: 146.069142,
|
|
268
|
+
R: 174.111676,
|
|
269
|
+
S: 105.042593,
|
|
270
|
+
T: 119.058243,
|
|
271
|
+
U: 168.964203,
|
|
272
|
+
V: 117.078979,
|
|
273
|
+
W: 204.089878,
|
|
274
|
+
Y: 181.073893
|
|
275
|
+
};
|
|
276
|
+
|
|
277
|
+
export const extended_protein_values = {
|
|
278
|
+
A: "A",
|
|
279
|
+
B: "ND",
|
|
280
|
+
C: "C",
|
|
281
|
+
D: "D",
|
|
282
|
+
E: "E",
|
|
283
|
+
F: "F",
|
|
284
|
+
G: "G",
|
|
285
|
+
H: "H",
|
|
286
|
+
I: "I",
|
|
287
|
+
J: "IL",
|
|
288
|
+
K: "K",
|
|
289
|
+
L: "L",
|
|
290
|
+
M: "M",
|
|
291
|
+
N: "N",
|
|
292
|
+
O: "O",
|
|
293
|
+
P: "P",
|
|
294
|
+
Q: "Q",
|
|
295
|
+
R: "R",
|
|
296
|
+
S: "S",
|
|
297
|
+
T: "T",
|
|
298
|
+
U: "U",
|
|
299
|
+
V: "V",
|
|
300
|
+
W: "W",
|
|
301
|
+
X: "ACDEFGHIKLMNPQRSTVWY",
|
|
302
|
+
// # TODO - Include U and O in the possible values of X?
|
|
303
|
+
// # This could alter the extended_protein_weight_ranges ...
|
|
304
|
+
// # by MP: Won't do this, because they are so rare.
|
|
305
|
+
Y: "Y",
|
|
306
|
+
Z: "QE",
|
|
307
|
+
"*": "\\*\\.",
|
|
308
|
+
".": "\\.\\.",
|
|
309
|
+
"-": "\\-"
|
|
310
|
+
};
|
|
311
|
+
|
|
312
|
+
// protein_weight_ranges = _make_ranges(protein_weights)
|
|
313
|
+
|
|
314
|
+
// extended_protein_weight_ranges, avg_extended_protein_weights = \
|
|
315
|
+
// _make_ambiguous_ranges(extended_protein_values,
|
|
316
|
+
// protein_weights)
|
|
317
|
+
|
|
318
|
+
// # For Center of Mass Calculation.
|
|
319
|
+
// # Taken from http://www.chem.qmul.ac.uk/iupac/AtWt/ & PyMol
|
|
320
|
+
export const atom_weights = {
|
|
321
|
+
H: 1.00794,
|
|
322
|
+
D: 2.0141,
|
|
323
|
+
He: 4.002602,
|
|
324
|
+
Li: 6.941,
|
|
325
|
+
Be: 9.012182,
|
|
326
|
+
B: 10.811,
|
|
327
|
+
C: 12.0107,
|
|
328
|
+
N: 14.0067,
|
|
329
|
+
O: 15.9994,
|
|
330
|
+
F: 18.9984032,
|
|
331
|
+
Ne: 20.1797,
|
|
332
|
+
Na: 22.98977,
|
|
333
|
+
Mg: 24.305,
|
|
334
|
+
Al: 26.981538,
|
|
335
|
+
Si: 28.0855,
|
|
336
|
+
P: 30.973761,
|
|
337
|
+
S: 32.065,
|
|
338
|
+
Cl: 35.453,
|
|
339
|
+
Ar: 39.948,
|
|
340
|
+
K: 39.0983,
|
|
341
|
+
Ca: 40.078,
|
|
342
|
+
Sc: 44.95591,
|
|
343
|
+
Ti: 47.867,
|
|
344
|
+
V: 50.9415,
|
|
345
|
+
Cr: 51.9961,
|
|
346
|
+
Mn: 54.938049,
|
|
347
|
+
Fe: 55.845,
|
|
348
|
+
Co: 58.9332,
|
|
349
|
+
Ni: 58.6934,
|
|
350
|
+
Cu: 63.546,
|
|
351
|
+
Zn: 65.39,
|
|
352
|
+
Ga: 69.723,
|
|
353
|
+
Ge: 72.64,
|
|
354
|
+
As: 74.9216,
|
|
355
|
+
Se: 78.96,
|
|
356
|
+
Br: 79.904,
|
|
357
|
+
Kr: 83.8,
|
|
358
|
+
Rb: 85.4678,
|
|
359
|
+
Sr: 87.62,
|
|
360
|
+
Y: 88.90585,
|
|
361
|
+
Zr: 91.224,
|
|
362
|
+
Nb: 92.90638,
|
|
363
|
+
Mo: 95.94,
|
|
364
|
+
Tc: 98.0,
|
|
365
|
+
Ru: 101.07,
|
|
366
|
+
Rh: 102.9055,
|
|
367
|
+
Pd: 106.42,
|
|
368
|
+
Ag: 107.8682,
|
|
369
|
+
Cd: 112.411,
|
|
370
|
+
In: 114.818,
|
|
371
|
+
Sn: 118.71,
|
|
372
|
+
Sb: 121.76,
|
|
373
|
+
Te: 127.6,
|
|
374
|
+
I: 126.90447,
|
|
375
|
+
Xe: 131.293,
|
|
376
|
+
Cs: 132.90545,
|
|
377
|
+
Ba: 137.327,
|
|
378
|
+
La: 138.9055,
|
|
379
|
+
Ce: 140.116,
|
|
380
|
+
Pr: 140.90765,
|
|
381
|
+
Nd: 144.24,
|
|
382
|
+
Pm: 145.0,
|
|
383
|
+
Sm: 150.36,
|
|
384
|
+
Eu: 151.964,
|
|
385
|
+
Gd: 157.25,
|
|
386
|
+
Tb: 158.92534,
|
|
387
|
+
Dy: 162.5,
|
|
388
|
+
Ho: 164.93032,
|
|
389
|
+
Er: 167.259,
|
|
390
|
+
Tm: 168.93421,
|
|
391
|
+
Yb: 173.04,
|
|
392
|
+
Lu: 174.967,
|
|
393
|
+
Hf: 178.49,
|
|
394
|
+
Ta: 180.9479,
|
|
395
|
+
W: 183.84,
|
|
396
|
+
Re: 186.207,
|
|
397
|
+
Os: 190.23,
|
|
398
|
+
Ir: 192.217,
|
|
399
|
+
Pt: 195.078,
|
|
400
|
+
Au: 196.96655,
|
|
401
|
+
Hg: 200.59,
|
|
402
|
+
Tl: 204.3833,
|
|
403
|
+
Pb: 207.2,
|
|
404
|
+
Bi: 208.98038,
|
|
405
|
+
Po: 208.98,
|
|
406
|
+
At: 209.99,
|
|
407
|
+
Rn: 222.02,
|
|
408
|
+
Fr: 223.02,
|
|
409
|
+
Ra: 226.03,
|
|
410
|
+
Ac: 227.03,
|
|
411
|
+
Th: 232.0381,
|
|
412
|
+
Pa: 231.03588,
|
|
413
|
+
U: 238.02891,
|
|
414
|
+
Np: 237.05,
|
|
415
|
+
Pu: 244.06,
|
|
416
|
+
Am: 243.06,
|
|
417
|
+
Cm: 247.07,
|
|
418
|
+
Bk: 247.07,
|
|
419
|
+
Cf: 251.08,
|
|
420
|
+
Es: 252.08,
|
|
421
|
+
Fm: 257.1,
|
|
422
|
+
Md: 258.1,
|
|
423
|
+
No: 259.1,
|
|
424
|
+
Lr: 262.11,
|
|
425
|
+
Rf: 261.11,
|
|
426
|
+
Db: 262.11,
|
|
427
|
+
Sg: 266.12,
|
|
428
|
+
Bh: 264.12,
|
|
429
|
+
Hs: 269.13,
|
|
430
|
+
Mt: 268.14
|
|
431
|
+
};
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import calculateTm from "./calculateNebTm";
|
|
2
|
+
|
|
3
|
+
export default function calculateNebTa(
|
|
4
|
+
sequences,
|
|
5
|
+
primerConc,
|
|
6
|
+
{ monovalentCationConc, polymerase } = {}
|
|
7
|
+
) {
|
|
8
|
+
try {
|
|
9
|
+
if (sequences.length !== 2) {
|
|
10
|
+
throw new Error(
|
|
11
|
+
`${sequences.length} sequences received when 2 primers were expected`
|
|
12
|
+
);
|
|
13
|
+
}
|
|
14
|
+
const meltingTemperatures = sequences.map(seq =>
|
|
15
|
+
calculateTm(seq, primerConc, { monovalentCationConc })
|
|
16
|
+
);
|
|
17
|
+
meltingTemperatures.sort((a, b) => a - b);
|
|
18
|
+
const lowerMeltingTemp = meltingTemperatures[0];
|
|
19
|
+
let annealingTemp;
|
|
20
|
+
if (polymerase === "Q5") {
|
|
21
|
+
// Ta = Tm_lower+1°C is standard for Q5
|
|
22
|
+
annealingTemp = lowerMeltingTemp + 1;
|
|
23
|
+
if (annealingTemp > 72) {
|
|
24
|
+
// "Annealing temperature for experiments with this enzyme should typically not exceed 72°C"
|
|
25
|
+
annealingTemp = 72;
|
|
26
|
+
}
|
|
27
|
+
} else {
|
|
28
|
+
annealingTemp = lowerMeltingTemp - 3;
|
|
29
|
+
}
|
|
30
|
+
return annealingTemp;
|
|
31
|
+
} catch (err) {
|
|
32
|
+
return `Error calculating annealing temperature: ${err}`;
|
|
33
|
+
}
|
|
34
|
+
};
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import assert from "assert";
|
|
2
|
+
import calculateTa from "./calculateNebTa";
|
|
3
|
+
|
|
4
|
+
describe("calculate Ta", () => {
|
|
5
|
+
it("should return the annealing temperature of two primers based on Q5 protocol", () => {
|
|
6
|
+
// primer concentration in Q5 protocol is 500 nM
|
|
7
|
+
const primerConc = 0.0000005;
|
|
8
|
+
const options = {
|
|
9
|
+
// 50 mM KCl in Q5 protocol
|
|
10
|
+
monovalentCationConc: 0.05,
|
|
11
|
+
polymerase: "Q5"
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
const sequenceSet1 = ["AGCGGATAACAATTTCACACAGGA", "GTAAAACGACGGCCAGT"];
|
|
15
|
+
assert.equal(
|
|
16
|
+
calculateTa(sequenceSet1, primerConc, options),
|
|
17
|
+
63.54033701264342
|
|
18
|
+
);
|
|
19
|
+
const sequenceSet2 = ["AGCGGATAAGGGCAATTTCAC", "GTAAAACGACGGCCA"];
|
|
20
|
+
assert.equal(
|
|
21
|
+
calculateTa(sequenceSet2, primerConc, options),
|
|
22
|
+
59.95638912652805
|
|
23
|
+
);
|
|
24
|
+
const sequenceSet3 = [
|
|
25
|
+
"AGCGGATAAGGGCAATTTCAC",
|
|
26
|
+
"GTAAAACGACGGCCA",
|
|
27
|
+
"AGCGGATAACAATTTCAC"
|
|
28
|
+
];
|
|
29
|
+
assert.equal(
|
|
30
|
+
calculateTa(sequenceSet3, primerConc, options),
|
|
31
|
+
"Error calculating annealing temperature: Error: 3 sequences received when 2 primers were expected"
|
|
32
|
+
);
|
|
33
|
+
// "Annealing temperature for experiments with this enzyme should typically not exceed 72°C"
|
|
34
|
+
const sequenceSet4 = [
|
|
35
|
+
"CACACCAGGTCTCAGATATACATATGACAGACAAACCGGCCAAAGG",
|
|
36
|
+
"CACACCAGGTCTCACTCCTTCTTAAATCATCGGGTCAGCACGTAGG"
|
|
37
|
+
];
|
|
38
|
+
assert.equal(calculateTa(sequenceSet4, primerConc, options), 72);
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
it("should return the annealing temperature of two primers without a specified polymerase", () => {
|
|
42
|
+
// primer concentration 500 nM
|
|
43
|
+
const primerConc = 0.0000005;
|
|
44
|
+
const options = {
|
|
45
|
+
// 50 mM KCl
|
|
46
|
+
monovalentCationConc: 0.05
|
|
47
|
+
};
|
|
48
|
+
const sequenceSet = [
|
|
49
|
+
"CACACCAGGTCTCAGATATACATATGACAGACAAACCGGCCAAAGG",
|
|
50
|
+
"CACACCAGGTCTCACTCCTTCTTAAATCATCGGGTCAGCACGTAGG"
|
|
51
|
+
];
|
|
52
|
+
assert.equal(
|
|
53
|
+
calculateTa(sequenceSet, primerConc, options),
|
|
54
|
+
74.49383180968016
|
|
55
|
+
);
|
|
56
|
+
});
|
|
57
|
+
});
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import getComplementSequenceString from "./getComplementSequenceString";
|
|
2
|
+
import calculatePercentGc from "./calculatePercentGC";
|
|
3
|
+
|
|
4
|
+
// sources of formulas:
|
|
5
|
+
// - https://tmcalculator.neb.com/#!/help
|
|
6
|
+
// - Tm calculation: SantaLucia (1998) PNAS 95:1460-5
|
|
7
|
+
// - salt correction (for monovalent cations): Owczarzy (2004) Biochem 43:3537-54
|
|
8
|
+
|
|
9
|
+
// primer concentration & monovalent cation concentration in M
|
|
10
|
+
export default function calculateNebTm(
|
|
11
|
+
sequence,
|
|
12
|
+
primerConc,
|
|
13
|
+
{ monovalentCationConc } = {}
|
|
14
|
+
) {
|
|
15
|
+
try {
|
|
16
|
+
const checkForDegenerateBases = /[^atgc]/i.test(sequence);
|
|
17
|
+
if (checkForDegenerateBases) {
|
|
18
|
+
throw new Error(
|
|
19
|
+
`Degenerate bases prohibited in Tm calculation of sequence ${sequence}`
|
|
20
|
+
);
|
|
21
|
+
}
|
|
22
|
+
const seq = sequence.toUpperCase().split("");
|
|
23
|
+
// enthalpy, entropy
|
|
24
|
+
let h = 0;
|
|
25
|
+
let s = 0;
|
|
26
|
+
// adjustments for helix initiation
|
|
27
|
+
let hi = 0;
|
|
28
|
+
let si = 0;
|
|
29
|
+
// R = universal gas constant with units of cal/(K*mol)
|
|
30
|
+
const r = 1.987;
|
|
31
|
+
// to convert the units of Tm from kelvin to celsius and vice versa
|
|
32
|
+
const kelvinToCelsius = -273.15;
|
|
33
|
+
const celsiusToKelvin = 273.15;
|
|
34
|
+
// to convert the units of enthalpy from kilocal/mol to cal/mol
|
|
35
|
+
const kilocalToCal = 1000;
|
|
36
|
+
const sequenceToEnthalpyMap = {
|
|
37
|
+
"AA/TT": -7.9,
|
|
38
|
+
"AT/TA": -7.2,
|
|
39
|
+
"TA/AT": -7.2,
|
|
40
|
+
"CA/GT": -8.5,
|
|
41
|
+
"GT/CA": -8.4,
|
|
42
|
+
"CT/GA": -7.8,
|
|
43
|
+
"GA/CT": -8.2,
|
|
44
|
+
"CG/GC": -10.6,
|
|
45
|
+
"GC/CG": -9.8,
|
|
46
|
+
"GG/CC": -8.0,
|
|
47
|
+
"TT/AA": -7.9,
|
|
48
|
+
"TG/AC": -8.5,
|
|
49
|
+
"AC/TG": -8.4,
|
|
50
|
+
"AG/TC": -7.8,
|
|
51
|
+
"TC/AG": -8.2,
|
|
52
|
+
"CC/GG": -8.0,
|
|
53
|
+
initiationWithTerminalGC: 0.1,
|
|
54
|
+
initiationWithTerminalAT: 2.3
|
|
55
|
+
};
|
|
56
|
+
const sequenceToEntropyMap = {
|
|
57
|
+
"AA/TT": -22.2,
|
|
58
|
+
"AT/TA": -20.4,
|
|
59
|
+
"TA/AT": -21.3,
|
|
60
|
+
"CA/GT": -22.7,
|
|
61
|
+
"GT/CA": -22.4,
|
|
62
|
+
"CT/GA": -21.0,
|
|
63
|
+
"GA/CT": -22.2,
|
|
64
|
+
"CG/GC": -27.2,
|
|
65
|
+
"GC/CG": -24.4,
|
|
66
|
+
"GG/CC": -19.9,
|
|
67
|
+
"TT/AA": -22.2,
|
|
68
|
+
"TG/AC": -22.7,
|
|
69
|
+
"AC/TG": -22.4,
|
|
70
|
+
"AG/TC": -21.0,
|
|
71
|
+
"TC/AG": -22.2,
|
|
72
|
+
"CC/GG": -19.9,
|
|
73
|
+
initiationWithTerminalGC: -2.8,
|
|
74
|
+
initiationWithTerminalAT: 4.1
|
|
75
|
+
};
|
|
76
|
+
for (let i = 0; i < seq.length; i++) {
|
|
77
|
+
if (i === 0 || i === seq.length - 1) {
|
|
78
|
+
// first or last nucleotide
|
|
79
|
+
if (seq[i] === "G" || seq[i] === "C") {
|
|
80
|
+
hi += sequenceToEnthalpyMap.initiationWithTerminalGC;
|
|
81
|
+
si += sequenceToEntropyMap.initiationWithTerminalGC;
|
|
82
|
+
} else if (seq[i] === "A" || seq[i] === "T") {
|
|
83
|
+
hi += sequenceToEnthalpyMap.initiationWithTerminalAT;
|
|
84
|
+
si += sequenceToEntropyMap.initiationWithTerminalAT;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
if (i < seq.length - 1) {
|
|
88
|
+
const dimer = seq[i] + seq[i + 1];
|
|
89
|
+
const complement = getComplementSequenceString(dimer).toUpperCase();
|
|
90
|
+
const dimerDuplex = `${dimer}/${complement}`;
|
|
91
|
+
if (
|
|
92
|
+
!sequenceToEnthalpyMap[dimerDuplex] ||
|
|
93
|
+
!sequenceToEntropyMap[dimerDuplex]
|
|
94
|
+
) {
|
|
95
|
+
throw new Error(
|
|
96
|
+
`Could not find value for ${dimerDuplex} of sequence ${sequence}`
|
|
97
|
+
);
|
|
98
|
+
}
|
|
99
|
+
h += sequenceToEnthalpyMap[dimerDuplex];
|
|
100
|
+
s += sequenceToEntropyMap[dimerDuplex];
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
// this calculated Tm assumes 1 M monovalent cation concentration
|
|
104
|
+
const deltaH = h + hi;
|
|
105
|
+
const deltaS = s + si;
|
|
106
|
+
const numerator = deltaH * kilocalToCal;
|
|
107
|
+
const denominator = deltaS + r * Math.log(primerConc);
|
|
108
|
+
const meltingTemp = numerator / denominator + kelvinToCelsius;
|
|
109
|
+
if (monovalentCationConc) {
|
|
110
|
+
// adjusting Tm for actual monovalent cation concentration
|
|
111
|
+
const lnOfMonoConc = Math.log(monovalentCationConc);
|
|
112
|
+
const gcContent = calculatePercentGc(sequence) / 100;
|
|
113
|
+
const part = 4.29 * gcContent - 3.95;
|
|
114
|
+
const saltCorrection =
|
|
115
|
+
part * Math.pow(10, -5) * lnOfMonoConc +
|
|
116
|
+
Math.pow(9.4, -6) * Math.pow(lnOfMonoConc, 2);
|
|
117
|
+
const adjustedMeltingTemp =
|
|
118
|
+
1 / (1 / (meltingTemp + celsiusToKelvin) + saltCorrection) +
|
|
119
|
+
kelvinToCelsius;
|
|
120
|
+
return adjustedMeltingTemp;
|
|
121
|
+
} else {
|
|
122
|
+
return meltingTemp;
|
|
123
|
+
}
|
|
124
|
+
} catch (err) {
|
|
125
|
+
return `Error calculating Tm for sequence ${sequence}: ${err}`;
|
|
126
|
+
}
|
|
127
|
+
};
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import assert from "assert";
|
|
2
|
+
import calculateTm from "./calculateNebTm";
|
|
3
|
+
|
|
4
|
+
describe("calculate Tm based on SantaLucia 1998 & Owczarzy 2004", () => {
|
|
5
|
+
it("should return the melting temperature of a given sequence, if no degenerate bases are present", () => {
|
|
6
|
+
const options = {
|
|
7
|
+
// 50 mM KCl in Q5 protocol
|
|
8
|
+
monovalentCationConc: 0.05
|
|
9
|
+
};
|
|
10
|
+
// primer concentration in Q5 protocol is 500 nM
|
|
11
|
+
assert.equal(
|
|
12
|
+
calculateTm("AGCGGATAACAATTTCACACAGGA", 0.0000005, options),
|
|
13
|
+
65.8994505801345
|
|
14
|
+
);
|
|
15
|
+
assert.equal(
|
|
16
|
+
calculateTm("AGCGGATAACAATTTCAC", 0.0000005, options),
|
|
17
|
+
56.11037835109477
|
|
18
|
+
);
|
|
19
|
+
assert.equal(
|
|
20
|
+
calculateTm("AGCGGATAACAATTTcac", 0.0000005, options),
|
|
21
|
+
56.11037835109477
|
|
22
|
+
);
|
|
23
|
+
assert.equal(
|
|
24
|
+
calculateTm("AGCGGNNN", 0.0000005, options),
|
|
25
|
+
"Error calculating Tm for sequence AGCGGNNN: Error: Degenerate bases prohibited in Tm calculation of sequence AGCGGNNN"
|
|
26
|
+
);
|
|
27
|
+
assert.equal(
|
|
28
|
+
calculateTm("AGCGGnnn", 0.0000005, options),
|
|
29
|
+
"Error calculating Tm for sequence AGCGGnnn: Error: Degenerate bases prohibited in Tm calculation of sequence AGCGGnnn"
|
|
30
|
+
);
|
|
31
|
+
});
|
|
32
|
+
});
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import calculatePercentGC from "./calculatePercentGC";
|
|
2
|
+
import assert from "assert";
|
|
3
|
+
|
|
4
|
+
describe("calculatePercentGC", () => {
|
|
5
|
+
it("should return the percent GC of a given sequence string", () => {
|
|
6
|
+
assert.equal(calculatePercentGC("gact"), 50);
|
|
7
|
+
assert.equal(Math.floor(calculatePercentGC("gac")), 66);
|
|
8
|
+
assert.equal(calculatePercentGC("a"), 0);
|
|
9
|
+
assert.equal(calculatePercentGC(""), 0);
|
|
10
|
+
assert.equal(calculatePercentGC("ggg"), 100);
|
|
11
|
+
assert.equal(calculatePercentGC("GGG"), 100);
|
|
12
|
+
assert.equal(calculatePercentGC("ccc"), 100);
|
|
13
|
+
});
|
|
14
|
+
});
|