@teselagen/sequence-utils 0.1.21 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +12030 -26126
- package/index.mjs +12119 -26124
- package/index.umd.js +24056 -38154
- package/package.json +2 -2
- package/src/DNAComplementMap.js +32 -0
- package/src/addGapsToSeqReads.js +417 -0
- package/src/addGapsToSeqReads.test.js +358 -0
- package/src/adjustAnnotationsToInsert.js +19 -0
- package/src/adjustBpsToReplaceOrInsert.js +50 -0
- package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
- package/src/aliasedEnzymesByName.js +7363 -0
- package/src/aminoAcidToDegenerateDnaMap.js +32 -0
- package/src/aminoAcidToDegenerateRnaMap.js +32 -0
- package/src/aminoAcidToDnaRna.test.js +27 -0
- package/src/annotateSingleSeq.js +29 -0
- package/src/annotateSingleSeq.test.js +64 -0
- package/src/annotationTypes.js +23 -0
- package/src/autoAnnotate.js +242 -0
- package/src/autoAnnotate.test.js +1039 -0
- package/src/bioData.js +431 -0
- package/src/calculateNebTa.js +34 -0
- package/src/calculateNebTa.test.js +57 -0
- package/src/calculateNebTm.js +127 -0
- package/src/calculateNebTm.test.js +32 -0
- package/src/calculatePercentGC.js +3 -0
- package/src/calculatePercentGC.test.js +14 -0
- package/src/calculateTm.js +297 -0
- package/src/calculateTm.test.js +7 -0
- package/src/computeDigestFragments.js +179 -0
- package/src/computeDigestFragments.test.js +73 -0
- package/src/condensePairwiseAlignmentDifferences.js +85 -0
- package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
- package/src/convertAACaretPositionOrRangeToDna.js +24 -0
- package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
- package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
- package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
- package/src/cutSequenceByRestrictionEnzyme.js +301 -0
- package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
- package/src/defaultEnzymesByName.js +278 -0
- package/src/degenerateDnaToAminoAcidMap.js +5 -0
- package/src/degenerateRnaToAminoAcidMap.js +5 -0
- package/src/deleteSequenceDataAtRange.js +5 -0
- package/src/deleteSequenceDataAtRange.test.js +146 -0
- package/src/diffUtils.js +64 -0
- package/src/diffUtils.test.js +74 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
- package/src/featureTypesAndColors.js +152 -0
- package/src/featureTypesAndColors.test.js +52 -0
- package/src/filterAminoAcidSequenceString.js +13 -0
- package/src/filterAminoAcidSequenceString.test.js +22 -0
- package/src/filterSequenceString.js +22 -0
- package/src/filterSequenceString.test.js +13 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
- package/src/findOrfsInPlasmid.js +26 -0
- package/src/findSequenceMatches.js +133 -0
- package/src/findSequenceMatches.test.js +286 -0
- package/src/generateAnnotations.js +34 -0
- package/src/generateSequenceData.js +206 -0
- package/src/generateSequenceData.test.js +22 -0
- package/src/getAllInsertionsInSeqReads.js +83 -0
- package/src/getAllInsertionsInSeqReads.test.js +26 -0
- package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
- package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
- package/src/getAminoAcidFromSequenceTriplet.js +22 -0
- package/src/getAminoAcidStringFromSequenceString.js +18 -0
- package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
- package/src/getCodonRangeForAASliver.js +63 -0
- package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
- package/src/getComplementSequenceAndAnnotations.js +20 -0
- package/src/getComplementSequenceString.js +19 -0
- package/src/getComplementSequenceString.test.js +13 -0
- package/src/getCutsiteType.js +10 -0
- package/src/getCutsitesFromSequence.js +17 -0
- package/src/getDegenerateDnaStringFromAAString.js +8 -0
- package/src/getDegenerateRnaStringFromAAString.js +8 -0
- package/src/getDigestFragmentsForCutsites.js +105 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
- package/src/getInsertBetweenVals.js +28 -0
- package/src/getInsertBetweenVals.test.js +33 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
- package/src/getMassOfAaString.js +24 -0
- package/src/getMassofAaString.test.js +18 -0
- package/src/getOrfsFromSequence.js +124 -0
- package/src/getOrfsFromSequence.test.js +210 -0
- package/src/getOverlapBetweenTwoSequences.js +30 -0
- package/src/getOverlapBetweenTwoSequences.test.js +23 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
- package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
- package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
- package/src/getReverseComplementAnnotation.js +23 -0
- package/src/getReverseComplementAnnotation.test.js +44 -0
- package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
- package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
- package/src/getReverseComplementSequenceString.js +17 -0
- package/src/getReverseComplementSequenceString.test.js +11 -0
- package/src/getReverseSequenceString.js +12 -0
- package/src/getReverseSequenceString.test.js +9 -0
- package/src/getSequenceDataBetweenRange.js +131 -0
- package/src/getSequenceDataBetweenRange.test.js +474 -0
- package/src/getVirtualDigest.js +125 -0
- package/src/getVirtualDigest.test.js +134 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
- package/src/index.js +106 -0
- package/src/index.test.js +38 -0
- package/src/insertGapsIntoRefSeq.js +38 -0
- package/src/insertGapsIntoRefSeq.test.js +20 -0
- package/src/insertSequenceDataAtPosition.js +2 -0
- package/src/insertSequenceDataAtPosition.test.js +75 -0
- package/src/insertSequenceDataAtPositionOrRange.js +249 -0
- package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
- package/src/isEnzymeType2S.js +3 -0
- package/src/mapAnnotationsToRows.js +174 -0
- package/src/mapAnnotationsToRows.test.js +425 -0
- package/src/prepareCircularViewData.js +17 -0
- package/src/prepareCircularViewData.test.js +196 -0
- package/src/prepareRowData.js +41 -0
- package/src/prepareRowData.test.js +36 -0
- package/src/prepareRowData_output1.json +391 -0
- package/src/proteinAlphabet.js +257 -0
- package/src/rotateBpsToPosition.js +13 -0
- package/src/rotateBpsToPosition.test.js +6 -0
- package/src/rotateSequenceDataToPosition.js +48 -0
- package/src/rotateSequenceDataToPosition.test.js +71 -0
- package/src/shiftAnnotationsByLen.js +17 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
- package/src/tidyUpAnnotation.js +182 -0
- package/src/tidyUpSequenceData.js +169 -0
- package/src/tidyUpSequenceData.test.js +332 -0
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
/* eslint-disable eqeqeq */
|
|
2
|
+
/**
|
|
3
|
+
* DNA melting temperature calculator.
|
|
4
|
+
* @author Nick Elsbree
|
|
5
|
+
* @author Zinovii Dmytriv (original author)
|
|
6
|
+
*/
|
|
7
|
+
const calcTmMethods = {
|
|
8
|
+
TABLE_BRESLAUER: "breslauer",
|
|
9
|
+
TABLE_SUGIMOTO: "sugimoto",
|
|
10
|
+
TABLE_UNIFIED: "unified",
|
|
11
|
+
|
|
12
|
+
A: -10.8, // Helix initiation for deltaS
|
|
13
|
+
R: 1.987, // Gas constant (cal/(K*mol)).
|
|
14
|
+
C: 0.5e-6, // Oligo concentration. 0.5uM is typical for PCR.
|
|
15
|
+
Na: 50e-3, // Monovalent salt concentration. 50mM is typical for PCR.
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Calculates temperature for DNA sequence using a given algorithm.
|
|
19
|
+
* @param {String} sequence The DNA sequence to use.
|
|
20
|
+
* @param {String} type Either Teselagen.bio.tools.TemperatureCalculator.TABLE_BRESLAUER, TABLE_SUGIMOTO, or TABLE_UNIFIED
|
|
21
|
+
* @param {Double} A Helix initation for deltaS. Defaults to -10.8.
|
|
22
|
+
* @param {Double} R The gas constant, in cal/(K*mol). Defaults to 0.5e-6M.
|
|
23
|
+
* @param {Double} Na THe monovalent salt concentration. Defaults to 50e-3M.
|
|
24
|
+
* @return {Double} Temperature for the given sequence, in Celsius.
|
|
25
|
+
*/
|
|
26
|
+
calculateTemperature: function(sequence, type, A, R, C, Na) {
|
|
27
|
+
if (typeof type === "undefined") {
|
|
28
|
+
type = this.TABLE_BRESLAUER;
|
|
29
|
+
} else if (
|
|
30
|
+
type != this.TABLE_BRESLAUER &&
|
|
31
|
+
(type != this.TABLE_UNIFIED && type != this.TABLE_SUGIMOTO)
|
|
32
|
+
) {
|
|
33
|
+
throw new Error("Invalid table type!");
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
if (!A) {
|
|
37
|
+
A = this.A;
|
|
38
|
+
}
|
|
39
|
+
if (!R) {
|
|
40
|
+
R = this.R;
|
|
41
|
+
}
|
|
42
|
+
if (!C) {
|
|
43
|
+
C = this.C;
|
|
44
|
+
}
|
|
45
|
+
if (!Na) {
|
|
46
|
+
Na = this.Na;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const sequenceLength = sequence.length;
|
|
50
|
+
|
|
51
|
+
if (sequenceLength == 0) {
|
|
52
|
+
return 0;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const deltaHTable = this.getDeltaHTable(type);
|
|
56
|
+
const deltaSTable = this.getDeltaSTable(type);
|
|
57
|
+
|
|
58
|
+
const neighbors = []; // List goes in order: aa, at, ac, ag, tt, ta, tc, tg, cc, ca, ct, cg, gg, gt, gc
|
|
59
|
+
|
|
60
|
+
neighbors.push(this.calculateReps(sequence, "aa"));
|
|
61
|
+
neighbors.push(this.calculateNumberOfOccurrences(sequence, "at"));
|
|
62
|
+
neighbors.push(this.calculateNumberOfOccurrences(sequence, "ac"));
|
|
63
|
+
neighbors.push(this.calculateNumberOfOccurrences(sequence, "ag"));
|
|
64
|
+
|
|
65
|
+
neighbors.push(this.calculateReps(sequence, "tt"));
|
|
66
|
+
neighbors.push(this.calculateNumberOfOccurrences(sequence, "ta"));
|
|
67
|
+
neighbors.push(this.calculateNumberOfOccurrences(sequence, "tc"));
|
|
68
|
+
neighbors.push(this.calculateNumberOfOccurrences(sequence, "tg"));
|
|
69
|
+
|
|
70
|
+
neighbors.push(this.calculateReps(sequence, "cc"));
|
|
71
|
+
neighbors.push(this.calculateNumberOfOccurrences(sequence, "ca"));
|
|
72
|
+
neighbors.push(this.calculateNumberOfOccurrences(sequence, "ct"));
|
|
73
|
+
neighbors.push(this.calculateNumberOfOccurrences(sequence, "cg"));
|
|
74
|
+
|
|
75
|
+
neighbors.push(this.calculateReps(sequence, "gg"));
|
|
76
|
+
neighbors.push(this.calculateNumberOfOccurrences(sequence, "ga"));
|
|
77
|
+
neighbors.push(this.calculateNumberOfOccurrences(sequence, "gt"));
|
|
78
|
+
neighbors.push(this.calculateNumberOfOccurrences(sequence, "gc"));
|
|
79
|
+
|
|
80
|
+
let sumDeltaH = 0.0;
|
|
81
|
+
let sumDeltaS = 0.0;
|
|
82
|
+
|
|
83
|
+
for (let i = 0; i < 16; i++) {
|
|
84
|
+
sumDeltaH = sumDeltaH + neighbors[i] * deltaHTable[i];
|
|
85
|
+
sumDeltaS = sumDeltaS + neighbors[i] * deltaSTable[i];
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const temperature =
|
|
89
|
+
(-1000.0 * sumDeltaH) / (A + -sumDeltaS + R * Math.log(C / 4.0)) -
|
|
90
|
+
273.15 +
|
|
91
|
+
16.6 * Math.LOG10E * Math.log(Na);
|
|
92
|
+
|
|
93
|
+
// If temperature is negative then return 0.
|
|
94
|
+
if (temperature < 0) {
|
|
95
|
+
return 0;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return temperature.toFixed(2);
|
|
99
|
+
},
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* @private
|
|
103
|
+
* Function to return deltaH table for given algorithm.
|
|
104
|
+
* @param {String} type Algorithm to get table for.
|
|
105
|
+
* @return {Number[]} deltaH table for given algorithm.
|
|
106
|
+
*/
|
|
107
|
+
getDeltaHTable: function(type) {
|
|
108
|
+
if (type == this.TABLE_BRESLAUER) {
|
|
109
|
+
return [
|
|
110
|
+
9.1,
|
|
111
|
+
8.6,
|
|
112
|
+
6.5,
|
|
113
|
+
7.8,
|
|
114
|
+
9.1,
|
|
115
|
+
6.0,
|
|
116
|
+
5.6,
|
|
117
|
+
5.8,
|
|
118
|
+
11.0,
|
|
119
|
+
5.8,
|
|
120
|
+
7.8,
|
|
121
|
+
11.9,
|
|
122
|
+
11.0,
|
|
123
|
+
5.6,
|
|
124
|
+
6.5,
|
|
125
|
+
11.1
|
|
126
|
+
];
|
|
127
|
+
} else if (type == this.TABLE_SUGIMOTO) {
|
|
128
|
+
return [
|
|
129
|
+
8.0,
|
|
130
|
+
5.6,
|
|
131
|
+
6.5,
|
|
132
|
+
7.8,
|
|
133
|
+
8.0,
|
|
134
|
+
5.6,
|
|
135
|
+
5.6,
|
|
136
|
+
5.8,
|
|
137
|
+
10.9,
|
|
138
|
+
8.2,
|
|
139
|
+
6.6,
|
|
140
|
+
11.8,
|
|
141
|
+
10.9,
|
|
142
|
+
6.6,
|
|
143
|
+
9.4,
|
|
144
|
+
11.9
|
|
145
|
+
];
|
|
146
|
+
} else if (type == this.TABLE_UNIFIED) {
|
|
147
|
+
return [
|
|
148
|
+
7.9,
|
|
149
|
+
7.2,
|
|
150
|
+
8.4,
|
|
151
|
+
7.8,
|
|
152
|
+
7.9,
|
|
153
|
+
7.2,
|
|
154
|
+
8.2,
|
|
155
|
+
8.5,
|
|
156
|
+
8.0,
|
|
157
|
+
8.5,
|
|
158
|
+
7.8,
|
|
159
|
+
10.6,
|
|
160
|
+
8.0,
|
|
161
|
+
8.2,
|
|
162
|
+
8.4,
|
|
163
|
+
9.8
|
|
164
|
+
];
|
|
165
|
+
} else {
|
|
166
|
+
return null;
|
|
167
|
+
}
|
|
168
|
+
},
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* @private
|
|
172
|
+
* Function to return deltaS table for given algorithm.
|
|
173
|
+
* @param {String} type Algorithm to get table for.
|
|
174
|
+
* @return {Number[]} deltaS table for given algorithm.
|
|
175
|
+
*/
|
|
176
|
+
getDeltaSTable: function(type) {
|
|
177
|
+
if (type == this.TABLE_BRESLAUER) {
|
|
178
|
+
return [
|
|
179
|
+
24.0,
|
|
180
|
+
23.9,
|
|
181
|
+
17.3,
|
|
182
|
+
20.8,
|
|
183
|
+
24.0,
|
|
184
|
+
16.9,
|
|
185
|
+
13.5,
|
|
186
|
+
12.9,
|
|
187
|
+
26.6,
|
|
188
|
+
12.9,
|
|
189
|
+
20.8,
|
|
190
|
+
27.8,
|
|
191
|
+
26.6,
|
|
192
|
+
13.5,
|
|
193
|
+
17.3,
|
|
194
|
+
26.7
|
|
195
|
+
];
|
|
196
|
+
} else if (type == this.TABLE_SUGIMOTO) {
|
|
197
|
+
return [
|
|
198
|
+
21.9,
|
|
199
|
+
15.2,
|
|
200
|
+
17.3,
|
|
201
|
+
20.8,
|
|
202
|
+
21.9,
|
|
203
|
+
15.2,
|
|
204
|
+
13.5,
|
|
205
|
+
12.9,
|
|
206
|
+
28.4,
|
|
207
|
+
25.5,
|
|
208
|
+
23.5,
|
|
209
|
+
29.0,
|
|
210
|
+
28.4,
|
|
211
|
+
16.4,
|
|
212
|
+
25.5,
|
|
213
|
+
29.0
|
|
214
|
+
];
|
|
215
|
+
} else if (type == this.TABLE_UNIFIED) {
|
|
216
|
+
return [
|
|
217
|
+
22.2,
|
|
218
|
+
20.4,
|
|
219
|
+
22.4,
|
|
220
|
+
21.0,
|
|
221
|
+
22.2,
|
|
222
|
+
21.3,
|
|
223
|
+
22.2,
|
|
224
|
+
22.7,
|
|
225
|
+
19.9,
|
|
226
|
+
22.7,
|
|
227
|
+
21.0,
|
|
228
|
+
27.2,
|
|
229
|
+
19.9,
|
|
230
|
+
22.2,
|
|
231
|
+
22.4,
|
|
232
|
+
24.4
|
|
233
|
+
];
|
|
234
|
+
} else {
|
|
235
|
+
return null;
|
|
236
|
+
}
|
|
237
|
+
},
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* @private
|
|
241
|
+
* Finds number of occurrences of target in sequence.
|
|
242
|
+
* Will find repeating sequences, meaning that
|
|
243
|
+
* calculateReps("aaa", "aa") returns 2 rather than 1.
|
|
244
|
+
* @param {String} sequence The string to search through.
|
|
245
|
+
* @param {String} target The string to search for.
|
|
246
|
+
* @return {Int} Number of occurrences of target in sequence, with repeats.
|
|
247
|
+
*/
|
|
248
|
+
calculateReps: function(sequence, target) {
|
|
249
|
+
const sequenceLength = sequence.length;
|
|
250
|
+
|
|
251
|
+
if (sequenceLength == 0) {
|
|
252
|
+
return 0;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
let numFound = 0;
|
|
256
|
+
let seqOffset = 0; // Search offset for finding multiple matches.
|
|
257
|
+
|
|
258
|
+
// eslint-disable-next-line no-constant-condition
|
|
259
|
+
while (true) {
|
|
260
|
+
const foundSeq = sequence.indexOf(target, seqOffset);
|
|
261
|
+
|
|
262
|
+
if (foundSeq == -1) {
|
|
263
|
+
break;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
seqOffset = foundSeq + 1;
|
|
267
|
+
numFound++;
|
|
268
|
+
|
|
269
|
+
if (seqOffset > sequenceLength) {
|
|
270
|
+
break;
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
return numFound;
|
|
275
|
+
},
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* @private
|
|
279
|
+
* Counts number of occurrences of target in sequence, without repeating.
|
|
280
|
+
* @param {String} sequence The string to search through.
|
|
281
|
+
* @param {String} target The string to search for.
|
|
282
|
+
* @return {Int} Number of occurrences of target in sequence.
|
|
283
|
+
*/
|
|
284
|
+
calculateNumberOfOccurrences: function(sequence, target) {
|
|
285
|
+
const sequenceLength = sequence.length;
|
|
286
|
+
|
|
287
|
+
if (sequenceLength == 0) {
|
|
288
|
+
return 0;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
const numberFound = sequence.split(target).length - 1;
|
|
292
|
+
|
|
293
|
+
return numberFound;
|
|
294
|
+
}
|
|
295
|
+
};
|
|
296
|
+
|
|
297
|
+
export default calcTmMethods.calculateTemperature.bind(calcTmMethods);
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import shortid from "shortid";
|
|
2
|
+
import {flatMap, cloneDeep} from "lodash";
|
|
3
|
+
import {normalizePositionByRangeLength, getRangeLength} from "@teselagen/range-utils";
|
|
4
|
+
import getCutsitesFromSequence from "./getCutsitesFromSequence";
|
|
5
|
+
|
|
6
|
+
function computeDigestFragments({
|
|
7
|
+
cutsites,
|
|
8
|
+
sequenceLength,
|
|
9
|
+
circular,
|
|
10
|
+
//optional:
|
|
11
|
+
includeOverAndUnderHangs,
|
|
12
|
+
computePartialDigest,
|
|
13
|
+
computeDigestDisabled,
|
|
14
|
+
computePartialDigestDisabled,
|
|
15
|
+
selectionLayerUpdate,
|
|
16
|
+
updateSelectedFragment
|
|
17
|
+
}) {
|
|
18
|
+
const fragments = [];
|
|
19
|
+
const overlappingEnzymes = [];
|
|
20
|
+
const pairs = [];
|
|
21
|
+
|
|
22
|
+
const sortedCutsites = cutsites.sort((a, b) => {
|
|
23
|
+
return a.topSnipPosition - b.topSnipPosition;
|
|
24
|
+
});
|
|
25
|
+
if (!circular && cutsites.length) {
|
|
26
|
+
sortedCutsites.push({
|
|
27
|
+
id: "seqTerm_" + shortid(),
|
|
28
|
+
start: 0,
|
|
29
|
+
end: 0,
|
|
30
|
+
overhangBps: "",
|
|
31
|
+
topSnipPosition: 0,
|
|
32
|
+
bottomSnipPosition: 0,
|
|
33
|
+
upstreamTopSnip: 0,
|
|
34
|
+
upstreamBottomSnip: 0,
|
|
35
|
+
upstreamTopBeforeBottom: false,
|
|
36
|
+
topSnipBeforeBottom: false,
|
|
37
|
+
recognitionSiteRange: {
|
|
38
|
+
start: 0,
|
|
39
|
+
end: 0
|
|
40
|
+
},
|
|
41
|
+
forward: true,
|
|
42
|
+
name: "Sequence_Terminus",
|
|
43
|
+
restrictionEnzyme: {
|
|
44
|
+
name: "Sequence_Terminus"
|
|
45
|
+
}
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
sortedCutsites.forEach((cutsite1, index) => {
|
|
50
|
+
if (computePartialDigest && !computePartialDigestDisabled) {
|
|
51
|
+
sortedCutsites.forEach((cs, index2) => {
|
|
52
|
+
if (index2 === index + 1 || index2 === 0) {
|
|
53
|
+
return;
|
|
54
|
+
}
|
|
55
|
+
pairs.push([cutsite1, sortedCutsites[index2]]);
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
if (!computeDigestDisabled) {
|
|
59
|
+
pairs.push([
|
|
60
|
+
cutsite1,
|
|
61
|
+
sortedCutsites[index + 1]
|
|
62
|
+
? sortedCutsites[index + 1]
|
|
63
|
+
: sortedCutsites[0]
|
|
64
|
+
]);
|
|
65
|
+
}
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
pairs.forEach(r => {
|
|
69
|
+
let [cut1, cut2] = r;
|
|
70
|
+
|
|
71
|
+
let start;
|
|
72
|
+
let end;
|
|
73
|
+
let size;
|
|
74
|
+
start = normalizePositionByRangeLength(
|
|
75
|
+
cut1.topSnipPosition,
|
|
76
|
+
sequenceLength
|
|
77
|
+
);
|
|
78
|
+
end = normalizePositionByRangeLength(
|
|
79
|
+
cut2.topSnipPosition - 1,
|
|
80
|
+
sequenceLength
|
|
81
|
+
);
|
|
82
|
+
size = getRangeLength({ start, end }, sequenceLength);
|
|
83
|
+
let overlapsSelf;
|
|
84
|
+
if (includeOverAndUnderHangs) {
|
|
85
|
+
const oldSize = size;
|
|
86
|
+
start = normalizePositionByRangeLength(
|
|
87
|
+
cut1.topSnipBeforeBottom
|
|
88
|
+
? cut1.topSnipPosition
|
|
89
|
+
: cut1.bottomSnipPosition,
|
|
90
|
+
sequenceLength
|
|
91
|
+
);
|
|
92
|
+
end = normalizePositionByRangeLength(
|
|
93
|
+
cut2.topSnipBeforeBottom
|
|
94
|
+
? cut2.bottomSnipPosition - 1
|
|
95
|
+
: cut2.topSnipPosition - 1,
|
|
96
|
+
sequenceLength
|
|
97
|
+
);
|
|
98
|
+
size = getRangeLength({ start, end }, sequenceLength);
|
|
99
|
+
if (oldSize > size) {
|
|
100
|
+
//we've got a part that wraps on itself
|
|
101
|
+
overlapsSelf = true;
|
|
102
|
+
size += sequenceLength;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
let isFormedFromLinearEnd;
|
|
107
|
+
if (cut1.name === "Sequence_Terminus") {
|
|
108
|
+
cut1 = cloneDeep(cut1);
|
|
109
|
+
isFormedFromLinearEnd = true;
|
|
110
|
+
cut1.name = "Linear_Sequence_Start";
|
|
111
|
+
cut1.restrictionEnzyme.name = "Linear_Sequence_Start";
|
|
112
|
+
} else if (cut2.name === "Sequence_Terminus") {
|
|
113
|
+
cut2 = cloneDeep(cut2);
|
|
114
|
+
isFormedFromLinearEnd = true;
|
|
115
|
+
cut2.name = "Linear_Sequence_End";
|
|
116
|
+
cut2.restrictionEnzyme.name = "Linear_Sequence_End";
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const id = start + "-" + end + "-" + size + "-";
|
|
120
|
+
const name = `${cut1.restrictionEnzyme.name} -- ${cut2.restrictionEnzyme.name} ${size} bps`;
|
|
121
|
+
getRangeLength({ start, end }, sequenceLength);
|
|
122
|
+
|
|
123
|
+
fragments.push({
|
|
124
|
+
isFormedFromLinearEnd,
|
|
125
|
+
madeFromOneCutsite: cut1 === cut2,
|
|
126
|
+
start,
|
|
127
|
+
end,
|
|
128
|
+
size,
|
|
129
|
+
overlapsSelf,
|
|
130
|
+
id,
|
|
131
|
+
name,
|
|
132
|
+
cut1,
|
|
133
|
+
cut2,
|
|
134
|
+
onFragmentSelect:
|
|
135
|
+
selectionLayerUpdate && updateSelectedFragment
|
|
136
|
+
? () => {
|
|
137
|
+
selectionLayerUpdate({
|
|
138
|
+
start,
|
|
139
|
+
end,
|
|
140
|
+
name
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
updateSelectedFragment(id);
|
|
144
|
+
}
|
|
145
|
+
: undefined
|
|
146
|
+
});
|
|
147
|
+
});
|
|
148
|
+
fragments.filter(fragment => {
|
|
149
|
+
if (!fragment.size) {
|
|
150
|
+
overlappingEnzymes.push(fragment);
|
|
151
|
+
return false;
|
|
152
|
+
}
|
|
153
|
+
return true;
|
|
154
|
+
});
|
|
155
|
+
return {
|
|
156
|
+
computePartialDigestDisabled,
|
|
157
|
+
computeDigestDisabled,
|
|
158
|
+
fragments,
|
|
159
|
+
overlappingEnzymes
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function getDigestFragsForSeqAndEnzymes({
|
|
164
|
+
sequence,
|
|
165
|
+
circular,
|
|
166
|
+
enzymes,
|
|
167
|
+
includeOverAndUnderHangs
|
|
168
|
+
}) {
|
|
169
|
+
const cutsitesByName = getCutsitesFromSequence(sequence, circular, enzymes);
|
|
170
|
+
return computeDigestFragments({
|
|
171
|
+
includeOverAndUnderHangs,
|
|
172
|
+
cutsites: flatMap(cutsitesByName),
|
|
173
|
+
sequenceLength: sequence.length,
|
|
174
|
+
circular
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
export {computeDigestFragments};
|
|
179
|
+
export {getDigestFragsForSeqAndEnzymes};
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import {getDigestFragsForSeqAndEnzymes} from "./computeDigestFragments.js";
|
|
2
|
+
import aliasedEnzymesByName from "./aliasedEnzymesByName.js";
|
|
3
|
+
|
|
4
|
+
describe("computeDigestFragments", () => {
|
|
5
|
+
it("it should correctly generate fragments for bamhi cutting once in a circular sequence with includeOverAndUnderHangs=true", () => {
|
|
6
|
+
const result = getDigestFragsForSeqAndEnzymes({
|
|
7
|
+
sequence: "ggggatccggggggggggggggggggggggggggggggggggggggggg",
|
|
8
|
+
circular: true,
|
|
9
|
+
enzymes: [aliasedEnzymesByName.bamhi],
|
|
10
|
+
includeOverAndUnderHangs: true
|
|
11
|
+
});
|
|
12
|
+
expect(result.fragments).toHaveLength(1);
|
|
13
|
+
expect(result.fragments[0].overlapsSelf).toEqual(true);
|
|
14
|
+
expect(result.fragments[0].start).toEqual(3);
|
|
15
|
+
expect(result.fragments[0].end).toEqual(6);
|
|
16
|
+
// expect(result.fragments[0].size).toEqual(4);
|
|
17
|
+
expect(result.fragments[0].size).toEqual(53);
|
|
18
|
+
expect(result.fragments[0].madeFromOneCutsite).toEqual(true);
|
|
19
|
+
});
|
|
20
|
+
it("it should correctly generate fragments for bamhi cutting once in a circular sequence", () => {
|
|
21
|
+
const result = getDigestFragsForSeqAndEnzymes({
|
|
22
|
+
sequence: "ggggatccggggggggggggggggggggggggggggggggggggggggg",
|
|
23
|
+
circular: true,
|
|
24
|
+
enzymes: [aliasedEnzymesByName.bamhi]
|
|
25
|
+
});
|
|
26
|
+
expect(result.fragments).toHaveLength(1);
|
|
27
|
+
expect(result.fragments[0].start).toEqual(3);
|
|
28
|
+
expect(result.fragments[0].end).toEqual(2);
|
|
29
|
+
expect(result.fragments[0].size).toEqual(49);
|
|
30
|
+
expect(result.fragments[0].madeFromOneCutsite).toEqual(true);
|
|
31
|
+
});
|
|
32
|
+
it("it should correctly generate fragments for bamhi cutting once in a linear sequence", () => {
|
|
33
|
+
const result = getDigestFragsForSeqAndEnzymes({
|
|
34
|
+
sequence: "ggggatccggggggggggggggggggggggggggggggggggggggggg",
|
|
35
|
+
circular: false,
|
|
36
|
+
enzymes: [aliasedEnzymesByName.bamhi]
|
|
37
|
+
});
|
|
38
|
+
expect(result.fragments).toHaveLength(2);
|
|
39
|
+
expect(result.fragments[0].isFormedFromLinearEnd).toEqual(true);
|
|
40
|
+
expect(result.fragments[0].name).toEqual(
|
|
41
|
+
"BamHI -- Linear_Sequence_End 46 bps"
|
|
42
|
+
);
|
|
43
|
+
expect(result.fragments[0].start).toEqual(3);
|
|
44
|
+
expect(result.fragments[0].end).toEqual(48);
|
|
45
|
+
expect(result.fragments[0].size).toEqual(46);
|
|
46
|
+
expect(result.fragments[0].madeFromOneCutsite).toEqual(false);
|
|
47
|
+
|
|
48
|
+
expect(result.fragments[1].isFormedFromLinearEnd).toEqual(true);
|
|
49
|
+
expect(result.fragments[1].name).toEqual(
|
|
50
|
+
"Linear_Sequence_Start -- BamHI 3 bps"
|
|
51
|
+
);
|
|
52
|
+
expect(result.fragments[1].start).toEqual(0);
|
|
53
|
+
expect(result.fragments[1].end).toEqual(2);
|
|
54
|
+
expect(result.fragments[1].size).toEqual(3);
|
|
55
|
+
expect(result.fragments[1].madeFromOneCutsite).toEqual(false);
|
|
56
|
+
});
|
|
57
|
+
it("it should not generate any fragments for bamhi if it doesn't cut in a linear sequence", () => {
|
|
58
|
+
const result = getDigestFragsForSeqAndEnzymes({
|
|
59
|
+
sequence: "ggggggggggggggggggggggggggggggggggggggggggggg",
|
|
60
|
+
circular: false,
|
|
61
|
+
enzymes: [aliasedEnzymesByName.bamhi]
|
|
62
|
+
});
|
|
63
|
+
expect(result.fragments).toHaveLength(0);
|
|
64
|
+
});
|
|
65
|
+
it("it should not generate any fragments for bamhi if it doesn't cut in a circular sequence", () => {
|
|
66
|
+
const result = getDigestFragsForSeqAndEnzymes({
|
|
67
|
+
sequence: "ggggggggggggggggggggggggggggggggggggggggggggg",
|
|
68
|
+
circular: true,
|
|
69
|
+
enzymes: [aliasedEnzymesByName.bamhi]
|
|
70
|
+
});
|
|
71
|
+
expect(result.fragments).toHaveLength(0);
|
|
72
|
+
});
|
|
73
|
+
});
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
export default function condensePairwiseAlignmentDifferences(
|
|
2
|
+
referenceSeq,
|
|
3
|
+
alignedSeq
|
|
4
|
+
) {
|
|
5
|
+
const overviewMinimapTrack = [];
|
|
6
|
+
const referenceSeqSplit = referenceSeq.toLowerCase().split("");
|
|
7
|
+
const alignedSeqSplit = alignedSeq.toLowerCase().split("");
|
|
8
|
+
const seqLength = alignedSeq.length;
|
|
9
|
+
const alignedSeqWithoutLeadingDashes = alignedSeq.replace(/^-+/g, "");
|
|
10
|
+
const alignedSeqWithoutTrailingDashes = alignedSeq.replace(/-+$/g, "");
|
|
11
|
+
|
|
12
|
+
const startIndex = seqLength - alignedSeqWithoutLeadingDashes.length;
|
|
13
|
+
const endIndex =
|
|
14
|
+
seqLength - (seqLength - alignedSeqWithoutTrailingDashes.length);
|
|
15
|
+
for (let i = 0; i < referenceSeqSplit.length; i++) {
|
|
16
|
+
if (i < startIndex || i >= endIndex) {
|
|
17
|
+
overviewMinimapTrack.push("W");
|
|
18
|
+
} else if (
|
|
19
|
+
referenceSeqSplit[i] === alignedSeqSplit[i] &&
|
|
20
|
+
referenceSeqSplit[i] !== "-" &&
|
|
21
|
+
alignedSeqSplit[i] !== "-"
|
|
22
|
+
) {
|
|
23
|
+
// ACTG match
|
|
24
|
+
overviewMinimapTrack.push("G");
|
|
25
|
+
} else if (
|
|
26
|
+
referenceSeqSplit[i] !== alignedSeqSplit[i] &&
|
|
27
|
+
referenceSeqSplit[i] !== "-" &&
|
|
28
|
+
alignedSeqSplit[i] !== "-"
|
|
29
|
+
) {
|
|
30
|
+
// ACTG mismatch
|
|
31
|
+
overviewMinimapTrack.push("R");
|
|
32
|
+
} else if (alignedSeqSplit[i] === "-") {
|
|
33
|
+
// deletion
|
|
34
|
+
overviewMinimapTrack.push("R");
|
|
35
|
+
} else if (
|
|
36
|
+
referenceSeqSplit[i] === "-" &&
|
|
37
|
+
referenceSeqSplit[i - 1] !== "-" &&
|
|
38
|
+
i !== referenceSeqSplit.length - 1 &&
|
|
39
|
+
referenceSeqSplit[i + 1] === "-"
|
|
40
|
+
) {
|
|
41
|
+
// insertion (first "-" of a >1 insertion)
|
|
42
|
+
overviewMinimapTrack.push("R");
|
|
43
|
+
} else if (
|
|
44
|
+
referenceSeqSplit[i] === "-" &&
|
|
45
|
+
referenceSeqSplit[i - 1] !== "-" &&
|
|
46
|
+
i !== referenceSeqSplit.length - 1 &&
|
|
47
|
+
referenceSeqSplit[i + 1] !== "-"
|
|
48
|
+
) {
|
|
49
|
+
// "-" of a one-bp insertion in the middle of the sequence
|
|
50
|
+
overviewMinimapTrack.push("R");
|
|
51
|
+
i++;
|
|
52
|
+
} else if (
|
|
53
|
+
i === referenceSeqSplit.length - 1 &&
|
|
54
|
+
referenceSeqSplit[i] === "-" &&
|
|
55
|
+
referenceSeqSplit[i - 1] === "-"
|
|
56
|
+
) {
|
|
57
|
+
// final "-" of a >1 insertion at the 3' end of the sequence
|
|
58
|
+
overviewMinimapTrack.splice(-2, 1);
|
|
59
|
+
} else if (
|
|
60
|
+
i === referenceSeqSplit.length - 1 &&
|
|
61
|
+
referenceSeqSplit[i] === "-" &&
|
|
62
|
+
referenceSeqSplit[i - 1] !== "-"
|
|
63
|
+
) {
|
|
64
|
+
// "-" of a one-bp insertion at the 3' end of the sequence
|
|
65
|
+
overviewMinimapTrack.pop();
|
|
66
|
+
overviewMinimapTrack.push("R");
|
|
67
|
+
} else if (
|
|
68
|
+
referenceSeqSplit[i] === "-" &&
|
|
69
|
+
referenceSeqSplit[i - 1] === "-" &&
|
|
70
|
+
referenceSeqSplit[i + 1] !== "-"
|
|
71
|
+
) {
|
|
72
|
+
// "-" at the end of an insertion
|
|
73
|
+
i++;
|
|
74
|
+
} else if (
|
|
75
|
+
referenceSeqSplit[i] === "-" &&
|
|
76
|
+
referenceSeqSplit[i - 1] === "-"
|
|
77
|
+
) {
|
|
78
|
+
// insertion (NOT first "-" of the insertion)
|
|
79
|
+
// do nothing, skip over these "-"
|
|
80
|
+
} else {
|
|
81
|
+
console.error("should not reach this step!");
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
return overviewMinimapTrack.join("");
|
|
85
|
+
};
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import condensePairwiseAlignmentDifferences from "./condensePairwiseAlignmentDifferences.js";
|
|
2
|
+
describe("pairwise alignment differences", () => {
|
|
3
|
+
it("it should ignore start and end where dashes are ", () => {
|
|
4
|
+
const result = condensePairwiseAlignmentDifferences(
|
|
5
|
+
"cccccccGCTAG--Tccc",
|
|
6
|
+
"-------GCTAGAAT---"
|
|
7
|
+
);
|
|
8
|
+
expect(result).toBe("WWWWWWWGGGGGRWWW");
|
|
9
|
+
});
|
|
10
|
+
it("1 insertion of >1 bp", () => {
|
|
11
|
+
const result = condensePairwiseAlignmentDifferences("GCTAG--T", "GCTAGAAT");
|
|
12
|
+
expect(result).toBe("GGGGGR");
|
|
13
|
+
});
|
|
14
|
+
it("should be case insensitive", () => {
|
|
15
|
+
const result = condensePairwiseAlignmentDifferences("GctAG-T", "GCTAGAT");
|
|
16
|
+
expect(result).toBe("GGGGGR");
|
|
17
|
+
});
|
|
18
|
+
it("insertion of one bp in the middle of the sequence", () => {
|
|
19
|
+
const result = condensePairwiseAlignmentDifferences("GCTAG-T", "GCTAGAT");
|
|
20
|
+
expect(result).toBe("GGGGGR");
|
|
21
|
+
});
|
|
22
|
+
it("1 deletion", () => {
|
|
23
|
+
const result = condensePairwiseAlignmentDifferences("GCTAGAAT", "GC--GAAT");
|
|
24
|
+
expect(result).toBe("GGRRGGGG");
|
|
25
|
+
});
|
|
26
|
+
it("2 insertions", () => {
|
|
27
|
+
const result = condensePairwiseAlignmentDifferences("G--AG--T", "GCTAGAAT");
|
|
28
|
+
expect(result).toBe("GRGR");
|
|
29
|
+
});
|
|
30
|
+
it("insertion at the 3' end", () => {
|
|
31
|
+
const result = condensePairwiseAlignmentDifferences("GCTAGA--", "GCTAGAAT");
|
|
32
|
+
expect(result).toBe("GGGGGR");
|
|
33
|
+
});
|
|
34
|
+
it("insertion of one bp at the 3' end", () => {
|
|
35
|
+
const result = condensePairwiseAlignmentDifferences("GCTAGAA-", "GCTAGAAT");
|
|
36
|
+
expect(result).toBe("GGGGGGR");
|
|
37
|
+
});
|
|
38
|
+
it("insertion of three bp at the 3' end", () => {
|
|
39
|
+
const result = condensePairwiseAlignmentDifferences("GCTAG---", "GCTAGAAT");
|
|
40
|
+
expect(result).toBe("GGGGR");
|
|
41
|
+
});
|
|
42
|
+
it("insertion at the 5' end", () => {
|
|
43
|
+
const result = condensePairwiseAlignmentDifferences("--TAGAAT", "GCTAGAAT");
|
|
44
|
+
expect(result).toBe("RGGGGG");
|
|
45
|
+
});
|
|
46
|
+
it("insertion of one bp at the 5' end", () => {
|
|
47
|
+
const result = condensePairwiseAlignmentDifferences("-TAGAAT", "CTAGAAT");
|
|
48
|
+
expect(result).toBe("RGGGGG");
|
|
49
|
+
});
|
|
50
|
+
it("1 insertion & 1 deletion in the middle", () => {
|
|
51
|
+
const result = condensePairwiseAlignmentDifferences("GCTAG--T", "GC--GAAT");
|
|
52
|
+
expect(result).toBe("GGRRGR");
|
|
53
|
+
});
|
|
54
|
+
it("1 insertion & 1 deletion that are adjacent in the middle", () => {
|
|
55
|
+
const result = condensePairwiseAlignmentDifferences("GCTA--T", "GC--AAT");
|
|
56
|
+
expect(result).toBe("GGRRR");
|
|
57
|
+
});
|
|
58
|
+
it("1 insertion then 1 deletion that are adjacent at the 5' end", () => {
|
|
59
|
+
const result = condensePairwiseAlignmentDifferences("--TAGAAT", "GC--GAAT");
|
|
60
|
+
expect(result).toBe("RRGGGG");
|
|
61
|
+
});
|
|
62
|
+
it("1 insertion then 1 deletion that are adjacent at the 3' end", () => {
|
|
63
|
+
const result = condensePairwiseAlignmentDifferences("GCTAGA--", "GCTA--AT");
|
|
64
|
+
expect(result).toBe("GGGGRR");
|
|
65
|
+
});
|
|
66
|
+
});
|