@teselagen/sequence-utils 0.3.23 → 0.3.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/calculateNebTm.d.ts +3 -2
- package/calculateTm.d.ts +7 -1
- package/degenerateDnaToAminoAcidMap.d.ts +1 -1
- package/degenerateRnaToAminoAcidMap.d.ts +1 -1
- package/getAminoAcidDataForEachBaseOfDna.d.ts +13 -3
- package/{index.mjs → index.cjs} +2760 -6271
- package/index.d.ts +76 -81
- package/index.js +2729 -6240
- package/{index.umd.js → index.umd.cjs} +2639 -6150
- package/insertSequenceDataAtPosition.d.ts +1 -1
- package/package.json +1 -1
- package/src/addGapsToSeqReads.js +1 -1
- package/src/adjustAnnotationsToInsert.js +1 -1
- package/src/autoAnnotate.js +1 -1
- package/src/calculateNebTm.js +1 -2
- package/src/calculateNebTm.test.js +17 -7
- package/src/calculateTm.js +54 -17
- package/src/calculateTm.test.js +7 -1
- package/src/computeDigestFragments.js +1 -1
- package/src/cutSequenceByRestrictionEnzyme.js +1 -1
- package/src/degenerateDnaToAminoAcidMap.js +1 -1
- package/src/degenerateRnaToAminoAcidMap.js +1 -1
- package/src/deleteSequenceDataAtRange.test.js +1 -1
- package/src/diffUtils.js +1 -1
- package/src/diffUtils.test.js +1 -1
- package/src/featureTypesAndColors.js +1 -1
- package/src/filterSequenceString.js +1 -1
- package/src/findSequenceMatches.js +1 -1
- package/src/generateSequenceData.test.js +1 -1
- package/src/getAminoAcidDataForEachBaseOfDna.js +246 -115
- package/src/getAminoAcidDataForEachBaseOfDna.test.js +55 -0
- package/src/getComplementSequenceString.js +1 -1
- package/src/getDigestFragmentsForRestrictionEnzymes.js +1 -1
- package/src/getReverseComplementSequenceAndAnnotations.js +1 -1
- package/src/getSequenceDataBetweenRange.js +1 -1
- package/src/getVirtualDigest.js +1 -1
- package/src/insertSequenceDataAtPositionOrRange.js +1 -1
- package/src/insertSequenceDataAtPositionOrRange.test.js +22 -0
- package/src/mapAnnotationsToRows.js +1 -1
- package/src/prepareCircularViewData.js +1 -1
- package/src/rotateSequenceDataToPosition.js +1 -1
- package/src/tidyUpAnnotation.js +1 -1
- package/src/tidyUpSequenceData.js +9 -2
|
@@ -1,24 +1,80 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
translateRange,
|
|
3
|
+
getSequenceWithinRange,
|
|
4
|
+
flipContainedRange,
|
|
5
|
+
isPositionWithinRange
|
|
6
|
+
} from "@teselagen/range-utils";
|
|
2
7
|
import revComp from "./getReverseComplementSequenceString";
|
|
3
8
|
import getAA from "./getAminoAcidFromSequenceTriplet";
|
|
4
9
|
|
|
5
10
|
//
|
|
6
11
|
import proteinAlphabet from "./proteinAlphabet";
|
|
7
12
|
|
|
8
|
-
// ac.throw([ac.string,ac.bool],arguments);
|
|
9
13
|
/**
|
|
10
14
|
* @private
|
|
11
|
-
* Gets
|
|
12
|
-
*
|
|
15
|
+
* Gets the next triplet of bases in the sequenceString
|
|
16
|
+
* @param {Number} index The index of the sequenceString to start at
|
|
13
17
|
* @param {String} sequenceString The dna sequenceString.
|
|
14
|
-
* @param {
|
|
15
|
-
* @
|
|
16
|
-
* @
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
}]
|
|
18
|
+
* @param {Object[]} exonRange Array of ranges of the sequenceString that contains the positions of bases corresponding to exons.
|
|
19
|
+
* @return {Object} The triplet of bases, the number of bases read, and the positions of the codon bases in the sequenceString
|
|
20
|
+
* @property {String} triplet The triplet of bases
|
|
21
|
+
* @property {Number} basesRead The number of bases read
|
|
22
|
+
* @property {Number[]} codonPositions The positions of the codon bases in the sequenceString
|
|
20
23
|
*/
|
|
21
|
-
|
|
24
|
+
function getNextTriplet(index, sequenceString, exonRange) {
|
|
25
|
+
let triplet = "";
|
|
26
|
+
let internalIndex;
|
|
27
|
+
// Positions of codons relative to the coding sequence start
|
|
28
|
+
// including introns.
|
|
29
|
+
const codonPositions = [];
|
|
30
|
+
|
|
31
|
+
// A function to check if a base is within an exon, defined here
|
|
32
|
+
// to avoid function creation in the loop (linter error)
|
|
33
|
+
const isBaseInExon = baseIndex =>
|
|
34
|
+
exonRange.some(r =>
|
|
35
|
+
isPositionWithinRange(baseIndex, r, sequenceString.length, true, false)
|
|
36
|
+
);
|
|
37
|
+
|
|
38
|
+
for (
|
|
39
|
+
internalIndex = index;
|
|
40
|
+
internalIndex < sequenceString.length;
|
|
41
|
+
internalIndex++
|
|
42
|
+
) {
|
|
43
|
+
// We have read three bases into the triplet (this has to be at the top of the loop)
|
|
44
|
+
if (triplet.length === 3) {
|
|
45
|
+
break;
|
|
46
|
+
}
|
|
47
|
+
// TODO: ask about ranges
|
|
48
|
+
// The base corresponds to an intron
|
|
49
|
+
if (isBaseInExon(internalIndex)) {
|
|
50
|
+
// We read a base from the sequenceString
|
|
51
|
+
triplet += sequenceString[internalIndex];
|
|
52
|
+
codonPositions.push(internalIndex);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return { triplet, basesRead: internalIndex - index, codonPositions };
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* @private
|
|
61
|
+
* Returns a series of derived properties from the arguments to getAminoAcidDataForEachBaseOfDna
|
|
62
|
+
* @param {String} originalSequenceString The dna sequenceString.
|
|
63
|
+
* @param {boolean} forward Whether the translation is in the forward direction.
|
|
64
|
+
* @param {Object} optionalSubrangeRange The range of the sequenceString to translate.
|
|
65
|
+
* @param {boolean} isProteinSequence Whether the sequenceString is a protein sequence.
|
|
66
|
+
* @return {Object} The derived properties
|
|
67
|
+
* @property {String} sequenceString
|
|
68
|
+
* - If !isProtein: The subsequence within originalSequenceString that will be translated, defined by transaltionRange. If
|
|
69
|
+
* !forward, this will be the reverse complement of the subsequence.
|
|
70
|
+
* - If isProtein: The originalSequenceString.
|
|
71
|
+
* @property {Object} translationRange The range of the originalSequenceString that we're translating (if !isProtein), or getting DNA-level
|
|
72
|
+
info for (if isProtein).
|
|
73
|
+
* @property {Number} originalSequenceStringLength The length of the full DNA sequence. If !isProtein it's the length of originalSequenceString
|
|
74
|
+
* @property {Number} sequenceStringLength The length of the DNA sequence that would give the translation.
|
|
75
|
+
* @property {Object[]} exonRange Array of ranges of the sequenceString that contains the positions of bases corresponding to exons.
|
|
76
|
+
*/
|
|
77
|
+
function getTranslatedSequenceProperties(
|
|
22
78
|
originalSequenceString,
|
|
23
79
|
forward,
|
|
24
80
|
optionalSubrangeRange,
|
|
@@ -27,137 +83,212 @@ export default function getAminoAcidDataForEachBaseOfDna(
|
|
|
27
83
|
const originalSequenceStringLength = isProteinSequence
|
|
28
84
|
? originalSequenceString.length * 3
|
|
29
85
|
: originalSequenceString.length;
|
|
86
|
+
|
|
30
87
|
let sequenceString = originalSequenceString;
|
|
31
|
-
|
|
88
|
+
const translationRange = { start: 0, end: originalSequenceStringLength - 1 };
|
|
89
|
+
|
|
32
90
|
if (optionalSubrangeRange) {
|
|
33
91
|
sequenceString = getSequenceWithinRange(
|
|
34
92
|
optionalSubrangeRange,
|
|
35
93
|
originalSequenceString
|
|
36
94
|
);
|
|
37
|
-
|
|
95
|
+
translationRange.start = optionalSubrangeRange.start;
|
|
96
|
+
translationRange.end = optionalSubrangeRange.end;
|
|
38
97
|
}
|
|
98
|
+
|
|
39
99
|
const sequenceStringLength = isProteinSequence
|
|
40
100
|
? sequenceString.length * 3
|
|
41
101
|
: sequenceString.length;
|
|
42
102
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
},
|
|
59
|
-
startOffset,
|
|
103
|
+
if (!isProteinSequence && !forward) {
|
|
104
|
+
sequenceString = revComp(sequenceString);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// TODO: what to do with protein if this is true?
|
|
108
|
+
const absoluteExonRange =
|
|
109
|
+
!isProteinSequence &&
|
|
110
|
+
optionalSubrangeRange &&
|
|
111
|
+
optionalSubrangeRange.locations
|
|
112
|
+
? optionalSubrangeRange.locations
|
|
113
|
+
: [translationRange];
|
|
114
|
+
const exonRange = absoluteExonRange.map(range => {
|
|
115
|
+
let outputRange = translateRange(
|
|
116
|
+
range,
|
|
117
|
+
-translationRange.start,
|
|
60
118
|
originalSequenceStringLength
|
|
61
119
|
);
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
aminoAcidIndex,
|
|
69
|
-
sequenceIndex: codonRange.start + i,
|
|
70
|
-
codonRange,
|
|
71
|
-
fullCodon: false
|
|
72
|
-
});
|
|
73
|
-
}
|
|
74
|
-
aminoAcidIndex--;
|
|
120
|
+
if (!forward) {
|
|
121
|
+
outputRange = flipContainedRange(
|
|
122
|
+
outputRange,
|
|
123
|
+
{ start: 0, end: sequenceStringLength - 1 },
|
|
124
|
+
sequenceStringLength
|
|
125
|
+
);
|
|
75
126
|
}
|
|
127
|
+
return outputRange;
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
return {
|
|
131
|
+
sequenceString,
|
|
132
|
+
translationRange,
|
|
133
|
+
sequenceStringLength,
|
|
134
|
+
originalSequenceStringLength,
|
|
135
|
+
exonRange
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Function to convert the position within the CDS (where A in ATG is 0, and T in ATG is 1)
|
|
141
|
+
* to the position in the main sequence
|
|
142
|
+
*
|
|
143
|
+
* @param {Number} index The index of the sequenceString to start at
|
|
144
|
+
* @param {boolean} forward Whether the translation is in the forward direction.
|
|
145
|
+
* @param {Object} translationRange The range of the originalSequenceString that we're translating (if !isProtein), or getting DNA-level
|
|
146
|
+
* info for (if isProtein).
|
|
147
|
+
* @param {Number} mainSequenceLength The length of the full DNA sequence. If !isProtein it's the length of originalSequenceString
|
|
148
|
+
* @return {Number} The position in the main sequence
|
|
149
|
+
*
|
|
150
|
+
*/
|
|
151
|
+
function positionInCdsToPositionInMainSequence(
|
|
152
|
+
index,
|
|
153
|
+
forward,
|
|
154
|
+
translationRange,
|
|
155
|
+
mainSequenceLength
|
|
156
|
+
) {
|
|
157
|
+
let outputRange = translateRange(
|
|
158
|
+
{ start: index, end: index },
|
|
159
|
+
translationRange.start,
|
|
160
|
+
mainSequenceLength
|
|
161
|
+
);
|
|
162
|
+
if (!forward) {
|
|
163
|
+
outputRange = flipContainedRange(
|
|
164
|
+
outputRange,
|
|
165
|
+
translationRange,
|
|
166
|
+
mainSequenceLength
|
|
167
|
+
);
|
|
76
168
|
}
|
|
169
|
+
return outputRange.start;
|
|
170
|
+
}
|
|
77
171
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
172
|
+
/**
|
|
173
|
+
* @private
|
|
174
|
+
* Gets aminoAcid data, including position in string and position in codon
|
|
175
|
+
* from the sequenceString and the direction of the translation
|
|
176
|
+
* @param {String} sequenceString The dna sequenceString.
|
|
177
|
+
* @param {boolean} forward Should we find forward facing orfs or reverse facing orfs
|
|
178
|
+
* @param {boolean} isProteinSequence We're passing in a sequence of AA chars instead of DNA chars (slightly confusing but we'll still use the dna indexing for rendering in OVE)
|
|
179
|
+
* @return [{
|
|
180
|
+
aminoAcid:
|
|
181
|
+
positionInCodon:
|
|
182
|
+
}]
|
|
183
|
+
*/
|
|
184
|
+
export default function getAminoAcidDataForEachBaseOfDna(
|
|
185
|
+
originalSequenceString,
|
|
186
|
+
forward,
|
|
187
|
+
optionalSubrangeRange,
|
|
188
|
+
isProteinSequence
|
|
189
|
+
) {
|
|
190
|
+
// Obtain derived properties, see getTranslatedSequenceProperties
|
|
191
|
+
const {
|
|
192
|
+
sequenceString,
|
|
193
|
+
translationRange,
|
|
194
|
+
sequenceStringLength,
|
|
195
|
+
originalSequenceStringLength,
|
|
196
|
+
exonRange
|
|
197
|
+
} = getTranslatedSequenceProperties(
|
|
198
|
+
originalSequenceString,
|
|
199
|
+
forward,
|
|
200
|
+
optionalSubrangeRange,
|
|
201
|
+
isProteinSequence
|
|
202
|
+
);
|
|
203
|
+
|
|
204
|
+
const aminoAcidDataForEachBaseOfDNA = [];
|
|
205
|
+
|
|
206
|
+
// Iterate over the DNA sequence length in increments of 3
|
|
207
|
+
for (let index = 0; index < sequenceStringLength; index += 3) {
|
|
84
208
|
let aminoAcid;
|
|
209
|
+
const aminoAcidIndex = index / 3;
|
|
210
|
+
let codonPositionsInCDS;
|
|
211
|
+
let basesRead;
|
|
212
|
+
|
|
85
213
|
if (isProteinSequence) {
|
|
86
|
-
|
|
87
|
-
|
|
214
|
+
codonPositionsInCDS = [0, 1, 2].map(i => index + i);
|
|
215
|
+
basesRead = 3;
|
|
216
|
+
aminoAcid = proteinAlphabet[sequenceString[index / 3].toUpperCase()];
|
|
88
217
|
} else {
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
218
|
+
// Get the triplet of DNA bases
|
|
219
|
+
const {
|
|
220
|
+
triplet,
|
|
221
|
+
basesRead: _basesRead,
|
|
222
|
+
codonPositions
|
|
223
|
+
} = getNextTriplet(index, sequenceString, exonRange);
|
|
224
|
+
basesRead = _basesRead;
|
|
225
|
+
codonPositionsInCDS = codonPositions;
|
|
226
|
+
// If the triplet is not full, we need to add the gap xxx amino acid, start
|
|
227
|
+
aminoAcid = triplet.length === 3 ? getAA(triplet) : getAA("xxx");
|
|
95
228
|
}
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
229
|
+
|
|
230
|
+
const absoluteCodonPositions = codonPositionsInCDS.map(i =>
|
|
231
|
+
positionInCdsToPositionInMainSequence(
|
|
232
|
+
i,
|
|
233
|
+
forward,
|
|
234
|
+
translationRange,
|
|
235
|
+
originalSequenceStringLength
|
|
236
|
+
)
|
|
103
237
|
);
|
|
104
238
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
positionInCodon: 1,
|
|
116
|
-
aminoAcidIndex,
|
|
117
|
-
sequenceIndex: codonRange.start + 1,
|
|
118
|
-
codonRange,
|
|
119
|
-
fullCodon: true
|
|
120
|
-
});
|
|
121
|
-
aminoAcidDataForEachBaseOfDNA.push({
|
|
122
|
-
aminoAcid, //gap amino acid
|
|
123
|
-
positionInCodon: forward ? 2 : 0,
|
|
124
|
-
aminoAcidIndex,
|
|
125
|
-
sequenceIndex: codonRange.start + 2,
|
|
126
|
-
codonRange,
|
|
127
|
-
fullCodon: true
|
|
128
|
-
});
|
|
129
|
-
if (forward) {
|
|
130
|
-
aminoAcidIndex++;
|
|
131
|
-
} else {
|
|
132
|
-
aminoAcidIndex--;
|
|
133
|
-
}
|
|
134
|
-
}
|
|
239
|
+
// What should the codon range be if it comprises intron bases?
|
|
240
|
+
const codonRange = forward
|
|
241
|
+
? {
|
|
242
|
+
start: absoluteCodonPositions[0],
|
|
243
|
+
end: absoluteCodonPositions[codonPositionsInCDS.length - 1]
|
|
244
|
+
}
|
|
245
|
+
: {
|
|
246
|
+
start: absoluteCodonPositions[codonPositionsInCDS.length - 1],
|
|
247
|
+
end: absoluteCodonPositions[0]
|
|
248
|
+
};
|
|
135
249
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
250
|
+
// Iterate over the positions read
|
|
251
|
+
let positionInCodon = 0;
|
|
252
|
+
for (let i = 0; i < basesRead; i++) {
|
|
253
|
+
const posInCds = i + index;
|
|
254
|
+
if (codonPositionsInCDS.includes(posInCds)) {
|
|
255
|
+
aminoAcidDataForEachBaseOfDNA.push({
|
|
256
|
+
aminoAcid,
|
|
257
|
+
positionInCodon,
|
|
258
|
+
aminoAcidIndex,
|
|
259
|
+
sequenceIndex: absoluteCodonPositions[i],
|
|
260
|
+
codonRange,
|
|
261
|
+
fullCodon: codonPositionsInCDS.length === 3
|
|
262
|
+
});
|
|
263
|
+
positionInCodon++;
|
|
264
|
+
} else {
|
|
265
|
+
// TODO: what should we insert here?
|
|
266
|
+
aminoAcidDataForEachBaseOfDNA.push({
|
|
267
|
+
aminoAcid: null,
|
|
268
|
+
positionInCodon: null,
|
|
269
|
+
aminoAcidIndex: null,
|
|
270
|
+
sequenceIndex: positionInCdsToPositionInMainSequence(
|
|
271
|
+
posInCds,
|
|
272
|
+
forward,
|
|
273
|
+
translationRange,
|
|
274
|
+
originalSequenceStringLength
|
|
275
|
+
),
|
|
276
|
+
codonRange: null,
|
|
277
|
+
fullCodon: null
|
|
278
|
+
});
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
// Move the index in case intron bases were read
|
|
282
|
+
index += basesRead - codonPositionsInCDS.length;
|
|
157
283
|
}
|
|
158
284
|
|
|
159
285
|
if (sequenceStringLength !== aminoAcidDataForEachBaseOfDNA.length) {
|
|
160
286
|
throw new Error("something went wrong!");
|
|
161
287
|
}
|
|
288
|
+
|
|
289
|
+
// Reverse the array if we're translating in the reverse direction
|
|
290
|
+
if (!forward) {
|
|
291
|
+
aminoAcidDataForEachBaseOfDNA.reverse();
|
|
292
|
+
}
|
|
162
293
|
return aminoAcidDataForEachBaseOfDNA;
|
|
163
294
|
}
|
|
@@ -5,6 +5,7 @@ import getAA from "./getAminoAcidFromSequenceTriplet";
|
|
|
5
5
|
import assert from "assert";
|
|
6
6
|
|
|
7
7
|
let aaData;
|
|
8
|
+
let aaData2;
|
|
8
9
|
describe("getAminoAcidDataForEachBaseOfDna tranlates a", () => {
|
|
9
10
|
//: It gets correct amino acid mapping and position in codon for each basepair in sequence
|
|
10
11
|
it("1 amino acid long sequence", () => {
|
|
@@ -419,4 +420,58 @@ describe("getAminoAcidDataForEachBaseOfDna tranlates a", () => {
|
|
|
419
420
|
}
|
|
420
421
|
]);
|
|
421
422
|
});
|
|
423
|
+
it("protein 1 amino acid long sequence", () => {
|
|
424
|
+
aaData = getAminoAcidDataForEachBaseOfDna("M", true, null, true);
|
|
425
|
+
aaData2 = getAminoAcidDataForEachBaseOfDna("atg", true, null, false);
|
|
426
|
+
assert.deepEqual(aaData, aaData2);
|
|
427
|
+
});
|
|
428
|
+
it("protein 1 amino acid long sequence in reverse direction", () => {
|
|
429
|
+
aaData = getAminoAcidDataForEachBaseOfDna("H", false, null, true);
|
|
430
|
+
aaData2 = getAminoAcidDataForEachBaseOfDna("atg", false, null, false);
|
|
431
|
+
assert.deepEqual(aaData, aaData2);
|
|
432
|
+
});
|
|
433
|
+
it("> 1 amino acid long sequence", () => {
|
|
434
|
+
aaData = getAminoAcidDataForEachBaseOfDna("MF", true, null, true);
|
|
435
|
+
aaData2 = getAminoAcidDataForEachBaseOfDna("atgttt", true, null, false);
|
|
436
|
+
assert.deepEqual(aaData, aaData2);
|
|
437
|
+
});
|
|
438
|
+
it("> 1 amino acid long sequence in reverse direction", () => {
|
|
439
|
+
aaData = getAminoAcidDataForEachBaseOfDna("KH", false, null, true);
|
|
440
|
+
aaData2 = getAminoAcidDataForEachBaseOfDna("atgttt", false, null, false);
|
|
441
|
+
assert.deepEqual(aaData, aaData2);
|
|
442
|
+
});
|
|
443
|
+
it.skip("protein 1 amino acid long sequence which is a subrange of a larger sequence", () => {
|
|
444
|
+
aaData = getAminoAcidDataForEachBaseOfDna(
|
|
445
|
+
"AMA",
|
|
446
|
+
true,
|
|
447
|
+
{ start: 1, end: 1 },
|
|
448
|
+
true
|
|
449
|
+
);
|
|
450
|
+
aaData2 = getAminoAcidDataForEachBaseOfDna(
|
|
451
|
+
"xxxatgxxx",
|
|
452
|
+
true,
|
|
453
|
+
{ start: 3, end: 5 },
|
|
454
|
+
false
|
|
455
|
+
);
|
|
456
|
+
// Unclear what the behavior should be here,
|
|
457
|
+
// for now it returns the same as the old code (ignores the start and end range)
|
|
458
|
+
assert.deepEqual(aaData, aaData2);
|
|
459
|
+
});
|
|
460
|
+
it.skip("protein 1 amino acid long sequence in reverse direaction which is a subrange of a larger sequence", () => {
|
|
461
|
+
aaData = getAminoAcidDataForEachBaseOfDna(
|
|
462
|
+
"AMA",
|
|
463
|
+
false,
|
|
464
|
+
{ start: 1, end: 1 },
|
|
465
|
+
true
|
|
466
|
+
);
|
|
467
|
+
aaData2 = getAminoAcidDataForEachBaseOfDna(
|
|
468
|
+
"xxxatgxxx",
|
|
469
|
+
false,
|
|
470
|
+
{ start: 3, end: 5 },
|
|
471
|
+
false
|
|
472
|
+
);
|
|
473
|
+
// Unclear what the behavior should be here,
|
|
474
|
+
// for now it returns the same as the old code (ignores the start and end range)
|
|
475
|
+
assert.deepEqual(aaData, aaData2);
|
|
476
|
+
});
|
|
422
477
|
});
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import getDigestFragmentsForCutsites from "./getDigestFragmentsForCutsites";
|
|
2
2
|
import cutSequenceByRestrictionEnzyme from "./cutSequenceByRestrictionEnzyme";
|
|
3
|
-
import { flatMap } from "lodash";
|
|
3
|
+
import { flatMap } from "lodash-es";
|
|
4
4
|
|
|
5
5
|
export default function getDigestFragmentsForRestrictionEnzymes(
|
|
6
6
|
sequence,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
|
|
2
2
|
import getReverseComplementAnnotation from "./getReverseComplementAnnotation";
|
|
3
3
|
import { annotationTypes } from "./annotationTypes";
|
|
4
|
-
import { map } from "lodash";
|
|
4
|
+
import { map } from "lodash-es";
|
|
5
5
|
import tidyUpSequenceData from "./tidyUpSequenceData";
|
|
6
6
|
|
|
7
7
|
import getSequenceDataBetweenRange from "./getSequenceDataBetweenRange";
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { flatMap, extend, forEach, startCase } from "lodash";
|
|
1
|
+
import { flatMap, extend, forEach, startCase } from "lodash-es";
|
|
2
2
|
import { getRangeLength } from "@teselagen/range-utils";
|
|
3
3
|
import convertDnaCaretPositionOrRangeToAa from "./convertDnaCaretPositionOrRangeToAA";
|
|
4
4
|
import insertSequenceDataAtPosition from "./insertSequenceDataAtPosition";
|
package/src/getVirtualDigest.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { getRangeLength } from "@teselagen/range-utils";
|
|
2
|
-
import { map, cloneDeep } from "lodash";
|
|
2
|
+
import { map, cloneDeep } from "lodash-es";
|
|
3
3
|
import convertDnaCaretPositionOrRangeToAa from "./convertDnaCaretPositionOrRangeToAA";
|
|
4
4
|
import rotateSequenceDataToPosition from "./rotateSequenceDataToPosition";
|
|
5
5
|
import { adjustRangeToDeletionOfAnotherRange } from "@teselagen/range-utils";
|
|
@@ -83,6 +83,28 @@ describe("insertSequenceData", () => {
|
|
|
83
83
|
postInsertSeq.sequence.should.equal("atgatagatagggagaaa");
|
|
84
84
|
postInsertSeq.proteinSequence.should.equal("MIDREK");
|
|
85
85
|
});
|
|
86
|
+
it("inserts protein seq into a dna seq correctly", () => {
|
|
87
|
+
const sequenceToInsert = {
|
|
88
|
+
isProtein: true,
|
|
89
|
+
sequence: "atagatagg",
|
|
90
|
+
proteinSequence: "IDR"
|
|
91
|
+
};
|
|
92
|
+
const sequenceToInsertInto = {
|
|
93
|
+
// 012345
|
|
94
|
+
isProtein: false,
|
|
95
|
+
sequence: "atgagagagaaa",
|
|
96
|
+
proteinSequence: "MREK"
|
|
97
|
+
};
|
|
98
|
+
const range = { start: 3, end: 5 };
|
|
99
|
+
const postInsertSeq = insertSequenceDataAtPositionOrRange(
|
|
100
|
+
sequenceToInsert,
|
|
101
|
+
sequenceToInsertInto,
|
|
102
|
+
range
|
|
103
|
+
);
|
|
104
|
+
postInsertSeq.sequence.should.equal("atgatagatagggagaaa");
|
|
105
|
+
postInsertSeq.isProtein.should.equal(false);
|
|
106
|
+
postInsertSeq.proteinSequence.should.equal("MIDREK");
|
|
107
|
+
});
|
|
86
108
|
it("inserts characters at correct range and computes the new size correctly", () => {
|
|
87
109
|
const sequenceToInsert = {
|
|
88
110
|
sequence: "rrrrrrr"
|
package/src/tidyUpAnnotation.js
CHANGED
|
@@ -2,12 +2,13 @@
|
|
|
2
2
|
import shortid from "shortid";
|
|
3
3
|
|
|
4
4
|
import getAminoAcidDataForEachBaseOfDna from "./getAminoAcidDataForEachBaseOfDna";
|
|
5
|
-
import { cloneDeep, flatMap } from "lodash";
|
|
5
|
+
import { cloneDeep, flatMap } from "lodash-es";
|
|
6
6
|
import { annotationTypes } from "./annotationTypes";
|
|
7
7
|
import filterSequenceString from "./filterSequenceString";
|
|
8
8
|
import tidyUpAnnotation from "./tidyUpAnnotation";
|
|
9
9
|
import getDegenerateDnaStringFromAaString from "./getDegenerateDnaStringFromAAString";
|
|
10
10
|
import { getFeatureTypes } from "./featureTypesAndColors";
|
|
11
|
+
import getAminoAcidStringFromSequenceString from "./getAminoAcidStringFromSequenceString";
|
|
11
12
|
|
|
12
13
|
export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
13
14
|
const {
|
|
@@ -16,6 +17,7 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
16
17
|
doNotRemoveInvalidChars,
|
|
17
18
|
additionalValidChars,
|
|
18
19
|
noTranslationData,
|
|
20
|
+
includeProteinSequence,
|
|
19
21
|
doNotProvideIdsForAnnotations,
|
|
20
22
|
noCdsTranslations,
|
|
21
23
|
convertAnnotationsFromAAIndices,
|
|
@@ -55,7 +57,8 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
55
57
|
if (!doNotRemoveInvalidChars) {
|
|
56
58
|
if (seqData.isProtein) {
|
|
57
59
|
const [newSeq] = filterSequenceString(seqData.proteinSequence, {
|
|
58
|
-
...(topLevelSeqData || seqData)
|
|
60
|
+
...(topLevelSeqData || seqData),
|
|
61
|
+
isProtein: true
|
|
59
62
|
});
|
|
60
63
|
seqData.proteinSequence = newSeq;
|
|
61
64
|
} else {
|
|
@@ -79,6 +82,10 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
79
82
|
null,
|
|
80
83
|
true
|
|
81
84
|
);
|
|
85
|
+
} else if (includeProteinSequence) {
|
|
86
|
+
seqData.proteinSequence = getAminoAcidStringFromSequenceString(
|
|
87
|
+
seqData.sequence
|
|
88
|
+
);
|
|
82
89
|
}
|
|
83
90
|
|
|
84
91
|
seqData.size = seqData.noSequence ? seqData.size : seqData.sequence.length;
|