@teselagen/sequence-utils 0.3.23 → 0.3.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/calculateNebTm.d.ts +3 -2
  2. package/calculateTm.d.ts +7 -1
  3. package/degenerateDnaToAminoAcidMap.d.ts +1 -1
  4. package/degenerateRnaToAminoAcidMap.d.ts +1 -1
  5. package/getAminoAcidDataForEachBaseOfDna.d.ts +13 -3
  6. package/{index.mjs → index.cjs} +2760 -6271
  7. package/index.d.ts +76 -81
  8. package/index.js +2729 -6240
  9. package/{index.umd.js → index.umd.cjs} +2639 -6150
  10. package/insertSequenceDataAtPosition.d.ts +1 -1
  11. package/package.json +1 -1
  12. package/src/addGapsToSeqReads.js +1 -1
  13. package/src/adjustAnnotationsToInsert.js +1 -1
  14. package/src/autoAnnotate.js +1 -1
  15. package/src/calculateNebTm.js +1 -2
  16. package/src/calculateNebTm.test.js +17 -7
  17. package/src/calculateTm.js +54 -17
  18. package/src/calculateTm.test.js +7 -1
  19. package/src/computeDigestFragments.js +1 -1
  20. package/src/cutSequenceByRestrictionEnzyme.js +1 -1
  21. package/src/degenerateDnaToAminoAcidMap.js +1 -1
  22. package/src/degenerateRnaToAminoAcidMap.js +1 -1
  23. package/src/deleteSequenceDataAtRange.test.js +1 -1
  24. package/src/diffUtils.js +1 -1
  25. package/src/diffUtils.test.js +1 -1
  26. package/src/featureTypesAndColors.js +1 -1
  27. package/src/filterSequenceString.js +1 -1
  28. package/src/findSequenceMatches.js +1 -1
  29. package/src/generateSequenceData.test.js +1 -1
  30. package/src/getAminoAcidDataForEachBaseOfDna.js +246 -115
  31. package/src/getAminoAcidDataForEachBaseOfDna.test.js +55 -0
  32. package/src/getComplementSequenceString.js +1 -1
  33. package/src/getDigestFragmentsForRestrictionEnzymes.js +1 -1
  34. package/src/getReverseComplementSequenceAndAnnotations.js +1 -1
  35. package/src/getSequenceDataBetweenRange.js +1 -1
  36. package/src/getVirtualDigest.js +1 -1
  37. package/src/insertSequenceDataAtPositionOrRange.js +1 -1
  38. package/src/insertSequenceDataAtPositionOrRange.test.js +22 -0
  39. package/src/mapAnnotationsToRows.js +1 -1
  40. package/src/prepareCircularViewData.js +1 -1
  41. package/src/rotateSequenceDataToPosition.js +1 -1
  42. package/src/tidyUpAnnotation.js +1 -1
  43. package/src/tidyUpSequenceData.js +9 -2
@@ -1,24 +1,80 @@
1
- import { translateRange, getSequenceWithinRange } from "@teselagen/range-utils";
1
+ import {
2
+ translateRange,
3
+ getSequenceWithinRange,
4
+ flipContainedRange,
5
+ isPositionWithinRange
6
+ } from "@teselagen/range-utils";
2
7
  import revComp from "./getReverseComplementSequenceString";
3
8
  import getAA from "./getAminoAcidFromSequenceTriplet";
4
9
 
5
10
  //
6
11
  import proteinAlphabet from "./proteinAlphabet";
7
12
 
8
- // ac.throw([ac.string,ac.bool],arguments);
9
13
  /**
10
14
  * @private
11
- * Gets aminoAcid data, including position in string and position in codon
12
- * from the sequenceString and the direction of the translation
15
+ * Gets the next triplet of bases in the sequenceString
16
+ * @param {Number} index The index of the sequenceString to start at
13
17
  * @param {String} sequenceString The dna sequenceString.
14
- * @param {boolean} forward Should we find forward facing orfs or reverse facing orfs
15
- * @param {boolean} isProteinSequence We're passing in a sequence of AA chars instead of DNA chars (slightly confusing but we'll still use the dna indexing for rendering in OVE)
16
- * @return [{
17
- aminoAcid:
18
- positionInCodon:
19
- }]
18
+ * @param {Object[]} exonRange Array of ranges of the sequenceString that contains the positions of bases corresponding to exons.
19
+ * @return {Object} The triplet of bases, the number of bases read, and the positions of the codon bases in the sequenceString
20
+ * @property {String} triplet The triplet of bases
21
+ * @property {Number} basesRead The number of bases read
22
+ * @property {Number[]} codonPositions The positions of the codon bases in the sequenceString
20
23
  */
21
- export default function getAminoAcidDataForEachBaseOfDna(
24
+ function getNextTriplet(index, sequenceString, exonRange) {
25
+ let triplet = "";
26
+ let internalIndex;
27
+ // Positions of codons relative to the coding sequence start
28
+ // including introns.
29
+ const codonPositions = [];
30
+
31
+ // A function to check if a base is within an exon, defined here
32
+ // to avoid function creation in the loop (linter error)
33
+ const isBaseInExon = baseIndex =>
34
+ exonRange.some(r =>
35
+ isPositionWithinRange(baseIndex, r, sequenceString.length, true, false)
36
+ );
37
+
38
+ for (
39
+ internalIndex = index;
40
+ internalIndex < sequenceString.length;
41
+ internalIndex++
42
+ ) {
43
+ // We have read three bases into the triplet (this has to be at the top of the loop)
44
+ if (triplet.length === 3) {
45
+ break;
46
+ }
47
+ // TODO: ask about ranges
48
+ // The base corresponds to an intron
49
+ if (isBaseInExon(internalIndex)) {
50
+ // We read a base from the sequenceString
51
+ triplet += sequenceString[internalIndex];
52
+ codonPositions.push(internalIndex);
53
+ }
54
+ }
55
+
56
+ return { triplet, basesRead: internalIndex - index, codonPositions };
57
+ }
58
+
59
+ /**
60
+ * @private
61
+ * Returns a series of derived properties from the arguments to getAminoAcidDataForEachBaseOfDna
62
+ * @param {String} originalSequenceString The dna sequenceString.
63
+ * @param {boolean} forward Whether the translation is in the forward direction.
64
+ * @param {Object} optionalSubrangeRange The range of the sequenceString to translate.
65
+ * @param {boolean} isProteinSequence Whether the sequenceString is a protein sequence.
66
+ * @return {Object} The derived properties
67
+ * @property {String} sequenceString
68
+ * - If !isProtein: The subsequence within originalSequenceString that will be translated, defined by transaltionRange. If
69
+ * !forward, this will be the reverse complement of the subsequence.
70
+ * - If isProtein: The originalSequenceString.
71
+ * @property {Object} translationRange The range of the originalSequenceString that we're translating (if !isProtein), or getting DNA-level
72
+ info for (if isProtein).
73
+ * @property {Number} originalSequenceStringLength The length of the full DNA sequence. If !isProtein it's the length of originalSequenceString
74
+ * @property {Number} sequenceStringLength The length of the DNA sequence that would give the translation.
75
+ * @property {Object[]} exonRange Array of ranges of the sequenceString that contains the positions of bases corresponding to exons.
76
+ */
77
+ function getTranslatedSequenceProperties(
22
78
  originalSequenceString,
23
79
  forward,
24
80
  optionalSubrangeRange,
@@ -27,137 +83,212 @@ export default function getAminoAcidDataForEachBaseOfDna(
27
83
  const originalSequenceStringLength = isProteinSequence
28
84
  ? originalSequenceString.length * 3
29
85
  : originalSequenceString.length;
86
+
30
87
  let sequenceString = originalSequenceString;
31
- let startOffset = 0;
88
+ const translationRange = { start: 0, end: originalSequenceStringLength - 1 };
89
+
32
90
  if (optionalSubrangeRange) {
33
91
  sequenceString = getSequenceWithinRange(
34
92
  optionalSubrangeRange,
35
93
  originalSequenceString
36
94
  );
37
- startOffset = optionalSubrangeRange.start;
95
+ translationRange.start = optionalSubrangeRange.start;
96
+ translationRange.end = optionalSubrangeRange.end;
38
97
  }
98
+
39
99
  const sequenceStringLength = isProteinSequence
40
100
  ? sequenceString.length * 3
41
101
  : sequenceString.length;
42
102
 
43
- // ac.throw([ac.string,ac.bool],arguments);
44
- const aminoAcidDataForEachBaseOfDNA = [];
45
- let codonRange;
46
- let revCompGapLength = 0;
47
- let aminoAcidIndex = 0;
48
- if (!forward) {
49
- //compute the start of the amino acid sequence, but only if translating in the reverse direction
50
- aminoAcidIndex = Math.floor((sequenceStringLength - 1) / 3);
51
- //because we're translating in the reverse direction, we need to
52
- //check to see if there are untranslated amino acids at the start of the sequenceString
53
- revCompGapLength = sequenceStringLength % 3;
54
- codonRange = translateRange(
55
- {
56
- start: 0,
57
- end: revCompGapLength - 1
58
- },
59
- startOffset,
103
+ if (!isProteinSequence && !forward) {
104
+ sequenceString = revComp(sequenceString);
105
+ }
106
+
107
+ // TODO: what to do with protein if this is true?
108
+ const absoluteExonRange =
109
+ !isProteinSequence &&
110
+ optionalSubrangeRange &&
111
+ optionalSubrangeRange.locations
112
+ ? optionalSubrangeRange.locations
113
+ : [translationRange];
114
+ const exonRange = absoluteExonRange.map(range => {
115
+ let outputRange = translateRange(
116
+ range,
117
+ -translationRange.start,
60
118
  originalSequenceStringLength
61
119
  );
62
-
63
- if (revCompGapLength > 0) {
64
- for (let i = 0; i < revCompGapLength; i++) {
65
- aminoAcidDataForEachBaseOfDNA.push({
66
- aminoAcid: getAA("xxx"), //fake xxx triplet returns the ambiguous X amino acid
67
- positionInCodon: revCompGapLength - i - 1,
68
- aminoAcidIndex,
69
- sequenceIndex: codonRange.start + i,
70
- codonRange,
71
- fullCodon: false
72
- });
73
- }
74
- aminoAcidIndex--;
120
+ if (!forward) {
121
+ outputRange = flipContainedRange(
122
+ outputRange,
123
+ { start: 0, end: sequenceStringLength - 1 },
124
+ sequenceStringLength
125
+ );
75
126
  }
127
+ return outputRange;
128
+ });
129
+
130
+ return {
131
+ sequenceString,
132
+ translationRange,
133
+ sequenceStringLength,
134
+ originalSequenceStringLength,
135
+ exonRange
136
+ };
137
+ }
138
+
139
+ /**
140
+ * Function to convert the position within the CDS (where A in ATG is 0, and T in ATG is 1)
141
+ * to the position in the main sequence
142
+ *
143
+ * @param {Number} index The index of the sequenceString to start at
144
+ * @param {boolean} forward Whether the translation is in the forward direction.
145
+ * @param {Object} translationRange The range of the originalSequenceString that we're translating (if !isProtein), or getting DNA-level
146
+ * info for (if isProtein).
147
+ * @param {Number} mainSequenceLength The length of the full DNA sequence. If !isProtein it's the length of originalSequenceString
148
+ * @return {Number} The position in the main sequence
149
+ *
150
+ */
151
+ function positionInCdsToPositionInMainSequence(
152
+ index,
153
+ forward,
154
+ translationRange,
155
+ mainSequenceLength
156
+ ) {
157
+ let outputRange = translateRange(
158
+ { start: index, end: index },
159
+ translationRange.start,
160
+ mainSequenceLength
161
+ );
162
+ if (!forward) {
163
+ outputRange = flipContainedRange(
164
+ outputRange,
165
+ translationRange,
166
+ mainSequenceLength
167
+ );
76
168
  }
169
+ return outputRange.start;
170
+ }
77
171
 
78
- //compute the bulk of the sequence
79
- for (
80
- let index = 2 + revCompGapLength;
81
- index < sequenceStringLength;
82
- index += 3
83
- ) {
172
+ /**
173
+ * @private
174
+ * Gets aminoAcid data, including position in string and position in codon
175
+ * from the sequenceString and the direction of the translation
176
+ * @param {String} sequenceString The dna sequenceString.
177
+ * @param {boolean} forward Should we find forward facing orfs or reverse facing orfs
178
+ * @param {boolean} isProteinSequence We're passing in a sequence of AA chars instead of DNA chars (slightly confusing but we'll still use the dna indexing for rendering in OVE)
179
+ * @return [{
180
+ aminoAcid:
181
+ positionInCodon:
182
+ }]
183
+ */
184
+ export default function getAminoAcidDataForEachBaseOfDna(
185
+ originalSequenceString,
186
+ forward,
187
+ optionalSubrangeRange,
188
+ isProteinSequence
189
+ ) {
190
+ // Obtain derived properties, see getTranslatedSequenceProperties
191
+ const {
192
+ sequenceString,
193
+ translationRange,
194
+ sequenceStringLength,
195
+ originalSequenceStringLength,
196
+ exonRange
197
+ } = getTranslatedSequenceProperties(
198
+ originalSequenceString,
199
+ forward,
200
+ optionalSubrangeRange,
201
+ isProteinSequence
202
+ );
203
+
204
+ const aminoAcidDataForEachBaseOfDNA = [];
205
+
206
+ // Iterate over the DNA sequence length in increments of 3
207
+ for (let index = 0; index < sequenceStringLength; index += 3) {
84
208
  let aminoAcid;
209
+ const aminoAcidIndex = index / 3;
210
+ let codonPositionsInCDS;
211
+ let basesRead;
212
+
85
213
  if (isProteinSequence) {
86
- aminoAcid =
87
- proteinAlphabet[sequenceString[(index - 2) / 3].toUpperCase()];
214
+ codonPositionsInCDS = [0, 1, 2].map(i => index + i);
215
+ basesRead = 3;
216
+ aminoAcid = proteinAlphabet[sequenceString[index / 3].toUpperCase()];
88
217
  } else {
89
- let triplet = sequenceString.slice(index - 2, index + 1);
90
- if (!forward) {
91
- //we reverse the triplet
92
- triplet = revComp(triplet);
93
- }
94
- aminoAcid = getAA(triplet);
218
+ // Get the triplet of DNA bases
219
+ const {
220
+ triplet,
221
+ basesRead: _basesRead,
222
+ codonPositions
223
+ } = getNextTriplet(index, sequenceString, exonRange);
224
+ basesRead = _basesRead;
225
+ codonPositionsInCDS = codonPositions;
226
+ // If the triplet is not full, we need to add the gap xxx amino acid, start
227
+ aminoAcid = triplet.length === 3 ? getAA(triplet) : getAA("xxx");
95
228
  }
96
- codonRange = translateRange(
97
- {
98
- start: index - 2,
99
- end: index
100
- },
101
- startOffset,
102
- originalSequenceStringLength
229
+
230
+ const absoluteCodonPositions = codonPositionsInCDS.map(i =>
231
+ positionInCdsToPositionInMainSequence(
232
+ i,
233
+ forward,
234
+ translationRange,
235
+ originalSequenceStringLength
236
+ )
103
237
  );
104
238
 
105
- aminoAcidDataForEachBaseOfDNA.push({
106
- aminoAcid, //gap amino acid
107
- positionInCodon: forward ? 0 : 2,
108
- aminoAcidIndex,
109
- sequenceIndex: codonRange.start,
110
- codonRange,
111
- fullCodon: true
112
- });
113
- aminoAcidDataForEachBaseOfDNA.push({
114
- aminoAcid, //gap amino acid
115
- positionInCodon: 1,
116
- aminoAcidIndex,
117
- sequenceIndex: codonRange.start + 1,
118
- codonRange,
119
- fullCodon: true
120
- });
121
- aminoAcidDataForEachBaseOfDNA.push({
122
- aminoAcid, //gap amino acid
123
- positionInCodon: forward ? 2 : 0,
124
- aminoAcidIndex,
125
- sequenceIndex: codonRange.start + 2,
126
- codonRange,
127
- fullCodon: true
128
- });
129
- if (forward) {
130
- aminoAcidIndex++;
131
- } else {
132
- aminoAcidIndex--;
133
- }
134
- }
239
+ // What should the codon range be if it comprises intron bases?
240
+ const codonRange = forward
241
+ ? {
242
+ start: absoluteCodonPositions[0],
243
+ end: absoluteCodonPositions[codonPositionsInCDS.length - 1]
244
+ }
245
+ : {
246
+ start: absoluteCodonPositions[codonPositionsInCDS.length - 1],
247
+ end: absoluteCodonPositions[0]
248
+ };
135
249
 
136
- //compute the end of the sequence
137
- //we'll never hit the following logic if translating in the reverse direction
138
- const lengthOfEndBpsNotCoveredByAminoAcids =
139
- sequenceStringLength - aminoAcidDataForEachBaseOfDNA.length;
140
- codonRange = translateRange(
141
- {
142
- start: sequenceStringLength - lengthOfEndBpsNotCoveredByAminoAcids,
143
- end: sequenceStringLength - 1
144
- },
145
- startOffset,
146
- originalSequenceStringLength
147
- );
148
- for (let j = 0; j < lengthOfEndBpsNotCoveredByAminoAcids; j++) {
149
- aminoAcidDataForEachBaseOfDNA.push({
150
- aminoAcid: getAA("xxx"), //fake xxx triplet returns the gap amino acid
151
- positionInCodon: j,
152
- aminoAcidIndex,
153
- sequenceIndex: codonRange.start + j,
154
- fullCodon: false,
155
- codonRange
156
- });
250
+ // Iterate over the positions read
251
+ let positionInCodon = 0;
252
+ for (let i = 0; i < basesRead; i++) {
253
+ const posInCds = i + index;
254
+ if (codonPositionsInCDS.includes(posInCds)) {
255
+ aminoAcidDataForEachBaseOfDNA.push({
256
+ aminoAcid,
257
+ positionInCodon,
258
+ aminoAcidIndex,
259
+ sequenceIndex: absoluteCodonPositions[i],
260
+ codonRange,
261
+ fullCodon: codonPositionsInCDS.length === 3
262
+ });
263
+ positionInCodon++;
264
+ } else {
265
+ // TODO: what should we insert here?
266
+ aminoAcidDataForEachBaseOfDNA.push({
267
+ aminoAcid: null,
268
+ positionInCodon: null,
269
+ aminoAcidIndex: null,
270
+ sequenceIndex: positionInCdsToPositionInMainSequence(
271
+ posInCds,
272
+ forward,
273
+ translationRange,
274
+ originalSequenceStringLength
275
+ ),
276
+ codonRange: null,
277
+ fullCodon: null
278
+ });
279
+ }
280
+ }
281
+ // Move the index in case intron bases were read
282
+ index += basesRead - codonPositionsInCDS.length;
157
283
  }
158
284
 
159
285
  if (sequenceStringLength !== aminoAcidDataForEachBaseOfDNA.length) {
160
286
  throw new Error("something went wrong!");
161
287
  }
288
+
289
+ // Reverse the array if we're translating in the reverse direction
290
+ if (!forward) {
291
+ aminoAcidDataForEachBaseOfDNA.reverse();
292
+ }
162
293
  return aminoAcidDataForEachBaseOfDNA;
163
294
  }
@@ -5,6 +5,7 @@ import getAA from "./getAminoAcidFromSequenceTriplet";
5
5
  import assert from "assert";
6
6
 
7
7
  let aaData;
8
+ let aaData2;
8
9
  describe("getAminoAcidDataForEachBaseOfDna tranlates a", () => {
9
10
  //: It gets correct amino acid mapping and position in codon for each basepair in sequence
10
11
  it("1 amino acid long sequence", () => {
@@ -419,4 +420,58 @@ describe("getAminoAcidDataForEachBaseOfDna tranlates a", () => {
419
420
  }
420
421
  ]);
421
422
  });
423
+ it("protein 1 amino acid long sequence", () => {
424
+ aaData = getAminoAcidDataForEachBaseOfDna("M", true, null, true);
425
+ aaData2 = getAminoAcidDataForEachBaseOfDna("atg", true, null, false);
426
+ assert.deepEqual(aaData, aaData2);
427
+ });
428
+ it("protein 1 amino acid long sequence in reverse direction", () => {
429
+ aaData = getAminoAcidDataForEachBaseOfDna("H", false, null, true);
430
+ aaData2 = getAminoAcidDataForEachBaseOfDna("atg", false, null, false);
431
+ assert.deepEqual(aaData, aaData2);
432
+ });
433
+ it("> 1 amino acid long sequence", () => {
434
+ aaData = getAminoAcidDataForEachBaseOfDna("MF", true, null, true);
435
+ aaData2 = getAminoAcidDataForEachBaseOfDna("atgttt", true, null, false);
436
+ assert.deepEqual(aaData, aaData2);
437
+ });
438
+ it("> 1 amino acid long sequence in reverse direction", () => {
439
+ aaData = getAminoAcidDataForEachBaseOfDna("KH", false, null, true);
440
+ aaData2 = getAminoAcidDataForEachBaseOfDna("atgttt", false, null, false);
441
+ assert.deepEqual(aaData, aaData2);
442
+ });
443
+ it.skip("protein 1 amino acid long sequence which is a subrange of a larger sequence", () => {
444
+ aaData = getAminoAcidDataForEachBaseOfDna(
445
+ "AMA",
446
+ true,
447
+ { start: 1, end: 1 },
448
+ true
449
+ );
450
+ aaData2 = getAminoAcidDataForEachBaseOfDna(
451
+ "xxxatgxxx",
452
+ true,
453
+ { start: 3, end: 5 },
454
+ false
455
+ );
456
+ // Unclear what the behavior should be here,
457
+ // for now it returns the same as the old code (ignores the start and end range)
458
+ assert.deepEqual(aaData, aaData2);
459
+ });
460
+ it.skip("protein 1 amino acid long sequence in reverse direaction which is a subrange of a larger sequence", () => {
461
+ aaData = getAminoAcidDataForEachBaseOfDna(
462
+ "AMA",
463
+ false,
464
+ { start: 1, end: 1 },
465
+ true
466
+ );
467
+ aaData2 = getAminoAcidDataForEachBaseOfDna(
468
+ "xxxatgxxx",
469
+ false,
470
+ { start: 3, end: 5 },
471
+ false
472
+ );
473
+ // Unclear what the behavior should be here,
474
+ // for now it returns the same as the old code (ignores the start and end range)
475
+ assert.deepEqual(aaData, aaData2);
476
+ });
422
477
  });
@@ -1,5 +1,5 @@
1
1
  import DNAComplementMap from "./DNAComplementMap";
2
- import { merge } from "lodash";
2
+ import { merge } from "lodash-es";
3
3
 
4
4
  export default function getComplementSequenceString(sequence, isRna) {
5
5
  if (typeof sequence !== "string") return "";
@@ -1,6 +1,6 @@
1
1
  import getDigestFragmentsForCutsites from "./getDigestFragmentsForCutsites";
2
2
  import cutSequenceByRestrictionEnzyme from "./cutSequenceByRestrictionEnzyme";
3
- import { flatMap } from "lodash";
3
+ import { flatMap } from "lodash-es";
4
4
 
5
5
  export default function getDigestFragmentsForRestrictionEnzymes(
6
6
  sequence,
@@ -1,7 +1,7 @@
1
1
  import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
2
2
  import getReverseComplementAnnotation from "./getReverseComplementAnnotation";
3
3
  import { annotationTypes } from "./annotationTypes";
4
- import { map } from "lodash";
4
+ import { map } from "lodash-es";
5
5
  import tidyUpSequenceData from "./tidyUpSequenceData";
6
6
 
7
7
  import getSequenceDataBetweenRange from "./getSequenceDataBetweenRange";
@@ -1,4 +1,4 @@
1
- import { flatMap, extend, forEach, startCase } from "lodash";
1
+ import { flatMap, extend, forEach, startCase } from "lodash-es";
2
2
  import { getRangeLength } from "@teselagen/range-utils";
3
3
  import convertDnaCaretPositionOrRangeToAa from "./convertDnaCaretPositionOrRangeToAA";
4
4
  import insertSequenceDataAtPosition from "./insertSequenceDataAtPosition";
@@ -1,6 +1,6 @@
1
1
  //UNDER CONSTRUCTION
2
2
 
3
- import { get } from "lodash";
3
+ import { get } from "lodash-es";
4
4
 
5
5
  import {
6
6
  normalizePositionByRangeLength,
@@ -1,5 +1,5 @@
1
1
  import { getRangeLength } from "@teselagen/range-utils";
2
- import { map, cloneDeep } from "lodash";
2
+ import { map, cloneDeep } from "lodash-es";
3
3
  import convertDnaCaretPositionOrRangeToAa from "./convertDnaCaretPositionOrRangeToAA";
4
4
  import rotateSequenceDataToPosition from "./rotateSequenceDataToPosition";
5
5
  import { adjustRangeToDeletionOfAnotherRange } from "@teselagen/range-utils";
@@ -83,6 +83,28 @@ describe("insertSequenceData", () => {
83
83
  postInsertSeq.sequence.should.equal("atgatagatagggagaaa");
84
84
  postInsertSeq.proteinSequence.should.equal("MIDREK");
85
85
  });
86
+ it("inserts protein seq into a dna seq correctly", () => {
87
+ const sequenceToInsert = {
88
+ isProtein: true,
89
+ sequence: "atagatagg",
90
+ proteinSequence: "IDR"
91
+ };
92
+ const sequenceToInsertInto = {
93
+ // 012345
94
+ isProtein: false,
95
+ sequence: "atgagagagaaa",
96
+ proteinSequence: "MREK"
97
+ };
98
+ const range = { start: 3, end: 5 };
99
+ const postInsertSeq = insertSequenceDataAtPositionOrRange(
100
+ sequenceToInsert,
101
+ sequenceToInsertInto,
102
+ range
103
+ );
104
+ postInsertSeq.sequence.should.equal("atgatagatagggagaaa");
105
+ postInsertSeq.isProtein.should.equal(false);
106
+ postInsertSeq.proteinSequence.should.equal("MIDREK");
107
+ });
86
108
  it("inserts characters at correct range and computes the new size correctly", () => {
87
109
  const sequenceToInsert = {
88
110
  sequence: "rrrrrrr"
@@ -1,4 +1,4 @@
1
- import { each, forEach, startsWith, filter } from "lodash";
1
+ import { each, forEach, startsWith, filter } from "lodash-es";
2
2
 
3
3
  import {
4
4
  getYOffsetForPotentiallyCircularRange,
@@ -1,4 +1,4 @@
1
- import { cloneDeep } from "lodash";
1
+ import { cloneDeep } from "lodash-es";
2
2
  import { getYOffsetsForPotentiallyCircularRanges } from "@teselagen/range-utils";
3
3
  import { annotationTypes } from "./annotationTypes";
4
4
 
@@ -1,4 +1,4 @@
1
- import { map } from "lodash";
1
+ import { map } from "lodash-es";
2
2
  import { adjustRangeToRotation } from "@teselagen/range-utils";
3
3
  import tidyUpSequenceData from "./tidyUpSequenceData";
4
4
  import { modifiableTypes } from "./annotationTypes";
@@ -1,4 +1,4 @@
1
- import { cloneDeep, get, some } from "lodash";
1
+ import { cloneDeep, get, some } from "lodash-es";
2
2
  import { getFeatureToColorMap, getFeatureTypes } from "./featureTypesAndColors";
3
3
  import shortid from "shortid";
4
4
 
@@ -2,12 +2,13 @@
2
2
  import shortid from "shortid";
3
3
 
4
4
  import getAminoAcidDataForEachBaseOfDna from "./getAminoAcidDataForEachBaseOfDna";
5
- import { cloneDeep, flatMap } from "lodash";
5
+ import { cloneDeep, flatMap } from "lodash-es";
6
6
  import { annotationTypes } from "./annotationTypes";
7
7
  import filterSequenceString from "./filterSequenceString";
8
8
  import tidyUpAnnotation from "./tidyUpAnnotation";
9
9
  import getDegenerateDnaStringFromAaString from "./getDegenerateDnaStringFromAAString";
10
10
  import { getFeatureTypes } from "./featureTypesAndColors";
11
+ import getAminoAcidStringFromSequenceString from "./getAminoAcidStringFromSequenceString";
11
12
 
12
13
  export default function tidyUpSequenceData(pSeqData, options = {}) {
13
14
  const {
@@ -16,6 +17,7 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
16
17
  doNotRemoveInvalidChars,
17
18
  additionalValidChars,
18
19
  noTranslationData,
20
+ includeProteinSequence,
19
21
  doNotProvideIdsForAnnotations,
20
22
  noCdsTranslations,
21
23
  convertAnnotationsFromAAIndices,
@@ -55,7 +57,8 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
55
57
  if (!doNotRemoveInvalidChars) {
56
58
  if (seqData.isProtein) {
57
59
  const [newSeq] = filterSequenceString(seqData.proteinSequence, {
58
- ...(topLevelSeqData || seqData)
60
+ ...(topLevelSeqData || seqData),
61
+ isProtein: true
59
62
  });
60
63
  seqData.proteinSequence = newSeq;
61
64
  } else {
@@ -79,6 +82,10 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
79
82
  null,
80
83
  true
81
84
  );
85
+ } else if (includeProteinSequence) {
86
+ seqData.proteinSequence = getAminoAcidStringFromSequenceString(
87
+ seqData.sequence
88
+ );
82
89
  }
83
90
 
84
91
  seqData.size = seqData.noSequence ? seqData.size : seqData.sequence.length;