@teselagen/sequence-utils 0.3.32 → 0.3.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,10 +1,10 @@
1
1
  {
2
2
  "name": "@teselagen/sequence-utils",
3
- "version": "0.3.32",
3
+ "version": "0.3.36",
4
4
  "type": "module",
5
5
  "dependencies": {
6
6
  "escape-string-regexp": "5.0.0",
7
- "jsondiffpatch": "0.4.1",
7
+ "jsondiffpatch": "0.7.3",
8
8
  "string-splice": "^1.3.0",
9
9
  "lodash-es": "^4.17.21",
10
10
  "shortid": "2.2.16",
@@ -108,6 +108,14 @@ declare const proteinAlphabet: {
108
108
  color: string;
109
109
  mass: number;
110
110
  };
111
+ O: {
112
+ value: string;
113
+ name: string;
114
+ threeLettersName: string;
115
+ colorByFamily: string;
116
+ color: string;
117
+ mass: number;
118
+ };
111
119
  M: {
112
120
  value: string;
113
121
  name: string;
@@ -0,0 +1,86 @@
1
+ import {
2
+ isValidSequence,
3
+ SANTA_LUCIA_NN,
4
+ SANTA_LUCIA_INIT
5
+ } from "./calculateSantaLuciaTm.js";
6
+
7
+ /**
8
+ * Calculate End Stability (3' end stability) of a primer
9
+ *
10
+ * The maximum stability for the last five 3' bases of a left or right primer.
11
+ * Bigger numbers mean more stable 3' ends. The value is the maximum delta G
12
+ * (kcal/mol) for duplex disruption for the five 3' bases.
13
+ *
14
+ * According to Primer3 documentation:
15
+ * - Most stable 5mer duplex: GCGCG = 6.86 kcal/mol (SantaLucia 1998)
16
+ * - Most labile 5mer duplex: TATAT = 0.86 kcal/mol (SantaLucia 1998)
17
+ *
18
+ * @param {string} sequence - DNA sequence (5' to 3')
19
+ * @returns {number} - Delta G (kcal/mol) for the last 5 bases at 3' end
20
+ * @throws {Error} Invalid sequence or too short.
21
+ */
22
+ export default function calculateEndStability(sequence) {
23
+ try {
24
+ sequence = sequence?.toUpperCase().trim();
25
+
26
+ if (!isValidSequence(sequence)) {
27
+ throw new Error("Invalid sequence: contains non-DNA characters");
28
+ }
29
+
30
+ if (sequence.length < 5) {
31
+ throw new Error(
32
+ "Sequence too short: minimum length is 5 bases for end stability calculation"
33
+ );
34
+ }
35
+
36
+ const last5Bases = sequence.substring(sequence.length - 5);
37
+
38
+ let deltaH = 0; // kcal/mol
39
+ let deltaS = 0; // cal/K·mol
40
+
41
+ // Calculate nearest-neighbor contributions for the 4 dinucleotides
42
+ for (let i = 0; i < 4; i++) {
43
+ const dinucleotide = last5Bases.substring(i, i + 2);
44
+
45
+ if (dinucleotide.includes("N")) {
46
+ continue;
47
+ }
48
+
49
+ const params = SANTA_LUCIA_NN[dinucleotide];
50
+ if (params) {
51
+ deltaH += params.dH;
52
+ deltaS += params.dS;
53
+ }
54
+ }
55
+
56
+ // Add initiation parameters for terminal base pairs
57
+ const firstBase = last5Bases[0];
58
+ const lastBase = last5Bases[last5Bases.length - 1];
59
+
60
+ // Terminal GC or AT initiation
61
+ if (firstBase === "G" || firstBase === "C") {
62
+ deltaH += SANTA_LUCIA_INIT.GC.dH;
63
+ deltaS += SANTA_LUCIA_INIT.GC.dS;
64
+ } else {
65
+ deltaH += SANTA_LUCIA_INIT.AT.dH;
66
+ deltaS += SANTA_LUCIA_INIT.AT.dS;
67
+ }
68
+
69
+ if (lastBase === "G" || lastBase === "C") {
70
+ deltaH += SANTA_LUCIA_INIT.GC.dH;
71
+ deltaS += SANTA_LUCIA_INIT.GC.dS;
72
+ } else {
73
+ deltaH += SANTA_LUCIA_INIT.AT.dH;
74
+ deltaS += SANTA_LUCIA_INIT.AT.dS;
75
+ }
76
+
77
+ // Calculate deltaG at 37°C (310.15 K)
78
+ // deltaG = deltaH - T * deltaS
79
+ const T = 310.15; // 37°C in Kelvin
80
+ const deltaG = deltaH - (T * deltaS) / 1000; // Result in kcal/mol
81
+
82
+ return Math.round(Math.abs(deltaG) * 100) / 100;
83
+ } catch (e) {
84
+ return `Error calculating end stability for sequence ${sequence}. ${e}`;
85
+ }
86
+ }
@@ -0,0 +1,21 @@
1
+ import assert from "assert";
2
+ import calculateEndStability from "./calculateEndStability";
3
+
4
+ describe("Calculate the stability of the primer ends.", () => {
5
+ it("should return the end stability score of a given primer sequence", () => {
6
+ assert.equal(calculateEndStability("AGCGGATAACAATTTCACACAGGA"), 3.89);
7
+ assert.equal(calculateEndStability("AGCGGATAACAATTTCAC"), 3.24);
8
+ assert.equal(calculateEndStability("AGCGGATAACAATTTcac"), 3.24);
9
+ assert.equal(calculateEndStability("ataataccgcgccacatagc"), 2.99);
10
+ assert.equal(calculateEndStability("AGCGGATAACAATACNNN"), 0.6);
11
+ assert.equal(calculateEndStability("AGCGGATAACAATACnnn"), 0.6);
12
+ assert.equal(
13
+ calculateEndStability("AGCGGATAACAYZAKLPATAC"),
14
+ "Error calculating end stability for sequence AGCGGATAACAYZAKLPATAC. Error: Invalid sequence: contains non-DNA characters"
15
+ );
16
+ assert.equal(
17
+ calculateEndStability("AGCG"),
18
+ "Error calculating end stability for sequence AGCG. Error: Sequence too short: minimum length is 5 bases for end stability calculation"
19
+ );
20
+ });
21
+ });
@@ -0,0 +1,177 @@
1
+ /**
2
+ * Primer3 Melting Temperature Calculator
3
+ *
4
+ * Implements the melting temperature calculation algorithm from Primer3
5
+ * based on the documentation at https://primer3.ut.ee/primer3web_help.htm
6
+ *
7
+ * Uses SantaLucia (1998) nearest-neighbor thermodynamics method with
8
+ * fixed Primer3 custom parameters:
9
+ * - Formula: SantaLucia (1998)
10
+ * - Salt correction: SantaLucia (1998)
11
+ * - Monovalent salt: 50.0 mM
12
+ * - Divalent salt: 1.5 mM
13
+ * - dNTP concentration: 0.6 mM
14
+ * - DNA concentration: 50.0 nM
15
+ *
16
+ * References:
17
+ * - SantaLucia JR (1998) "A unified view of polymer, dumbbell and
18
+ * oligonucleotide DNA nearest-neighbor thermodynamics",
19
+ * Proc Natl Acad Sci 95:1460-65
20
+ */
21
+
22
+ // Primer3 custom parameters (fixed)
23
+ const PRIMER3_PARAMS = {
24
+ saltMonovalent: 50.0, // mM
25
+ saltDivalent: 1.5, // mM
26
+ dntpConc: 0.6, // mM
27
+ dnaConc: 50.0, // nM
28
+ R: 1.987 // Gas constant (cal/K·mol)
29
+ };
30
+
31
+ // SantaLucia (1998) nearest-neighbor parameters
32
+ // dH in kcal/mol, dS in cal/K·mol
33
+ export const SANTA_LUCIA_NN = {
34
+ AA: { dH: -7.9, dS: -22.2 },
35
+ TT: { dH: -7.9, dS: -22.2 },
36
+ AT: { dH: -7.2, dS: -20.4 },
37
+ TA: { dH: -7.2, dS: -21.3 },
38
+ CA: { dH: -8.5, dS: -22.7 },
39
+ TG: { dH: -8.5, dS: -22.7 },
40
+ GT: { dH: -8.4, dS: -22.4 },
41
+ AC: { dH: -8.4, dS: -22.4 },
42
+ CT: { dH: -7.8, dS: -21.0 },
43
+ AG: { dH: -7.8, dS: -21.0 },
44
+ GA: { dH: -8.2, dS: -22.2 },
45
+ TC: { dH: -8.2, dS: -22.2 },
46
+ CG: { dH: -10.6, dS: -27.2 },
47
+ GC: { dH: -9.8, dS: -24.4 },
48
+ GG: { dH: -8.0, dS: -19.9 },
49
+ CC: { dH: -8.0, dS: -19.9 }
50
+ };
51
+
52
+ // Initiation parameters (SantaLucia 1998)
53
+ export const SANTA_LUCIA_INIT = {
54
+ GC: { dH: 0.1, dS: -2.8 }, // initiation with terminal GC
55
+ AT: { dH: 2.3, dS: 4.1 } // initiation with terminal AT
56
+ };
57
+
58
+ /**
59
+ * Calculate effective monovalent cation concentration
60
+ * Accounts for divalent cations (Mg2+) binding to dNTPs
61
+ * Formula from von Ahsen et al. (2001)
62
+ *
63
+ * @returns {number} - Effective monovalent concentration in mM
64
+ */
65
+ function getEffectiveMonovalentConc() {
66
+ let effectiveMono = PRIMER3_PARAMS.saltMonovalent;
67
+
68
+ // Adjust for divalent cations
69
+ if (PRIMER3_PARAMS.saltDivalent > 0) {
70
+ const freeMg = Math.max(
71
+ 0,
72
+ PRIMER3_PARAMS.saltDivalent - PRIMER3_PARAMS.dntpConc
73
+ );
74
+ effectiveMono += 120 * Math.sqrt(freeMg);
75
+ }
76
+
77
+ return effectiveMono;
78
+ }
79
+
80
+ /**
81
+ * Apply SantaLucia (1998) salt correction to entropy
82
+ *
83
+ * @param {number} deltaS - Entropy in cal/K·mol
84
+ * @param {number} nnPairs - Number of nearest-neighbor pairs
85
+ * @returns {number} - Corrected entropy in cal/K·mol
86
+ */
87
+ function applySaltCorrection(deltaS, nnPairs) {
88
+ const effectiveMono = getEffectiveMonovalentConc();
89
+ // SantaLucia (1998) salt correction
90
+ return deltaS + 0.368 * nnPairs * Math.log(effectiveMono / 1000);
91
+ }
92
+
93
+ /**
94
+ * Validate DNA sequence
95
+ *
96
+ * @param {string} sequence - DNA sequence
97
+ * @returns {boolean} - True if valid
98
+ */
99
+ export function isValidSequence(sequence) {
100
+ return /^[ATGCN]+$/.test(sequence);
101
+ }
102
+
103
+ /**
104
+ * Calculate melting temperature using SantaLucia (1998) method
105
+ *
106
+ * @param {string} sequence - DNA sequence (5' to 3')
107
+ * @returns {number} - Melting temperature in Celsius
108
+ * @throws {Error} Invalid sequence or too short.
109
+ */
110
+ export default function calculateSantaLuciaTm(sequence) {
111
+ // Convert to uppercase and validate
112
+ try {
113
+ sequence = sequence?.toUpperCase().trim();
114
+
115
+ if (!isValidSequence(sequence)) {
116
+ throw new Error("Invalid sequence: contains non-DNA characters");
117
+ }
118
+
119
+ if (sequence.length < 2) {
120
+ throw new Error("Sequence too short: minimum length is 2 bases");
121
+ }
122
+
123
+ let deltaH = 0; // kcal/mol
124
+ let deltaS = 0; // cal/K·mol
125
+
126
+ // Calculate nearest-neighbor contributions
127
+ for (let i = 0; i < sequence.length - 1; i++) {
128
+ const dinucleotide = sequence.substring(i, i + 2);
129
+
130
+ // Skip if contains N
131
+ if (dinucleotide.includes("N")) {
132
+ continue;
133
+ }
134
+
135
+ const params = SANTA_LUCIA_NN[dinucleotide];
136
+ if (params) {
137
+ deltaH += params.dH;
138
+ deltaS += params.dS;
139
+ }
140
+ }
141
+
142
+ // Add initiation parameters
143
+ const firstBase = sequence[0];
144
+ const lastBase = sequence[sequence.length - 1];
145
+
146
+ // Terminal GC or AT initiation
147
+ if (firstBase === "G" || firstBase === "C") {
148
+ deltaH += SANTA_LUCIA_INIT.GC.dH;
149
+ deltaS += SANTA_LUCIA_INIT.GC.dS;
150
+ } else {
151
+ deltaH += SANTA_LUCIA_INIT.AT.dH;
152
+ deltaS += SANTA_LUCIA_INIT.AT.dS;
153
+ }
154
+
155
+ if (lastBase === "G" || lastBase === "C") {
156
+ deltaH += SANTA_LUCIA_INIT.GC.dH;
157
+ deltaS += SANTA_LUCIA_INIT.GC.dS;
158
+ } else {
159
+ deltaH += SANTA_LUCIA_INIT.AT.dH;
160
+ deltaS += SANTA_LUCIA_INIT.AT.dS;
161
+ }
162
+
163
+ // Apply salt correction
164
+ const nnPairs = sequence.length - 1;
165
+ deltaS = applySaltCorrection(deltaS, nnPairs);
166
+
167
+ // Calculate Tm using: Tm = deltaH / (deltaS + R * ln(C/4))
168
+ // where C is DNA concentration in M (convert from nM)
169
+ const C = PRIMER3_PARAMS.dnaConc * 1e-9; // Convert nM to M
170
+ const Tm = (deltaH * 1000) / (deltaS + PRIMER3_PARAMS.R * Math.log(C / 4));
171
+
172
+ // Convert from Kelvin to Celsius
173
+ return Tm - 273.15;
174
+ } catch (e) {
175
+ return `Error calculating Tm for sequence ${sequence}. ${e}`;
176
+ }
177
+ }
@@ -0,0 +1,39 @@
1
+ import assert from "assert";
2
+ import calculateSantaLuciaTm from "./calculateSantaLuciaTm";
3
+
4
+ describe("calculate Tm based on SantaLucia 1998", () => {
5
+ it("should return the melting temperature of a given sequence, if no degenerate bases are present", () => {
6
+ assert.equal(
7
+ calculateSantaLuciaTm("AGCGGATAACAATTTCACACAGGA"),
8
+ 60.805947394707346
9
+ );
10
+ assert.equal(
11
+ calculateSantaLuciaTm("AGCGGATAACAATTTCAC"),
12
+ 50.301642635069356
13
+ );
14
+ assert.equal(
15
+ calculateSantaLuciaTm("AGCGGATAACAATTTcac"),
16
+ 50.301642635069356
17
+ );
18
+ assert.equal(
19
+ calculateSantaLuciaTm("ataataccgcgccacatagc"),
20
+ 58.27798862992364
21
+ );
22
+ assert.equal(
23
+ calculateSantaLuciaTm("AGCGGATAACAATACNNN"),
24
+ 40.92944342497407
25
+ );
26
+ assert.equal(
27
+ calculateSantaLuciaTm("AGCGGATAACAATACnnn"),
28
+ 40.92944342497407
29
+ );
30
+ assert.equal(
31
+ calculateSantaLuciaTm("AGCGGATAACAYZAKLPATAC"),
32
+ "Error calculating Tm for sequence AGCGGATAACAYZAKLPATAC. Error: Invalid sequence: contains non-DNA characters"
33
+ );
34
+ assert.equal(
35
+ calculateSantaLuciaTm("A"),
36
+ "Error calculating Tm for sequence A. Error: Sequence too short: minimum length is 2 bases"
37
+ );
38
+ });
39
+ });
package/src/diffUtils.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { cloneDeep, forEach } from "lodash-es";
2
- import { diff, patch, reverse } from "jsondiffpatch/dist/jsondiffpatch.umd";
2
+ import { diff, patch, reverse } from "jsondiffpatch";
3
3
 
4
4
  import tidyUpSequenceData from "./tidyUpSequenceData";
5
5
 
@@ -103,7 +103,7 @@ export function getAcceptedChars({
103
103
  isMixedRnaAndDna
104
104
  } = {}) {
105
105
  return isProtein
106
- ? `${extended_protein_letters.toLowerCase()}}`
106
+ ? `${extended_protein_letters.toLowerCase()}`
107
107
  : isOligo
108
108
  ? ambiguous_rna_letters.toLowerCase() + "t"
109
109
  : isRna
@@ -186,6 +186,9 @@ export default function getAminoAcidDataForEachBaseOfDna(
186
186
  optionalSubrangeRange,
187
187
  isProteinSequence
188
188
  ) {
189
+ if (!originalSequenceString) {
190
+ return [];
191
+ }
189
192
  // Obtain derived properties, see getTranslatedSequenceProperties
190
193
  const {
191
194
  sequenceString,
@@ -205,7 +208,7 @@ export default function getAminoAcidDataForEachBaseOfDna(
205
208
  // Iterate over the DNA sequence length in increments of 3
206
209
  for (let index = 0; index < sequenceStringLength; index += 3) {
207
210
  let aminoAcid;
208
- const aminoAcidIndex = index / 3;
211
+ const aminoAcidIndex = Math.floor(index / 3);
209
212
  let codonPositionsInCDS;
210
213
  let basesRead;
211
214
 
@@ -225,22 +225,14 @@ describe("getSequenceDataBetweenRange", () => {
225
225
  end: 3
226
226
  }
227
227
  );
228
- res.should.containSubset({
229
- sequence: "gc",
230
- features: [
231
- {
232
- start: 0,
233
- end: 1,
234
- locations: [
235
- {
236
- start: 0,
237
- end: 1
238
- }
239
- ],
240
- name: "happy"
241
- }
242
- ]
243
- });
228
+ res.features.should.containSubset([
229
+ {
230
+ start: 0,
231
+ end: 1,
232
+ name: "happy"
233
+ }
234
+ ]);
235
+ res.sequence.should.equal("gc");
244
236
  });
245
237
  it("feature with locations, non circular enclosing range", () => {
246
238
  const res = getSequenceDataBetweenRange(
@@ -283,7 +275,7 @@ describe("getSequenceDataBetweenRange", () => {
283
275
  ]
284
276
  });
285
277
  });
286
- it.only("feature with locations, non circular, non-fully enclosing range - it should trim the start/end correctly to match the location", () => {
278
+ it("feature with locations, non circular, non-fully enclosing range - it should trim the start/end correctly to match the location", () => {
287
279
  const res = getSequenceDataBetweenRange(
288
280
  {
289
281
  sequence: "gggatgcatgca",
package/src/index.js CHANGED
@@ -96,6 +96,8 @@ export { default as condensePairwiseAlignmentDifferences } from "./condensePairw
96
96
  export { default as addGapsToSeqReads } from "./addGapsToSeqReads";
97
97
  export { default as calculateNebTm } from "./calculateNebTm";
98
98
  export { default as calculateNebTa } from "./calculateNebTa";
99
+ export { default as calculateSantaLuciaTm } from "./calculateSantaLuciaTm";
100
+ export { default as calculateEndStability } from "./calculateEndStability";
99
101
  export { default as getDigestFragmentsForCutsites } from "./getDigestFragmentsForCutsites";
100
102
  export { default as getDigestFragmentsForRestrictionEnzymes } from "./getDigestFragmentsForRestrictionEnzymes";
101
103
  export { default as convertDnaCaretPositionOrRangeToAA } from "./convertDnaCaretPositionOrRangeToAA";
@@ -27,9 +27,10 @@ export default function insertSequenceDataAtPositionOrRange(
27
27
  ...options
28
28
  });
29
29
  const newSequenceData = cloneDeep(existingSequenceData);
30
- const insertLength = sequenceDataToInsert.proteinSequence
31
- ? sequenceDataToInsert.proteinSequence.length * 3
32
- : sequenceDataToInsert.sequence.length;
30
+ const insertLength =
31
+ sequenceDataToInsert.isProtein && sequenceDataToInsert.proteinSequence
32
+ ? sequenceDataToInsert.proteinSequence.length * 3
33
+ : sequenceDataToInsert.sequence.length;
33
34
  let caretPosition = caretPositionOrRange;
34
35
 
35
36
  const isInsertSameLengthAsSelection =
@@ -187,7 +188,7 @@ function adjustAnnotationsToDelete(annotationsToBeAdjusted, range, maxLength) {
187
188
  ...newRange,
188
189
  start: newLocations[0].start,
189
190
  end: newLocations[newLocations.length - 1].end,
190
- ...(newLocations.length > 1 && { locations: newLocations })
191
+ ...(newLocations.length > 0 && { locations: newLocations })
191
192
  };
192
193
  } else {
193
194
  return newRange;
@@ -112,6 +112,15 @@ const proteinAlphabet = {
112
112
  mass: 128.17228
113
113
  },
114
114
 
115
+ O: {
116
+ value: "O",
117
+ name: "Pyrrolysine",
118
+ threeLettersName: "Pyl",
119
+ colorByFamily: "#FFC0CB",
120
+ color: "hsl(264.7, 100%, 69%)",
121
+ mass: 255.313
122
+ },
123
+
115
124
  M: {
116
125
  value: "M",
117
126
  name: "Methionine",
@@ -1,6 +1,6 @@
1
1
  import proteinAlphabet from "./proteinAlphabet";
2
2
 
3
- const threeLetterSequenceStringToAminoAcidMap = {
3
+ const initThreeLetterSequenceStringToAminoAcidMap = {
4
4
  gct: proteinAlphabet.A,
5
5
  gcc: proteinAlphabet.A,
6
6
  gca: proteinAlphabet.A,
@@ -99,8 +99,80 @@ const threeLetterSequenceStringToAminoAcidMap = {
99
99
  taa: proteinAlphabet["*"],
100
100
  tag: proteinAlphabet["*"],
101
101
  tga: proteinAlphabet["*"],
102
+ uaa: proteinAlphabet["*"],
103
+ uag: proteinAlphabet["*"],
104
+ uga: proteinAlphabet["*"],
102
105
  "...": proteinAlphabet["."],
103
106
  "---": proteinAlphabet["-"]
104
107
  };
105
108
 
109
+ // IUPAC nucleotide codes (DNA/RNA) with U awareness
110
+ const IUPAC = {
111
+ A: ["A"],
112
+ C: ["C"],
113
+ G: ["G"],
114
+ T: ["T"],
115
+ U: ["U"],
116
+
117
+ R: ["A", "G"],
118
+ Y: ["C", "T", "U"],
119
+ K: ["G", "T", "U"],
120
+ M: ["A", "C"],
121
+ S: ["G", "C"],
122
+ W: ["A", "T", "U"],
123
+ B: ["C", "G", "T", "U"],
124
+ D: ["A", "G", "T", "U"],
125
+ H: ["A", "C", "T", "U"],
126
+ V: ["A", "C", "G"],
127
+ N: ["A", "C", "G", "T", "U"],
128
+ X: ["A", "C", "G", "T", "U"]
129
+ };
130
+
131
+
132
+ function expandAndResolve(threeLetterCodon) {
133
+ const chars = threeLetterCodon.toUpperCase().split("");
134
+ const picks = chars.map((c) => IUPAC[c] || [c]);
135
+
136
+ let allPossibleThreeLetterCodons = [""];
137
+ for (const set of picks) {
138
+ const next = [];
139
+ for (const prefix of allPossibleThreeLetterCodons) for (const b of set) next.push(prefix + b);
140
+ allPossibleThreeLetterCodons = next;
141
+ }
142
+ let foundAminoAcid = null;
143
+ for (const codon of allPossibleThreeLetterCodons) {
144
+ const lowerCodon = codon.toLowerCase();
145
+ const aminoAcidObj = initThreeLetterSequenceStringToAminoAcidMap[lowerCodon] ?? initThreeLetterSequenceStringToAminoAcidMap[lowerCodon.replace(/u/g, "t")] ?? initThreeLetterSequenceStringToAminoAcidMap[lowerCodon.replace(/t/g, "u")];
146
+ if (aminoAcidObj) {
147
+ if (!foundAminoAcid) {
148
+ foundAminoAcid = aminoAcidObj;
149
+ } else if (foundAminoAcid.value !== aminoAcidObj.value ) {
150
+ return null
151
+ }
152
+ } else {
153
+ return null;
154
+ }
155
+ }
156
+ return foundAminoAcid;
157
+ }
158
+
159
+ function getCodonToAminoAcidMap() {
160
+ const map = initThreeLetterSequenceStringToAminoAcidMap;
161
+ // generate all IUPAC 3-mers
162
+ const codes = Object.keys(IUPAC);
163
+ for (const a of codes)
164
+ for (const b of codes)
165
+ for (const c of codes) {
166
+ const codon = a + b + c;
167
+ const lowerCodon = codon.toLowerCase();
168
+ if (map[lowerCodon]) continue;
169
+ const aminoAcidObj = expandAndResolve(codon);
170
+ if (aminoAcidObj) map[lowerCodon] = aminoAcidObj;
171
+ }
172
+
173
+ return map;
174
+ }
175
+
176
+ const threeLetterSequenceStringToAminoAcidMap = getCodonToAminoAcidMap();
177
+
106
178
  export default threeLetterSequenceStringToAminoAcidMap;
@@ -9,6 +9,7 @@ import tidyUpAnnotation from "./tidyUpAnnotation";
9
9
  import getDegenerateDnaStringFromAaString from "./getDegenerateDnaStringFromAAString";
10
10
  import { getFeatureTypes } from "./featureTypesAndColors";
11
11
  import getAminoAcidStringFromSequenceString from "./getAminoAcidStringFromSequenceString";
12
+ import { expandOrContractRangeByLength } from "@teselagen/range-utils";
12
13
 
13
14
  export default function tidyUpSequenceData(pSeqData, options = {}) {
14
15
  const {
@@ -137,14 +138,21 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
137
138
  //filter off cds translations
138
139
  return [];
139
140
  }
140
- if (!translation.aminoAcids && !seqData.noSequence) {
141
- translation.aminoAcids = getAminoAcidDataForEachBaseOfDna(
141
+ const codonStart = translation?.notes?.codon_start?.[0] - 1 || 0;
142
+ const expandedRange = expandOrContractRangeByLength(
143
+ translation,
144
+ -codonStart,
145
+ true,
146
+ seqData.sequence.length
147
+ );
148
+ if (!expandedRange.aminoAcids && !seqData.noSequence) {
149
+ expandedRange.aminoAcids = getAminoAcidDataForEachBaseOfDna(
142
150
  seqData.sequence,
143
- translation.forward,
144
- translation
151
+ expandedRange.forward,
152
+ expandedRange
145
153
  );
146
154
  }
147
- return translation;
155
+ return expandedRange;
148
156
  });
149
157
  }
150
158
 
@@ -879,6 +879,30 @@ declare const threeLetterSequenceStringToAminoAcidMap: {
879
879
  color: string;
880
880
  mass: number;
881
881
  };
882
+ uaa: {
883
+ value: string;
884
+ name: string;
885
+ threeLettersName: string;
886
+ colorByFamily: string;
887
+ color: string;
888
+ mass: number;
889
+ };
890
+ uag: {
891
+ value: string;
892
+ name: string;
893
+ threeLettersName: string;
894
+ colorByFamily: string;
895
+ color: string;
896
+ mass: number;
897
+ };
898
+ uga: {
899
+ value: string;
900
+ name: string;
901
+ threeLettersName: string;
902
+ colorByFamily: string;
903
+ color: string;
904
+ mass: number;
905
+ };
882
906
  "...": {
883
907
  value: string;
884
908
  name: string;