@teselagen/sequence-utils 0.3.32 → 0.3.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/calculateEndStability.d.ts +16 -0
- package/calculateEndStability.test.d.ts +1 -0
- package/calculateSantaLuciaTm.d.ts +127 -0
- package/calculateSantaLuciaTm.test.d.ts +1 -0
- package/diffUtils.d.ts +3 -3
- package/index.cjs +1408 -3904
- package/index.d.ts +2 -0
- package/index.js +1408 -3904
- package/index.umd.cjs +1408 -3904
- package/package.json +2 -2
- package/proteinAlphabet.d.ts +8 -0
- package/src/calculateEndStability.js +86 -0
- package/src/calculateEndStability.test.js +21 -0
- package/src/calculateSantaLuciaTm.js +177 -0
- package/src/calculateSantaLuciaTm.test.js +39 -0
- package/src/diffUtils.js +1 -1
- package/src/filterSequenceString.js +1 -1
- package/src/getAminoAcidDataForEachBaseOfDna.js +4 -1
- package/src/getSequenceDataBetweenRange.test.js +9 -17
- package/src/index.js +2 -0
- package/src/insertSequenceDataAtPositionOrRange.js +5 -4
- package/src/proteinAlphabet.js +9 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.js +73 -1
- package/src/tidyUpSequenceData.js +13 -5
- package/threeLetterSequenceStringToAminoAcidMap.d.ts +24 -0
package/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@teselagen/sequence-utils",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.36",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"dependencies": {
|
|
6
6
|
"escape-string-regexp": "5.0.0",
|
|
7
|
-
"jsondiffpatch": "0.
|
|
7
|
+
"jsondiffpatch": "0.7.3",
|
|
8
8
|
"string-splice": "^1.3.0",
|
|
9
9
|
"lodash-es": "^4.17.21",
|
|
10
10
|
"shortid": "2.2.16",
|
package/proteinAlphabet.d.ts
CHANGED
|
@@ -108,6 +108,14 @@ declare const proteinAlphabet: {
|
|
|
108
108
|
color: string;
|
|
109
109
|
mass: number;
|
|
110
110
|
};
|
|
111
|
+
O: {
|
|
112
|
+
value: string;
|
|
113
|
+
name: string;
|
|
114
|
+
threeLettersName: string;
|
|
115
|
+
colorByFamily: string;
|
|
116
|
+
color: string;
|
|
117
|
+
mass: number;
|
|
118
|
+
};
|
|
111
119
|
M: {
|
|
112
120
|
value: string;
|
|
113
121
|
name: string;
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import {
|
|
2
|
+
isValidSequence,
|
|
3
|
+
SANTA_LUCIA_NN,
|
|
4
|
+
SANTA_LUCIA_INIT
|
|
5
|
+
} from "./calculateSantaLuciaTm.js";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Calculate End Stability (3' end stability) of a primer
|
|
9
|
+
*
|
|
10
|
+
* The maximum stability for the last five 3' bases of a left or right primer.
|
|
11
|
+
* Bigger numbers mean more stable 3' ends. The value is the maximum delta G
|
|
12
|
+
* (kcal/mol) for duplex disruption for the five 3' bases.
|
|
13
|
+
*
|
|
14
|
+
* According to Primer3 documentation:
|
|
15
|
+
* - Most stable 5mer duplex: GCGCG = 6.86 kcal/mol (SantaLucia 1998)
|
|
16
|
+
* - Most labile 5mer duplex: TATAT = 0.86 kcal/mol (SantaLucia 1998)
|
|
17
|
+
*
|
|
18
|
+
* @param {string} sequence - DNA sequence (5' to 3')
|
|
19
|
+
* @returns {number} - Delta G (kcal/mol) for the last 5 bases at 3' end
|
|
20
|
+
* @throws {Error} Invalid sequence or too short.
|
|
21
|
+
*/
|
|
22
|
+
export default function calculateEndStability(sequence) {
|
|
23
|
+
try {
|
|
24
|
+
sequence = sequence?.toUpperCase().trim();
|
|
25
|
+
|
|
26
|
+
if (!isValidSequence(sequence)) {
|
|
27
|
+
throw new Error("Invalid sequence: contains non-DNA characters");
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if (sequence.length < 5) {
|
|
31
|
+
throw new Error(
|
|
32
|
+
"Sequence too short: minimum length is 5 bases for end stability calculation"
|
|
33
|
+
);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const last5Bases = sequence.substring(sequence.length - 5);
|
|
37
|
+
|
|
38
|
+
let deltaH = 0; // kcal/mol
|
|
39
|
+
let deltaS = 0; // cal/K·mol
|
|
40
|
+
|
|
41
|
+
// Calculate nearest-neighbor contributions for the 4 dinucleotides
|
|
42
|
+
for (let i = 0; i < 4; i++) {
|
|
43
|
+
const dinucleotide = last5Bases.substring(i, i + 2);
|
|
44
|
+
|
|
45
|
+
if (dinucleotide.includes("N")) {
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const params = SANTA_LUCIA_NN[dinucleotide];
|
|
50
|
+
if (params) {
|
|
51
|
+
deltaH += params.dH;
|
|
52
|
+
deltaS += params.dS;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Add initiation parameters for terminal base pairs
|
|
57
|
+
const firstBase = last5Bases[0];
|
|
58
|
+
const lastBase = last5Bases[last5Bases.length - 1];
|
|
59
|
+
|
|
60
|
+
// Terminal GC or AT initiation
|
|
61
|
+
if (firstBase === "G" || firstBase === "C") {
|
|
62
|
+
deltaH += SANTA_LUCIA_INIT.GC.dH;
|
|
63
|
+
deltaS += SANTA_LUCIA_INIT.GC.dS;
|
|
64
|
+
} else {
|
|
65
|
+
deltaH += SANTA_LUCIA_INIT.AT.dH;
|
|
66
|
+
deltaS += SANTA_LUCIA_INIT.AT.dS;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if (lastBase === "G" || lastBase === "C") {
|
|
70
|
+
deltaH += SANTA_LUCIA_INIT.GC.dH;
|
|
71
|
+
deltaS += SANTA_LUCIA_INIT.GC.dS;
|
|
72
|
+
} else {
|
|
73
|
+
deltaH += SANTA_LUCIA_INIT.AT.dH;
|
|
74
|
+
deltaS += SANTA_LUCIA_INIT.AT.dS;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Calculate deltaG at 37°C (310.15 K)
|
|
78
|
+
// deltaG = deltaH - T * deltaS
|
|
79
|
+
const T = 310.15; // 37°C in Kelvin
|
|
80
|
+
const deltaG = deltaH - (T * deltaS) / 1000; // Result in kcal/mol
|
|
81
|
+
|
|
82
|
+
return Math.round(Math.abs(deltaG) * 100) / 100;
|
|
83
|
+
} catch (e) {
|
|
84
|
+
return `Error calculating end stability for sequence ${sequence}. ${e}`;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import assert from "assert";
|
|
2
|
+
import calculateEndStability from "./calculateEndStability";
|
|
3
|
+
|
|
4
|
+
describe("Calculate the stability of the primer ends.", () => {
|
|
5
|
+
it("should return the end stability score of a given primer sequence", () => {
|
|
6
|
+
assert.equal(calculateEndStability("AGCGGATAACAATTTCACACAGGA"), 3.89);
|
|
7
|
+
assert.equal(calculateEndStability("AGCGGATAACAATTTCAC"), 3.24);
|
|
8
|
+
assert.equal(calculateEndStability("AGCGGATAACAATTTcac"), 3.24);
|
|
9
|
+
assert.equal(calculateEndStability("ataataccgcgccacatagc"), 2.99);
|
|
10
|
+
assert.equal(calculateEndStability("AGCGGATAACAATACNNN"), 0.6);
|
|
11
|
+
assert.equal(calculateEndStability("AGCGGATAACAATACnnn"), 0.6);
|
|
12
|
+
assert.equal(
|
|
13
|
+
calculateEndStability("AGCGGATAACAYZAKLPATAC"),
|
|
14
|
+
"Error calculating end stability for sequence AGCGGATAACAYZAKLPATAC. Error: Invalid sequence: contains non-DNA characters"
|
|
15
|
+
);
|
|
16
|
+
assert.equal(
|
|
17
|
+
calculateEndStability("AGCG"),
|
|
18
|
+
"Error calculating end stability for sequence AGCG. Error: Sequence too short: minimum length is 5 bases for end stability calculation"
|
|
19
|
+
);
|
|
20
|
+
});
|
|
21
|
+
});
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Primer3 Melting Temperature Calculator
|
|
3
|
+
*
|
|
4
|
+
* Implements the melting temperature calculation algorithm from Primer3
|
|
5
|
+
* based on the documentation at https://primer3.ut.ee/primer3web_help.htm
|
|
6
|
+
*
|
|
7
|
+
* Uses SantaLucia (1998) nearest-neighbor thermodynamics method with
|
|
8
|
+
* fixed Primer3 custom parameters:
|
|
9
|
+
* - Formula: SantaLucia (1998)
|
|
10
|
+
* - Salt correction: SantaLucia (1998)
|
|
11
|
+
* - Monovalent salt: 50.0 mM
|
|
12
|
+
* - Divalent salt: 1.5 mM
|
|
13
|
+
* - dNTP concentration: 0.6 mM
|
|
14
|
+
* - DNA concentration: 50.0 nM
|
|
15
|
+
*
|
|
16
|
+
* References:
|
|
17
|
+
* - SantaLucia JR (1998) "A unified view of polymer, dumbbell and
|
|
18
|
+
* oligonucleotide DNA nearest-neighbor thermodynamics",
|
|
19
|
+
* Proc Natl Acad Sci 95:1460-65
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
// Primer3 custom parameters (fixed)
|
|
23
|
+
const PRIMER3_PARAMS = {
|
|
24
|
+
saltMonovalent: 50.0, // mM
|
|
25
|
+
saltDivalent: 1.5, // mM
|
|
26
|
+
dntpConc: 0.6, // mM
|
|
27
|
+
dnaConc: 50.0, // nM
|
|
28
|
+
R: 1.987 // Gas constant (cal/K·mol)
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
// SantaLucia (1998) nearest-neighbor parameters
|
|
32
|
+
// dH in kcal/mol, dS in cal/K·mol
|
|
33
|
+
export const SANTA_LUCIA_NN = {
|
|
34
|
+
AA: { dH: -7.9, dS: -22.2 },
|
|
35
|
+
TT: { dH: -7.9, dS: -22.2 },
|
|
36
|
+
AT: { dH: -7.2, dS: -20.4 },
|
|
37
|
+
TA: { dH: -7.2, dS: -21.3 },
|
|
38
|
+
CA: { dH: -8.5, dS: -22.7 },
|
|
39
|
+
TG: { dH: -8.5, dS: -22.7 },
|
|
40
|
+
GT: { dH: -8.4, dS: -22.4 },
|
|
41
|
+
AC: { dH: -8.4, dS: -22.4 },
|
|
42
|
+
CT: { dH: -7.8, dS: -21.0 },
|
|
43
|
+
AG: { dH: -7.8, dS: -21.0 },
|
|
44
|
+
GA: { dH: -8.2, dS: -22.2 },
|
|
45
|
+
TC: { dH: -8.2, dS: -22.2 },
|
|
46
|
+
CG: { dH: -10.6, dS: -27.2 },
|
|
47
|
+
GC: { dH: -9.8, dS: -24.4 },
|
|
48
|
+
GG: { dH: -8.0, dS: -19.9 },
|
|
49
|
+
CC: { dH: -8.0, dS: -19.9 }
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
// Initiation parameters (SantaLucia 1998)
|
|
53
|
+
export const SANTA_LUCIA_INIT = {
|
|
54
|
+
GC: { dH: 0.1, dS: -2.8 }, // initiation with terminal GC
|
|
55
|
+
AT: { dH: 2.3, dS: 4.1 } // initiation with terminal AT
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Calculate effective monovalent cation concentration
|
|
60
|
+
* Accounts for divalent cations (Mg2+) binding to dNTPs
|
|
61
|
+
* Formula from von Ahsen et al. (2001)
|
|
62
|
+
*
|
|
63
|
+
* @returns {number} - Effective monovalent concentration in mM
|
|
64
|
+
*/
|
|
65
|
+
function getEffectiveMonovalentConc() {
|
|
66
|
+
let effectiveMono = PRIMER3_PARAMS.saltMonovalent;
|
|
67
|
+
|
|
68
|
+
// Adjust for divalent cations
|
|
69
|
+
if (PRIMER3_PARAMS.saltDivalent > 0) {
|
|
70
|
+
const freeMg = Math.max(
|
|
71
|
+
0,
|
|
72
|
+
PRIMER3_PARAMS.saltDivalent - PRIMER3_PARAMS.dntpConc
|
|
73
|
+
);
|
|
74
|
+
effectiveMono += 120 * Math.sqrt(freeMg);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return effectiveMono;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Apply SantaLucia (1998) salt correction to entropy
|
|
82
|
+
*
|
|
83
|
+
* @param {number} deltaS - Entropy in cal/K·mol
|
|
84
|
+
* @param {number} nnPairs - Number of nearest-neighbor pairs
|
|
85
|
+
* @returns {number} - Corrected entropy in cal/K·mol
|
|
86
|
+
*/
|
|
87
|
+
function applySaltCorrection(deltaS, nnPairs) {
|
|
88
|
+
const effectiveMono = getEffectiveMonovalentConc();
|
|
89
|
+
// SantaLucia (1998) salt correction
|
|
90
|
+
return deltaS + 0.368 * nnPairs * Math.log(effectiveMono / 1000);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Validate DNA sequence
|
|
95
|
+
*
|
|
96
|
+
* @param {string} sequence - DNA sequence
|
|
97
|
+
* @returns {boolean} - True if valid
|
|
98
|
+
*/
|
|
99
|
+
export function isValidSequence(sequence) {
|
|
100
|
+
return /^[ATGCN]+$/.test(sequence);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Calculate melting temperature using SantaLucia (1998) method
|
|
105
|
+
*
|
|
106
|
+
* @param {string} sequence - DNA sequence (5' to 3')
|
|
107
|
+
* @returns {number} - Melting temperature in Celsius
|
|
108
|
+
* @throws {Error} Invalid sequence or too short.
|
|
109
|
+
*/
|
|
110
|
+
export default function calculateSantaLuciaTm(sequence) {
|
|
111
|
+
// Convert to uppercase and validate
|
|
112
|
+
try {
|
|
113
|
+
sequence = sequence?.toUpperCase().trim();
|
|
114
|
+
|
|
115
|
+
if (!isValidSequence(sequence)) {
|
|
116
|
+
throw new Error("Invalid sequence: contains non-DNA characters");
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (sequence.length < 2) {
|
|
120
|
+
throw new Error("Sequence too short: minimum length is 2 bases");
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
let deltaH = 0; // kcal/mol
|
|
124
|
+
let deltaS = 0; // cal/K·mol
|
|
125
|
+
|
|
126
|
+
// Calculate nearest-neighbor contributions
|
|
127
|
+
for (let i = 0; i < sequence.length - 1; i++) {
|
|
128
|
+
const dinucleotide = sequence.substring(i, i + 2);
|
|
129
|
+
|
|
130
|
+
// Skip if contains N
|
|
131
|
+
if (dinucleotide.includes("N")) {
|
|
132
|
+
continue;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const params = SANTA_LUCIA_NN[dinucleotide];
|
|
136
|
+
if (params) {
|
|
137
|
+
deltaH += params.dH;
|
|
138
|
+
deltaS += params.dS;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Add initiation parameters
|
|
143
|
+
const firstBase = sequence[0];
|
|
144
|
+
const lastBase = sequence[sequence.length - 1];
|
|
145
|
+
|
|
146
|
+
// Terminal GC or AT initiation
|
|
147
|
+
if (firstBase === "G" || firstBase === "C") {
|
|
148
|
+
deltaH += SANTA_LUCIA_INIT.GC.dH;
|
|
149
|
+
deltaS += SANTA_LUCIA_INIT.GC.dS;
|
|
150
|
+
} else {
|
|
151
|
+
deltaH += SANTA_LUCIA_INIT.AT.dH;
|
|
152
|
+
deltaS += SANTA_LUCIA_INIT.AT.dS;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
if (lastBase === "G" || lastBase === "C") {
|
|
156
|
+
deltaH += SANTA_LUCIA_INIT.GC.dH;
|
|
157
|
+
deltaS += SANTA_LUCIA_INIT.GC.dS;
|
|
158
|
+
} else {
|
|
159
|
+
deltaH += SANTA_LUCIA_INIT.AT.dH;
|
|
160
|
+
deltaS += SANTA_LUCIA_INIT.AT.dS;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Apply salt correction
|
|
164
|
+
const nnPairs = sequence.length - 1;
|
|
165
|
+
deltaS = applySaltCorrection(deltaS, nnPairs);
|
|
166
|
+
|
|
167
|
+
// Calculate Tm using: Tm = deltaH / (deltaS + R * ln(C/4))
|
|
168
|
+
// where C is DNA concentration in M (convert from nM)
|
|
169
|
+
const C = PRIMER3_PARAMS.dnaConc * 1e-9; // Convert nM to M
|
|
170
|
+
const Tm = (deltaH * 1000) / (deltaS + PRIMER3_PARAMS.R * Math.log(C / 4));
|
|
171
|
+
|
|
172
|
+
// Convert from Kelvin to Celsius
|
|
173
|
+
return Tm - 273.15;
|
|
174
|
+
} catch (e) {
|
|
175
|
+
return `Error calculating Tm for sequence ${sequence}. ${e}`;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import assert from "assert";
|
|
2
|
+
import calculateSantaLuciaTm from "./calculateSantaLuciaTm";
|
|
3
|
+
|
|
4
|
+
describe("calculate Tm based on SantaLucia 1998", () => {
|
|
5
|
+
it("should return the melting temperature of a given sequence, if no degenerate bases are present", () => {
|
|
6
|
+
assert.equal(
|
|
7
|
+
calculateSantaLuciaTm("AGCGGATAACAATTTCACACAGGA"),
|
|
8
|
+
60.805947394707346
|
|
9
|
+
);
|
|
10
|
+
assert.equal(
|
|
11
|
+
calculateSantaLuciaTm("AGCGGATAACAATTTCAC"),
|
|
12
|
+
50.301642635069356
|
|
13
|
+
);
|
|
14
|
+
assert.equal(
|
|
15
|
+
calculateSantaLuciaTm("AGCGGATAACAATTTcac"),
|
|
16
|
+
50.301642635069356
|
|
17
|
+
);
|
|
18
|
+
assert.equal(
|
|
19
|
+
calculateSantaLuciaTm("ataataccgcgccacatagc"),
|
|
20
|
+
58.27798862992364
|
|
21
|
+
);
|
|
22
|
+
assert.equal(
|
|
23
|
+
calculateSantaLuciaTm("AGCGGATAACAATACNNN"),
|
|
24
|
+
40.92944342497407
|
|
25
|
+
);
|
|
26
|
+
assert.equal(
|
|
27
|
+
calculateSantaLuciaTm("AGCGGATAACAATACnnn"),
|
|
28
|
+
40.92944342497407
|
|
29
|
+
);
|
|
30
|
+
assert.equal(
|
|
31
|
+
calculateSantaLuciaTm("AGCGGATAACAYZAKLPATAC"),
|
|
32
|
+
"Error calculating Tm for sequence AGCGGATAACAYZAKLPATAC. Error: Invalid sequence: contains non-DNA characters"
|
|
33
|
+
);
|
|
34
|
+
assert.equal(
|
|
35
|
+
calculateSantaLuciaTm("A"),
|
|
36
|
+
"Error calculating Tm for sequence A. Error: Sequence too short: minimum length is 2 bases"
|
|
37
|
+
);
|
|
38
|
+
});
|
|
39
|
+
});
|
package/src/diffUtils.js
CHANGED
|
@@ -103,7 +103,7 @@ export function getAcceptedChars({
|
|
|
103
103
|
isMixedRnaAndDna
|
|
104
104
|
} = {}) {
|
|
105
105
|
return isProtein
|
|
106
|
-
? `${extended_protein_letters.toLowerCase()}
|
|
106
|
+
? `${extended_protein_letters.toLowerCase()}`
|
|
107
107
|
: isOligo
|
|
108
108
|
? ambiguous_rna_letters.toLowerCase() + "t"
|
|
109
109
|
: isRna
|
|
@@ -186,6 +186,9 @@ export default function getAminoAcidDataForEachBaseOfDna(
|
|
|
186
186
|
optionalSubrangeRange,
|
|
187
187
|
isProteinSequence
|
|
188
188
|
) {
|
|
189
|
+
if (!originalSequenceString) {
|
|
190
|
+
return [];
|
|
191
|
+
}
|
|
189
192
|
// Obtain derived properties, see getTranslatedSequenceProperties
|
|
190
193
|
const {
|
|
191
194
|
sequenceString,
|
|
@@ -205,7 +208,7 @@ export default function getAminoAcidDataForEachBaseOfDna(
|
|
|
205
208
|
// Iterate over the DNA sequence length in increments of 3
|
|
206
209
|
for (let index = 0; index < sequenceStringLength; index += 3) {
|
|
207
210
|
let aminoAcid;
|
|
208
|
-
const aminoAcidIndex = index / 3;
|
|
211
|
+
const aminoAcidIndex = Math.floor(index / 3);
|
|
209
212
|
let codonPositionsInCDS;
|
|
210
213
|
let basesRead;
|
|
211
214
|
|
|
@@ -225,22 +225,14 @@ describe("getSequenceDataBetweenRange", () => {
|
|
|
225
225
|
end: 3
|
|
226
226
|
}
|
|
227
227
|
);
|
|
228
|
-
res.should.containSubset(
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
start: 0,
|
|
237
|
-
end: 1
|
|
238
|
-
}
|
|
239
|
-
],
|
|
240
|
-
name: "happy"
|
|
241
|
-
}
|
|
242
|
-
]
|
|
243
|
-
});
|
|
228
|
+
res.features.should.containSubset([
|
|
229
|
+
{
|
|
230
|
+
start: 0,
|
|
231
|
+
end: 1,
|
|
232
|
+
name: "happy"
|
|
233
|
+
}
|
|
234
|
+
]);
|
|
235
|
+
res.sequence.should.equal("gc");
|
|
244
236
|
});
|
|
245
237
|
it("feature with locations, non circular enclosing range", () => {
|
|
246
238
|
const res = getSequenceDataBetweenRange(
|
|
@@ -283,7 +275,7 @@ describe("getSequenceDataBetweenRange", () => {
|
|
|
283
275
|
]
|
|
284
276
|
});
|
|
285
277
|
});
|
|
286
|
-
it
|
|
278
|
+
it("feature with locations, non circular, non-fully enclosing range - it should trim the start/end correctly to match the location", () => {
|
|
287
279
|
const res = getSequenceDataBetweenRange(
|
|
288
280
|
{
|
|
289
281
|
sequence: "gggatgcatgca",
|
package/src/index.js
CHANGED
|
@@ -96,6 +96,8 @@ export { default as condensePairwiseAlignmentDifferences } from "./condensePairw
|
|
|
96
96
|
export { default as addGapsToSeqReads } from "./addGapsToSeqReads";
|
|
97
97
|
export { default as calculateNebTm } from "./calculateNebTm";
|
|
98
98
|
export { default as calculateNebTa } from "./calculateNebTa";
|
|
99
|
+
export { default as calculateSantaLuciaTm } from "./calculateSantaLuciaTm";
|
|
100
|
+
export { default as calculateEndStability } from "./calculateEndStability";
|
|
99
101
|
export { default as getDigestFragmentsForCutsites } from "./getDigestFragmentsForCutsites";
|
|
100
102
|
export { default as getDigestFragmentsForRestrictionEnzymes } from "./getDigestFragmentsForRestrictionEnzymes";
|
|
101
103
|
export { default as convertDnaCaretPositionOrRangeToAA } from "./convertDnaCaretPositionOrRangeToAA";
|
|
@@ -27,9 +27,10 @@ export default function insertSequenceDataAtPositionOrRange(
|
|
|
27
27
|
...options
|
|
28
28
|
});
|
|
29
29
|
const newSequenceData = cloneDeep(existingSequenceData);
|
|
30
|
-
const insertLength =
|
|
31
|
-
|
|
32
|
-
|
|
30
|
+
const insertLength =
|
|
31
|
+
sequenceDataToInsert.isProtein && sequenceDataToInsert.proteinSequence
|
|
32
|
+
? sequenceDataToInsert.proteinSequence.length * 3
|
|
33
|
+
: sequenceDataToInsert.sequence.length;
|
|
33
34
|
let caretPosition = caretPositionOrRange;
|
|
34
35
|
|
|
35
36
|
const isInsertSameLengthAsSelection =
|
|
@@ -187,7 +188,7 @@ function adjustAnnotationsToDelete(annotationsToBeAdjusted, range, maxLength) {
|
|
|
187
188
|
...newRange,
|
|
188
189
|
start: newLocations[0].start,
|
|
189
190
|
end: newLocations[newLocations.length - 1].end,
|
|
190
|
-
...(newLocations.length >
|
|
191
|
+
...(newLocations.length > 0 && { locations: newLocations })
|
|
191
192
|
};
|
|
192
193
|
} else {
|
|
193
194
|
return newRange;
|
package/src/proteinAlphabet.js
CHANGED
|
@@ -112,6 +112,15 @@ const proteinAlphabet = {
|
|
|
112
112
|
mass: 128.17228
|
|
113
113
|
},
|
|
114
114
|
|
|
115
|
+
O: {
|
|
116
|
+
value: "O",
|
|
117
|
+
name: "Pyrrolysine",
|
|
118
|
+
threeLettersName: "Pyl",
|
|
119
|
+
colorByFamily: "#FFC0CB",
|
|
120
|
+
color: "hsl(264.7, 100%, 69%)",
|
|
121
|
+
mass: 255.313
|
|
122
|
+
},
|
|
123
|
+
|
|
115
124
|
M: {
|
|
116
125
|
value: "M",
|
|
117
126
|
name: "Methionine",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import proteinAlphabet from "./proteinAlphabet";
|
|
2
2
|
|
|
3
|
-
const
|
|
3
|
+
const initThreeLetterSequenceStringToAminoAcidMap = {
|
|
4
4
|
gct: proteinAlphabet.A,
|
|
5
5
|
gcc: proteinAlphabet.A,
|
|
6
6
|
gca: proteinAlphabet.A,
|
|
@@ -99,8 +99,80 @@ const threeLetterSequenceStringToAminoAcidMap = {
|
|
|
99
99
|
taa: proteinAlphabet["*"],
|
|
100
100
|
tag: proteinAlphabet["*"],
|
|
101
101
|
tga: proteinAlphabet["*"],
|
|
102
|
+
uaa: proteinAlphabet["*"],
|
|
103
|
+
uag: proteinAlphabet["*"],
|
|
104
|
+
uga: proteinAlphabet["*"],
|
|
102
105
|
"...": proteinAlphabet["."],
|
|
103
106
|
"---": proteinAlphabet["-"]
|
|
104
107
|
};
|
|
105
108
|
|
|
109
|
+
// IUPAC nucleotide codes (DNA/RNA) with U awareness
|
|
110
|
+
const IUPAC = {
|
|
111
|
+
A: ["A"],
|
|
112
|
+
C: ["C"],
|
|
113
|
+
G: ["G"],
|
|
114
|
+
T: ["T"],
|
|
115
|
+
U: ["U"],
|
|
116
|
+
|
|
117
|
+
R: ["A", "G"],
|
|
118
|
+
Y: ["C", "T", "U"],
|
|
119
|
+
K: ["G", "T", "U"],
|
|
120
|
+
M: ["A", "C"],
|
|
121
|
+
S: ["G", "C"],
|
|
122
|
+
W: ["A", "T", "U"],
|
|
123
|
+
B: ["C", "G", "T", "U"],
|
|
124
|
+
D: ["A", "G", "T", "U"],
|
|
125
|
+
H: ["A", "C", "T", "U"],
|
|
126
|
+
V: ["A", "C", "G"],
|
|
127
|
+
N: ["A", "C", "G", "T", "U"],
|
|
128
|
+
X: ["A", "C", "G", "T", "U"]
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
function expandAndResolve(threeLetterCodon) {
|
|
133
|
+
const chars = threeLetterCodon.toUpperCase().split("");
|
|
134
|
+
const picks = chars.map((c) => IUPAC[c] || [c]);
|
|
135
|
+
|
|
136
|
+
let allPossibleThreeLetterCodons = [""];
|
|
137
|
+
for (const set of picks) {
|
|
138
|
+
const next = [];
|
|
139
|
+
for (const prefix of allPossibleThreeLetterCodons) for (const b of set) next.push(prefix + b);
|
|
140
|
+
allPossibleThreeLetterCodons = next;
|
|
141
|
+
}
|
|
142
|
+
let foundAminoAcid = null;
|
|
143
|
+
for (const codon of allPossibleThreeLetterCodons) {
|
|
144
|
+
const lowerCodon = codon.toLowerCase();
|
|
145
|
+
const aminoAcidObj = initThreeLetterSequenceStringToAminoAcidMap[lowerCodon] ?? initThreeLetterSequenceStringToAminoAcidMap[lowerCodon.replace(/u/g, "t")] ?? initThreeLetterSequenceStringToAminoAcidMap[lowerCodon.replace(/t/g, "u")];
|
|
146
|
+
if (aminoAcidObj) {
|
|
147
|
+
if (!foundAminoAcid) {
|
|
148
|
+
foundAminoAcid = aminoAcidObj;
|
|
149
|
+
} else if (foundAminoAcid.value !== aminoAcidObj.value ) {
|
|
150
|
+
return null
|
|
151
|
+
}
|
|
152
|
+
} else {
|
|
153
|
+
return null;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
return foundAminoAcid;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
function getCodonToAminoAcidMap() {
|
|
160
|
+
const map = initThreeLetterSequenceStringToAminoAcidMap;
|
|
161
|
+
// generate all IUPAC 3-mers
|
|
162
|
+
const codes = Object.keys(IUPAC);
|
|
163
|
+
for (const a of codes)
|
|
164
|
+
for (const b of codes)
|
|
165
|
+
for (const c of codes) {
|
|
166
|
+
const codon = a + b + c;
|
|
167
|
+
const lowerCodon = codon.toLowerCase();
|
|
168
|
+
if (map[lowerCodon]) continue;
|
|
169
|
+
const aminoAcidObj = expandAndResolve(codon);
|
|
170
|
+
if (aminoAcidObj) map[lowerCodon] = aminoAcidObj;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return map;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const threeLetterSequenceStringToAminoAcidMap = getCodonToAminoAcidMap();
|
|
177
|
+
|
|
106
178
|
export default threeLetterSequenceStringToAminoAcidMap;
|
|
@@ -9,6 +9,7 @@ import tidyUpAnnotation from "./tidyUpAnnotation";
|
|
|
9
9
|
import getDegenerateDnaStringFromAaString from "./getDegenerateDnaStringFromAAString";
|
|
10
10
|
import { getFeatureTypes } from "./featureTypesAndColors";
|
|
11
11
|
import getAminoAcidStringFromSequenceString from "./getAminoAcidStringFromSequenceString";
|
|
12
|
+
import { expandOrContractRangeByLength } from "@teselagen/range-utils";
|
|
12
13
|
|
|
13
14
|
export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
14
15
|
const {
|
|
@@ -137,14 +138,21 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
137
138
|
//filter off cds translations
|
|
138
139
|
return [];
|
|
139
140
|
}
|
|
140
|
-
|
|
141
|
-
|
|
141
|
+
const codonStart = translation?.notes?.codon_start?.[0] - 1 || 0;
|
|
142
|
+
const expandedRange = expandOrContractRangeByLength(
|
|
143
|
+
translation,
|
|
144
|
+
-codonStart,
|
|
145
|
+
true,
|
|
146
|
+
seqData.sequence.length
|
|
147
|
+
);
|
|
148
|
+
if (!expandedRange.aminoAcids && !seqData.noSequence) {
|
|
149
|
+
expandedRange.aminoAcids = getAminoAcidDataForEachBaseOfDna(
|
|
142
150
|
seqData.sequence,
|
|
143
|
-
|
|
144
|
-
|
|
151
|
+
expandedRange.forward,
|
|
152
|
+
expandedRange
|
|
145
153
|
);
|
|
146
154
|
}
|
|
147
|
-
return
|
|
155
|
+
return expandedRange;
|
|
148
156
|
});
|
|
149
157
|
}
|
|
150
158
|
|
|
@@ -879,6 +879,30 @@ declare const threeLetterSequenceStringToAminoAcidMap: {
|
|
|
879
879
|
color: string;
|
|
880
880
|
mass: number;
|
|
881
881
|
};
|
|
882
|
+
uaa: {
|
|
883
|
+
value: string;
|
|
884
|
+
name: string;
|
|
885
|
+
threeLettersName: string;
|
|
886
|
+
colorByFamily: string;
|
|
887
|
+
color: string;
|
|
888
|
+
mass: number;
|
|
889
|
+
};
|
|
890
|
+
uag: {
|
|
891
|
+
value: string;
|
|
892
|
+
name: string;
|
|
893
|
+
threeLettersName: string;
|
|
894
|
+
colorByFamily: string;
|
|
895
|
+
color: string;
|
|
896
|
+
mass: number;
|
|
897
|
+
};
|
|
898
|
+
uga: {
|
|
899
|
+
value: string;
|
|
900
|
+
name: string;
|
|
901
|
+
threeLettersName: string;
|
|
902
|
+
colorByFamily: string;
|
|
903
|
+
color: string;
|
|
904
|
+
mass: number;
|
|
905
|
+
};
|
|
882
906
|
"...": {
|
|
883
907
|
value: string;
|
|
884
908
|
name: string;
|