@teselagen/sequence-utils 0.3.32-beta.2 → 0.3.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/diffUtils.d.ts +3 -3
- package/findOrfsInPlasmid.d.ts +1 -1
- package/getOrfsFromSequence.d.ts +1 -1
- package/index.cjs +1557 -3929
- package/index.js +1557 -3929
- package/index.umd.cjs +1557 -3929
- package/package.json +5 -9
- package/proteinAlphabet.d.ts +8 -0
- package/src/computeDigestFragments.js +2 -2
- package/src/cutSequenceByRestrictionEnzyme.js +2 -2
- package/src/diffUtils.js +1 -1
- package/src/filterSequenceString.js +1 -1
- package/src/generateAnnotations.js +2 -2
- package/src/getAminoAcidDataForEachBaseOfDna.js +4 -1
- package/src/getOrfsFromSequence.js +2 -2
- package/src/getSequenceDataBetweenRange.test.js +9 -17
- package/src/insertSequenceDataAtPositionOrRange.js +5 -4
- package/src/proteinAlphabet.js +9 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.js +73 -1
- package/src/tidyUpAnnotation.js +3 -3
- package/src/tidyUpSequenceData.js +2 -2
- package/threeLetterSequenceStringToAminoAcidMap.d.ts +24 -0
package/package.json
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@teselagen/sequence-utils",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.35",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"dependencies": {
|
|
6
|
-
"@teselagen/range-utils": "0.3.14-beta.1",
|
|
7
6
|
"escape-string-regexp": "5.0.0",
|
|
8
|
-
"jsondiffpatch": "0.
|
|
7
|
+
"jsondiffpatch": "0.7.3",
|
|
8
|
+
"string-splice": "^1.3.0",
|
|
9
9
|
"lodash-es": "^4.17.21",
|
|
10
|
-
"
|
|
11
|
-
"
|
|
10
|
+
"shortid": "2.2.16",
|
|
11
|
+
"@teselagen/range-utils": "0.3.13"
|
|
12
12
|
},
|
|
13
13
|
"exports": {
|
|
14
14
|
".": {
|
|
@@ -16,9 +16,5 @@
|
|
|
16
16
|
"require": "./index.cjs"
|
|
17
17
|
}
|
|
18
18
|
},
|
|
19
|
-
"volta": {
|
|
20
|
-
"node": "18.18.0",
|
|
21
|
-
"yarn": "1.22.22"
|
|
22
|
-
},
|
|
23
19
|
"license": "MIT"
|
|
24
20
|
}
|
package/proteinAlphabet.d.ts
CHANGED
|
@@ -108,6 +108,14 @@ declare const proteinAlphabet: {
|
|
|
108
108
|
color: string;
|
|
109
109
|
mass: number;
|
|
110
110
|
};
|
|
111
|
+
O: {
|
|
112
|
+
value: string;
|
|
113
|
+
name: string;
|
|
114
|
+
threeLettersName: string;
|
|
115
|
+
colorByFamily: string;
|
|
116
|
+
color: string;
|
|
117
|
+
mass: number;
|
|
118
|
+
};
|
|
111
119
|
M: {
|
|
112
120
|
value: string;
|
|
113
121
|
name: string;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
import shortid from "shortid";
|
|
2
2
|
import { flatMap, cloneDeep } from "lodash-es";
|
|
3
3
|
import {
|
|
4
4
|
normalizePositionByRangeLength,
|
|
@@ -27,7 +27,7 @@ function computeDigestFragments({
|
|
|
27
27
|
});
|
|
28
28
|
if (!circular && cutsites.length) {
|
|
29
29
|
sortedCutsites.push({
|
|
30
|
-
id: "seqTerm_" +
|
|
30
|
+
id: "seqTerm_" + shortid(),
|
|
31
31
|
start: 0,
|
|
32
32
|
end: 0,
|
|
33
33
|
overhangBps: "",
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { assign } from "lodash-es";
|
|
2
|
-
import
|
|
2
|
+
import shortid from "shortid";
|
|
3
3
|
import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
|
|
4
4
|
|
|
5
5
|
import {
|
|
@@ -268,7 +268,7 @@ function cutSequence(
|
|
|
268
268
|
const overhangBps = getSequenceWithinRange(cutRange, originalSequence);
|
|
269
269
|
|
|
270
270
|
restrictionCutSite = {
|
|
271
|
-
id:
|
|
271
|
+
id: shortid(),
|
|
272
272
|
start,
|
|
273
273
|
end,
|
|
274
274
|
topSnipPosition,
|
package/src/diffUtils.js
CHANGED
|
@@ -103,7 +103,7 @@ export function getAcceptedChars({
|
|
|
103
103
|
isMixedRnaAndDna
|
|
104
104
|
} = {}) {
|
|
105
105
|
return isProtein
|
|
106
|
-
? `${extended_protein_letters.toLowerCase()}
|
|
106
|
+
? `${extended_protein_letters.toLowerCase()}`
|
|
107
107
|
: isOligo
|
|
108
108
|
? ambiguous_rna_letters.toLowerCase() + "t"
|
|
109
109
|
: isRna
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { generateRandomRange } from "@teselagen/range-utils";
|
|
2
|
-
import
|
|
2
|
+
import shortid from "shortid";
|
|
3
3
|
|
|
4
4
|
function generateAnnotations(
|
|
5
5
|
numberOfAnnotationsToGenerate,
|
|
@@ -21,7 +21,7 @@ function generateAnnotation(start, end, maxLength) {
|
|
|
21
21
|
...range,
|
|
22
22
|
name: getRandomInt(0, 100000).toString(),
|
|
23
23
|
type: "misc_feature",
|
|
24
|
-
id:
|
|
24
|
+
id: shortid(),
|
|
25
25
|
forward: Math.random() > 0.5,
|
|
26
26
|
notes: {}
|
|
27
27
|
};
|
|
@@ -186,6 +186,9 @@ export default function getAminoAcidDataForEachBaseOfDna(
|
|
|
186
186
|
optionalSubrangeRange,
|
|
187
187
|
isProteinSequence
|
|
188
188
|
) {
|
|
189
|
+
if (!originalSequenceString) {
|
|
190
|
+
return [];
|
|
191
|
+
}
|
|
189
192
|
// Obtain derived properties, see getTranslatedSequenceProperties
|
|
190
193
|
const {
|
|
191
194
|
sequenceString,
|
|
@@ -205,7 +208,7 @@ export default function getAminoAcidDataForEachBaseOfDna(
|
|
|
205
208
|
// Iterate over the DNA sequence length in increments of 3
|
|
206
209
|
for (let index = 0; index < sequenceStringLength; index += 3) {
|
|
207
210
|
let aminoAcid;
|
|
208
|
-
const aminoAcidIndex = index / 3;
|
|
211
|
+
const aminoAcidIndex = Math.floor(index / 3);
|
|
209
212
|
let codonPositionsInCDS;
|
|
210
213
|
let basesRead;
|
|
211
214
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
import shortid from "shortid";
|
|
2
2
|
import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
|
|
3
3
|
|
|
4
4
|
/**
|
|
@@ -61,7 +61,7 @@ export default function getOrfsFromSequence(options) {
|
|
|
61
61
|
forward: forward,
|
|
62
62
|
annotationTypePlural: "orfs",
|
|
63
63
|
isOrf: true,
|
|
64
|
-
id:
|
|
64
|
+
id: shortid()
|
|
65
65
|
});
|
|
66
66
|
}
|
|
67
67
|
}
|
|
@@ -225,22 +225,14 @@ describe("getSequenceDataBetweenRange", () => {
|
|
|
225
225
|
end: 3
|
|
226
226
|
}
|
|
227
227
|
);
|
|
228
|
-
res.should.containSubset(
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
start: 0,
|
|
237
|
-
end: 1
|
|
238
|
-
}
|
|
239
|
-
],
|
|
240
|
-
name: "happy"
|
|
241
|
-
}
|
|
242
|
-
]
|
|
243
|
-
});
|
|
228
|
+
res.features.should.containSubset([
|
|
229
|
+
{
|
|
230
|
+
start: 0,
|
|
231
|
+
end: 1,
|
|
232
|
+
name: "happy"
|
|
233
|
+
}
|
|
234
|
+
]);
|
|
235
|
+
res.sequence.should.equal("gc");
|
|
244
236
|
});
|
|
245
237
|
it("feature with locations, non circular enclosing range", () => {
|
|
246
238
|
const res = getSequenceDataBetweenRange(
|
|
@@ -283,7 +275,7 @@ describe("getSequenceDataBetweenRange", () => {
|
|
|
283
275
|
]
|
|
284
276
|
});
|
|
285
277
|
});
|
|
286
|
-
it
|
|
278
|
+
it("feature with locations, non circular, non-fully enclosing range - it should trim the start/end correctly to match the location", () => {
|
|
287
279
|
const res = getSequenceDataBetweenRange(
|
|
288
280
|
{
|
|
289
281
|
sequence: "gggatgcatgca",
|
|
@@ -27,9 +27,10 @@ export default function insertSequenceDataAtPositionOrRange(
|
|
|
27
27
|
...options
|
|
28
28
|
});
|
|
29
29
|
const newSequenceData = cloneDeep(existingSequenceData);
|
|
30
|
-
const insertLength =
|
|
31
|
-
|
|
32
|
-
|
|
30
|
+
const insertLength =
|
|
31
|
+
sequenceDataToInsert.isProtein && sequenceDataToInsert.proteinSequence
|
|
32
|
+
? sequenceDataToInsert.proteinSequence.length * 3
|
|
33
|
+
: sequenceDataToInsert.sequence.length;
|
|
33
34
|
let caretPosition = caretPositionOrRange;
|
|
34
35
|
|
|
35
36
|
const isInsertSameLengthAsSelection =
|
|
@@ -187,7 +188,7 @@ function adjustAnnotationsToDelete(annotationsToBeAdjusted, range, maxLength) {
|
|
|
187
188
|
...newRange,
|
|
188
189
|
start: newLocations[0].start,
|
|
189
190
|
end: newLocations[newLocations.length - 1].end,
|
|
190
|
-
...(newLocations.length >
|
|
191
|
+
...(newLocations.length > 0 && { locations: newLocations })
|
|
191
192
|
};
|
|
192
193
|
} else {
|
|
193
194
|
return newRange;
|
package/src/proteinAlphabet.js
CHANGED
|
@@ -112,6 +112,15 @@ const proteinAlphabet = {
|
|
|
112
112
|
mass: 128.17228
|
|
113
113
|
},
|
|
114
114
|
|
|
115
|
+
O: {
|
|
116
|
+
value: "O",
|
|
117
|
+
name: "Pyrrolysine",
|
|
118
|
+
threeLettersName: "Pyl",
|
|
119
|
+
colorByFamily: "#FFC0CB",
|
|
120
|
+
color: "hsl(264.7, 100%, 69%)",
|
|
121
|
+
mass: 255.313
|
|
122
|
+
},
|
|
123
|
+
|
|
115
124
|
M: {
|
|
116
125
|
value: "M",
|
|
117
126
|
name: "Methionine",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import proteinAlphabet from "./proteinAlphabet";
|
|
2
2
|
|
|
3
|
-
const
|
|
3
|
+
const initThreeLetterSequenceStringToAminoAcidMap = {
|
|
4
4
|
gct: proteinAlphabet.A,
|
|
5
5
|
gcc: proteinAlphabet.A,
|
|
6
6
|
gca: proteinAlphabet.A,
|
|
@@ -99,8 +99,80 @@ const threeLetterSequenceStringToAminoAcidMap = {
|
|
|
99
99
|
taa: proteinAlphabet["*"],
|
|
100
100
|
tag: proteinAlphabet["*"],
|
|
101
101
|
tga: proteinAlphabet["*"],
|
|
102
|
+
uaa: proteinAlphabet["*"],
|
|
103
|
+
uag: proteinAlphabet["*"],
|
|
104
|
+
uga: proteinAlphabet["*"],
|
|
102
105
|
"...": proteinAlphabet["."],
|
|
103
106
|
"---": proteinAlphabet["-"]
|
|
104
107
|
};
|
|
105
108
|
|
|
109
|
+
// IUPAC nucleotide codes (DNA/RNA) with U awareness
|
|
110
|
+
const IUPAC = {
|
|
111
|
+
A: ["A"],
|
|
112
|
+
C: ["C"],
|
|
113
|
+
G: ["G"],
|
|
114
|
+
T: ["T"],
|
|
115
|
+
U: ["U"],
|
|
116
|
+
|
|
117
|
+
R: ["A", "G"],
|
|
118
|
+
Y: ["C", "T", "U"],
|
|
119
|
+
K: ["G", "T", "U"],
|
|
120
|
+
M: ["A", "C"],
|
|
121
|
+
S: ["G", "C"],
|
|
122
|
+
W: ["A", "T", "U"],
|
|
123
|
+
B: ["C", "G", "T", "U"],
|
|
124
|
+
D: ["A", "G", "T", "U"],
|
|
125
|
+
H: ["A", "C", "T", "U"],
|
|
126
|
+
V: ["A", "C", "G"],
|
|
127
|
+
N: ["A", "C", "G", "T", "U"],
|
|
128
|
+
X: ["A", "C", "G", "T", "U"]
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
function expandAndResolve(threeLetterCodon) {
|
|
133
|
+
const chars = threeLetterCodon.toUpperCase().split("");
|
|
134
|
+
const picks = chars.map((c) => IUPAC[c] || [c]);
|
|
135
|
+
|
|
136
|
+
let allPossibleThreeLetterCodons = [""];
|
|
137
|
+
for (const set of picks) {
|
|
138
|
+
const next = [];
|
|
139
|
+
for (const prefix of allPossibleThreeLetterCodons) for (const b of set) next.push(prefix + b);
|
|
140
|
+
allPossibleThreeLetterCodons = next;
|
|
141
|
+
}
|
|
142
|
+
let foundAminoAcid = null;
|
|
143
|
+
for (const codon of allPossibleThreeLetterCodons) {
|
|
144
|
+
const lowerCodon = codon.toLowerCase();
|
|
145
|
+
const aminoAcidObj = initThreeLetterSequenceStringToAminoAcidMap[lowerCodon] ?? initThreeLetterSequenceStringToAminoAcidMap[lowerCodon.replace(/u/g, "t")] ?? initThreeLetterSequenceStringToAminoAcidMap[lowerCodon.replace(/t/g, "u")];
|
|
146
|
+
if (aminoAcidObj) {
|
|
147
|
+
if (!foundAminoAcid) {
|
|
148
|
+
foundAminoAcid = aminoAcidObj;
|
|
149
|
+
} else if (foundAminoAcid.value !== aminoAcidObj.value ) {
|
|
150
|
+
return null
|
|
151
|
+
}
|
|
152
|
+
} else {
|
|
153
|
+
return null;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
return foundAminoAcid;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
function getCodonToAminoAcidMap() {
|
|
160
|
+
const map = initThreeLetterSequenceStringToAminoAcidMap;
|
|
161
|
+
// generate all IUPAC 3-mers
|
|
162
|
+
const codes = Object.keys(IUPAC);
|
|
163
|
+
for (const a of codes)
|
|
164
|
+
for (const b of codes)
|
|
165
|
+
for (const c of codes) {
|
|
166
|
+
const codon = a + b + c;
|
|
167
|
+
const lowerCodon = codon.toLowerCase();
|
|
168
|
+
if (map[lowerCodon]) continue;
|
|
169
|
+
const aminoAcidObj = expandAndResolve(codon);
|
|
170
|
+
if (aminoAcidObj) map[lowerCodon] = aminoAcidObj;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return map;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const threeLetterSequenceStringToAminoAcidMap = getCodonToAminoAcidMap();
|
|
177
|
+
|
|
106
178
|
export default threeLetterSequenceStringToAminoAcidMap;
|
package/src/tidyUpAnnotation.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { cloneDeep, get, some } from "lodash-es";
|
|
2
2
|
import { getFeatureToColorMap, getFeatureTypes } from "./featureTypesAndColors";
|
|
3
|
-
import
|
|
3
|
+
import shortid from "shortid";
|
|
4
4
|
|
|
5
5
|
export default function tidyUpAnnotation(
|
|
6
6
|
_annotation,
|
|
@@ -34,10 +34,10 @@ export default function tidyUpAnnotation(
|
|
|
34
34
|
annotation.name = "Untitled annotation";
|
|
35
35
|
}
|
|
36
36
|
if (provideNewIdsForAnnotations) {
|
|
37
|
-
annotation.id =
|
|
37
|
+
annotation.id = shortid();
|
|
38
38
|
}
|
|
39
39
|
if (!annotation.id && annotation.id !== 0 && !doNotProvideIdsForAnnotations) {
|
|
40
|
-
annotation.id =
|
|
40
|
+
annotation.id = shortid();
|
|
41
41
|
messages.push(
|
|
42
42
|
"Unable to detect valid ID for annotation, setting ID to " + annotation.id
|
|
43
43
|
);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// tnrtodo: figure out where to insert this validation exactly..
|
|
2
|
-
import
|
|
2
|
+
import shortid from "shortid";
|
|
3
3
|
|
|
4
4
|
import getAminoAcidDataForEachBaseOfDna from "./getAminoAcidDataForEachBaseOfDna";
|
|
5
5
|
import { cloneDeep, flatMap } from "lodash-es";
|
|
@@ -155,7 +155,7 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
155
155
|
if (item.id || item.id === 0) {
|
|
156
156
|
itemId = item.id;
|
|
157
157
|
} else {
|
|
158
|
-
itemId =
|
|
158
|
+
itemId = shortid();
|
|
159
159
|
if (!doNotProvideIdsForAnnotations) {
|
|
160
160
|
item.id = itemId; //assign the newly created id to the item
|
|
161
161
|
}
|
|
@@ -879,6 +879,30 @@ declare const threeLetterSequenceStringToAminoAcidMap: {
|
|
|
879
879
|
color: string;
|
|
880
880
|
mass: number;
|
|
881
881
|
};
|
|
882
|
+
uaa: {
|
|
883
|
+
value: string;
|
|
884
|
+
name: string;
|
|
885
|
+
threeLettersName: string;
|
|
886
|
+
colorByFamily: string;
|
|
887
|
+
color: string;
|
|
888
|
+
mass: number;
|
|
889
|
+
};
|
|
890
|
+
uag: {
|
|
891
|
+
value: string;
|
|
892
|
+
name: string;
|
|
893
|
+
threeLettersName: string;
|
|
894
|
+
colorByFamily: string;
|
|
895
|
+
color: string;
|
|
896
|
+
mass: number;
|
|
897
|
+
};
|
|
898
|
+
uga: {
|
|
899
|
+
value: string;
|
|
900
|
+
name: string;
|
|
901
|
+
threeLettersName: string;
|
|
902
|
+
colorByFamily: string;
|
|
903
|
+
color: string;
|
|
904
|
+
mass: number;
|
|
905
|
+
};
|
|
882
906
|
"...": {
|
|
883
907
|
value: string;
|
|
884
908
|
name: string;
|