@teselagen/sequence-utils 0.3.37 → 0.3.38-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DNAComplementMap.d.ts +1 -1
- package/addGapsToSeqReads.d.ts +16 -3
- package/adjustAnnotationsToInsert.d.ts +2 -1
- package/adjustBpsToReplaceOrInsert.d.ts +2 -1
- package/aliasedEnzymesByName.d.ts +37 -1
- package/aminoAcidToDegenerateDnaMap.d.ts +1 -31
- package/aminoAcidToDegenerateRnaMap.d.ts +1 -1
- package/annotateSingleSeq.d.ts +5 -4
- package/annotationTypes.d.ts +2 -2
- package/autoAnnotate.d.ts +17 -8
- package/bioData.d.ts +10 -58
- package/calculateEndStability.d.ts +1 -1
- package/calculateNebTa.d.ts +6 -1
- package/calculateNebTm.d.ts +6 -4
- package/calculatePercentGC.d.ts +1 -1
- package/calculateSantaLuciaTm.d.ts +28 -114
- package/calculateTm.d.ts +13 -1
- package/computeDigestFragments.d.ts +30 -24
- package/condensePairwiseAlignmentDifferences.d.ts +1 -1
- package/convertAACaretPositionOrRangeToDna.d.ts +2 -1
- package/convertDnaCaretPositionOrRangeToAA.d.ts +2 -1
- package/cutSequenceByRestrictionEnzyme.d.ts +2 -1
- package/defaultEnzymesByName.d.ts +2 -1
- package/degenerateDnaToAminoAcidMap.d.ts +1 -1
- package/degenerateRnaToAminoAcidMap.d.ts +1 -1
- package/deleteSequenceDataAtRange.d.ts +2 -1
- package/diffUtils.d.ts +9 -7
- package/doesEnzymeChopOutsideOfRecognitionSite.d.ts +2 -1
- package/featureTypesAndColors.d.ts +19 -6
- package/filterSequenceString.d.ts +14 -10
- package/findApproxMatches.d.ts +7 -1
- package/findNearestRangeOfSequenceOverlapToPosition.d.ts +2 -1
- package/findOrfsInPlasmid.d.ts +2 -11
- package/findSequenceMatches.d.ts +11 -1
- package/generateAnnotations.d.ts +2 -1
- package/generateSequenceData.d.ts +8 -13
- package/getAllInsertionsInSeqReads.d.ts +11 -1
- package/getAminoAcidDataForEachBaseOfDna.d.ts +6 -5
- package/getAminoAcidFromSequenceTriplet.d.ts +1 -1
- package/getAminoAcidStringFromSequenceString.d.ts +3 -1
- package/getCodonRangeForAASliver.d.ts +3 -4
- package/getComplementAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getComplementSequenceAndAnnotations.d.ts +5 -1
- package/getComplementSequenceString.d.ts +1 -1
- package/getCutsiteType.d.ts +2 -1
- package/getCutsitesFromSequence.d.ts +2 -1
- package/getDegenerateDnaStringFromAAString.d.ts +1 -1
- package/getDegenerateRnaStringFromAAString.d.ts +1 -1
- package/getDigestFragmentsForCutsites.d.ts +4 -1
- package/getDigestFragmentsForRestrictionEnzymes.d.ts +8 -1
- package/getInsertBetweenVals.d.ts +2 -1
- package/getLeftAndRightOfSequenceInRangeGivenPosition.d.ts +2 -1
- package/getOrfsFromSequence.d.ts +17 -11
- package/getOverlapBetweenTwoSequences.d.ts +2 -1
- package/getPossiblePartsFromSequenceAndEnzymes.d.ts +18 -1
- package/getReverseAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getReverseComplementAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getReverseComplementAnnotation.d.ts +11 -1
- package/getReverseComplementSequenceAndAnnotations.d.ts +5 -1
- package/getReverseComplementSequenceString.d.ts +1 -1
- package/getReverseSequenceString.d.ts +1 -1
- package/getSequenceDataBetweenRange.d.ts +9 -1
- package/getVirtualDigest.d.ts +11 -10
- package/guessIfSequenceIsDnaAndNotProtein.d.ts +5 -1
- package/index.cjs +732 -483
- package/index.d.ts +8 -5
- package/index.js +732 -483
- package/index.umd.cjs +732 -483
- package/insertGapsIntoRefSeq.d.ts +2 -1
- package/insertSequenceDataAtPositionOrRange.d.ts +10 -1
- package/isEnzymeType2S.d.ts +2 -1
- package/mapAnnotationsToRows.d.ts +9 -1
- package/package.json +9 -6
- package/prepareCircularViewData.d.ts +2 -1
- package/prepareRowData.d.ts +7 -3
- package/proteinAlphabet.d.ts +1 -1
- package/rotateBpsToPosition.d.ts +1 -1
- package/rotateSequenceDataToPosition.d.ts +3 -1
- package/shiftAnnotationsByLen.d.ts +4 -3
- package/src/DNAComplementMap.ts +32 -0
- package/src/addGapsToSeqReads.ts +436 -0
- package/src/adjustAnnotationsToInsert.ts +20 -0
- package/src/adjustBpsToReplaceOrInsert.ts +73 -0
- package/src/aliasedEnzymesByName.ts +7366 -0
- package/src/aminoAcidToDegenerateDnaMap.ts +32 -0
- package/src/aminoAcidToDegenerateRnaMap.ts +32 -0
- package/src/annotateSingleSeq.ts +37 -0
- package/src/annotationTypes.ts +23 -0
- package/src/autoAnnotate.test.js +0 -1
- package/src/autoAnnotate.ts +290 -0
- package/src/bioData.ts +65 -0
- package/src/calculateEndStability.ts +91 -0
- package/src/calculateNebTa.ts +46 -0
- package/src/calculateNebTm.ts +132 -0
- package/src/calculatePercentGC.ts +3 -0
- package/src/calculateSantaLuciaTm.ts +184 -0
- package/src/calculateTm.ts +242 -0
- package/src/computeDigestFragments.ts +238 -0
- package/src/condensePairwiseAlignmentDifferences.ts +85 -0
- package/src/convertAACaretPositionOrRangeToDna.ts +28 -0
- package/src/convertDnaCaretPositionOrRangeToAA.ts +28 -0
- package/src/cutSequenceByRestrictionEnzyme.ts +345 -0
- package/src/defaultEnzymesByName.ts +280 -0
- package/src/degenerateDnaToAminoAcidMap.ts +5 -0
- package/src/degenerateRnaToAminoAcidMap.ts +5 -0
- package/src/deleteSequenceDataAtRange.ts +13 -0
- package/src/diffUtils.ts +80 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.ts +16 -0
- package/src/featureTypesAndColors.ts +167 -0
- package/src/filterSequenceString.ts +153 -0
- package/src/findApproxMatches.ts +58 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.ts +43 -0
- package/src/findOrfsInPlasmid.js +6 -1
- package/src/findOrfsInPlasmid.ts +31 -0
- package/src/findSequenceMatches.ts +154 -0
- package/src/generateAnnotations.ts +39 -0
- package/src/generateSequenceData.ts +212 -0
- package/src/getAllInsertionsInSeqReads.ts +100 -0
- package/src/getAminoAcidDataForEachBaseOfDna.ts +305 -0
- package/src/getAminoAcidFromSequenceTriplet.ts +27 -0
- package/src/getAminoAcidStringFromSequenceString.ts +36 -0
- package/src/getCodonRangeForAASliver.ts +73 -0
- package/src/getComplementAminoAcidStringFromSequenceString.ts +10 -0
- package/src/getComplementSequenceAndAnnotations.ts +25 -0
- package/src/getComplementSequenceString.ts +23 -0
- package/src/getCutsiteType.ts +18 -0
- package/src/getCutsitesFromSequence.ts +22 -0
- package/src/getDegenerateDnaStringFromAAString.ts +15 -0
- package/src/getDegenerateRnaStringFromAAString.ts +15 -0
- package/src/getDigestFragmentsForCutsites.ts +126 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.ts +50 -0
- package/src/getInsertBetweenVals.ts +31 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.ts +40 -0
- package/src/getMassOfAaString.ts +29 -0
- package/src/getOrfsFromSequence.ts +132 -0
- package/src/getOverlapBetweenTwoSequences.ts +30 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.ts +149 -0
- package/src/getReverseAminoAcidStringFromSequenceString.ts +22 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.ts +10 -0
- package/src/getReverseComplementAnnotation.ts +33 -0
- package/src/getReverseComplementSequenceAndAnnotations.ts +46 -0
- package/src/getReverseComplementSequenceString.ts +18 -0
- package/src/getReverseSequenceString.ts +12 -0
- package/src/getSequenceDataBetweenRange.ts +154 -0
- package/src/getVirtualDigest.ts +139 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.ts +39 -0
- package/src/index.test.ts +43 -0
- package/src/index.ts +111 -0
- package/src/insertGapsIntoRefSeq.ts +43 -0
- package/src/insertSequenceDataAtPosition.ts +2 -0
- package/src/insertSequenceDataAtPositionOrRange.ts +328 -0
- package/src/isEnzymeType2S.ts +5 -0
- package/src/mapAnnotationsToRows.ts +256 -0
- package/src/prepareCircularViewData.ts +24 -0
- package/src/prepareRowData.ts +61 -0
- package/src/prepareRowData_output1.json +1 -0
- package/src/proteinAlphabet.ts +271 -0
- package/src/rotateBpsToPosition.ts +12 -0
- package/src/rotateSequenceDataToPosition.ts +54 -0
- package/src/shiftAnnotationsByLen.ts +24 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.ts +198 -0
- package/src/tidyUpAnnotation.ts +205 -0
- package/src/tidyUpSequenceData.ts +213 -0
- package/src/types.ts +109 -0
- package/threeLetterSequenceStringToAminoAcidMap.d.ts +11 -921
- package/tidyUpAnnotation.d.ts +13 -11
- package/tidyUpSequenceData.d.ts +15 -1
- package/types.d.ts +105 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
const aminoAcidToDegenerateDnaMap: Record<string, string> = {
|
|
2
|
+
"-": "---",
|
|
3
|
+
".": "...",
|
|
4
|
+
"*": "trr",
|
|
5
|
+
a: "gcn",
|
|
6
|
+
b: "ray", // D or N => aay + gay = ray
|
|
7
|
+
c: "tgy",
|
|
8
|
+
d: "gay",
|
|
9
|
+
e: "gar",
|
|
10
|
+
f: "tty",
|
|
11
|
+
g: "ggn",
|
|
12
|
+
h: "cay",
|
|
13
|
+
i: "ath",
|
|
14
|
+
j: "htn", // L or I ytn + ath => htn
|
|
15
|
+
k: "aar",
|
|
16
|
+
l: "ytn", // YTR、CTN => Y=CT, N=AGCT
|
|
17
|
+
m: "atg",
|
|
18
|
+
n: "aay",
|
|
19
|
+
o: "tag", // Pyrrolysine, https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2933860/
|
|
20
|
+
p: "ccn",
|
|
21
|
+
q: "car",
|
|
22
|
+
r: "mgn", // CGN、MGR => M=AC, N=AGCT
|
|
23
|
+
s: "wsn", // TCN、AGY => AT = w, CG = S, N is AGCT
|
|
24
|
+
t: "acn",
|
|
25
|
+
u: "tga", // Selenocysteine, https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2933860/
|
|
26
|
+
v: "gtn",
|
|
27
|
+
w: "tgg",
|
|
28
|
+
x: "nnn", // unknown aa.
|
|
29
|
+
y: "tay",
|
|
30
|
+
z: "sar" // E or Q, => gar + car = sar
|
|
31
|
+
};
|
|
32
|
+
export default aminoAcidToDegenerateDnaMap;
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
const aminoAcidToDegenerateRnaMap = {
|
|
2
|
+
"-": "---",
|
|
3
|
+
".": "...",
|
|
4
|
+
"*": "trr",
|
|
5
|
+
a: "gcn",
|
|
6
|
+
b: "ray",
|
|
7
|
+
c: "ugy",
|
|
8
|
+
d: "gay",
|
|
9
|
+
e: "gar",
|
|
10
|
+
f: "uuy",
|
|
11
|
+
g: "ggn",
|
|
12
|
+
h: "cay",
|
|
13
|
+
i: "auh",
|
|
14
|
+
j: "hun",
|
|
15
|
+
k: "aar",
|
|
16
|
+
l: "yun",
|
|
17
|
+
m: "aug",
|
|
18
|
+
n: "aay",
|
|
19
|
+
o: "uag",
|
|
20
|
+
p: "ccn",
|
|
21
|
+
q: "car",
|
|
22
|
+
r: "mgn",
|
|
23
|
+
s: "wsn",
|
|
24
|
+
t: "acn",
|
|
25
|
+
u: "uga",
|
|
26
|
+
v: "gun",
|
|
27
|
+
w: "ugg",
|
|
28
|
+
x: "nnn",
|
|
29
|
+
y: "uay",
|
|
30
|
+
z: "sar"
|
|
31
|
+
};
|
|
32
|
+
export default aminoAcidToDegenerateRnaMap;
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { autoAnnotate } from "./autoAnnotate";
|
|
2
|
+
import { SequenceData } from "./types";
|
|
3
|
+
|
|
4
|
+
function annotateSingleSeq({
|
|
5
|
+
fullSeq,
|
|
6
|
+
searchSeq
|
|
7
|
+
}: {
|
|
8
|
+
fullSeq: SequenceData;
|
|
9
|
+
searchSeq: SequenceData;
|
|
10
|
+
}) {
|
|
11
|
+
const fullSeqId = fullSeq.id || "fullSeqId";
|
|
12
|
+
const searchSeqId = searchSeq.id || "searchSeqId";
|
|
13
|
+
const results = autoAnnotate({
|
|
14
|
+
seqsToAnnotateById: {
|
|
15
|
+
[fullSeqId]: {
|
|
16
|
+
sequence: fullSeq.sequence,
|
|
17
|
+
circular: fullSeq.circular,
|
|
18
|
+
annotations: fullSeq.features || []
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
annotationsToCheckById: {
|
|
22
|
+
[searchSeqId]: {
|
|
23
|
+
...searchSeq,
|
|
24
|
+
id: searchSeqId
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
compareName: false
|
|
28
|
+
});
|
|
29
|
+
if (results && results[fullSeqId]) {
|
|
30
|
+
return {
|
|
31
|
+
matches: results[fullSeqId]
|
|
32
|
+
};
|
|
33
|
+
} else {
|
|
34
|
+
return { matches: [] };
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
export default annotateSingleSeq;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export const annotationTypes = [
|
|
2
|
+
"features",
|
|
3
|
+
"warnings",
|
|
4
|
+
"assemblyPieces",
|
|
5
|
+
"lineageAnnotations",
|
|
6
|
+
"parts",
|
|
7
|
+
"cutsites",
|
|
8
|
+
"orfs",
|
|
9
|
+
"translations",
|
|
10
|
+
"primers",
|
|
11
|
+
"guides"
|
|
12
|
+
];
|
|
13
|
+
|
|
14
|
+
export const modifiableTypes = [
|
|
15
|
+
"features",
|
|
16
|
+
"assemblyPieces",
|
|
17
|
+
"lineageAnnotations",
|
|
18
|
+
"warnings",
|
|
19
|
+
"parts",
|
|
20
|
+
"translations",
|
|
21
|
+
"primers",
|
|
22
|
+
"guides"
|
|
23
|
+
];
|
package/src/autoAnnotate.test.js
CHANGED
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
/* Copyright (C) 2018 TeselaGen Biotechnology, Inc. */
|
|
2
|
+
import { forEach, omitBy } from "lodash-es";
|
|
3
|
+
import { ambiguous_dna_values } from "./bioData";
|
|
4
|
+
import aminoAcidToDegenerateDnaMap from "./aminoAcidToDegenerateDnaMap";
|
|
5
|
+
import {
|
|
6
|
+
normalizePositionByRangeLength,
|
|
7
|
+
reversePositionInRange
|
|
8
|
+
} from "@teselagen/range-utils";
|
|
9
|
+
import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
|
|
10
|
+
import { Annotation } from "./types";
|
|
11
|
+
|
|
12
|
+
//seqsToAnnotateById must not be length = 0
|
|
13
|
+
function autoAnnotate({
|
|
14
|
+
seqsToAnnotateById,
|
|
15
|
+
annotationsToCheckById,
|
|
16
|
+
compareName,
|
|
17
|
+
warnIfMoreThan
|
|
18
|
+
}: {
|
|
19
|
+
seqsToAnnotateById: Record<
|
|
20
|
+
string,
|
|
21
|
+
{ sequence: string; annotations: Annotation[]; circular?: boolean }
|
|
22
|
+
>;
|
|
23
|
+
annotationsToCheckById: Record<string, { sequence: string; id: string }>;
|
|
24
|
+
compareName?: boolean;
|
|
25
|
+
warnIfMoreThan?: number;
|
|
26
|
+
}) {
|
|
27
|
+
const annotationsToAddBySeqId: Record<string, Annotation[]> = {};
|
|
28
|
+
|
|
29
|
+
forEach(annotationsToCheckById, ann => {
|
|
30
|
+
const reg = new RegExp(ann.sequence, "gi");
|
|
31
|
+
forEach(
|
|
32
|
+
omitBy(seqsToAnnotateById, s => !s.sequence.length),
|
|
33
|
+
({ circular, sequence }, id) => {
|
|
34
|
+
function getMatches({
|
|
35
|
+
seqToMatchAgainst,
|
|
36
|
+
isReverse,
|
|
37
|
+
seqLen
|
|
38
|
+
}: {
|
|
39
|
+
seqToMatchAgainst: string;
|
|
40
|
+
isReverse?: boolean;
|
|
41
|
+
seqLen: number;
|
|
42
|
+
}) {
|
|
43
|
+
let match;
|
|
44
|
+
let lastMatch: { start: number; end: number } | undefined;
|
|
45
|
+
// const matches = []
|
|
46
|
+
try {
|
|
47
|
+
while ((match = reg.exec(seqToMatchAgainst))) {
|
|
48
|
+
const { index: matchStart, 0: matchSeq } = match;
|
|
49
|
+
if (matchStart >= seqLen) return;
|
|
50
|
+
const matchEnd = matchStart + matchSeq.length;
|
|
51
|
+
if (lastMatch) {
|
|
52
|
+
if (matchStart > lastMatch.start && matchEnd <= lastMatch.end) {
|
|
53
|
+
reg.lastIndex = match.index + 1;
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
lastMatch = {
|
|
58
|
+
start: matchStart,
|
|
59
|
+
end: matchEnd
|
|
60
|
+
};
|
|
61
|
+
const range = {
|
|
62
|
+
start: matchStart,
|
|
63
|
+
end: normalizePositionByRangeLength(matchEnd - 1, seqLen, false)
|
|
64
|
+
};
|
|
65
|
+
if (!annotationsToAddBySeqId[id])
|
|
66
|
+
annotationsToAddBySeqId[id] = [];
|
|
67
|
+
annotationsToAddBySeqId[id].push({
|
|
68
|
+
...(isReverse
|
|
69
|
+
? {
|
|
70
|
+
start: reversePositionInRange(range.end, seqLen, false),
|
|
71
|
+
end: reversePositionInRange(range.start, seqLen, false)
|
|
72
|
+
}
|
|
73
|
+
: range),
|
|
74
|
+
strand: isReverse ? -1 : 1,
|
|
75
|
+
id: ann.id
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
reg.lastIndex = match.index + 1;
|
|
79
|
+
}
|
|
80
|
+
} catch (error) {
|
|
81
|
+
console.error(`error:`, error);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
const seqLen = sequence.length;
|
|
85
|
+
|
|
86
|
+
const revSeq = getReverseComplementSequenceString(sequence);
|
|
87
|
+
getMatches({
|
|
88
|
+
seqLen,
|
|
89
|
+
seqToMatchAgainst: circular ? sequence + sequence : sequence
|
|
90
|
+
});
|
|
91
|
+
getMatches({
|
|
92
|
+
seqLen,
|
|
93
|
+
isReverse: true,
|
|
94
|
+
seqToMatchAgainst: circular ? revSeq + revSeq : revSeq
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
);
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
//loop through all patterns and get all matches
|
|
101
|
+
|
|
102
|
+
const toReturn: Record<string, Annotation[] | Record<string, string[]>> = {};
|
|
103
|
+
|
|
104
|
+
forEach(annotationsToAddBySeqId, (anns, id) => {
|
|
105
|
+
const origSeq = seqsToAnnotateById[id];
|
|
106
|
+
const alreadyExistingAnnsByStartEnd: Record<string, Annotation> = {};
|
|
107
|
+
forEach(origSeq.annotations, ann => {
|
|
108
|
+
alreadyExistingAnnsByStartEnd[
|
|
109
|
+
getStartEndStr(
|
|
110
|
+
{ ...ann, strand: typeof ann.strand === "string" ? 1 : ann.strand },
|
|
111
|
+
{ compareName }
|
|
112
|
+
)
|
|
113
|
+
] = ann;
|
|
114
|
+
});
|
|
115
|
+
const warningCounter: Record<string, number> = {};
|
|
116
|
+
const toAdd = anns
|
|
117
|
+
.filter(ann => {
|
|
118
|
+
const alreadyExistingAnn =
|
|
119
|
+
alreadyExistingAnnsByStartEnd[
|
|
120
|
+
getStartEndStr(
|
|
121
|
+
{
|
|
122
|
+
...ann,
|
|
123
|
+
strand: typeof ann.strand === "string" ? 1 : ann.strand
|
|
124
|
+
},
|
|
125
|
+
{ compareName }
|
|
126
|
+
)
|
|
127
|
+
];
|
|
128
|
+
if (alreadyExistingAnn) return false;
|
|
129
|
+
if (warnIfMoreThan && ann.id !== undefined) {
|
|
130
|
+
warningCounter[String(ann.id)] =
|
|
131
|
+
(warningCounter[String(ann.id)] || 0) + 1;
|
|
132
|
+
}
|
|
133
|
+
return true;
|
|
134
|
+
})
|
|
135
|
+
.sort((a, b) => a.start - b.start);
|
|
136
|
+
if (toAdd.length) {
|
|
137
|
+
toReturn[id] = toAdd;
|
|
138
|
+
}
|
|
139
|
+
warnIfMoreThan &&
|
|
140
|
+
forEach(warningCounter, (num, annId) => {
|
|
141
|
+
if (num > warnIfMoreThan) {
|
|
142
|
+
const warnings =
|
|
143
|
+
(toReturn["__more_than_warnings"] as Record<string, string[]>) ||
|
|
144
|
+
{};
|
|
145
|
+
warnings[id] = warnings[id] || [];
|
|
146
|
+
warnings[id].push(annId);
|
|
147
|
+
toReturn["__more_than_warnings"] = warnings;
|
|
148
|
+
}
|
|
149
|
+
});
|
|
150
|
+
});
|
|
151
|
+
return toReturn;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
function getStartEndStr(
|
|
155
|
+
{
|
|
156
|
+
start,
|
|
157
|
+
end,
|
|
158
|
+
name,
|
|
159
|
+
strand,
|
|
160
|
+
forward
|
|
161
|
+
}: {
|
|
162
|
+
start: number;
|
|
163
|
+
end: number;
|
|
164
|
+
name?: string;
|
|
165
|
+
strand?: number;
|
|
166
|
+
forward?: boolean;
|
|
167
|
+
},
|
|
168
|
+
{ compareName }: { compareName: boolean | undefined }
|
|
169
|
+
) {
|
|
170
|
+
const isReverse = strand === -1 || forward === false;
|
|
171
|
+
return `${start}-${end}-${isReverse ? "rev" : "for"}-${
|
|
172
|
+
compareName ? name : ""
|
|
173
|
+
}`;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function convertApELikeRegexToRegex(regString = "") {
|
|
177
|
+
let newstr = "";
|
|
178
|
+
let rightOfCaretHolder = "";
|
|
179
|
+
let afterRightCaretHolder = "";
|
|
180
|
+
let beforeRightCaret = "";
|
|
181
|
+
let prevBp: string | undefined;
|
|
182
|
+
let hitLeftCaret: boolean | undefined;
|
|
183
|
+
let hitRightCaret: boolean | undefined;
|
|
184
|
+
|
|
185
|
+
// eslint-disable-next-line no-unused-vars
|
|
186
|
+
for (const bp of regString.replace("(", "").replace(")", "")) {
|
|
187
|
+
/* eslint-disable no-loop-func*/
|
|
188
|
+
/* eslint-disable no-inner-declarations*/
|
|
189
|
+
function maybeHandleRightCaret(justAdded: string) {
|
|
190
|
+
if (hitRightCaret) {
|
|
191
|
+
rightOfCaretHolder += justAdded;
|
|
192
|
+
afterRightCaretHolder = `${rightOfCaretHolder}${
|
|
193
|
+
afterRightCaretHolder.length ? "|" : ""
|
|
194
|
+
}${afterRightCaretHolder}`;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
/* eslint-enable no-loop-func*/
|
|
198
|
+
/* eslint-enable no-inner-declarations*/
|
|
199
|
+
const ambigVal = ambiguous_dna_values[bp.toUpperCase()];
|
|
200
|
+
if (ambigVal && ambigVal.length > 1) {
|
|
201
|
+
let valToUse;
|
|
202
|
+
if (ambigVal.length === 4) {
|
|
203
|
+
valToUse = ".";
|
|
204
|
+
} else {
|
|
205
|
+
valToUse = `[${ambigVal}]`;
|
|
206
|
+
}
|
|
207
|
+
newstr += valToUse;
|
|
208
|
+
maybeHandleRightCaret(valToUse);
|
|
209
|
+
continue;
|
|
210
|
+
}
|
|
211
|
+
if (bp === "#") {
|
|
212
|
+
if (hitRightCaret) throw new Error("Error converting regex");
|
|
213
|
+
const valToUse = prevBp ? `[^${prevBp}]*?` : `.*?`;
|
|
214
|
+
newstr += valToUse;
|
|
215
|
+
maybeHandleRightCaret(valToUse);
|
|
216
|
+
continue;
|
|
217
|
+
}
|
|
218
|
+
if (bp === "<") {
|
|
219
|
+
if (hitRightCaret) throw new Error("Error converting to regex");
|
|
220
|
+
if (hitLeftCaret) throw new Error("Error converting to regex");
|
|
221
|
+
let holder = "";
|
|
222
|
+
let stringToAdd = "";
|
|
223
|
+
let isGroupClosed = true;
|
|
224
|
+
let closingBraceHit;
|
|
225
|
+
const groups = [];
|
|
226
|
+
for (let index = 0; index < newstr.length; index++) {
|
|
227
|
+
const char = newstr[index];
|
|
228
|
+
const nextChar = newstr[index + 1];
|
|
229
|
+
if (char === "[") {
|
|
230
|
+
isGroupClosed = false;
|
|
231
|
+
} else if (char === "]" || closingBraceHit) {
|
|
232
|
+
closingBraceHit = true;
|
|
233
|
+
if (ambiguous_dna_values[nextChar] || nextChar === "[") {
|
|
234
|
+
isGroupClosed = true;
|
|
235
|
+
closingBraceHit = false;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
holder += char;
|
|
239
|
+
if (isGroupClosed) {
|
|
240
|
+
groups.push(holder);
|
|
241
|
+
holder = "";
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
let concattedEls = "";
|
|
245
|
+
groups.reverse();
|
|
246
|
+
groups.forEach(g => {
|
|
247
|
+
concattedEls = g + concattedEls;
|
|
248
|
+
stringToAdd = `${concattedEls}${
|
|
249
|
+
stringToAdd.length ? "|" : ""
|
|
250
|
+
}${stringToAdd}`;
|
|
251
|
+
});
|
|
252
|
+
newstr = `(${stringToAdd})?`;
|
|
253
|
+
hitLeftCaret = true;
|
|
254
|
+
continue;
|
|
255
|
+
}
|
|
256
|
+
if (bp === ">") {
|
|
257
|
+
if (hitRightCaret) throw new Error("Error converting regex");
|
|
258
|
+
hitRightCaret = true;
|
|
259
|
+
beforeRightCaret = newstr;
|
|
260
|
+
continue;
|
|
261
|
+
}
|
|
262
|
+
newstr += bp;
|
|
263
|
+
maybeHandleRightCaret(bp);
|
|
264
|
+
prevBp = bp;
|
|
265
|
+
}
|
|
266
|
+
if (hitRightCaret) {
|
|
267
|
+
newstr = `${beforeRightCaret}(${afterRightCaretHolder})?`;
|
|
268
|
+
}
|
|
269
|
+
return newstr;
|
|
270
|
+
}
|
|
271
|
+
function convertProteinSeqToDNAIupac(sequence: string) {
|
|
272
|
+
let toRet = "";
|
|
273
|
+
let l;
|
|
274
|
+
for (l of sequence) {
|
|
275
|
+
const degenDna = aminoAcidToDegenerateDnaMap[l.toLowerCase()];
|
|
276
|
+
if (degenDna) {
|
|
277
|
+
toRet += degenDna;
|
|
278
|
+
} else {
|
|
279
|
+
toRet += l;
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
return toRet;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
export {
|
|
287
|
+
convertProteinSeqToDNAIupac,
|
|
288
|
+
convertApELikeRegexToRegex,
|
|
289
|
+
autoAnnotate
|
|
290
|
+
};
|
package/src/bioData.ts
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
//Adapted from biopython. Check the BIOPYTHON_LICENSE for licensing info
|
|
2
|
+
|
|
3
|
+
export const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
|
|
4
|
+
export const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
|
|
5
|
+
export const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
|
|
6
|
+
export const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
7
|
+
export const unambiguous_dna_letters = "GATC";
|
|
8
|
+
export const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
|
|
9
|
+
export const unambiguous_rna_letters = "GAUC";
|
|
10
|
+
export const extended_dna_letters = "GATCBDSW";
|
|
11
|
+
|
|
12
|
+
export const ambiguous_dna_values: Record<string, string> = {
|
|
13
|
+
".": ".",
|
|
14
|
+
A: "A",
|
|
15
|
+
C: "C",
|
|
16
|
+
G: "G",
|
|
17
|
+
T: "T",
|
|
18
|
+
M: "AC",
|
|
19
|
+
R: "AG",
|
|
20
|
+
W: "AT",
|
|
21
|
+
S: "CG",
|
|
22
|
+
Y: "CT",
|
|
23
|
+
K: "GT",
|
|
24
|
+
V: "ACG",
|
|
25
|
+
H: "ACT",
|
|
26
|
+
D: "AGT",
|
|
27
|
+
B: "CGT",
|
|
28
|
+
X: "GATC",
|
|
29
|
+
N: "GATC"
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
export const extended_protein_values: Record<string, string> = {
|
|
33
|
+
A: "A",
|
|
34
|
+
B: "ND",
|
|
35
|
+
C: "C",
|
|
36
|
+
D: "D",
|
|
37
|
+
E: "E",
|
|
38
|
+
F: "F",
|
|
39
|
+
G: "G",
|
|
40
|
+
H: "H",
|
|
41
|
+
I: "I",
|
|
42
|
+
J: "IL",
|
|
43
|
+
K: "K",
|
|
44
|
+
L: "L",
|
|
45
|
+
M: "M",
|
|
46
|
+
N: "N",
|
|
47
|
+
O: "O",
|
|
48
|
+
P: "P",
|
|
49
|
+
Q: "Q",
|
|
50
|
+
R: "R",
|
|
51
|
+
S: "S",
|
|
52
|
+
T: "T",
|
|
53
|
+
U: "U",
|
|
54
|
+
V: "V",
|
|
55
|
+
W: "W",
|
|
56
|
+
X: "ACDEFGHIKLMNPQRSTVWY",
|
|
57
|
+
// # TODO - Include U and O in the possible values of X?
|
|
58
|
+
// # This could alter the extended_protein_weight_ranges ...
|
|
59
|
+
// # by MP: Won't do this, because they are so rare.
|
|
60
|
+
Y: "Y",
|
|
61
|
+
Z: "QE",
|
|
62
|
+
"*": "\\*\\.",
|
|
63
|
+
".": "\\.",
|
|
64
|
+
"-": "\\-"
|
|
65
|
+
};
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import {
|
|
2
|
+
isValidSequence,
|
|
3
|
+
SANTA_LUCIA_NN,
|
|
4
|
+
SANTA_LUCIA_INIT,
|
|
5
|
+
SantaLuciaParams
|
|
6
|
+
} from "./calculateSantaLuciaTm";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Calculate End Stability (3' end stability) of a primer
|
|
10
|
+
*
|
|
11
|
+
* The maximum stability for the last five 3' bases of a left or right primer.
|
|
12
|
+
* Bigger numbers mean more stable 3' ends. The value is the maximum delta G
|
|
13
|
+
* (kcal/mol) for duplex disruption for the five 3' bases.
|
|
14
|
+
*
|
|
15
|
+
* According to Primer3 documentation:
|
|
16
|
+
* - Most stable 5mer duplex: GCGCG = 6.86 kcal/mol (SantaLucia 1998)
|
|
17
|
+
* - Most labile 5mer duplex: TATAT = 0.86 kcal/mol (SantaLucia 1998)
|
|
18
|
+
*
|
|
19
|
+
* @param {string} sequence - DNA sequence (5' to 3')
|
|
20
|
+
* @returns {number} - Delta G (kcal/mol) for the last 5 bases at 3' end
|
|
21
|
+
* @throws {Error} Invalid sequence or too short.
|
|
22
|
+
*/
|
|
23
|
+
export default function calculateEndStability(
|
|
24
|
+
sequence: string
|
|
25
|
+
): number | string {
|
|
26
|
+
try {
|
|
27
|
+
const seq = sequence?.toUpperCase().trim();
|
|
28
|
+
|
|
29
|
+
if (!isValidSequence(seq)) {
|
|
30
|
+
throw new Error("Invalid sequence: contains non-DNA characters");
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
if (seq.length < 5) {
|
|
34
|
+
throw new Error(
|
|
35
|
+
"Sequence too short: minimum length is 5 bases for end stability calculation"
|
|
36
|
+
);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const last5Bases = seq.substring(seq.length - 5);
|
|
40
|
+
|
|
41
|
+
let deltaH = 0; // kcal/mol
|
|
42
|
+
let deltaS = 0; // cal/K·mol
|
|
43
|
+
|
|
44
|
+
// Calculate nearest-neighbor contributions for the 4 dinucleotides
|
|
45
|
+
for (let i = 0; i < 4; i++) {
|
|
46
|
+
const dinucleotide = last5Bases.substring(i, i + 2);
|
|
47
|
+
|
|
48
|
+
if (dinucleotide.includes("N")) {
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const params = (SANTA_LUCIA_NN as Record<string, SantaLuciaParams>)[
|
|
53
|
+
dinucleotide
|
|
54
|
+
];
|
|
55
|
+
if (params) {
|
|
56
|
+
deltaH += params.dH;
|
|
57
|
+
deltaS += params.dS;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Add initiation parameters for terminal base pairs
|
|
62
|
+
const firstBase = last5Bases[0];
|
|
63
|
+
const lastBase = last5Bases[last5Bases.length - 1];
|
|
64
|
+
|
|
65
|
+
// Terminal GC or AT initiation
|
|
66
|
+
if (firstBase === "G" || firstBase === "C") {
|
|
67
|
+
deltaH += SANTA_LUCIA_INIT["GC"].dH;
|
|
68
|
+
deltaS += SANTA_LUCIA_INIT["GC"].dS;
|
|
69
|
+
} else {
|
|
70
|
+
deltaH += SANTA_LUCIA_INIT["AT"].dH;
|
|
71
|
+
deltaS += SANTA_LUCIA_INIT["AT"].dS;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (lastBase === "G" || lastBase === "C") {
|
|
75
|
+
deltaH += SANTA_LUCIA_INIT["GC"].dH;
|
|
76
|
+
deltaS += SANTA_LUCIA_INIT["GC"].dS;
|
|
77
|
+
} else {
|
|
78
|
+
deltaH += SANTA_LUCIA_INIT["AT"].dH;
|
|
79
|
+
deltaS += SANTA_LUCIA_INIT["AT"].dS;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Calculate deltaG at 37°C (310.15 K)
|
|
83
|
+
// deltaG = deltaH - T * deltaS
|
|
84
|
+
const T = 310.15; // 37°C in Kelvin
|
|
85
|
+
const deltaG = deltaH - (T * deltaS) / 1000; // Result in kcal/mol
|
|
86
|
+
|
|
87
|
+
return Math.round(Math.abs(deltaG) * 100) / 100;
|
|
88
|
+
} catch (e) {
|
|
89
|
+
return `Error calculating end stability for sequence ${sequence}. ${e}`;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import calculateTm from "./calculateNebTm";
|
|
2
|
+
|
|
3
|
+
interface NebTaOptions {
|
|
4
|
+
monovalentCationConc?: number;
|
|
5
|
+
polymerase?: string;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export default function calculateNebTa(
|
|
9
|
+
sequences: string[],
|
|
10
|
+
primerConc: number,
|
|
11
|
+
{ monovalentCationConc, polymerase }: NebTaOptions = {}
|
|
12
|
+
): number | string {
|
|
13
|
+
try {
|
|
14
|
+
if (sequences.length !== 2) {
|
|
15
|
+
throw new Error(
|
|
16
|
+
`${sequences.length} sequences received when 2 primers were expected`
|
|
17
|
+
);
|
|
18
|
+
}
|
|
19
|
+
// Type assertion or check return type of calculateTm if it can be number | string
|
|
20
|
+
// Assuming calculateTm returns number | string based on previous pattern
|
|
21
|
+
const meltingTemperatures = sequences.map(seq => {
|
|
22
|
+
const tm = calculateTm(seq, { monovalentCationConc, primerConc });
|
|
23
|
+
if (typeof tm !== "number") {
|
|
24
|
+
throw new Error(`Invalid Tm calculated for ${seq}: ${tm}`);
|
|
25
|
+
}
|
|
26
|
+
return tm;
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
meltingTemperatures.sort((a, b) => a - b);
|
|
30
|
+
const lowerMeltingTemp = meltingTemperatures[0];
|
|
31
|
+
let annealingTemp: number;
|
|
32
|
+
if (polymerase === "Q5") {
|
|
33
|
+
// Ta = Tm_lower+1°C is standard for Q5
|
|
34
|
+
annealingTemp = lowerMeltingTemp + 1;
|
|
35
|
+
if (annealingTemp > 72) {
|
|
36
|
+
// "Annealing temperature for experiments with this enzyme should typically not exceed 72°C"
|
|
37
|
+
annealingTemp = 72;
|
|
38
|
+
}
|
|
39
|
+
} else {
|
|
40
|
+
annealingTemp = lowerMeltingTemp - 3;
|
|
41
|
+
}
|
|
42
|
+
return annealingTemp;
|
|
43
|
+
} catch (err) {
|
|
44
|
+
return `Error calculating annealing temperature: ${err}`;
|
|
45
|
+
}
|
|
46
|
+
}
|