@teselagen/sequence-utils 0.3.41 → 0.3.42-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DNAComplementMap.d.ts +1 -1
- package/README.md +2 -8
- package/addGapsToSeqReads.d.ts +16 -3
- package/adjustAnnotationsToInsert.d.ts +2 -1
- package/adjustBpsToReplaceOrInsert.d.ts +2 -1
- package/aliasedEnzymesByName.d.ts +37 -1
- package/aminoAcidToDegenerateDnaMap.d.ts +1 -31
- package/aminoAcidToDegenerateRnaMap.d.ts +1 -1
- package/annotateSingleSeq.d.ts +5 -4
- package/annotationTypes.d.ts +2 -2
- package/autoAnnotate.d.ts +17 -8
- package/bioData.d.ts +10 -58
- package/calculateEndStability.d.ts +1 -1
- package/calculateNebTa.d.ts +6 -1
- package/calculateNebTm.d.ts +6 -4
- package/calculatePercentGC.d.ts +1 -1
- package/calculateSantaLuciaTm.d.ts +28 -114
- package/calculateTm.d.ts +13 -1
- package/computeDigestFragments.d.ts +30 -24
- package/condensePairwiseAlignmentDifferences.d.ts +1 -1
- package/convertAACaretPositionOrRangeToDna.d.ts +2 -1
- package/convertDnaCaretPositionOrRangeToAA.d.ts +2 -1
- package/cutSequenceByRestrictionEnzyme.d.ts +2 -1
- package/defaultEnzymesByName.d.ts +2 -1
- package/degenerateDnaToAminoAcidMap.d.ts +1 -1
- package/degenerateRnaToAminoAcidMap.d.ts +1 -1
- package/deleteSequenceDataAtRange.d.ts +2 -1
- package/diffUtils.d.ts +9 -7
- package/doesEnzymeChopOutsideOfRecognitionSite.d.ts +2 -1
- package/featureTypesAndColors.d.ts +19 -6
- package/filterSequenceString.d.ts +14 -10
- package/findApproxMatches.d.ts +7 -1
- package/findNearestRangeOfSequenceOverlapToPosition.d.ts +2 -1
- package/findOrfsInPlasmid.d.ts +2 -11
- package/findSequenceMatches.d.ts +11 -1
- package/generateAnnotations.d.ts +2 -1
- package/generateSequenceData.d.ts +8 -13
- package/getAllInsertionsInSeqReads.d.ts +11 -1
- package/getAminoAcidDataForEachBaseOfDna.d.ts +6 -5
- package/getAminoAcidFromSequenceTriplet.d.ts +1 -1
- package/getAminoAcidStringFromSequenceString.d.ts +3 -1
- package/getCodonRangeForAASliver.d.ts +3 -4
- package/getComplementAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getComplementSequenceAndAnnotations.d.ts +5 -1
- package/getComplementSequenceString.d.ts +1 -1
- package/getCutsiteType.d.ts +2 -1
- package/getCutsitesFromSequence.d.ts +2 -1
- package/getDegenerateDnaStringFromAAString.d.ts +1 -1
- package/getDegenerateRnaStringFromAAString.d.ts +1 -1
- package/getDigestFragmentsForCutsites.d.ts +4 -1
- package/getDigestFragmentsForRestrictionEnzymes.d.ts +8 -1
- package/getInsertBetweenVals.d.ts +2 -1
- package/getLeftAndRightOfSequenceInRangeGivenPosition.d.ts +2 -1
- package/getOrfsFromSequence.d.ts +17 -11
- package/getOverlapBetweenTwoSequences.d.ts +2 -1
- package/getPossiblePartsFromSequenceAndEnzymes.d.ts +18 -1
- package/getReverseAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getReverseComplementAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getReverseComplementAnnotation.d.ts +11 -1
- package/getReverseComplementSequenceAndAnnotations.d.ts +5 -1
- package/getReverseComplementSequenceString.d.ts +1 -1
- package/getReverseSequenceString.d.ts +1 -1
- package/getSequenceDataBetweenRange.d.ts +9 -1
- package/getVirtualDigest.d.ts +11 -10
- package/guessIfSequenceIsDnaAndNotProtein.d.ts +5 -1
- package/index.cjs +762 -495
- package/index.d.ts +9 -5
- package/index.js +763 -496
- package/index.umd.cjs +762 -495
- package/insertGapsIntoRefSeq.d.ts +2 -1
- package/insertSequenceDataAtPositionOrRange.d.ts +10 -1
- package/isEnzymeType2S.d.ts +2 -1
- package/mapAnnotationsToRows.d.ts +9 -1
- package/package.json +9 -6
- package/prepareCircularViewData.d.ts +2 -1
- package/prepareRowData.d.ts +7 -3
- package/proteinAlphabet.d.ts +1 -1
- package/rotateBpsToPosition.d.ts +1 -1
- package/rotateSequenceDataToPosition.d.ts +3 -1
- package/shiftAnnotationsByLen.d.ts +4 -3
- package/src/{addGapsToSeqReads.js → addGapsToSeqReads.ts} +33 -14
- package/src/{adjustAnnotationsToInsert.js → adjustAnnotationsToInsert.ts} +6 -5
- package/src/{adjustBpsToReplaceOrInsert.js → adjustBpsToReplaceOrInsert.ts} +31 -8
- package/src/{aliasedEnzymesByName.js → aliasedEnzymesByName.ts} +4 -1
- package/src/{aminoAcidToDegenerateDnaMap.js → aminoAcidToDegenerateDnaMap.ts} +1 -1
- package/src/{annotateSingleSeq.js → annotateSingleSeq.ts} +11 -3
- package/src/autoAnnotate.test.js +0 -1
- package/src/{autoAnnotate.js → autoAnnotate.ts} +69 -24
- package/src/{bioData.js → bioData.ts} +2 -2
- package/src/{calculateEndStability.js → calculateEndStability.ts} +21 -16
- package/src/{calculateNebTa.js → calculateNebTa.ts} +20 -8
- package/src/{calculateNebTm.js → calculateNebTm.ts} +15 -9
- package/src/{calculatePercentGC.js → calculatePercentGC.ts} +1 -1
- package/src/{calculateSantaLuciaTm.js → calculateSantaLuciaTm.ts} +29 -22
- package/src/{calculateTm.js → calculateTm.ts} +50 -59
- package/src/{computeDigestFragments.js → computeDigestFragments.ts} +92 -36
- package/src/{condensePairwiseAlignmentDifferences.js → condensePairwiseAlignmentDifferences.ts} +4 -4
- package/src/{convertAACaretPositionOrRangeToDna.js → convertAACaretPositionOrRangeToDna.ts} +8 -4
- package/src/{convertDnaCaretPositionOrRangeToAA.js → convertDnaCaretPositionOrRangeToAA.ts} +8 -4
- package/src/cutSequenceByRestrictionEnzyme.ts +345 -0
- package/src/{defaultEnzymesByName.js → defaultEnzymesByName.ts} +2 -1
- package/src/deleteSequenceDataAtRange.ts +13 -0
- package/src/diffUtils.ts +80 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.ts +16 -0
- package/src/{featureTypesAndColors.js → featureTypesAndColors.ts} +29 -14
- package/src/{filterSequenceString.js → filterSequenceString.ts} +51 -21
- package/src/{findApproxMatches.js → findApproxMatches.ts} +14 -6
- package/src/{findNearestRangeOfSequenceOverlapToPosition.js → findNearestRangeOfSequenceOverlapToPosition.ts} +13 -9
- package/src/{findOrfsInPlasmid.js → findOrfsInPlasmid.ts} +8 -7
- package/src/{findSequenceMatches.js → findSequenceMatches.ts} +31 -13
- package/src/{generateAnnotations.js → generateAnnotations.ts} +14 -9
- package/src/{generateSequenceData.js → generateSequenceData.ts} +19 -13
- package/src/{getAllInsertionsInSeqReads.js → getAllInsertionsInSeqReads.ts} +19 -2
- package/src/{getAminoAcidDataForEachBaseOfDna.js → getAminoAcidDataForEachBaseOfDna.ts} +36 -30
- package/src/{getAminoAcidFromSequenceTriplet.js → getAminoAcidFromSequenceTriplet.ts} +9 -4
- package/src/{getAminoAcidStringFromSequenceString.js → getAminoAcidStringFromSequenceString.ts} +14 -7
- package/src/{getCodonRangeForAASliver.js → getCodonRangeForAASliver.ts} +16 -6
- package/src/{getComplementAminoAcidStringFromSequenceString.js → getComplementAminoAcidStringFromSequenceString.ts} +5 -3
- package/src/{getComplementSequenceAndAnnotations.js → getComplementSequenceAndAnnotations.ts} +8 -6
- package/src/{getComplementSequenceString.js → getComplementSequenceString.ts} +5 -2
- package/src/getCutsiteType.ts +18 -0
- package/src/getCutsitesFromSequence.ts +22 -0
- package/src/getDegenerateDnaStringFromAAString.ts +15 -0
- package/src/getDegenerateRnaStringFromAAString.ts +15 -0
- package/src/{getDigestFragmentsForCutsites.js → getDigestFragmentsForCutsites.ts} +32 -14
- package/src/getDigestFragmentsForRestrictionEnzymes.ts +50 -0
- package/src/{getInsertBetweenVals.js → getInsertBetweenVals.ts} +8 -5
- package/src/{getLeftAndRightOfSequenceInRangeGivenPosition.js → getLeftAndRightOfSequenceInRangeGivenPosition.ts} +11 -10
- package/src/{getMassOfAaString.js → getMassOfAaString.ts} +4 -2
- package/src/{getOrfsFromSequence.js → getOrfsFromSequence.ts} +27 -7
- package/src/{getOverlapBetweenTwoSequences.js → getOverlapBetweenTwoSequences.ts} +4 -4
- package/src/{getPossiblePartsFromSequenceAndEnzymes.js → getPossiblePartsFromSequenceAndEnzymes.ts} +52 -25
- package/src/{getReverseAminoAcidStringFromSequenceString.js → getReverseAminoAcidStringFromSequenceString.ts} +4 -2
- package/src/{getReverseComplementAminoAcidStringFromSequenceString.js → getReverseComplementAminoAcidStringFromSequenceString.ts} +2 -2
- package/src/{getReverseComplementAnnotation.js → getReverseComplementAnnotation.ts} +4 -2
- package/src/getReverseComplementSequenceAndAnnotations.ts +45 -0
- package/src/{getReverseComplementSequenceString.js → getReverseComplementSequenceString.ts} +4 -4
- package/src/{getReverseSequenceString.js → getReverseSequenceString.ts} +1 -1
- package/src/getSequenceDataBetweenRange.test.js +6 -3
- package/src/{getSequenceDataBetweenRange.js → getSequenceDataBetweenRange.ts} +44 -29
- package/src/{getVirtualDigest.js → getVirtualDigest.ts} +20 -9
- package/src/{guessIfSequenceIsDnaAndNotProtein.js → guessIfSequenceIsDnaAndNotProtein.ts} +11 -5
- package/src/{index.test.js → index.test.ts} +9 -5
- package/src/{index.js → index.ts} +1 -0
- package/src/{insertGapsIntoRefSeq.js → insertGapsIntoRefSeq.ts} +7 -2
- package/src/{insertSequenceDataAtPositionOrRange.js → insertSequenceDataAtPositionOrRange.ts} +130 -56
- package/src/isEnzymeType2S.ts +5 -0
- package/src/mapAnnotationsToRows.ts +256 -0
- package/src/prepareCircularViewData.ts +24 -0
- package/src/{prepareRowData.js → prepareRowData.ts} +27 -8
- package/src/prepareRowData_output1.json +1 -0
- package/src/rotateBpsToPosition.ts +12 -0
- package/src/{rotateSequenceDataToPosition.js → rotateSequenceDataToPosition.ts} +11 -8
- package/src/{shiftAnnotationsByLen.js → shiftAnnotationsByLen.ts} +12 -5
- package/src/{threeLetterSequenceStringToAminoAcidMap.js → threeLetterSequenceStringToAminoAcidMap.ts} +29 -9
- package/src/{tidyUpAnnotation.js → tidyUpAnnotation.ts} +40 -18
- package/src/{tidyUpSequenceData.js → tidyUpSequenceData.ts} +83 -39
- package/src/types.ts +98 -0
- package/threeLetterSequenceStringToAminoAcidMap.d.ts +11 -921
- package/tidyUpAnnotation.d.ts +13 -11
- package/tidyUpSequenceData.d.ts +18 -1
- package/types.d.ts +96 -0
- package/addGapsToSeqReads.test.d.ts +0 -1
- package/adjustBpsToReplaceOrInsert.test.d.ts +0 -1
- package/aminoAcidToDnaRna.test.d.ts +0 -1
- package/annotateSingleSeq.test.d.ts +0 -1
- package/autoAnnotate.test.d.ts +0 -1
- package/calculateEndStability.test.d.ts +0 -1
- package/calculateNebTa.test.d.ts +0 -1
- package/calculateNebTm.test.d.ts +0 -1
- package/calculatePercentGC.test.d.ts +0 -1
- package/calculateSantaLuciaTm.test.d.ts +0 -1
- package/calculateTm.test.d.ts +0 -1
- package/computeDigestFragments.test.d.ts +0 -1
- package/condensePairwiseAlignmentDifferences.test.d.ts +0 -1
- package/convertAACaretPositionOrRangeToDna.test.d.ts +0 -1
- package/convertDnaCaretPositionOrRangeToAA.test.d.ts +0 -1
- package/cutSequenceByRestrictionEnzyme.test.d.ts +0 -1
- package/deleteSequenceDataAtRange.test.d.ts +0 -1
- package/diffUtils.test.d.ts +0 -1
- package/doesEnzymeChopOutsideOfRecognitionSite.test.d.ts +0 -1
- package/featureTypesAndColors.test.d.ts +0 -1
- package/filterSequenceString.test.d.ts +0 -1
- package/findApproxMatches.test.d.ts +0 -1
- package/findNearestRangeOfSequenceOverlapToPosition.test.d.ts +0 -1
- package/findSequenceMatches.test.d.ts +0 -1
- package/generateSequenceData.test.d.ts +0 -1
- package/getAllInsertionsInSeqReads.test.d.ts +0 -1
- package/getAminoAcidDataForEachBaseOfDna.test.d.ts +0 -1
- package/getAminoAcidStringFromSequenceString.test.d.ts +0 -1
- package/getComplementSequenceString.test.d.ts +0 -1
- package/getDigestFragmentsForRestrictionEnzymes.test.d.ts +0 -1
- package/getInsertBetweenVals.test.d.ts +0 -1
- package/getLeftAndRightOfSequenceInRangeGivenPosition.test.d.ts +0 -1
- package/getMassofAaString.test.d.ts +0 -1
- package/getOrfsFromSequence.test.d.ts +0 -1
- package/getOverlapBetweenTwoSequences.test.d.ts +0 -1
- package/getPossiblePartsFromSequenceAndEnzymes.test.d.ts +0 -1
- package/getReverseAminoAcidStringFromSequenceString.test.d.ts +0 -1
- package/getReverseComplementAnnotation.test.d.ts +0 -1
- package/getReverseComplementSequenceAndAnnotations.test.d.ts +0 -1
- package/getReverseComplementSequenceString.test.d.ts +0 -1
- package/getReverseSequenceString.test.d.ts +0 -1
- package/getSequenceDataBetweenRange.test.d.ts +0 -1
- package/getVirtualDigest.test.d.ts +0 -1
- package/guessIfSequenceIsDnaAndNotProtein.test.d.ts +0 -1
- package/index.test.d.ts +0 -1
- package/insertGapsIntoRefSeq.test.d.ts +0 -1
- package/insertSequenceDataAtPosition.test.d.ts +0 -1
- package/insertSequenceDataAtPositionOrRange.test.d.ts +0 -1
- package/mapAnnotationsToRows.test.d.ts +0 -1
- package/prepareCircularViewData.test.d.ts +0 -1
- package/prepareRowData.test.d.ts +0 -1
- package/rotateBpsToPosition.test.d.ts +0 -1
- package/rotateSequenceDataToPosition.test.d.ts +0 -1
- package/src/cutSequenceByRestrictionEnzyme.js +0 -301
- package/src/deleteSequenceDataAtRange.js +0 -5
- package/src/diffUtils.js +0 -63
- package/src/doesEnzymeChopOutsideOfRecognitionSite.js +0 -10
- package/src/getCutsiteType.js +0 -10
- package/src/getCutsitesFromSequence.js +0 -17
- package/src/getDegenerateDnaStringFromAAString.js +0 -8
- package/src/getDegenerateRnaStringFromAAString.js +0 -8
- package/src/getDigestFragmentsForRestrictionEnzymes.js +0 -27
- package/src/getReverseComplementSequenceAndAnnotations.js +0 -40
- package/src/isEnzymeType2S.js +0 -3
- package/src/mapAnnotationsToRows.js +0 -174
- package/src/prepareCircularViewData.js +0 -17
- package/src/rotateBpsToPosition.js +0 -9
- package/tidyUpSequenceData.test.d.ts +0 -1
- /package/src/{DNAComplementMap.js → DNAComplementMap.ts} +0 -0
- /package/src/{aminoAcidToDegenerateRnaMap.js → aminoAcidToDegenerateRnaMap.ts} +0 -0
- /package/src/{annotationTypes.js → annotationTypes.ts} +0 -0
- /package/src/{degenerateDnaToAminoAcidMap.js → degenerateDnaToAminoAcidMap.ts} +0 -0
- /package/src/{degenerateRnaToAminoAcidMap.js → degenerateRnaToAminoAcidMap.ts} +0 -0
- /package/src/{insertSequenceDataAtPosition.js → insertSequenceDataAtPosition.ts} +0 -0
- /package/src/{proteinAlphabet.js → proteinAlphabet.ts} +0 -0
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
import { map } from "lodash-es";
|
|
2
2
|
import { adjustRangeToRotation } from "@teselagen/range-utils";
|
|
3
|
-
import tidyUpSequenceData
|
|
3
|
+
import tidyUpSequenceData, {
|
|
4
|
+
TidyUpSequenceDataOptions
|
|
5
|
+
} from "./tidyUpSequenceData";
|
|
4
6
|
import { modifiableTypes } from "./annotationTypes";
|
|
5
7
|
import rotateBpsToPosition from "./rotateBpsToPosition";
|
|
8
|
+
import { SequenceData, Annotation } from "./types";
|
|
6
9
|
|
|
7
10
|
export default function rotateSequenceDataToPosition(
|
|
8
|
-
sequenceData,
|
|
9
|
-
caretPosition,
|
|
10
|
-
options
|
|
11
|
+
sequenceData: SequenceData,
|
|
12
|
+
caretPosition: number,
|
|
13
|
+
options: TidyUpSequenceDataOptions = {}
|
|
11
14
|
) {
|
|
12
15
|
const newSequenceData = tidyUpSequenceData(sequenceData, {
|
|
13
16
|
doNotRemoveInvalidChars: true,
|
|
@@ -25,7 +28,7 @@ export default function rotateSequenceDataToPosition(
|
|
|
25
28
|
//update the annotations:
|
|
26
29
|
//handle the delete if necessary
|
|
27
30
|
newSequenceData[annotationType] = adjustAnnotationsToRotation(
|
|
28
|
-
newSequenceData[annotationType],
|
|
31
|
+
newSequenceData[annotationType] as Annotation[],
|
|
29
32
|
caretPosition,
|
|
30
33
|
newSequenceData.sequence.length
|
|
31
34
|
);
|
|
@@ -34,9 +37,9 @@ export default function rotateSequenceDataToPosition(
|
|
|
34
37
|
}
|
|
35
38
|
|
|
36
39
|
function adjustAnnotationsToRotation(
|
|
37
|
-
annotationsToBeAdjusted,
|
|
38
|
-
positionToRotateTo,
|
|
39
|
-
maxLength
|
|
40
|
+
annotationsToBeAdjusted: Annotation[],
|
|
41
|
+
positionToRotateTo: number,
|
|
42
|
+
maxLength: number
|
|
40
43
|
) {
|
|
41
44
|
return map(annotationsToBeAdjusted, annotation => {
|
|
42
45
|
return {
|
|
@@ -1,17 +1,24 @@
|
|
|
1
1
|
import { modifiableTypes } from "./annotationTypes";
|
|
2
2
|
import adjustAnnotationsToInsert from "./adjustAnnotationsToInsert";
|
|
3
|
+
import { SequenceData } from "./types";
|
|
3
4
|
|
|
4
5
|
export default function shiftAnnotationsByLen({
|
|
5
6
|
seqData,
|
|
6
7
|
caretPosition,
|
|
7
8
|
insertLength
|
|
9
|
+
}: {
|
|
10
|
+
seqData: SequenceData;
|
|
11
|
+
caretPosition: number;
|
|
12
|
+
insertLength: number;
|
|
8
13
|
}) {
|
|
9
14
|
modifiableTypes.forEach(annotationType => {
|
|
10
15
|
const existingAnnotations = seqData[annotationType];
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
+
if (existingAnnotations) {
|
|
17
|
+
seqData[annotationType] = adjustAnnotationsToInsert(
|
|
18
|
+
existingAnnotations,
|
|
19
|
+
caretPosition,
|
|
20
|
+
insertLength
|
|
21
|
+
);
|
|
22
|
+
}
|
|
16
23
|
});
|
|
17
24
|
}
|
|
@@ -1,6 +1,19 @@
|
|
|
1
1
|
import proteinAlphabet from "./proteinAlphabet";
|
|
2
2
|
|
|
3
|
-
const initThreeLetterSequenceStringToAminoAcidMap
|
|
3
|
+
const initThreeLetterSequenceStringToAminoAcidMap: Record<
|
|
4
|
+
string,
|
|
5
|
+
{
|
|
6
|
+
value: string;
|
|
7
|
+
name: string;
|
|
8
|
+
threeLettersName: string;
|
|
9
|
+
hydrophobicity?: number;
|
|
10
|
+
colorByFamily: string;
|
|
11
|
+
color: string;
|
|
12
|
+
mass: number;
|
|
13
|
+
isAmbiguous?: boolean;
|
|
14
|
+
aliases?: string;
|
|
15
|
+
}
|
|
16
|
+
> = {
|
|
4
17
|
gct: proteinAlphabet.A,
|
|
5
18
|
gcc: proteinAlphabet.A,
|
|
6
19
|
gca: proteinAlphabet.A,
|
|
@@ -107,7 +120,7 @@ const initThreeLetterSequenceStringToAminoAcidMap = {
|
|
|
107
120
|
};
|
|
108
121
|
|
|
109
122
|
// IUPAC nucleotide codes (DNA/RNA) with U awareness
|
|
110
|
-
const IUPAC = {
|
|
123
|
+
const IUPAC: Record<string, string[]> = {
|
|
111
124
|
A: ["A"],
|
|
112
125
|
C: ["C"],
|
|
113
126
|
G: ["G"],
|
|
@@ -128,26 +141,33 @@ const IUPAC = {
|
|
|
128
141
|
X: ["A", "C", "G", "T", "U"]
|
|
129
142
|
};
|
|
130
143
|
|
|
131
|
-
|
|
132
|
-
function expandAndResolve(threeLetterCodon) {
|
|
144
|
+
function expandAndResolve(threeLetterCodon: string) {
|
|
133
145
|
const chars = threeLetterCodon.toUpperCase().split("");
|
|
134
|
-
const picks = chars.map(
|
|
146
|
+
const picks = chars.map(c => IUPAC[c] || [c]);
|
|
135
147
|
|
|
136
148
|
let allPossibleThreeLetterCodons = [""];
|
|
137
149
|
for (const set of picks) {
|
|
138
150
|
const next = [];
|
|
139
|
-
for (const prefix of allPossibleThreeLetterCodons)
|
|
151
|
+
for (const prefix of allPossibleThreeLetterCodons)
|
|
152
|
+
for (const b of set) next.push(prefix + b);
|
|
140
153
|
allPossibleThreeLetterCodons = next;
|
|
141
154
|
}
|
|
142
155
|
let foundAminoAcid = null;
|
|
143
156
|
for (const codon of allPossibleThreeLetterCodons) {
|
|
144
157
|
const lowerCodon = codon.toLowerCase();
|
|
145
|
-
const aminoAcidObj =
|
|
158
|
+
const aminoAcidObj =
|
|
159
|
+
initThreeLetterSequenceStringToAminoAcidMap[lowerCodon] ??
|
|
160
|
+
initThreeLetterSequenceStringToAminoAcidMap[
|
|
161
|
+
lowerCodon.replace(/u/g, "t")
|
|
162
|
+
] ??
|
|
163
|
+
initThreeLetterSequenceStringToAminoAcidMap[
|
|
164
|
+
lowerCodon.replace(/t/g, "u")
|
|
165
|
+
];
|
|
146
166
|
if (aminoAcidObj) {
|
|
147
167
|
if (!foundAminoAcid) {
|
|
148
168
|
foundAminoAcid = aminoAcidObj;
|
|
149
|
-
} else if (foundAminoAcid.value !== aminoAcidObj.value
|
|
150
|
-
return null
|
|
169
|
+
} else if (foundAminoAcid.value !== aminoAcidObj.value) {
|
|
170
|
+
return null;
|
|
151
171
|
}
|
|
152
172
|
} else {
|
|
153
173
|
return null;
|
|
@@ -1,9 +1,22 @@
|
|
|
1
1
|
import { cloneDeep, get, some } from "lodash-es";
|
|
2
2
|
import { getFeatureToColorMap, getFeatureTypes } from "./featureTypesAndColors";
|
|
3
3
|
import shortid from "shortid";
|
|
4
|
+
import { Annotation, SequenceData } from "./types";
|
|
5
|
+
|
|
6
|
+
export interface TidyUpAnnotationOptions {
|
|
7
|
+
sequenceData?: Partial<SequenceData>;
|
|
8
|
+
convertAnnotationsFromAAIndices?: boolean;
|
|
9
|
+
annotationType?: string;
|
|
10
|
+
provideNewIdsForAnnotations?: boolean;
|
|
11
|
+
doNotProvideIdsForAnnotations?: boolean;
|
|
12
|
+
messages?: string[];
|
|
13
|
+
mutative?: boolean;
|
|
14
|
+
allowNonStandardGenbankTypes?: boolean;
|
|
15
|
+
featureTypes?: string[];
|
|
16
|
+
}
|
|
4
17
|
|
|
5
18
|
export default function tidyUpAnnotation(
|
|
6
|
-
_annotation,
|
|
19
|
+
_annotation: Annotation,
|
|
7
20
|
{
|
|
8
21
|
sequenceData = {},
|
|
9
22
|
convertAnnotationsFromAAIndices,
|
|
@@ -14,7 +27,7 @@ export default function tidyUpAnnotation(
|
|
|
14
27
|
mutative,
|
|
15
28
|
allowNonStandardGenbankTypes,
|
|
16
29
|
featureTypes
|
|
17
|
-
}
|
|
30
|
+
}: TidyUpAnnotationOptions
|
|
18
31
|
) {
|
|
19
32
|
const { size, circular, isProtein } = sequenceData;
|
|
20
33
|
if (!_annotation || typeof _annotation !== "object") {
|
|
@@ -70,7 +83,7 @@ export default function tidyUpAnnotation(
|
|
|
70
83
|
if (
|
|
71
84
|
isProtein ||
|
|
72
85
|
annotation.forward === true ||
|
|
73
|
-
annotation.forward === "true" ||
|
|
86
|
+
(annotation.forward as unknown) === "true" ||
|
|
74
87
|
annotation.strand === 1 ||
|
|
75
88
|
annotation.strand === "1" ||
|
|
76
89
|
annotation.strand === "+"
|
|
@@ -85,7 +98,9 @@ export default function tidyUpAnnotation(
|
|
|
85
98
|
!annotation.type ||
|
|
86
99
|
typeof annotation.type !== "string" ||
|
|
87
100
|
!some(featureTypes || getFeatureTypes(), featureType => {
|
|
88
|
-
if (
|
|
101
|
+
if (
|
|
102
|
+
featureType.toLowerCase() === (annotation.type as string).toLowerCase()
|
|
103
|
+
) {
|
|
89
104
|
annotation.type = featureType; //this makes sure the annotation.type is being set to the exact value of the accepted featureType
|
|
90
105
|
return true;
|
|
91
106
|
}
|
|
@@ -123,7 +138,10 @@ export default function tidyUpAnnotation(
|
|
|
123
138
|
}
|
|
124
139
|
|
|
125
140
|
if (!annotation.color) {
|
|
126
|
-
annotation.color =
|
|
141
|
+
annotation.color =
|
|
142
|
+
getFeatureToColorMap()[
|
|
143
|
+
annotation.type as keyof ReturnType<typeof getFeatureToColorMap>
|
|
144
|
+
];
|
|
127
145
|
}
|
|
128
146
|
return annotation;
|
|
129
147
|
}
|
|
@@ -136,19 +154,23 @@ function coerceLocation({
|
|
|
136
154
|
messages,
|
|
137
155
|
circular,
|
|
138
156
|
name
|
|
157
|
+
}: {
|
|
158
|
+
location: Annotation;
|
|
159
|
+
convertAnnotationsFromAAIndices?: boolean;
|
|
160
|
+
size?: number;
|
|
161
|
+
isProtein?: boolean;
|
|
162
|
+
messages: string[];
|
|
163
|
+
circular?: boolean;
|
|
164
|
+
name?: string;
|
|
139
165
|
}) {
|
|
140
|
-
location.start = parseInt(location.start, 10);
|
|
141
|
-
location.end = parseInt(location.end, 10);
|
|
166
|
+
location.start = parseInt(String(location.start), 10);
|
|
167
|
+
location.end = parseInt(String(location.end), 10);
|
|
142
168
|
|
|
143
169
|
if (convertAnnotationsFromAAIndices) {
|
|
144
170
|
location.start = location.start * 3;
|
|
145
171
|
location.end = location.end * 3 + 2;
|
|
146
172
|
}
|
|
147
|
-
if (
|
|
148
|
-
location.start < 0 ||
|
|
149
|
-
!(location.start <= size - 1) ||
|
|
150
|
-
location.start > size - 1
|
|
151
|
-
) {
|
|
173
|
+
if (size !== undefined && (location.start < 0 || location.start > size - 1)) {
|
|
152
174
|
messages.push(
|
|
153
175
|
"Invalid annotation start: " +
|
|
154
176
|
location.start +
|
|
@@ -159,11 +181,7 @@ function coerceLocation({
|
|
|
159
181
|
); //setting it to 0 internally, but users will see it as 1
|
|
160
182
|
location.start = Math.max(0, size - (isProtein ? 3 : 1));
|
|
161
183
|
}
|
|
162
|
-
if (
|
|
163
|
-
location.end < 0 ||
|
|
164
|
-
!(location.end <= size - 1) ||
|
|
165
|
-
location.end > size - 1
|
|
166
|
-
) {
|
|
184
|
+
if (size !== undefined && (location.end < 0 || location.end > size - 1)) {
|
|
167
185
|
messages.push(
|
|
168
186
|
"Invalid annotation end: " +
|
|
169
187
|
location.end +
|
|
@@ -174,7 +192,11 @@ function coerceLocation({
|
|
|
174
192
|
); //setting it to 0 internally, but users will see it as 1
|
|
175
193
|
location.end = Math.max(0, size - 1);
|
|
176
194
|
}
|
|
177
|
-
if (
|
|
195
|
+
if (
|
|
196
|
+
size !== undefined &&
|
|
197
|
+
location.start > location.end &&
|
|
198
|
+
circular === false
|
|
199
|
+
) {
|
|
178
200
|
messages.push(
|
|
179
201
|
"Invalid circular annotation detected for " + name + ". end set to 1"
|
|
180
202
|
); //setting it to 0 internally, but users will see it as 1
|
|
@@ -4,14 +4,37 @@ import shortid from "shortid";
|
|
|
4
4
|
import getAminoAcidDataForEachBaseOfDna from "./getAminoAcidDataForEachBaseOfDna";
|
|
5
5
|
import { cloneDeep, flatMap } from "lodash-es";
|
|
6
6
|
import { annotationTypes } from "./annotationTypes";
|
|
7
|
-
import filterSequenceString
|
|
7
|
+
import filterSequenceString, {
|
|
8
|
+
FilterSequenceStringOptions
|
|
9
|
+
} from "./filterSequenceString";
|
|
8
10
|
import tidyUpAnnotation from "./tidyUpAnnotation";
|
|
9
11
|
import getDegenerateDnaStringFromAaString from "./getDegenerateDnaStringFromAAString";
|
|
10
12
|
import { getFeatureTypes } from "./featureTypesAndColors";
|
|
11
13
|
import getAminoAcidStringFromSequenceString from "./getAminoAcidStringFromSequenceString";
|
|
12
14
|
import { expandOrContractRangeByLength } from "@teselagen/range-utils";
|
|
13
15
|
|
|
14
|
-
|
|
16
|
+
import { SequenceData, Annotation } from "./types";
|
|
17
|
+
|
|
18
|
+
export interface TidyUpSequenceDataOptions {
|
|
19
|
+
annotationsAsObjects?: boolean;
|
|
20
|
+
logMessages?: boolean;
|
|
21
|
+
doNotRemoveInvalidChars?: boolean;
|
|
22
|
+
additionalValidChars?: string;
|
|
23
|
+
noTranslationData?: boolean;
|
|
24
|
+
getAcceptedInsertChars?: (options: FilterSequenceStringOptions) => string;
|
|
25
|
+
includeProteinSequence?: boolean;
|
|
26
|
+
doNotProvideIdsForAnnotations?: boolean;
|
|
27
|
+
noCdsTranslations?: boolean;
|
|
28
|
+
convertAnnotationsFromAAIndices?: boolean;
|
|
29
|
+
topLevelSeqData?: Partial<SequenceData>;
|
|
30
|
+
allowNonStandardGenbankTypes?: boolean;
|
|
31
|
+
isMixedRnaAndDna?: boolean;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export default function tidyUpSequenceData(
|
|
35
|
+
pSeqData: Partial<SequenceData>,
|
|
36
|
+
options: TidyUpSequenceDataOptions = {}
|
|
37
|
+
) {
|
|
15
38
|
const {
|
|
16
39
|
annotationsAsObjects,
|
|
17
40
|
logMessages,
|
|
@@ -22,14 +45,16 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
22
45
|
doNotProvideIdsForAnnotations,
|
|
23
46
|
noCdsTranslations,
|
|
24
47
|
convertAnnotationsFromAAIndices,
|
|
48
|
+
isMixedRnaAndDna,
|
|
49
|
+
getAcceptedInsertChars,
|
|
25
50
|
topLevelSeqData
|
|
26
51
|
} = options;
|
|
27
|
-
let seqData = cloneDeep(pSeqData); //sequence is usually immutable, so we clone it and return it
|
|
52
|
+
let seqData = cloneDeep(pSeqData) as SequenceData; //sequence is usually immutable, so we clone it and return it
|
|
28
53
|
const response = {
|
|
29
54
|
messages: []
|
|
30
55
|
};
|
|
31
56
|
if (!seqData) {
|
|
32
|
-
seqData = {};
|
|
57
|
+
seqData = { sequence: "" } as SequenceData;
|
|
33
58
|
}
|
|
34
59
|
if (!seqData.sequence) {
|
|
35
60
|
seqData.sequence = "";
|
|
@@ -58,14 +83,19 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
58
83
|
if (!doNotRemoveInvalidChars) {
|
|
59
84
|
if (seqData.isProtein) {
|
|
60
85
|
const [newSeq] = filterSequenceString(seqData.proteinSequence, {
|
|
86
|
+
isMixedRnaAndDna,
|
|
87
|
+
additionalValidChars,
|
|
61
88
|
...(topLevelSeqData || seqData),
|
|
62
|
-
isProtein: true
|
|
89
|
+
isProtein: true,
|
|
90
|
+
getAcceptedInsertChars
|
|
63
91
|
});
|
|
64
92
|
seqData.proteinSequence = newSeq;
|
|
65
93
|
} else {
|
|
66
94
|
const [newSeq] = filterSequenceString(seqData.sequence, {
|
|
67
95
|
additionalValidChars,
|
|
68
|
-
|
|
96
|
+
isMixedRnaAndDna,
|
|
97
|
+
...(topLevelSeqData || seqData),
|
|
98
|
+
getAcceptedInsertChars
|
|
69
99
|
});
|
|
70
100
|
seqData.sequence = newSeq;
|
|
71
101
|
}
|
|
@@ -94,10 +124,10 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
94
124
|
? seqData.proteinSize
|
|
95
125
|
: seqData.proteinSequence.length;
|
|
96
126
|
if (
|
|
97
|
-
seqData.circular === "false" ||
|
|
127
|
+
seqData.circular === ("false" as unknown) ||
|
|
98
128
|
/* eslint-disable eqeqeq*/
|
|
99
129
|
|
|
100
|
-
seqData.circular == -1 ||
|
|
130
|
+
seqData.circular == (-1 as unknown) ||
|
|
101
131
|
/* eslint-enable eqeqeq*/
|
|
102
132
|
seqData.circular === false ||
|
|
103
133
|
(!seqData.circular && seqData.sequenceTypeCode !== "CIRCULAR_DNA")
|
|
@@ -110,26 +140,31 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
110
140
|
|
|
111
141
|
annotationTypes.forEach(annotationType => {
|
|
112
142
|
if (!Array.isArray(seqData[annotationType])) {
|
|
113
|
-
if (
|
|
114
|
-
seqData[annotationType]
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
143
|
+
if (
|
|
144
|
+
seqData[annotationType] &&
|
|
145
|
+
typeof seqData[annotationType] === "object"
|
|
146
|
+
) {
|
|
147
|
+
seqData[annotationType] = Object.keys(
|
|
148
|
+
seqData[annotationType] as object
|
|
149
|
+
).map(key => {
|
|
150
|
+
return (seqData[annotationType] as Record<string, unknown>)[key];
|
|
151
|
+
});
|
|
119
152
|
} else {
|
|
120
153
|
seqData[annotationType] = [];
|
|
121
154
|
}
|
|
122
155
|
}
|
|
123
|
-
seqData[annotationType] = seqData[annotationType].filter(
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
156
|
+
seqData[annotationType] = (seqData[annotationType] as Annotation[]).filter(
|
|
157
|
+
annotation => {
|
|
158
|
+
return tidyUpAnnotation(annotation, {
|
|
159
|
+
...options,
|
|
160
|
+
featureTypes,
|
|
161
|
+
sequenceData: seqData,
|
|
162
|
+
convertAnnotationsFromAAIndices,
|
|
163
|
+
mutative: true,
|
|
164
|
+
annotationType
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
);
|
|
133
168
|
});
|
|
134
169
|
|
|
135
170
|
if (!noTranslationData) {
|
|
@@ -138,7 +173,12 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
138
173
|
//filter off cds translations
|
|
139
174
|
return [];
|
|
140
175
|
}
|
|
141
|
-
const codonStart =
|
|
176
|
+
const codonStart =
|
|
177
|
+
(
|
|
178
|
+
(translation?.notes as Record<string, unknown>)?.[
|
|
179
|
+
"codon_start"
|
|
180
|
+
] as number[]
|
|
181
|
+
)?.[0] - 1 || 0;
|
|
142
182
|
const expandedRange = expandOrContractRangeByLength(
|
|
143
183
|
translation,
|
|
144
184
|
-codonStart,
|
|
@@ -148,8 +188,9 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
148
188
|
if (!expandedRange.aminoAcids && !seqData.noSequence) {
|
|
149
189
|
expandedRange.aminoAcids = getAminoAcidDataForEachBaseOfDna(
|
|
150
190
|
seqData.sequence,
|
|
151
|
-
expandedRange.forward,
|
|
152
|
-
expandedRange
|
|
191
|
+
expandedRange.forward || false,
|
|
192
|
+
expandedRange,
|
|
193
|
+
false
|
|
153
194
|
);
|
|
154
195
|
}
|
|
155
196
|
return expandedRange;
|
|
@@ -158,19 +199,22 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
158
199
|
|
|
159
200
|
if (annotationsAsObjects) {
|
|
160
201
|
annotationTypes.forEach(name => {
|
|
161
|
-
seqData[name] = seqData[name].reduce(
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
202
|
+
seqData[name] = (seqData[name] as Annotation[]).reduce(
|
|
203
|
+
(acc: Record<string, Annotation>, item: Annotation) => {
|
|
204
|
+
let itemId;
|
|
205
|
+
if (item.id || item.id === 0) {
|
|
206
|
+
itemId = item.id;
|
|
207
|
+
} else {
|
|
208
|
+
itemId = shortid();
|
|
209
|
+
if (!doNotProvideIdsForAnnotations) {
|
|
210
|
+
item.id = itemId; //assign the newly created id to the item
|
|
211
|
+
}
|
|
169
212
|
}
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
213
|
+
acc[itemId] = item;
|
|
214
|
+
return acc;
|
|
215
|
+
},
|
|
216
|
+
{}
|
|
217
|
+
);
|
|
174
218
|
});
|
|
175
219
|
}
|
|
176
220
|
if (logMessages && response.messages.length > 0) {
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import type { Range } from "@teselagen/range-utils";
|
|
2
|
+
|
|
3
|
+
export interface Annotation extends Range {
|
|
4
|
+
id?: string | number;
|
|
5
|
+
name?: string;
|
|
6
|
+
forward?: boolean;
|
|
7
|
+
strand?: number | string;
|
|
8
|
+
type?: string;
|
|
9
|
+
color?: string;
|
|
10
|
+
notes?: Record<string, unknown> | string;
|
|
11
|
+
annotationTypePlural?: string;
|
|
12
|
+
translationType?: string;
|
|
13
|
+
[key: string]: unknown;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface SequenceData {
|
|
17
|
+
id?: string;
|
|
18
|
+
sequence: string;
|
|
19
|
+
proteinSequence?: string;
|
|
20
|
+
circular?: boolean;
|
|
21
|
+
isProtein?: boolean;
|
|
22
|
+
isRna?: boolean;
|
|
23
|
+
size?: number;
|
|
24
|
+
proteinSize?: number;
|
|
25
|
+
name?: string;
|
|
26
|
+
description?: string;
|
|
27
|
+
features?: Annotation[];
|
|
28
|
+
parts?: Annotation[];
|
|
29
|
+
translations?: Annotation[];
|
|
30
|
+
primers?: Annotation[];
|
|
31
|
+
cutsites?: Annotation[];
|
|
32
|
+
orfs?: Annotation[];
|
|
33
|
+
guides?: Annotation[];
|
|
34
|
+
noSequence?: boolean;
|
|
35
|
+
sequenceTypeCode?: string;
|
|
36
|
+
aminoAcidDataForEachBaseOfDNA?: unknown[];
|
|
37
|
+
chromatogramData?: ChromatogramData;
|
|
38
|
+
[key: string]: unknown;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export interface ChromatogramData {
|
|
42
|
+
baseTraces?: unknown[];
|
|
43
|
+
baseCalls?: unknown[];
|
|
44
|
+
qualNums?: unknown[];
|
|
45
|
+
basePos?: unknown[];
|
|
46
|
+
[key: string]: unknown;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export interface RestrictionEnzyme {
|
|
50
|
+
name: string;
|
|
51
|
+
site: string;
|
|
52
|
+
forwardRegex: string;
|
|
53
|
+
reverseRegex: string;
|
|
54
|
+
topSnipOffset?: number;
|
|
55
|
+
bottomSnipOffset?: number;
|
|
56
|
+
cutType?: number; // 0 or 1
|
|
57
|
+
usForward?: number; // upstream forward
|
|
58
|
+
usReverse?: number;
|
|
59
|
+
[key: string]: unknown;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export interface CutSite extends Annotation {
|
|
63
|
+
topSnipPosition: number | null;
|
|
64
|
+
bottomSnipPosition: number | null;
|
|
65
|
+
overhangSize: number;
|
|
66
|
+
overhangBps?: string;
|
|
67
|
+
restrictionEnzyme: RestrictionEnzyme;
|
|
68
|
+
upstreamTopSnip?: number | null;
|
|
69
|
+
upstreamBottomSnip?: number | null;
|
|
70
|
+
topSnipBeforeBottom?: boolean;
|
|
71
|
+
upstreamTopBeforeBottom?: boolean;
|
|
72
|
+
cutType?: number;
|
|
73
|
+
cutsTwice?: boolean;
|
|
74
|
+
recognitionSiteRange?: Range;
|
|
75
|
+
isOverhangIncludedInFragmentSize?: boolean;
|
|
76
|
+
[key: string]: unknown;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export interface DigestFragment extends Range {
|
|
80
|
+
isFormedFromLinearEnd?: boolean;
|
|
81
|
+
madeFromOneCutsite?: boolean;
|
|
82
|
+
size: number;
|
|
83
|
+
cut1: CutSite;
|
|
84
|
+
cut2: CutSite;
|
|
85
|
+
id: string;
|
|
86
|
+
name: string;
|
|
87
|
+
onFragmentSelect?: () => void;
|
|
88
|
+
[key: string]: unknown;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export interface AminoAcidData {
|
|
92
|
+
fullCodon: boolean | null;
|
|
93
|
+
aminoAcid: { value: string } | null;
|
|
94
|
+
aminoAcidIndex: number | null;
|
|
95
|
+
positionInCodon?: number | null;
|
|
96
|
+
sequenceIndex?: number | null;
|
|
97
|
+
codonRange?: { start: number; end: number } | null;
|
|
98
|
+
}
|