@teselagen/sequence-utils 0.3.37 → 0.3.38-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DNAComplementMap.d.ts +1 -1
- package/addGapsToSeqReads.d.ts +16 -3
- package/adjustAnnotationsToInsert.d.ts +2 -1
- package/adjustBpsToReplaceOrInsert.d.ts +2 -1
- package/aliasedEnzymesByName.d.ts +37 -1
- package/aminoAcidToDegenerateDnaMap.d.ts +1 -31
- package/aminoAcidToDegenerateRnaMap.d.ts +1 -1
- package/annotateSingleSeq.d.ts +5 -4
- package/annotationTypes.d.ts +2 -2
- package/autoAnnotate.d.ts +17 -8
- package/bioData.d.ts +10 -58
- package/calculateEndStability.d.ts +1 -1
- package/calculateNebTa.d.ts +6 -1
- package/calculateNebTm.d.ts +6 -4
- package/calculatePercentGC.d.ts +1 -1
- package/calculateSantaLuciaTm.d.ts +28 -114
- package/calculateTm.d.ts +13 -1
- package/computeDigestFragments.d.ts +30 -24
- package/condensePairwiseAlignmentDifferences.d.ts +1 -1
- package/convertAACaretPositionOrRangeToDna.d.ts +2 -1
- package/convertDnaCaretPositionOrRangeToAA.d.ts +2 -1
- package/cutSequenceByRestrictionEnzyme.d.ts +2 -1
- package/defaultEnzymesByName.d.ts +2 -1
- package/degenerateDnaToAminoAcidMap.d.ts +1 -1
- package/degenerateRnaToAminoAcidMap.d.ts +1 -1
- package/deleteSequenceDataAtRange.d.ts +2 -1
- package/diffUtils.d.ts +9 -7
- package/doesEnzymeChopOutsideOfRecognitionSite.d.ts +2 -1
- package/featureTypesAndColors.d.ts +19 -6
- package/filterSequenceString.d.ts +14 -10
- package/findApproxMatches.d.ts +7 -1
- package/findNearestRangeOfSequenceOverlapToPosition.d.ts +2 -1
- package/findOrfsInPlasmid.d.ts +2 -11
- package/findSequenceMatches.d.ts +11 -1
- package/generateAnnotations.d.ts +2 -1
- package/generateSequenceData.d.ts +8 -13
- package/getAllInsertionsInSeqReads.d.ts +11 -1
- package/getAminoAcidDataForEachBaseOfDna.d.ts +6 -5
- package/getAminoAcidFromSequenceTriplet.d.ts +1 -1
- package/getAminoAcidStringFromSequenceString.d.ts +3 -1
- package/getCodonRangeForAASliver.d.ts +3 -4
- package/getComplementAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getComplementSequenceAndAnnotations.d.ts +5 -1
- package/getComplementSequenceString.d.ts +1 -1
- package/getCutsiteType.d.ts +2 -1
- package/getCutsitesFromSequence.d.ts +2 -1
- package/getDegenerateDnaStringFromAAString.d.ts +1 -1
- package/getDegenerateRnaStringFromAAString.d.ts +1 -1
- package/getDigestFragmentsForCutsites.d.ts +4 -1
- package/getDigestFragmentsForRestrictionEnzymes.d.ts +8 -1
- package/getInsertBetweenVals.d.ts +2 -1
- package/getLeftAndRightOfSequenceInRangeGivenPosition.d.ts +2 -1
- package/getOrfsFromSequence.d.ts +17 -11
- package/getOverlapBetweenTwoSequences.d.ts +2 -1
- package/getPossiblePartsFromSequenceAndEnzymes.d.ts +18 -1
- package/getReverseAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getReverseComplementAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getReverseComplementAnnotation.d.ts +11 -1
- package/getReverseComplementSequenceAndAnnotations.d.ts +5 -1
- package/getReverseComplementSequenceString.d.ts +1 -1
- package/getReverseSequenceString.d.ts +1 -1
- package/getSequenceDataBetweenRange.d.ts +9 -1
- package/getVirtualDigest.d.ts +11 -10
- package/guessIfSequenceIsDnaAndNotProtein.d.ts +5 -1
- package/index.cjs +732 -483
- package/index.d.ts +8 -5
- package/index.js +732 -483
- package/index.umd.cjs +732 -483
- package/insertGapsIntoRefSeq.d.ts +2 -1
- package/insertSequenceDataAtPositionOrRange.d.ts +10 -1
- package/isEnzymeType2S.d.ts +2 -1
- package/mapAnnotationsToRows.d.ts +9 -1
- package/package.json +9 -6
- package/prepareCircularViewData.d.ts +2 -1
- package/prepareRowData.d.ts +7 -3
- package/proteinAlphabet.d.ts +1 -1
- package/rotateBpsToPosition.d.ts +1 -1
- package/rotateSequenceDataToPosition.d.ts +3 -1
- package/shiftAnnotationsByLen.d.ts +4 -3
- package/src/DNAComplementMap.ts +32 -0
- package/src/addGapsToSeqReads.ts +436 -0
- package/src/adjustAnnotationsToInsert.ts +20 -0
- package/src/adjustBpsToReplaceOrInsert.ts +73 -0
- package/src/aliasedEnzymesByName.ts +7366 -0
- package/src/aminoAcidToDegenerateDnaMap.ts +32 -0
- package/src/aminoAcidToDegenerateRnaMap.ts +32 -0
- package/src/annotateSingleSeq.ts +37 -0
- package/src/annotationTypes.ts +23 -0
- package/src/autoAnnotate.test.js +0 -1
- package/src/autoAnnotate.ts +290 -0
- package/src/bioData.ts +65 -0
- package/src/calculateEndStability.ts +91 -0
- package/src/calculateNebTa.ts +46 -0
- package/src/calculateNebTm.ts +132 -0
- package/src/calculatePercentGC.ts +3 -0
- package/src/calculateSantaLuciaTm.ts +184 -0
- package/src/calculateTm.ts +242 -0
- package/src/computeDigestFragments.ts +238 -0
- package/src/condensePairwiseAlignmentDifferences.ts +85 -0
- package/src/convertAACaretPositionOrRangeToDna.ts +28 -0
- package/src/convertDnaCaretPositionOrRangeToAA.ts +28 -0
- package/src/cutSequenceByRestrictionEnzyme.ts +345 -0
- package/src/defaultEnzymesByName.ts +280 -0
- package/src/degenerateDnaToAminoAcidMap.ts +5 -0
- package/src/degenerateRnaToAminoAcidMap.ts +5 -0
- package/src/deleteSequenceDataAtRange.ts +13 -0
- package/src/diffUtils.ts +80 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.ts +16 -0
- package/src/featureTypesAndColors.ts +167 -0
- package/src/filterSequenceString.ts +153 -0
- package/src/findApproxMatches.ts +58 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.ts +43 -0
- package/src/findOrfsInPlasmid.js +6 -1
- package/src/findOrfsInPlasmid.ts +31 -0
- package/src/findSequenceMatches.ts +154 -0
- package/src/generateAnnotations.ts +39 -0
- package/src/generateSequenceData.ts +212 -0
- package/src/getAllInsertionsInSeqReads.ts +100 -0
- package/src/getAminoAcidDataForEachBaseOfDna.ts +305 -0
- package/src/getAminoAcidFromSequenceTriplet.ts +27 -0
- package/src/getAminoAcidStringFromSequenceString.ts +36 -0
- package/src/getCodonRangeForAASliver.ts +73 -0
- package/src/getComplementAminoAcidStringFromSequenceString.ts +10 -0
- package/src/getComplementSequenceAndAnnotations.ts +25 -0
- package/src/getComplementSequenceString.ts +23 -0
- package/src/getCutsiteType.ts +18 -0
- package/src/getCutsitesFromSequence.ts +22 -0
- package/src/getDegenerateDnaStringFromAAString.ts +15 -0
- package/src/getDegenerateRnaStringFromAAString.ts +15 -0
- package/src/getDigestFragmentsForCutsites.ts +126 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.ts +50 -0
- package/src/getInsertBetweenVals.ts +31 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.ts +40 -0
- package/src/getMassOfAaString.ts +29 -0
- package/src/getOrfsFromSequence.ts +132 -0
- package/src/getOverlapBetweenTwoSequences.ts +30 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.ts +149 -0
- package/src/getReverseAminoAcidStringFromSequenceString.ts +22 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.ts +10 -0
- package/src/getReverseComplementAnnotation.ts +33 -0
- package/src/getReverseComplementSequenceAndAnnotations.ts +46 -0
- package/src/getReverseComplementSequenceString.ts +18 -0
- package/src/getReverseSequenceString.ts +12 -0
- package/src/getSequenceDataBetweenRange.ts +154 -0
- package/src/getVirtualDigest.ts +139 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.ts +39 -0
- package/src/index.test.ts +43 -0
- package/src/index.ts +111 -0
- package/src/insertGapsIntoRefSeq.ts +43 -0
- package/src/insertSequenceDataAtPosition.ts +2 -0
- package/src/insertSequenceDataAtPositionOrRange.ts +328 -0
- package/src/isEnzymeType2S.ts +5 -0
- package/src/mapAnnotationsToRows.ts +256 -0
- package/src/prepareCircularViewData.ts +24 -0
- package/src/prepareRowData.ts +61 -0
- package/src/prepareRowData_output1.json +1 -0
- package/src/proteinAlphabet.ts +271 -0
- package/src/rotateBpsToPosition.ts +12 -0
- package/src/rotateSequenceDataToPosition.ts +54 -0
- package/src/shiftAnnotationsByLen.ts +24 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.ts +198 -0
- package/src/tidyUpAnnotation.ts +205 -0
- package/src/tidyUpSequenceData.ts +213 -0
- package/src/types.ts +109 -0
- package/threeLetterSequenceStringToAminoAcidMap.d.ts +11 -921
- package/tidyUpAnnotation.d.ts +13 -11
- package/tidyUpSequenceData.d.ts +15 -1
- package/types.d.ts +105 -0
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import {
|
|
2
|
+
normalizePositionByRangeLength,
|
|
3
|
+
getRangeLength
|
|
4
|
+
} from "@teselagen/range-utils";
|
|
5
|
+
import { CutSite, DigestFragment } from "./types";
|
|
6
|
+
|
|
7
|
+
export default function getDigestFragmentsForCutsites(
|
|
8
|
+
sequenceLength: number,
|
|
9
|
+
circular: boolean,
|
|
10
|
+
cutsites: CutSite[],
|
|
11
|
+
opts: { computePartialDigests?: boolean } = {}
|
|
12
|
+
): DigestFragment[] {
|
|
13
|
+
const fragments: DigestFragment[] = [];
|
|
14
|
+
const overlappingEnzymes: DigestFragment[] = [];
|
|
15
|
+
const pairs: CutSite[][] = [];
|
|
16
|
+
if (!cutsites.length) return [];
|
|
17
|
+
let sortedCutsites = cutsites.sort((a, b) => {
|
|
18
|
+
return (a.topSnipPosition || 0) - (b.topSnipPosition || 0);
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
if (!circular) {
|
|
22
|
+
//if linear, add 2 fake cutsites for the start and end of the seq
|
|
23
|
+
sortedCutsites = [
|
|
24
|
+
{
|
|
25
|
+
start: 0,
|
|
26
|
+
end: 0,
|
|
27
|
+
topSnipPosition: 0,
|
|
28
|
+
bottomSnipPosition: 0,
|
|
29
|
+
overhangSize: 0,
|
|
30
|
+
type: "START_OR_END_OF_SEQ",
|
|
31
|
+
name: "START_OF_SEQ",
|
|
32
|
+
restrictionEnzyme: {
|
|
33
|
+
name: "START_OF_SEQ",
|
|
34
|
+
site: "",
|
|
35
|
+
forwardRegex: "",
|
|
36
|
+
reverseRegex: ""
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
...sortedCutsites,
|
|
40
|
+
{
|
|
41
|
+
start: sequenceLength,
|
|
42
|
+
end: sequenceLength,
|
|
43
|
+
topSnipPosition: sequenceLength,
|
|
44
|
+
bottomSnipPosition: sequenceLength,
|
|
45
|
+
overhangSize: 0,
|
|
46
|
+
type: "START_OR_END_OF_SEQ",
|
|
47
|
+
name: "END_OF_SEQ",
|
|
48
|
+
restrictionEnzyme: {
|
|
49
|
+
name: "END_OF_SEQ",
|
|
50
|
+
site: "",
|
|
51
|
+
forwardRegex: "",
|
|
52
|
+
reverseRegex: ""
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
];
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
sortedCutsites.forEach((cutsite1, index) => {
|
|
59
|
+
if (!circular && !sortedCutsites[index + 1]) {
|
|
60
|
+
return; //don't push a pair if the sequence is linear and we've reached the end of our cutsites array
|
|
61
|
+
}
|
|
62
|
+
if (opts.computePartialDigests) {
|
|
63
|
+
sortedCutsites.forEach((cs, index2) => {
|
|
64
|
+
if (index2 === index + 1 || index2 === 0) {
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
pairs.push([cutsite1, sortedCutsites[index2]]);
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
pairs.push([
|
|
71
|
+
cutsite1,
|
|
72
|
+
sortedCutsites[index + 1] ? sortedCutsites[index + 1] : sortedCutsites[0]
|
|
73
|
+
]);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
pairs.forEach(([cut1, cut2]) => {
|
|
77
|
+
const start = normalizePositionByRangeLength(
|
|
78
|
+
cut1.topSnipPosition || 0,
|
|
79
|
+
sequenceLength
|
|
80
|
+
);
|
|
81
|
+
const end = normalizePositionByRangeLength(
|
|
82
|
+
(cut2.topSnipPosition || 0) - 1,
|
|
83
|
+
sequenceLength
|
|
84
|
+
);
|
|
85
|
+
const fragmentRange = { start, end };
|
|
86
|
+
const size = getRangeLength(fragmentRange, sequenceLength);
|
|
87
|
+
|
|
88
|
+
// const id = uniqid()
|
|
89
|
+
const id = start + "-" + end + "-" + size + "-";
|
|
90
|
+
|
|
91
|
+
// getRangeLength({ start, end }, sequenceLength);
|
|
92
|
+
|
|
93
|
+
fragments.push({
|
|
94
|
+
// I don't think we can determine containsFive/ThreePrimeRecognitionSite until the inclusion/exclusion of the overhangs is done
|
|
95
|
+
// containsFivePrimeRecognitionSite: cut1.type !== "START_OR_END_OF_SEQ" && isRangeWithinRange(cut1.recognitionSiteRange, fragmentRange, sequenceLength ) ,
|
|
96
|
+
// containsThreePrimeRecognitionSite: cut2.type !== "START_OR_END_OF_SEQ" && isRangeWithinRange(cut1.recognitionSiteRange, fragmentRange, sequenceLength) ,
|
|
97
|
+
cut1: {
|
|
98
|
+
...cut1,
|
|
99
|
+
isOverhangIncludedInFragmentSize:
|
|
100
|
+
cut1.type !== "START_OR_END_OF_SEQ" &&
|
|
101
|
+
cut1.overhangSize > 0 &&
|
|
102
|
+
cut1.topSnipBeforeBottom
|
|
103
|
+
},
|
|
104
|
+
cut2: {
|
|
105
|
+
...cut2,
|
|
106
|
+
isOverhangIncludedInFragmentSize:
|
|
107
|
+
cut2.type !== "START_OR_END_OF_SEQ" &&
|
|
108
|
+
cut2.overhangSize > 0 &&
|
|
109
|
+
!cut2.topSnipBeforeBottom
|
|
110
|
+
},
|
|
111
|
+
...fragmentRange,
|
|
112
|
+
size,
|
|
113
|
+
id,
|
|
114
|
+
name: `${cut1.restrictionEnzyme.name} -- ${cut2.restrictionEnzyme.name} ${size} bps` // Add missing name property
|
|
115
|
+
});
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
fragments.filter(fragment => {
|
|
119
|
+
if (!fragment.size) {
|
|
120
|
+
overlappingEnzymes.push(fragment);
|
|
121
|
+
return false;
|
|
122
|
+
}
|
|
123
|
+
return true;
|
|
124
|
+
});
|
|
125
|
+
return fragments;
|
|
126
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { computeDigestFragments } from "./computeDigestFragments";
|
|
2
|
+
import getCutsitesFromSequence from "./getCutsitesFromSequence";
|
|
3
|
+
import { CutSite, RestrictionEnzyme } from "./types";
|
|
4
|
+
import { flatMap, uniqBy } from "lodash-es";
|
|
5
|
+
|
|
6
|
+
export default function getDigestFragmentsForRestrictionEnzymes(
|
|
7
|
+
sequence: string,
|
|
8
|
+
circular: boolean,
|
|
9
|
+
contextEnzymes: RestrictionEnzyme[] | RestrictionEnzyme,
|
|
10
|
+
options?: {
|
|
11
|
+
computePartialDigest?: boolean;
|
|
12
|
+
computePartialDigests?: boolean; // alias
|
|
13
|
+
computeDigestDisabled?: boolean; // corrected spelling if needed, but keeping as is
|
|
14
|
+
computePartialDigestDisabled?: boolean;
|
|
15
|
+
includeOverAndUnderHangs?: boolean;
|
|
16
|
+
}
|
|
17
|
+
) {
|
|
18
|
+
const cutsitesByName = getCutsitesFromSequence(
|
|
19
|
+
sequence,
|
|
20
|
+
circular,
|
|
21
|
+
Array.isArray(contextEnzymes) ? contextEnzymes : [contextEnzymes]
|
|
22
|
+
);
|
|
23
|
+
const digest = computeDigestFragments({
|
|
24
|
+
cutsites: flatMap(cutsitesByName) as CutSite[],
|
|
25
|
+
sequenceLength: sequence.length,
|
|
26
|
+
circular,
|
|
27
|
+
...options,
|
|
28
|
+
computePartialDigest:
|
|
29
|
+
options?.computePartialDigest || options?.computePartialDigests
|
|
30
|
+
});
|
|
31
|
+
const fragments = uniqBy(digest.fragments, fragment => {
|
|
32
|
+
return `${fragment.start}-${fragment.end}-${fragment.size}`;
|
|
33
|
+
});
|
|
34
|
+
if (
|
|
35
|
+
circular &&
|
|
36
|
+
(options?.computePartialDigest || options?.computePartialDigests)
|
|
37
|
+
) {
|
|
38
|
+
// filter out the full length fragment if it's a duplicate
|
|
39
|
+
const fullLengthFragmentIndex = fragments.findIndex(
|
|
40
|
+
f => f.size === sequence.length
|
|
41
|
+
);
|
|
42
|
+
if (fullLengthFragmentIndex > -1) {
|
|
43
|
+
fragments.splice(fullLengthFragmentIndex, 1);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return fragments.sort((a, b) => {
|
|
48
|
+
return a.start - b.start || b.size - a.size;
|
|
49
|
+
});
|
|
50
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import {
|
|
2
|
+
normalizePositionByRangeLength1Based,
|
|
3
|
+
Range
|
|
4
|
+
} from "@teselagen/range-utils";
|
|
5
|
+
|
|
6
|
+
export default function getInsertBetweenVals(
|
|
7
|
+
caretPosition: number,
|
|
8
|
+
selectionLayer: Range,
|
|
9
|
+
sequenceLength: number
|
|
10
|
+
): [number, number] {
|
|
11
|
+
if (selectionLayer.start > -1) {
|
|
12
|
+
//selection layer
|
|
13
|
+
return [
|
|
14
|
+
normalizePositionByRangeLength1Based(
|
|
15
|
+
selectionLayer.start,
|
|
16
|
+
sequenceLength
|
|
17
|
+
),
|
|
18
|
+
normalizePositionByRangeLength1Based(
|
|
19
|
+
selectionLayer.end + 2,
|
|
20
|
+
sequenceLength
|
|
21
|
+
)
|
|
22
|
+
];
|
|
23
|
+
} else if (caretPosition > -1) {
|
|
24
|
+
return [
|
|
25
|
+
normalizePositionByRangeLength1Based(caretPosition, sequenceLength),
|
|
26
|
+
normalizePositionByRangeLength1Based(caretPosition + 1, sequenceLength)
|
|
27
|
+
];
|
|
28
|
+
} else {
|
|
29
|
+
return [sequenceLength, 1];
|
|
30
|
+
}
|
|
31
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import {
|
|
2
|
+
isPositionWithinRange,
|
|
3
|
+
getSequenceWithinRange,
|
|
4
|
+
normalizePositionByRangeLength,
|
|
5
|
+
isPositionCloserToRangeStartThanRangeEnd,
|
|
6
|
+
Range
|
|
7
|
+
} from "@teselagen/range-utils";
|
|
8
|
+
|
|
9
|
+
export default function getLeftAndRightOfSequenceInRangeGivenPosition(
|
|
10
|
+
range: Range,
|
|
11
|
+
position: number,
|
|
12
|
+
sequence: string
|
|
13
|
+
): { leftHandSide: string; rightHandSide: string } {
|
|
14
|
+
const result = {
|
|
15
|
+
leftHandSide: "",
|
|
16
|
+
rightHandSide: ""
|
|
17
|
+
};
|
|
18
|
+
if (isPositionWithinRange(position, range, sequence.length)) {
|
|
19
|
+
result.leftHandSide = getSequenceWithinRange(
|
|
20
|
+
{
|
|
21
|
+
start: range.start,
|
|
22
|
+
end: normalizePositionByRangeLength(position - 1, sequence.length)
|
|
23
|
+
},
|
|
24
|
+
sequence
|
|
25
|
+
) as string;
|
|
26
|
+
result.rightHandSide = getSequenceWithinRange(
|
|
27
|
+
{ start: position, end: range.end },
|
|
28
|
+
sequence
|
|
29
|
+
) as string;
|
|
30
|
+
} else {
|
|
31
|
+
if (
|
|
32
|
+
isPositionCloserToRangeStartThanRangeEnd(position, range, sequence.length)
|
|
33
|
+
) {
|
|
34
|
+
result.rightHandSide = getSequenceWithinRange(range, sequence) as string;
|
|
35
|
+
} else {
|
|
36
|
+
result.leftHandSide = getSequenceWithinRange(range, sequence) as string;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return result;
|
|
40
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import proteinAlphabet from "./proteinAlphabet";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* @param {string} aaString A string of amino acid characters
|
|
5
|
+
* @param {number} numsAfterDecimal the number of digits to round to after the decimal point, must be greater than 0
|
|
6
|
+
* @param {boolean} divideByThree divide the final mass by three,
|
|
7
|
+
* this is useful in situtations where nucelotides are converted to
|
|
8
|
+
* amino acids in a way that the amino acid appears three times
|
|
9
|
+
* @returns The sum of the mass of all amino acids in the string
|
|
10
|
+
*/
|
|
11
|
+
export default function getMassOfAaString(
|
|
12
|
+
aaString: string,
|
|
13
|
+
numsAfterDecimal = 2,
|
|
14
|
+
divideByThree = false
|
|
15
|
+
) {
|
|
16
|
+
let sumMass = 0;
|
|
17
|
+
for (let i = 0; i < aaString.length; i++) {
|
|
18
|
+
sumMass += (proteinAlphabet as Record<string, { mass: number }>)[
|
|
19
|
+
aaString[i]
|
|
20
|
+
].mass;
|
|
21
|
+
}
|
|
22
|
+
if (divideByThree) {
|
|
23
|
+
sumMass /= 3;
|
|
24
|
+
}
|
|
25
|
+
if (aaString.length > 0) {
|
|
26
|
+
sumMass = sumMass + 18.0153;
|
|
27
|
+
}
|
|
28
|
+
return Math.round(sumMass * 10 ** numsAfterDecimal) / 10 ** numsAfterDecimal;
|
|
29
|
+
}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import shortid from "shortid";
|
|
2
|
+
import { Annotation } from "./types";
|
|
3
|
+
import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
|
|
4
|
+
|
|
5
|
+
interface GetOrfsOptions {
|
|
6
|
+
sequence: string;
|
|
7
|
+
minimumOrfSize: number;
|
|
8
|
+
forward: boolean;
|
|
9
|
+
circular?: boolean;
|
|
10
|
+
useAdditionalOrfStartCodons?: boolean;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface Orf extends Annotation {
|
|
14
|
+
internalStartCodonIndices: number[];
|
|
15
|
+
frame: number;
|
|
16
|
+
// annotationTypePlural: string; // Annotation might already have this or it's extra
|
|
17
|
+
isOrf: boolean;
|
|
18
|
+
remove?: boolean;
|
|
19
|
+
length: number;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* @private
|
|
24
|
+
* Finds ORFs in a given DNA forward in a given frame.
|
|
25
|
+
* frame - The frame to look in.
|
|
26
|
+
* sequence - The dna sequence.
|
|
27
|
+
* minimumOrfSize - The minimum length of ORF to return.
|
|
28
|
+
* forward - Should we find forward facing orfs or reverse facing orfs
|
|
29
|
+
* return - The list of ORFs found.
|
|
30
|
+
*/
|
|
31
|
+
export default function getOrfsFromSequence(options: GetOrfsOptions): Orf[] {
|
|
32
|
+
let sequence = options.sequence;
|
|
33
|
+
const minimumOrfSize = options.minimumOrfSize;
|
|
34
|
+
const forward = options.forward;
|
|
35
|
+
const circular = options.circular;
|
|
36
|
+
const useAdditionalOrfStartCodons = options.useAdditionalOrfStartCodons;
|
|
37
|
+
|
|
38
|
+
const originalSequenceLength = sequence.length;
|
|
39
|
+
if (!forward) {
|
|
40
|
+
//we reverse the sequence
|
|
41
|
+
sequence = getReverseComplementSequenceString(sequence);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if (circular) {
|
|
45
|
+
//we'll pass in double the sequence and then trim excess orfs
|
|
46
|
+
sequence += sequence;
|
|
47
|
+
}
|
|
48
|
+
const re = useAdditionalOrfStartCodons
|
|
49
|
+
? /(?=((?:A[TU]G|G[TU]G|C[TU]G)(?:.{3})*?(?:[TU]AG|[TU]AA|[TU]GA)))/gi
|
|
50
|
+
: /(?=((?:A[TU]G)(?:.{3})*?(?:[TU]AG|[TU]AA|[TU]GA)))/gi;
|
|
51
|
+
let m: RegExpExecArray | null;
|
|
52
|
+
const orfRanges: Orf[] = [];
|
|
53
|
+
//loop through orf hits!
|
|
54
|
+
/* eslint-disable no-cond-assign*/
|
|
55
|
+
|
|
56
|
+
while ((m = re.exec(sequence)) !== null) {
|
|
57
|
+
//stuff to get the regex to work
|
|
58
|
+
if (m.index === re.lastIndex) {
|
|
59
|
+
re.lastIndex++;
|
|
60
|
+
}
|
|
61
|
+
//orf logic:
|
|
62
|
+
const orfLength = m[1].length;
|
|
63
|
+
if (orfLength >= minimumOrfSize) {
|
|
64
|
+
//only keep orfs >= to the minimum size
|
|
65
|
+
const start = m.index;
|
|
66
|
+
let end = orfLength + start - 1;
|
|
67
|
+
//normalize the end if it is greater than the original sequence length
|
|
68
|
+
if (end >= originalSequenceLength) {
|
|
69
|
+
end -= originalSequenceLength;
|
|
70
|
+
}
|
|
71
|
+
if (start < originalSequenceLength) {
|
|
72
|
+
//only keep orfs that *begin* before the original sequence length (only the case when dealing with circular orfs)
|
|
73
|
+
orfRanges.push({
|
|
74
|
+
start: start,
|
|
75
|
+
end: end,
|
|
76
|
+
length: m[1].length,
|
|
77
|
+
internalStartCodonIndices: [],
|
|
78
|
+
frame: start % 3,
|
|
79
|
+
forward: forward,
|
|
80
|
+
// annotationTypePlural: "orfs",
|
|
81
|
+
isOrf: true,
|
|
82
|
+
id: shortid(),
|
|
83
|
+
type: "orf",
|
|
84
|
+
name: "ORF"
|
|
85
|
+
} as Orf);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
// pair down the orfs to remove duplicates
|
|
90
|
+
// and deal with revComp orfs
|
|
91
|
+
const orfEnds: Record<number, number> = {};
|
|
92
|
+
orfRanges.forEach((orf, index) => {
|
|
93
|
+
const indexOfAlreadyExistingOrf = orfEnds[orf.end];
|
|
94
|
+
|
|
95
|
+
if (typeof indexOfAlreadyExistingOrf !== "undefined") {
|
|
96
|
+
let internalOrf = orf;
|
|
97
|
+
let containingOrf = orfRanges[indexOfAlreadyExistingOrf];
|
|
98
|
+
if (containingOrf.length < internalOrf.length) {
|
|
99
|
+
internalOrf = orfRanges[indexOfAlreadyExistingOrf];
|
|
100
|
+
containingOrf = orf;
|
|
101
|
+
orfEnds[orf.end] = index;
|
|
102
|
+
}
|
|
103
|
+
const internalStartCodonIndex = forward
|
|
104
|
+
? internalOrf.start
|
|
105
|
+
: originalSequenceLength - internalOrf.start - 1; //use either the start or the end depending on the direction of the internalOrf
|
|
106
|
+
//we know because of how the regex works that larger orfs come first in the array
|
|
107
|
+
containingOrf.internalStartCodonIndices = [
|
|
108
|
+
...containingOrf.internalStartCodonIndices,
|
|
109
|
+
...internalOrf.internalStartCodonIndices,
|
|
110
|
+
internalStartCodonIndex
|
|
111
|
+
];
|
|
112
|
+
//set a flag that we'll use to remove all these shorter, duplicated orfs
|
|
113
|
+
internalOrf.remove = true;
|
|
114
|
+
} else {
|
|
115
|
+
orfEnds[orf.end] = index;
|
|
116
|
+
if (!forward) {
|
|
117
|
+
//this check needs to come after the above assignment of orfEnds
|
|
118
|
+
//flip the start and ends
|
|
119
|
+
const endHolder = orf.end; //temp variable
|
|
120
|
+
orf.end = originalSequenceLength - orf.start - 1;
|
|
121
|
+
orf.start = originalSequenceLength - endHolder - 1;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
});
|
|
125
|
+
const nonDuplicatedOrfRanges = orfRanges.filter(orf => {
|
|
126
|
+
if (!orf.remove) {
|
|
127
|
+
return true;
|
|
128
|
+
}
|
|
129
|
+
return false;
|
|
130
|
+
});
|
|
131
|
+
return nonDuplicatedOrfRanges;
|
|
132
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { modulatePositionByRange, Range } from "@teselagen/range-utils";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* This function gets the overlapping of one sequence to another based on sequence equality.
|
|
5
|
+
*
|
|
6
|
+
* @param {string} sequenceToFind
|
|
7
|
+
* @param {string} sequenceToSearchIn
|
|
8
|
+
* @param {object} options optional
|
|
9
|
+
* @return {object || null} null if no overlap exists or a range object with .start and .end properties
|
|
10
|
+
*/
|
|
11
|
+
export default function getOverlapBetweenTwoSequences(
|
|
12
|
+
sequenceToFind: string,
|
|
13
|
+
sequenceToSearchIn: string
|
|
14
|
+
): Range | null {
|
|
15
|
+
sequenceToSearchIn = sequenceToSearchIn.toLowerCase();
|
|
16
|
+
sequenceToFind = sequenceToFind.toLowerCase();
|
|
17
|
+
const lengthenedSeqToSearch = sequenceToSearchIn + sequenceToSearchIn;
|
|
18
|
+
const index = lengthenedSeqToSearch.indexOf(sequenceToFind);
|
|
19
|
+
if (index > -1) {
|
|
20
|
+
return {
|
|
21
|
+
start: index,
|
|
22
|
+
end: modulatePositionByRange(index + sequenceToFind.length - 1, {
|
|
23
|
+
start: 0,
|
|
24
|
+
end: sequenceToSearchIn.length - 1
|
|
25
|
+
})
|
|
26
|
+
};
|
|
27
|
+
} else {
|
|
28
|
+
return null;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import getComplementSequenceString from "./getComplementSequenceString";
|
|
2
|
+
import { normalizePositionByRangeLength } from "@teselagen/range-utils";
|
|
3
|
+
import cutSequenceByRestrictionEnzyme from "./cutSequenceByRestrictionEnzyme";
|
|
4
|
+
import { CutSite, RestrictionEnzyme, SequenceData } from "./types";
|
|
5
|
+
|
|
6
|
+
export interface PartBetweenEnzymes {
|
|
7
|
+
start: number;
|
|
8
|
+
start1Based: number;
|
|
9
|
+
end: number;
|
|
10
|
+
end1Based: number;
|
|
11
|
+
firstCut: CutSite;
|
|
12
|
+
firstCutOffset: number;
|
|
13
|
+
firstCutOverhang: string;
|
|
14
|
+
firstCutOverhangTop: string;
|
|
15
|
+
firstCutOverhangBottom: string;
|
|
16
|
+
secondCut: CutSite;
|
|
17
|
+
secondCutOffset: number;
|
|
18
|
+
secondCutOverhang: string;
|
|
19
|
+
secondCutOverhangTop: string;
|
|
20
|
+
secondCutOverhangBottom: string;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export default function getPossiblePartsFromSequenceAndEnzymes(
|
|
24
|
+
seqData: SequenceData,
|
|
25
|
+
restrictionEnzymes: RestrictionEnzyme | RestrictionEnzyme[]
|
|
26
|
+
): PartBetweenEnzymes[] {
|
|
27
|
+
// ac.throw([
|
|
28
|
+
// ac.string,
|
|
29
|
+
// ac.bool,
|
|
30
|
+
// ac.shape({
|
|
31
|
+
// "name": ac.string,
|
|
32
|
+
// "site": ac.string,
|
|
33
|
+
// "forwardRegex": ac.string,
|
|
34
|
+
// "reverseRegex": ac.string,
|
|
35
|
+
// "topSnipOffset": ac.number,
|
|
36
|
+
// "bottomSnipOffset": ac.number
|
|
37
|
+
// })
|
|
38
|
+
// ], arguments);
|
|
39
|
+
const enzymes = Array.isArray(restrictionEnzymes)
|
|
40
|
+
? restrictionEnzymes
|
|
41
|
+
: [restrictionEnzymes];
|
|
42
|
+
|
|
43
|
+
const bps = seqData.sequence;
|
|
44
|
+
const seqLen = bps.length;
|
|
45
|
+
const circular = seqData.circular || false;
|
|
46
|
+
let cutsites: CutSite[] = [];
|
|
47
|
+
enzymes.forEach(enzyme => {
|
|
48
|
+
const newCutsites = cutSequenceByRestrictionEnzyme(bps, circular, enzyme);
|
|
49
|
+
cutsites = cutsites.concat(newCutsites);
|
|
50
|
+
});
|
|
51
|
+
const parts: PartBetweenEnzymes[] = [];
|
|
52
|
+
if (cutsites.length < 1) {
|
|
53
|
+
return parts;
|
|
54
|
+
} else if (cutsites.length === 1) {
|
|
55
|
+
parts.push(
|
|
56
|
+
getPartBetweenEnzymesWithInclusiveOverhangs(
|
|
57
|
+
cutsites[0],
|
|
58
|
+
cutsites[0],
|
|
59
|
+
seqLen
|
|
60
|
+
)
|
|
61
|
+
);
|
|
62
|
+
return parts;
|
|
63
|
+
} else {
|
|
64
|
+
const pairs = pairwise(cutsites);
|
|
65
|
+
pairs.forEach(pair => {
|
|
66
|
+
const cut1 = pair[0];
|
|
67
|
+
const cut2 = pair[1];
|
|
68
|
+
const part1 = getPartBetweenEnzymesWithInclusiveOverhangs(
|
|
69
|
+
cut1,
|
|
70
|
+
cut2,
|
|
71
|
+
seqLen
|
|
72
|
+
);
|
|
73
|
+
const part2 = getPartBetweenEnzymesWithInclusiveOverhangs(
|
|
74
|
+
cut2,
|
|
75
|
+
cut1,
|
|
76
|
+
seqLen
|
|
77
|
+
);
|
|
78
|
+
if (circular || !(part1.start > part1.end)) {
|
|
79
|
+
//only add origin spanning parts if the sequence is circular
|
|
80
|
+
parts.push(part1);
|
|
81
|
+
}
|
|
82
|
+
if (circular || !(part2.start > part2.end)) {
|
|
83
|
+
//only add origin spanning parts if the sequence is circular
|
|
84
|
+
parts.push(part2);
|
|
85
|
+
}
|
|
86
|
+
});
|
|
87
|
+
return parts;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function getPartBetweenEnzymesWithInclusiveOverhangs(
|
|
92
|
+
cut1: CutSite,
|
|
93
|
+
cut2: CutSite,
|
|
94
|
+
seqLen: number
|
|
95
|
+
): PartBetweenEnzymes {
|
|
96
|
+
const firstCutOffset = getEnzymeRelativeOffset(cut1.restrictionEnzyme);
|
|
97
|
+
const secondCutOffset = getEnzymeRelativeOffset(cut2.restrictionEnzyme);
|
|
98
|
+
const start =
|
|
99
|
+
(cut1.topSnipBeforeBottom
|
|
100
|
+
? cut1.topSnipPosition
|
|
101
|
+
: cut1.bottomSnipPosition) || 0;
|
|
102
|
+
const end = normalizePositionByRangeLength(
|
|
103
|
+
(cut2.topSnipBeforeBottom
|
|
104
|
+
? cut2.bottomSnipPosition || 0
|
|
105
|
+
: cut2.topSnipPosition || 0) - 1,
|
|
106
|
+
seqLen
|
|
107
|
+
);
|
|
108
|
+
return {
|
|
109
|
+
start,
|
|
110
|
+
start1Based: start + 1,
|
|
111
|
+
end,
|
|
112
|
+
end1Based: end + 1,
|
|
113
|
+
firstCut: cut1,
|
|
114
|
+
//the offset is always counting with 0 being at the top snip position
|
|
115
|
+
firstCutOffset,
|
|
116
|
+
firstCutOverhang: cut1.overhangBps || "",
|
|
117
|
+
firstCutOverhangTop: firstCutOffset > 0 ? cut1.overhangBps || "" : "",
|
|
118
|
+
firstCutOverhangBottom:
|
|
119
|
+
firstCutOffset < 0
|
|
120
|
+
? getComplementSequenceString(cut1.overhangBps || "")
|
|
121
|
+
: "",
|
|
122
|
+
secondCut: cut2,
|
|
123
|
+
//the offset is always counting with 0 being at the top snip position
|
|
124
|
+
secondCutOffset,
|
|
125
|
+
secondCutOverhang: cut2.overhangBps || "",
|
|
126
|
+
secondCutOverhangTop: secondCutOffset < 0 ? cut2.overhangBps || "" : "",
|
|
127
|
+
secondCutOverhangBottom:
|
|
128
|
+
secondCutOffset > 0
|
|
129
|
+
? getComplementSequenceString(cut2.overhangBps || "")
|
|
130
|
+
: ""
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function getEnzymeRelativeOffset(enzyme: RestrictionEnzyme): number {
|
|
135
|
+
//the offset is always counting with 0 being at the top snip position
|
|
136
|
+
return (enzyme.bottomSnipOffset || 0) - (enzyme.topSnipOffset || 0);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function pairwise<T>(list: T[]): T[][] {
|
|
140
|
+
if (list.length < 2) {
|
|
141
|
+
return [];
|
|
142
|
+
}
|
|
143
|
+
const first = list[0],
|
|
144
|
+
rest = list.slice(1),
|
|
145
|
+
pairs = rest.map(x => {
|
|
146
|
+
return [first, x];
|
|
147
|
+
});
|
|
148
|
+
return pairs.concat(pairwise(rest));
|
|
149
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import getAminoAcidDataForEachBaseOfDna from "./getAminoAcidDataForEachBaseOfDna";
|
|
2
|
+
|
|
3
|
+
export default function getReverseAminoAcidStringFromSequenceString(
|
|
4
|
+
sequenceString: string
|
|
5
|
+
) {
|
|
6
|
+
const aminoAcidsPerBase = getAminoAcidDataForEachBaseOfDna(
|
|
7
|
+
sequenceString,
|
|
8
|
+
false,
|
|
9
|
+
null,
|
|
10
|
+
false
|
|
11
|
+
);
|
|
12
|
+
const aaArray: string[] = [];
|
|
13
|
+
let aaString = "";
|
|
14
|
+
aminoAcidsPerBase.forEach(aa => {
|
|
15
|
+
if (!aa.fullCodon) {
|
|
16
|
+
return;
|
|
17
|
+
}
|
|
18
|
+
aaArray[aa.aminoAcidIndex] = aa.aminoAcid.value;
|
|
19
|
+
});
|
|
20
|
+
aaString = aaArray.join("");
|
|
21
|
+
return aaString;
|
|
22
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import getAminoAcidStringFromSequenceString from "./getAminoAcidStringFromSequenceString";
|
|
2
|
+
import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
|
|
3
|
+
|
|
4
|
+
export default function getReverseComplementAminoAcidStringFromSequenceString(
|
|
5
|
+
sequenceString: string
|
|
6
|
+
): string {
|
|
7
|
+
return getAminoAcidStringFromSequenceString(
|
|
8
|
+
getReverseComplementSequenceString(sequenceString)
|
|
9
|
+
);
|
|
10
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { Annotation } from "./types";
|
|
2
|
+
|
|
3
|
+
export default function getReverseComplementAnnotation(
|
|
4
|
+
annotation: Annotation,
|
|
5
|
+
sequenceLength: number
|
|
6
|
+
) {
|
|
7
|
+
//note this function assumes that the entire sequence (or subsequence) is being reverse complemented
|
|
8
|
+
//TNR: this is what is happening:
|
|
9
|
+
//0123456789
|
|
10
|
+
//-feature-- //normal
|
|
11
|
+
//--erutaef- //reverse complemented
|
|
12
|
+
|
|
13
|
+
//sequence length = 10
|
|
14
|
+
//feature start = 1
|
|
15
|
+
//feature end = 7
|
|
16
|
+
//so, erutaef start = 2 = 10 - (7+1)
|
|
17
|
+
//and, erutaef end = 8 = 10 - (1+1)
|
|
18
|
+
|
|
19
|
+
return Object.assign({}, annotation, {
|
|
20
|
+
start: sequenceLength - (annotation.end + 1),
|
|
21
|
+
end: sequenceLength - (annotation.start + 1),
|
|
22
|
+
forward: !annotation.forward,
|
|
23
|
+
strand: annotation.strand === 1 ? -1 : 1,
|
|
24
|
+
...(annotation.locations && {
|
|
25
|
+
locations: annotation.locations.map(location => {
|
|
26
|
+
return {
|
|
27
|
+
start: sequenceLength - (location.end + 1),
|
|
28
|
+
end: sequenceLength - (location.start + 1)
|
|
29
|
+
};
|
|
30
|
+
})
|
|
31
|
+
})
|
|
32
|
+
});
|
|
33
|
+
}
|