@teselagen/sequence-utils 0.1.21 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +12030 -26126
- package/index.mjs +12119 -26124
- package/index.umd.js +24056 -38154
- package/package.json +2 -2
- package/src/DNAComplementMap.js +32 -0
- package/src/addGapsToSeqReads.js +417 -0
- package/src/addGapsToSeqReads.test.js +358 -0
- package/src/adjustAnnotationsToInsert.js +19 -0
- package/src/adjustBpsToReplaceOrInsert.js +50 -0
- package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
- package/src/aliasedEnzymesByName.js +7363 -0
- package/src/aminoAcidToDegenerateDnaMap.js +32 -0
- package/src/aminoAcidToDegenerateRnaMap.js +32 -0
- package/src/aminoAcidToDnaRna.test.js +27 -0
- package/src/annotateSingleSeq.js +29 -0
- package/src/annotateSingleSeq.test.js +64 -0
- package/src/annotationTypes.js +23 -0
- package/src/autoAnnotate.js +242 -0
- package/src/autoAnnotate.test.js +1039 -0
- package/src/bioData.js +431 -0
- package/src/calculateNebTa.js +34 -0
- package/src/calculateNebTa.test.js +57 -0
- package/src/calculateNebTm.js +127 -0
- package/src/calculateNebTm.test.js +32 -0
- package/src/calculatePercentGC.js +3 -0
- package/src/calculatePercentGC.test.js +14 -0
- package/src/calculateTm.js +297 -0
- package/src/calculateTm.test.js +7 -0
- package/src/computeDigestFragments.js +179 -0
- package/src/computeDigestFragments.test.js +73 -0
- package/src/condensePairwiseAlignmentDifferences.js +85 -0
- package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
- package/src/convertAACaretPositionOrRangeToDna.js +24 -0
- package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
- package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
- package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
- package/src/cutSequenceByRestrictionEnzyme.js +301 -0
- package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
- package/src/defaultEnzymesByName.js +278 -0
- package/src/degenerateDnaToAminoAcidMap.js +5 -0
- package/src/degenerateRnaToAminoAcidMap.js +5 -0
- package/src/deleteSequenceDataAtRange.js +5 -0
- package/src/deleteSequenceDataAtRange.test.js +146 -0
- package/src/diffUtils.js +64 -0
- package/src/diffUtils.test.js +74 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
- package/src/featureTypesAndColors.js +152 -0
- package/src/featureTypesAndColors.test.js +52 -0
- package/src/filterAminoAcidSequenceString.js +13 -0
- package/src/filterAminoAcidSequenceString.test.js +22 -0
- package/src/filterSequenceString.js +22 -0
- package/src/filterSequenceString.test.js +13 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
- package/src/findOrfsInPlasmid.js +26 -0
- package/src/findSequenceMatches.js +133 -0
- package/src/findSequenceMatches.test.js +286 -0
- package/src/generateAnnotations.js +34 -0
- package/src/generateSequenceData.js +206 -0
- package/src/generateSequenceData.test.js +22 -0
- package/src/getAllInsertionsInSeqReads.js +83 -0
- package/src/getAllInsertionsInSeqReads.test.js +26 -0
- package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
- package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
- package/src/getAminoAcidFromSequenceTriplet.js +22 -0
- package/src/getAminoAcidStringFromSequenceString.js +18 -0
- package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
- package/src/getCodonRangeForAASliver.js +63 -0
- package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
- package/src/getComplementSequenceAndAnnotations.js +20 -0
- package/src/getComplementSequenceString.js +19 -0
- package/src/getComplementSequenceString.test.js +13 -0
- package/src/getCutsiteType.js +10 -0
- package/src/getCutsitesFromSequence.js +17 -0
- package/src/getDegenerateDnaStringFromAAString.js +8 -0
- package/src/getDegenerateRnaStringFromAAString.js +8 -0
- package/src/getDigestFragmentsForCutsites.js +105 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
- package/src/getInsertBetweenVals.js +28 -0
- package/src/getInsertBetweenVals.test.js +33 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
- package/src/getMassOfAaString.js +24 -0
- package/src/getMassofAaString.test.js +18 -0
- package/src/getOrfsFromSequence.js +124 -0
- package/src/getOrfsFromSequence.test.js +210 -0
- package/src/getOverlapBetweenTwoSequences.js +30 -0
- package/src/getOverlapBetweenTwoSequences.test.js +23 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
- package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
- package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
- package/src/getReverseComplementAnnotation.js +23 -0
- package/src/getReverseComplementAnnotation.test.js +44 -0
- package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
- package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
- package/src/getReverseComplementSequenceString.js +17 -0
- package/src/getReverseComplementSequenceString.test.js +11 -0
- package/src/getReverseSequenceString.js +12 -0
- package/src/getReverseSequenceString.test.js +9 -0
- package/src/getSequenceDataBetweenRange.js +131 -0
- package/src/getSequenceDataBetweenRange.test.js +474 -0
- package/src/getVirtualDigest.js +125 -0
- package/src/getVirtualDigest.test.js +134 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
- package/src/index.js +106 -0
- package/src/index.test.js +38 -0
- package/src/insertGapsIntoRefSeq.js +38 -0
- package/src/insertGapsIntoRefSeq.test.js +20 -0
- package/src/insertSequenceDataAtPosition.js +2 -0
- package/src/insertSequenceDataAtPosition.test.js +75 -0
- package/src/insertSequenceDataAtPositionOrRange.js +249 -0
- package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
- package/src/isEnzymeType2S.js +3 -0
- package/src/mapAnnotationsToRows.js +174 -0
- package/src/mapAnnotationsToRows.test.js +425 -0
- package/src/prepareCircularViewData.js +17 -0
- package/src/prepareCircularViewData.test.js +196 -0
- package/src/prepareRowData.js +41 -0
- package/src/prepareRowData.test.js +36 -0
- package/src/prepareRowData_output1.json +391 -0
- package/src/proteinAlphabet.js +257 -0
- package/src/rotateBpsToPosition.js +13 -0
- package/src/rotateBpsToPosition.test.js +6 -0
- package/src/rotateSequenceDataToPosition.js +48 -0
- package/src/rotateSequenceDataToPosition.test.js +71 -0
- package/src/shiftAnnotationsByLen.js +17 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
- package/src/tidyUpAnnotation.js +182 -0
- package/src/tidyUpSequenceData.js +169 -0
- package/src/tidyUpSequenceData.test.js +332 -0
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import {normalizePositionByRangeLength, getRangeLength} from "@teselagen/range-utils";
|
|
2
|
+
|
|
3
|
+
export default function getDigestFragmentsForCutsites(
|
|
4
|
+
sequenceLength,
|
|
5
|
+
circular,
|
|
6
|
+
cutsites,
|
|
7
|
+
opts = {}
|
|
8
|
+
) {
|
|
9
|
+
const fragments = [];
|
|
10
|
+
const overlappingEnzymes = [];
|
|
11
|
+
const pairs = [];
|
|
12
|
+
if (!cutsites.length) return [];
|
|
13
|
+
let sortedCutsites = cutsites.sort((a, b) => {
|
|
14
|
+
return a.topSnipPosition - b.topSnipPosition;
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
if (!circular) {
|
|
18
|
+
//if linear, add 2 fake cutsites for the start and end of the seq
|
|
19
|
+
sortedCutsites = [
|
|
20
|
+
{
|
|
21
|
+
topSnipPosition: 0,
|
|
22
|
+
bottomSnipPosition: 0,
|
|
23
|
+
overhangSize: 0,
|
|
24
|
+
type: "START_OR_END_OF_SEQ",
|
|
25
|
+
name: "START_OF_SEQ"
|
|
26
|
+
},
|
|
27
|
+
...sortedCutsites,
|
|
28
|
+
{
|
|
29
|
+
topSnipPosition: sequenceLength,
|
|
30
|
+
bottomSnipPosition: sequenceLength,
|
|
31
|
+
overhangSize: 0,
|
|
32
|
+
type: "START_OR_END_OF_SEQ",
|
|
33
|
+
name: "END_OF_SEQ"
|
|
34
|
+
}
|
|
35
|
+
];
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
sortedCutsites.forEach((cutsite1, index) => {
|
|
39
|
+
if (!circular && !sortedCutsites[index + 1]) {
|
|
40
|
+
return; //don't push a pair if the sequence is linear and we've reached the end of our cutsites array
|
|
41
|
+
}
|
|
42
|
+
if (opts.computePartialDigests) {
|
|
43
|
+
sortedCutsites.forEach((cs, index2) => {
|
|
44
|
+
if (index2 === index + 1 || index2 === 0) {
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
pairs.push([cutsite1, sortedCutsites[index2]]);
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
pairs.push([
|
|
51
|
+
cutsite1,
|
|
52
|
+
sortedCutsites[index + 1] ? sortedCutsites[index + 1] : sortedCutsites[0]
|
|
53
|
+
]);
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
pairs.forEach(([cut1, cut2]) => {
|
|
57
|
+
const start = normalizePositionByRangeLength(
|
|
58
|
+
cut1.topSnipPosition,
|
|
59
|
+
sequenceLength
|
|
60
|
+
);
|
|
61
|
+
const end = normalizePositionByRangeLength(
|
|
62
|
+
cut2.topSnipPosition - 1,
|
|
63
|
+
sequenceLength
|
|
64
|
+
);
|
|
65
|
+
const fragmentRange = { start, end };
|
|
66
|
+
const size = getRangeLength(fragmentRange, sequenceLength);
|
|
67
|
+
|
|
68
|
+
// const id = uniqid()
|
|
69
|
+
const id = start + "-" + end + "-" + size + "-";
|
|
70
|
+
|
|
71
|
+
// getRangeLength({ start, end }, sequenceLength);
|
|
72
|
+
|
|
73
|
+
fragments.push({
|
|
74
|
+
// I don't think we can determine containsFive/ThreePrimeRecognitionSite until the inclusion/exclusion of the overhangs is done
|
|
75
|
+
// containsFivePrimeRecognitionSite: cut1.type !== "START_OR_END_OF_SEQ" && isRangeWithinRange(cut1.recognitionSiteRange, fragmentRange, sequenceLength ) ,
|
|
76
|
+
// containsThreePrimeRecognitionSite: cut2.type !== "START_OR_END_OF_SEQ" && isRangeWithinRange(cut1.recognitionSiteRange, fragmentRange, sequenceLength) ,
|
|
77
|
+
cut1: {
|
|
78
|
+
...cut1,
|
|
79
|
+
isOverhangIncludedInFragmentSize:
|
|
80
|
+
cut1.type !== "START_OR_END_OF_SEQ" &&
|
|
81
|
+
cut1.overhangSize > 0 &&
|
|
82
|
+
cut1.topSnipBeforeBottom
|
|
83
|
+
},
|
|
84
|
+
cut2: {
|
|
85
|
+
...cut2,
|
|
86
|
+
isOverhangIncludedInFragmentSize:
|
|
87
|
+
cut2.type !== "START_OR_END_OF_SEQ" &&
|
|
88
|
+
cut2.overhangSize > 0 &&
|
|
89
|
+
!cut2.topSnipBeforeBottom
|
|
90
|
+
},
|
|
91
|
+
...fragmentRange,
|
|
92
|
+
size,
|
|
93
|
+
id
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
fragments.filter(fragment => {
|
|
98
|
+
if (!fragment.size) {
|
|
99
|
+
overlappingEnzymes.push(fragment);
|
|
100
|
+
return false;
|
|
101
|
+
}
|
|
102
|
+
return true;
|
|
103
|
+
});
|
|
104
|
+
return fragments;
|
|
105
|
+
};
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import getDigestFragmentsForCutsites from "./getDigestFragmentsForCutsites";
|
|
2
|
+
import cutSequenceByRestrictionEnzyme from "./cutSequenceByRestrictionEnzyme";
|
|
3
|
+
import {flatMap} from "lodash";
|
|
4
|
+
|
|
5
|
+
export default function getDigestFragmentsForRestrictionEnzymes(
|
|
6
|
+
sequence,
|
|
7
|
+
circular,
|
|
8
|
+
restrictionEnzymeOrEnzymes,
|
|
9
|
+
opts
|
|
10
|
+
) {
|
|
11
|
+
const restrictionEnzymes = Array.isArray(restrictionEnzymeOrEnzymes)
|
|
12
|
+
? restrictionEnzymeOrEnzymes
|
|
13
|
+
: [restrictionEnzymeOrEnzymes];
|
|
14
|
+
const cutsites = flatMap(restrictionEnzymes, restrictionEnzyme => {
|
|
15
|
+
return cutSequenceByRestrictionEnzyme(
|
|
16
|
+
sequence,
|
|
17
|
+
circular,
|
|
18
|
+
restrictionEnzyme
|
|
19
|
+
);
|
|
20
|
+
});
|
|
21
|
+
return getDigestFragmentsForCutsites(
|
|
22
|
+
sequence.length,
|
|
23
|
+
circular,
|
|
24
|
+
cutsites,
|
|
25
|
+
opts
|
|
26
|
+
);
|
|
27
|
+
};
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
/* eslint-disable no-unused-expressions */
|
|
2
|
+
import chai from "chai";
|
|
3
|
+
import getDigestFragmentsForRestrictionEnzymes from "./getDigestFragmentsForRestrictionEnzymes.js";
|
|
4
|
+
import enzymeList from "./aliasedEnzymesByName";
|
|
5
|
+
|
|
6
|
+
chai.should();
|
|
7
|
+
describe("getDigestFragmentsForRestrictionEnzymes", () => {
|
|
8
|
+
//bamhi
|
|
9
|
+
// "bamhi": {
|
|
10
|
+
// "name": "bamhi",
|
|
11
|
+
// "site": "ggatcdc",
|
|
12
|
+
// "forwardRegex": "g{2}atc{2}",
|
|
13
|
+
// "reverseRegex": "g{2}atc{2}",
|
|
14
|
+
// "topSnipOffset": 1,
|
|
15
|
+
// "bottomSnipOffset": 5
|
|
16
|
+
// },
|
|
17
|
+
// ATGATCAGA
|
|
18
|
+
// 012345678
|
|
19
|
+
it("returns 0 digestFragments for a linear seq with no cutsites", () => {
|
|
20
|
+
const digestFragments = getDigestFragmentsForRestrictionEnzymes(
|
|
21
|
+
"rrrrrrrrr",
|
|
22
|
+
false,
|
|
23
|
+
enzymeList["bsai"]
|
|
24
|
+
);
|
|
25
|
+
digestFragments.should.be.an("array");
|
|
26
|
+
digestFragments.length.should.equal(0);
|
|
27
|
+
});
|
|
28
|
+
it("returns 0 digestFragments for a circular seq with no cutsites", () => {
|
|
29
|
+
const digestFragments = getDigestFragmentsForRestrictionEnzymes(
|
|
30
|
+
"rrrrrrrrrrrr",
|
|
31
|
+
false,
|
|
32
|
+
enzymeList["bsai"]
|
|
33
|
+
);
|
|
34
|
+
digestFragments.should.be.an("array");
|
|
35
|
+
digestFragments.length.should.equal(0);
|
|
36
|
+
});
|
|
37
|
+
it("gets digestFragments for a single type 2s cutsite cutting in a linear sequence", () => {
|
|
38
|
+
const digestFragments = getDigestFragmentsForRestrictionEnzymes(
|
|
39
|
+
"tagagtagagtagaGGTCTCgtagagtagagtagag",
|
|
40
|
+
false,
|
|
41
|
+
enzymeList["bsai"]
|
|
42
|
+
);
|
|
43
|
+
digestFragments.should.be.an("array");
|
|
44
|
+
digestFragments.length.should.equal(2);
|
|
45
|
+
digestFragments[0].start.should.equal(0);
|
|
46
|
+
digestFragments[0].end.should.equal(20);
|
|
47
|
+
|
|
48
|
+
digestFragments[0].cut1.overhangSize.should.equal(0);
|
|
49
|
+
digestFragments[0].cut1.isOverhangIncludedInFragmentSize.should.equal(
|
|
50
|
+
false
|
|
51
|
+
);
|
|
52
|
+
digestFragments[0].cut2.overhangSize.should.equal(4);
|
|
53
|
+
digestFragments[0].cut2.isOverhangIncludedInFragmentSize.should.equal(
|
|
54
|
+
false
|
|
55
|
+
);
|
|
56
|
+
|
|
57
|
+
digestFragments[1].start.should.equal(21);
|
|
58
|
+
digestFragments[1].end.should.equal(35);
|
|
59
|
+
digestFragments[1].cut1.overhangSize.should.equal(4);
|
|
60
|
+
digestFragments[1].cut1.isOverhangIncludedInFragmentSize.should.equal(true);
|
|
61
|
+
digestFragments[1].cut2.overhangSize.should.equal(0);
|
|
62
|
+
digestFragments[1].cut2.isOverhangIncludedInFragmentSize.should.equal(
|
|
63
|
+
false
|
|
64
|
+
);
|
|
65
|
+
});
|
|
66
|
+
it("gets digestFragments for a single cutsite cutting in a circular sequence", () => {
|
|
67
|
+
const digestFragments = getDigestFragmentsForRestrictionEnzymes(
|
|
68
|
+
"GGATCC",
|
|
69
|
+
//
|
|
70
|
+
true,
|
|
71
|
+
enzymeList["bamhi"]
|
|
72
|
+
);
|
|
73
|
+
// v
|
|
74
|
+
// G G A T C C
|
|
75
|
+
// C C T A G G
|
|
76
|
+
// ^
|
|
77
|
+
digestFragments.should.be.an("array");
|
|
78
|
+
digestFragments.length.should.equal(1);
|
|
79
|
+
digestFragments[0].start.should.equal(1);
|
|
80
|
+
digestFragments[0].end.should.equal(0);
|
|
81
|
+
// I don't think we can determine containsFive/ThreePrimeRecognitionSite until the inclusion/exclusion of the overhangs is done
|
|
82
|
+
// digestFragments[0].containsFivePrimeRecognitionSite.should.equal(false)
|
|
83
|
+
// digestFragments[0].containsThreePrimeRecognitionSite.should.equal(false)
|
|
84
|
+
digestFragments[0].cut1.overhangSize.should.equal(4);
|
|
85
|
+
digestFragments[0].cut1.isOverhangIncludedInFragmentSize.should.equal(true);
|
|
86
|
+
digestFragments[0].cut2.overhangSize.should.equal(4);
|
|
87
|
+
digestFragments[0].cut2.isOverhangIncludedInFragmentSize.should.equal(
|
|
88
|
+
false
|
|
89
|
+
);
|
|
90
|
+
});
|
|
91
|
+
it("gets digestFragments for a single cutsite cutting in a linear sequence", () => {
|
|
92
|
+
const digestFragments = getDigestFragmentsForRestrictionEnzymes(
|
|
93
|
+
"ggatcc",
|
|
94
|
+
false,
|
|
95
|
+
enzymeList["bamhi"]
|
|
96
|
+
);
|
|
97
|
+
digestFragments.should.be.an("array");
|
|
98
|
+
digestFragments.length.should.equal(2);
|
|
99
|
+
digestFragments[0].cut1.overhangSize.should.equal(0);
|
|
100
|
+
digestFragments[0].cut1.isOverhangIncludedInFragmentSize.should.equal(
|
|
101
|
+
false
|
|
102
|
+
);
|
|
103
|
+
digestFragments[0].cut2.overhangSize.should.equal(4);
|
|
104
|
+
digestFragments[0].cut2.isOverhangIncludedInFragmentSize.should.equal(
|
|
105
|
+
false
|
|
106
|
+
);
|
|
107
|
+
digestFragments[0].start.should.equal(0);
|
|
108
|
+
digestFragments[0].end.should.equal(0);
|
|
109
|
+
digestFragments[1].cut1.overhangSize.should.equal(4);
|
|
110
|
+
digestFragments[1].cut1.isOverhangIncludedInFragmentSize.should.equal(true);
|
|
111
|
+
digestFragments[1].cut2.overhangSize.should.equal(0);
|
|
112
|
+
digestFragments[1].cut2.isOverhangIncludedInFragmentSize.should.equal(
|
|
113
|
+
false
|
|
114
|
+
);
|
|
115
|
+
digestFragments[1].start.should.equal(1);
|
|
116
|
+
digestFragments[1].end.should.equal(5);
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
it("gets digestFragments for multiple cutsites cutting in a linear sequence", () => {
|
|
120
|
+
const digestFragments = getDigestFragmentsForRestrictionEnzymes(
|
|
121
|
+
"ggatcctttttttggatcc",
|
|
122
|
+
false,
|
|
123
|
+
enzymeList["bamhi"]
|
|
124
|
+
);
|
|
125
|
+
digestFragments.should.be.an("array");
|
|
126
|
+
digestFragments.length.should.equal(3);
|
|
127
|
+
digestFragments[0].cut1.type.should.equal("START_OR_END_OF_SEQ");
|
|
128
|
+
digestFragments[2].cut2.type.should.equal("START_OR_END_OF_SEQ");
|
|
129
|
+
digestFragments[0].start.should.equal(0);
|
|
130
|
+
digestFragments[0].end.should.equal(0);
|
|
131
|
+
digestFragments[1].start.should.equal(1);
|
|
132
|
+
digestFragments[1].end.should.equal(13);
|
|
133
|
+
digestFragments[2].start.should.equal(14);
|
|
134
|
+
digestFragments[2].end.should.equal(18);
|
|
135
|
+
});
|
|
136
|
+
it("gets digestFragments for multiple cutsites cutting in a circular sequence", () => {
|
|
137
|
+
const digestFragments = getDigestFragmentsForRestrictionEnzymes(
|
|
138
|
+
"ggatcctttttttggatcc",
|
|
139
|
+
true,
|
|
140
|
+
enzymeList["bamhi"]
|
|
141
|
+
);
|
|
142
|
+
digestFragments.should.be.an("array");
|
|
143
|
+
digestFragments.length.should.equal(2);
|
|
144
|
+
digestFragments[0].start.should.equal(1);
|
|
145
|
+
digestFragments[0].end.should.equal(13);
|
|
146
|
+
digestFragments[0].size.should.equal(13);
|
|
147
|
+
digestFragments[1].start.should.equal(14);
|
|
148
|
+
digestFragments[1].end.should.equal(0);
|
|
149
|
+
digestFragments[1].size.should.equal(6);
|
|
150
|
+
});
|
|
151
|
+
it("gets digestFragments for multiple type 2s cutsites cutting in a circular sequence", () => {
|
|
152
|
+
const digestFragments = getDigestFragmentsForRestrictionEnzymes(
|
|
153
|
+
"GGTCTCttttttttttttGGTCTCttttttttttttttt",
|
|
154
|
+
// ------------------
|
|
155
|
+
// ------- --------------
|
|
156
|
+
|
|
157
|
+
// --------------
|
|
158
|
+
// ------- ----------
|
|
159
|
+
true,
|
|
160
|
+
enzymeList["bsai"]
|
|
161
|
+
);
|
|
162
|
+
digestFragments.should.be.an("array");
|
|
163
|
+
digestFragments.length.should.equal(2);
|
|
164
|
+
digestFragments[0].cut1.overhangSize.should.equal(4);
|
|
165
|
+
digestFragments[0].cut1.isOverhangIncludedInFragmentSize.should.equal(true);
|
|
166
|
+
digestFragments[0].cut2.overhangSize.should.equal(4);
|
|
167
|
+
digestFragments[0].cut2.isOverhangIncludedInFragmentSize.should.equal(
|
|
168
|
+
false
|
|
169
|
+
);
|
|
170
|
+
digestFragments[0].start.should.equal(7);
|
|
171
|
+
digestFragments[0].end.should.equal(24);
|
|
172
|
+
digestFragments[0].size.should.equal(18);
|
|
173
|
+
|
|
174
|
+
digestFragments[1].cut1.overhangSize.should.equal(4);
|
|
175
|
+
digestFragments[1].cut1.isOverhangIncludedInFragmentSize.should.equal(true);
|
|
176
|
+
digestFragments[1].cut2.overhangSize.should.equal(4);
|
|
177
|
+
digestFragments[1].cut2.isOverhangIncludedInFragmentSize.should.equal(
|
|
178
|
+
false
|
|
179
|
+
);
|
|
180
|
+
digestFragments[1].start.should.equal(25);
|
|
181
|
+
digestFragments[1].end.should.equal(6);
|
|
182
|
+
digestFragments[1].size.should.equal(21);
|
|
183
|
+
});
|
|
184
|
+
it("gets digestFragments for multiple cutsites cutting in a circular sequence when computePartialDigests=true", () => {
|
|
185
|
+
const digestFragments = getDigestFragmentsForRestrictionEnzymes(
|
|
186
|
+
"ggatcctttttttggatcc",
|
|
187
|
+
true,
|
|
188
|
+
enzymeList["bamhi"],
|
|
189
|
+
{ computePartialDigests: true }
|
|
190
|
+
);
|
|
191
|
+
|
|
192
|
+
digestFragments.should.be.an("array");
|
|
193
|
+
digestFragments.length.should.equal(3);
|
|
194
|
+
digestFragments[0].start.should.equal(1);
|
|
195
|
+
digestFragments[0].end.should.equal(13);
|
|
196
|
+
digestFragments[0].size.should.equal(13);
|
|
197
|
+
|
|
198
|
+
digestFragments[1].start.should.equal(14);
|
|
199
|
+
digestFragments[1].end.should.equal(13);
|
|
200
|
+
digestFragments[1].size.should.equal(19);
|
|
201
|
+
|
|
202
|
+
digestFragments[2].start.should.equal(14);
|
|
203
|
+
digestFragments[2].end.should.equal(0);
|
|
204
|
+
digestFragments[2].size.should.equal(6);
|
|
205
|
+
});
|
|
206
|
+
//tnrtodo: this test should be enabled and checked for correctness
|
|
207
|
+
// it("gets digestFragments for multiple cutsites cutting in a linear sequence when computePartialDigests=true", function() {
|
|
208
|
+
// const digestFragments = getDigestFragmentsForRestrictionEnzymes(
|
|
209
|
+
// "ggatcctttttttggatcc",
|
|
210
|
+
// false,
|
|
211
|
+
// enzymeList["bamhi"],
|
|
212
|
+
// { computePartialDigests: true }
|
|
213
|
+
// );
|
|
214
|
+
// digestFragments.should.be.an("array");
|
|
215
|
+
// digestFragments.length.should.equal(9);
|
|
216
|
+
// digestFragments[0].start.should.equal(1);
|
|
217
|
+
// digestFragments[0].end.should.equal(13);
|
|
218
|
+
// digestFragments[0].size.should.equal(13);
|
|
219
|
+
|
|
220
|
+
// digestFragments[1].start.should.equal(14);
|
|
221
|
+
// digestFragments[1].end.should.equal(13);
|
|
222
|
+
// digestFragments[1].size.should.equal(19);
|
|
223
|
+
|
|
224
|
+
// digestFragments[2].start.should.equal(14);
|
|
225
|
+
// digestFragments[2].end.should.equal(0);
|
|
226
|
+
// digestFragments[2].size.should.equal(6);
|
|
227
|
+
// });
|
|
228
|
+
});
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import {normalizePositionByRangeLength1Based} from "@teselagen/range-utils";
|
|
2
|
+
|
|
3
|
+
export default function getInsertBetweenVals(
|
|
4
|
+
caretPosition,
|
|
5
|
+
selectionLayer,
|
|
6
|
+
sequenceLength
|
|
7
|
+
) {
|
|
8
|
+
if (selectionLayer.start > -1) {
|
|
9
|
+
//selection layer
|
|
10
|
+
return [
|
|
11
|
+
normalizePositionByRangeLength1Based(
|
|
12
|
+
selectionLayer.start,
|
|
13
|
+
sequenceLength
|
|
14
|
+
),
|
|
15
|
+
normalizePositionByRangeLength1Based(
|
|
16
|
+
selectionLayer.end + 2,
|
|
17
|
+
sequenceLength
|
|
18
|
+
)
|
|
19
|
+
];
|
|
20
|
+
} else if (caretPosition > -1) {
|
|
21
|
+
return [
|
|
22
|
+
normalizePositionByRangeLength1Based(caretPosition, sequenceLength),
|
|
23
|
+
normalizePositionByRangeLength1Based(caretPosition + 1, sequenceLength)
|
|
24
|
+
];
|
|
25
|
+
} else {
|
|
26
|
+
return [sequenceLength, 1];
|
|
27
|
+
}
|
|
28
|
+
};
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import assert from "assert";
|
|
2
|
+
import getInsertBetweenVals from "./getInsertBetweenVals";
|
|
3
|
+
describe("getInsertBetweenVals", () => {
|
|
4
|
+
it("should get 1 based insert between position X and position X2 based on either a 0-based caretPosition or a 0-based selectionLayer", () => {
|
|
5
|
+
// 0123
|
|
6
|
+
// atgc
|
|
7
|
+
// 1234
|
|
8
|
+
let insertBetween;
|
|
9
|
+
insertBetween = getInsertBetweenVals(1, {}, 4);
|
|
10
|
+
assert(insertBetween[0] === 1);
|
|
11
|
+
assert(insertBetween[1] === 2);
|
|
12
|
+
|
|
13
|
+
insertBetween = getInsertBetweenVals(0, {}, 4);
|
|
14
|
+
assert(insertBetween[0] === 4);
|
|
15
|
+
assert(insertBetween[1] === 1);
|
|
16
|
+
|
|
17
|
+
insertBetween = getInsertBetweenVals(-1, { start: 1, end: 1 }, 4);
|
|
18
|
+
assert(insertBetween[0] === 1);
|
|
19
|
+
assert(insertBetween[1] === 3);
|
|
20
|
+
|
|
21
|
+
insertBetween = getInsertBetweenVals(-1, { start: 0, end: 1 }, 4);
|
|
22
|
+
assert(insertBetween[0] === 4);
|
|
23
|
+
assert(insertBetween[1] === 3);
|
|
24
|
+
|
|
25
|
+
insertBetween = getInsertBetweenVals(-1, { start: 3, end: 1 }, 4);
|
|
26
|
+
assert(insertBetween[0] === 3);
|
|
27
|
+
assert(insertBetween[1] === 3);
|
|
28
|
+
|
|
29
|
+
insertBetween = getInsertBetweenVals(-1, { start: 3, end: 2 }, 4);
|
|
30
|
+
assert(insertBetween[0] === 3);
|
|
31
|
+
assert(insertBetween[1] === 4);
|
|
32
|
+
});
|
|
33
|
+
});
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import {
|
|
2
|
+
isPositionWithinRange,
|
|
3
|
+
getSequenceWithinRange,
|
|
4
|
+
normalizePositionByRangeLength,
|
|
5
|
+
isPositionCloserToRangeStartThanRangeEnd,
|
|
6
|
+
} from "@teselagen/range-utils";
|
|
7
|
+
|
|
8
|
+
export default function getLeftAndRightOfSequenceInRangeGivenPosition(
|
|
9
|
+
range,
|
|
10
|
+
position,
|
|
11
|
+
sequence
|
|
12
|
+
) {
|
|
13
|
+
const result = {
|
|
14
|
+
leftHandSide: "",
|
|
15
|
+
rightHandSide: ""
|
|
16
|
+
};
|
|
17
|
+
if (isPositionWithinRange(position, range)) {
|
|
18
|
+
result.leftHandSide = getSequenceWithinRange(
|
|
19
|
+
{
|
|
20
|
+
start: range.start,
|
|
21
|
+
end: normalizePositionByRangeLength(position - 1, sequence.length)
|
|
22
|
+
},
|
|
23
|
+
sequence
|
|
24
|
+
);
|
|
25
|
+
result.rightHandSide = getSequenceWithinRange(
|
|
26
|
+
{ start: position, end: range.end },
|
|
27
|
+
sequence
|
|
28
|
+
);
|
|
29
|
+
} else {
|
|
30
|
+
if (
|
|
31
|
+
isPositionCloserToRangeStartThanRangeEnd(position, range, sequence.length)
|
|
32
|
+
) {
|
|
33
|
+
result.rightHandSide = getSequenceWithinRange(range, sequence);
|
|
34
|
+
} else {
|
|
35
|
+
result.leftHandSide = getSequenceWithinRange(range, sequence);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
return result;
|
|
39
|
+
};
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
import chai from "chai";
|
|
4
|
+
import cutSequenceByRestrictionEnzyme from "./cutSequenceByRestrictionEnzyme.js";
|
|
5
|
+
import enzymeList from "./aliasedEnzymesByName";
|
|
6
|
+
import getLeftAndRightOfSequenceInRangeGivenPosition from "./getLeftAndRightOfSequenceInRangeGivenPosition";
|
|
7
|
+
|
|
8
|
+
chai.should();
|
|
9
|
+
describe("getLeftAndRightOfSequenceInRangeGivenPosition", () => {
|
|
10
|
+
it("gets the left and right of the range correctly given a position inside the range", () => {
|
|
11
|
+
const sequence = "aaaaaaaaaattttttttttgggggggggg";
|
|
12
|
+
const result = getLeftAndRightOfSequenceInRangeGivenPosition(
|
|
13
|
+
{ start: 9, end: 20 },
|
|
14
|
+
10,
|
|
15
|
+
sequence
|
|
16
|
+
);
|
|
17
|
+
result.leftHandSide.should.equal("a");
|
|
18
|
+
result.rightHandSide.should.equal("ttttttttttg");
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
it("gets the left and right of the range correctly given a position outside the range", () => {
|
|
22
|
+
const sequence = "aaaaaaaaaattttttttttgggggggggg";
|
|
23
|
+
const result = getLeftAndRightOfSequenceInRangeGivenPosition(
|
|
24
|
+
{ start: 9, end: 20 },
|
|
25
|
+
6,
|
|
26
|
+
sequence
|
|
27
|
+
);
|
|
28
|
+
result.leftHandSide.should.equal("");
|
|
29
|
+
result.rightHandSide.should.equal("attttttttttg");
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it("gets the left and right of the range correctly given a position outside the range", () => {
|
|
33
|
+
const sequence = "aaaaaaaaaattttttttttgggggggggg";
|
|
34
|
+
const result = getLeftAndRightOfSequenceInRangeGivenPosition(
|
|
35
|
+
{ start: 9, end: 20 },
|
|
36
|
+
24,
|
|
37
|
+
sequence
|
|
38
|
+
);
|
|
39
|
+
result.leftHandSide.should.equal("attttttttttg");
|
|
40
|
+
result.rightHandSide.should.equal("");
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
//bamhi
|
|
44
|
+
// "bamhi": {
|
|
45
|
+
// "name": "bamhi",
|
|
46
|
+
// "site": "ggatcc",
|
|
47
|
+
// "forwardRegex": "g{2}atc{2}",
|
|
48
|
+
// "reverseRegex": "g{2}atc{2}",
|
|
49
|
+
// "topSnipOffset": 1,
|
|
50
|
+
// "bottomSnipOffset": 5
|
|
51
|
+
// },
|
|
52
|
+
it("cuts a single circular cutsite", () => {
|
|
53
|
+
const sequence = "ccrrrrggat";
|
|
54
|
+
const cutsites = cutSequenceByRestrictionEnzyme(
|
|
55
|
+
sequence,
|
|
56
|
+
true,
|
|
57
|
+
enzymeList["bamhi"]
|
|
58
|
+
);
|
|
59
|
+
const cutsite = cutsites[0];
|
|
60
|
+
const result = getLeftAndRightOfSequenceInRangeGivenPosition(
|
|
61
|
+
cutsite.recognitionSiteRange,
|
|
62
|
+
cutsite.topSnipPosition,
|
|
63
|
+
sequence
|
|
64
|
+
);
|
|
65
|
+
result.leftHandSide.should.equal("g");
|
|
66
|
+
result.rightHandSide.should.equal("gatcc");
|
|
67
|
+
//
|
|
68
|
+
// //
|
|
69
|
+
// cutsites.should.be.an("array");
|
|
70
|
+
// cutsites.length.should.equal(1);
|
|
71
|
+
// cutsites[0].start.should.equal(6);
|
|
72
|
+
// cutsites[0].end.should.equal(1);
|
|
73
|
+
// cutsites[0].recognitionSiteRange.start.should.equal(6);
|
|
74
|
+
// cutsites[0].recognitionSiteRange.end.should.equal(1);
|
|
75
|
+
// cutsites[0].topSnipPosition.should.equal(7);
|
|
76
|
+
// cutsites[0].bottomSnipPosition.should.equal(1);
|
|
77
|
+
// should.not.exist(cutsites[0].upstreamTopSnip);
|
|
78
|
+
// should.not.exist(cutsites[0].upstreamBottomSnip);
|
|
79
|
+
});
|
|
80
|
+
});
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import proteinAlphabet from "./proteinAlphabet";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* @param {string} aaString A string of amino acid characters
|
|
5
|
+
* @param {number} numsAfterDecimal the number of digits to round to after the decimal point, must be greater than 0
|
|
6
|
+
* @param {boolean} divideByThree divide the final mass by three,
|
|
7
|
+
* this is useful in situtations where nucelotides are converted to
|
|
8
|
+
* amino acids in a way that the amino acid appears three times
|
|
9
|
+
* @returns The sum of the mass of all amino acids in the string
|
|
10
|
+
*/
|
|
11
|
+
export default function getMassOfAaString(
|
|
12
|
+
aaString,
|
|
13
|
+
numsAfterDecimal = 2,
|
|
14
|
+
divideByThree = false
|
|
15
|
+
) {
|
|
16
|
+
let sumMass = 0;
|
|
17
|
+
for (let i = 0; i < aaString.length; i++) {
|
|
18
|
+
sumMass += proteinAlphabet[aaString[i]].mass;
|
|
19
|
+
}
|
|
20
|
+
if (divideByThree) {
|
|
21
|
+
sumMass /= 3;
|
|
22
|
+
}
|
|
23
|
+
return Math.round(sumMass * 10 ** numsAfterDecimal) / 10 ** numsAfterDecimal;
|
|
24
|
+
};
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import getMassOfAaString from "./getMassOfAaString";
|
|
2
|
+
import assert from "assert";
|
|
3
|
+
|
|
4
|
+
describe("getMassOfAaString", () => {
|
|
5
|
+
it("an empty string has a mass of 0", () => {
|
|
6
|
+
assert.equal(getMassOfAaString(""), 0);
|
|
7
|
+
});
|
|
8
|
+
it("A string with one amino acids returns the correct results", () => {
|
|
9
|
+
assert.equal(getMassOfAaString("T"), 119.1);
|
|
10
|
+
assert.equal(getMassOfAaString("A"), 89.1);
|
|
11
|
+
assert.equal(getMassOfAaString("F"), 165.2);
|
|
12
|
+
});
|
|
13
|
+
it("A string a long string of amino acids returns the correct results", () => {
|
|
14
|
+
assert.equal(getMassOfAaString("TAGATAFPFPFPA"), 1510.6);
|
|
15
|
+
assert.equal(getMassOfAaString("TFPMAV"), 754.8);
|
|
16
|
+
assert.equal(getMassOfAaString("TFPMAVTAGATAFPFPFPA"), 2265.4);
|
|
17
|
+
});
|
|
18
|
+
});
|