@teselagen/sequence-utils 0.1.21 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +12030 -26126
- package/index.mjs +12119 -26124
- package/index.umd.js +24056 -38154
- package/package.json +2 -2
- package/src/DNAComplementMap.js +32 -0
- package/src/addGapsToSeqReads.js +417 -0
- package/src/addGapsToSeqReads.test.js +358 -0
- package/src/adjustAnnotationsToInsert.js +19 -0
- package/src/adjustBpsToReplaceOrInsert.js +50 -0
- package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
- package/src/aliasedEnzymesByName.js +7363 -0
- package/src/aminoAcidToDegenerateDnaMap.js +32 -0
- package/src/aminoAcidToDegenerateRnaMap.js +32 -0
- package/src/aminoAcidToDnaRna.test.js +27 -0
- package/src/annotateSingleSeq.js +29 -0
- package/src/annotateSingleSeq.test.js +64 -0
- package/src/annotationTypes.js +23 -0
- package/src/autoAnnotate.js +242 -0
- package/src/autoAnnotate.test.js +1039 -0
- package/src/bioData.js +431 -0
- package/src/calculateNebTa.js +34 -0
- package/src/calculateNebTa.test.js +57 -0
- package/src/calculateNebTm.js +127 -0
- package/src/calculateNebTm.test.js +32 -0
- package/src/calculatePercentGC.js +3 -0
- package/src/calculatePercentGC.test.js +14 -0
- package/src/calculateTm.js +297 -0
- package/src/calculateTm.test.js +7 -0
- package/src/computeDigestFragments.js +179 -0
- package/src/computeDigestFragments.test.js +73 -0
- package/src/condensePairwiseAlignmentDifferences.js +85 -0
- package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
- package/src/convertAACaretPositionOrRangeToDna.js +24 -0
- package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
- package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
- package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
- package/src/cutSequenceByRestrictionEnzyme.js +301 -0
- package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
- package/src/defaultEnzymesByName.js +278 -0
- package/src/degenerateDnaToAminoAcidMap.js +5 -0
- package/src/degenerateRnaToAminoAcidMap.js +5 -0
- package/src/deleteSequenceDataAtRange.js +5 -0
- package/src/deleteSequenceDataAtRange.test.js +146 -0
- package/src/diffUtils.js +64 -0
- package/src/diffUtils.test.js +74 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
- package/src/featureTypesAndColors.js +152 -0
- package/src/featureTypesAndColors.test.js +52 -0
- package/src/filterAminoAcidSequenceString.js +13 -0
- package/src/filterAminoAcidSequenceString.test.js +22 -0
- package/src/filterSequenceString.js +22 -0
- package/src/filterSequenceString.test.js +13 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
- package/src/findOrfsInPlasmid.js +26 -0
- package/src/findSequenceMatches.js +133 -0
- package/src/findSequenceMatches.test.js +286 -0
- package/src/generateAnnotations.js +34 -0
- package/src/generateSequenceData.js +206 -0
- package/src/generateSequenceData.test.js +22 -0
- package/src/getAllInsertionsInSeqReads.js +83 -0
- package/src/getAllInsertionsInSeqReads.test.js +26 -0
- package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
- package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
- package/src/getAminoAcidFromSequenceTriplet.js +22 -0
- package/src/getAminoAcidStringFromSequenceString.js +18 -0
- package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
- package/src/getCodonRangeForAASliver.js +63 -0
- package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
- package/src/getComplementSequenceAndAnnotations.js +20 -0
- package/src/getComplementSequenceString.js +19 -0
- package/src/getComplementSequenceString.test.js +13 -0
- package/src/getCutsiteType.js +10 -0
- package/src/getCutsitesFromSequence.js +17 -0
- package/src/getDegenerateDnaStringFromAAString.js +8 -0
- package/src/getDegenerateRnaStringFromAAString.js +8 -0
- package/src/getDigestFragmentsForCutsites.js +105 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
- package/src/getInsertBetweenVals.js +28 -0
- package/src/getInsertBetweenVals.test.js +33 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
- package/src/getMassOfAaString.js +24 -0
- package/src/getMassofAaString.test.js +18 -0
- package/src/getOrfsFromSequence.js +124 -0
- package/src/getOrfsFromSequence.test.js +210 -0
- package/src/getOverlapBetweenTwoSequences.js +30 -0
- package/src/getOverlapBetweenTwoSequences.test.js +23 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
- package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
- package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
- package/src/getReverseComplementAnnotation.js +23 -0
- package/src/getReverseComplementAnnotation.test.js +44 -0
- package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
- package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
- package/src/getReverseComplementSequenceString.js +17 -0
- package/src/getReverseComplementSequenceString.test.js +11 -0
- package/src/getReverseSequenceString.js +12 -0
- package/src/getReverseSequenceString.test.js +9 -0
- package/src/getSequenceDataBetweenRange.js +131 -0
- package/src/getSequenceDataBetweenRange.test.js +474 -0
- package/src/getVirtualDigest.js +125 -0
- package/src/getVirtualDigest.test.js +134 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
- package/src/index.js +106 -0
- package/src/index.test.js +38 -0
- package/src/insertGapsIntoRefSeq.js +38 -0
- package/src/insertGapsIntoRefSeq.test.js +20 -0
- package/src/insertSequenceDataAtPosition.js +2 -0
- package/src/insertSequenceDataAtPosition.test.js +75 -0
- package/src/insertSequenceDataAtPositionOrRange.js +249 -0
- package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
- package/src/isEnzymeType2S.js +3 -0
- package/src/mapAnnotationsToRows.js +174 -0
- package/src/mapAnnotationsToRows.test.js +425 -0
- package/src/prepareCircularViewData.js +17 -0
- package/src/prepareCircularViewData.test.js +196 -0
- package/src/prepareRowData.js +41 -0
- package/src/prepareRowData.test.js +36 -0
- package/src/prepareRowData_output1.json +391 -0
- package/src/proteinAlphabet.js +257 -0
- package/src/rotateBpsToPosition.js +13 -0
- package/src/rotateBpsToPosition.test.js +6 -0
- package/src/rotateSequenceDataToPosition.js +48 -0
- package/src/rotateSequenceDataToPosition.test.js +71 -0
- package/src/shiftAnnotationsByLen.js +17 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
- package/src/tidyUpAnnotation.js +182 -0
- package/src/tidyUpSequenceData.js +169 -0
- package/src/tidyUpSequenceData.test.js +332 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export default function convertAACaretPositionOrRangeToDna(rangeOrCaret) {
|
|
2
|
+
if (typeof rangeOrCaret === "object" && rangeOrCaret !== null) {
|
|
3
|
+
return convertAARangeToDnaRange({
|
|
4
|
+
...rangeOrCaret,
|
|
5
|
+
locations: rangeOrCaret.locations
|
|
6
|
+
? rangeOrCaret.locations.map(convertAARangeToDnaRange)
|
|
7
|
+
: undefined
|
|
8
|
+
});
|
|
9
|
+
} else {
|
|
10
|
+
return convertAACaretPositionToDnaCaretPosition(rangeOrCaret);
|
|
11
|
+
}
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
function convertAACaretPositionToDnaCaretPosition(caret) {
|
|
15
|
+
return caret * 3;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function convertAARangeToDnaRange(range) {
|
|
19
|
+
return {
|
|
20
|
+
...range,
|
|
21
|
+
start: range.start > -1 ? range.start * 3 : range.start,
|
|
22
|
+
end: range.end > -1 ? range.end * 3 + 2 : range.end
|
|
23
|
+
};
|
|
24
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import convertAACaretPositionOrRangeToDna from "./convertAACaretPositionOrRangeToDna";
|
|
2
|
+
describe("convertAACaretPositionOrRangeToDna", () => {
|
|
3
|
+
it(`should convert dna ranges and carets to AA ranges and carets`, () => {
|
|
4
|
+
const res = convertAACaretPositionOrRangeToDna({
|
|
5
|
+
start: 3,
|
|
6
|
+
end: 3
|
|
7
|
+
});
|
|
8
|
+
expect(res.start).toEqual(9);
|
|
9
|
+
expect(res.end).toEqual(11);
|
|
10
|
+
expect(convertAACaretPositionOrRangeToDna(3)).toEqual(9);
|
|
11
|
+
});
|
|
12
|
+
it(`should convert dna ranges and locations to AA ranges and carets`, () => {
|
|
13
|
+
const res = convertAACaretPositionOrRangeToDna({
|
|
14
|
+
start: 3,
|
|
15
|
+
end: 9,
|
|
16
|
+
locations: [
|
|
17
|
+
{
|
|
18
|
+
start: 3,
|
|
19
|
+
end: 5
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
start: 6,
|
|
23
|
+
end: 9
|
|
24
|
+
}
|
|
25
|
+
]
|
|
26
|
+
});
|
|
27
|
+
expect(res.start).toEqual(9);
|
|
28
|
+
expect(res.end).toEqual(29);
|
|
29
|
+
expect(res.locations[0].start).toEqual(9);
|
|
30
|
+
expect(res.locations[0].end).toEqual(17);
|
|
31
|
+
expect(res.locations[1].start).toEqual(18);
|
|
32
|
+
expect(res.locations[1].end).toEqual(29);
|
|
33
|
+
});
|
|
34
|
+
});
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export default function convertDnaCaretPositionOrRangeToAA(rangeOrCaret) {
|
|
2
|
+
if (typeof rangeOrCaret === "object" && rangeOrCaret !== null) {
|
|
3
|
+
return convertDnaRangeToAARange({
|
|
4
|
+
...rangeOrCaret,
|
|
5
|
+
locations: rangeOrCaret.locations
|
|
6
|
+
? rangeOrCaret.locations.map(convertDnaRangeToAARange)
|
|
7
|
+
: undefined
|
|
8
|
+
});
|
|
9
|
+
} else {
|
|
10
|
+
return convertDnaCaretPositionToAACaretPosition(rangeOrCaret);
|
|
11
|
+
}
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
function convertDnaCaretPositionToAACaretPosition(caret) {
|
|
15
|
+
return Math.floor(caret / 3);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function convertDnaRangeToAARange(range) {
|
|
19
|
+
return {
|
|
20
|
+
...range,
|
|
21
|
+
start: range.start > -1 ? Math.floor(range.start / 3) : range.start,
|
|
22
|
+
end: range.end > -1 ? Math.floor(range.end - 2) / 3 : range.end
|
|
23
|
+
};
|
|
24
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import convertDnaCaretPositionOrRangeToAa from "./convertDnaCaretPositionOrRangeToAA";
|
|
2
|
+
describe("convertDnaCaretPositionOrRangeToAa", () => {
|
|
3
|
+
it(`should convert dna ranges and carets to AA ranges and carets`, () => {
|
|
4
|
+
const res = convertDnaCaretPositionOrRangeToAa({
|
|
5
|
+
start: 9,
|
|
6
|
+
end: 11
|
|
7
|
+
});
|
|
8
|
+
expect(res.start).toEqual(3);
|
|
9
|
+
expect(res.end).toEqual(3);
|
|
10
|
+
// 0 1 2 3 4
|
|
11
|
+
// 0 1 2 3
|
|
12
|
+
// a t g c
|
|
13
|
+
expect(convertDnaCaretPositionOrRangeToAa(3)).toEqual(1);
|
|
14
|
+
});
|
|
15
|
+
it(`should convert dna ranges and locations to AA ranges and carets`, () => {
|
|
16
|
+
const res = convertDnaCaretPositionOrRangeToAa({
|
|
17
|
+
start: 9,
|
|
18
|
+
end: 29,
|
|
19
|
+
locations: [
|
|
20
|
+
{
|
|
21
|
+
start: 9,
|
|
22
|
+
end: 17
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
start: 18,
|
|
26
|
+
end: 29
|
|
27
|
+
}
|
|
28
|
+
]
|
|
29
|
+
});
|
|
30
|
+
expect(res.start).toEqual(3);
|
|
31
|
+
expect(res.end).toEqual(9);
|
|
32
|
+
expect(res.locations[0].start).toEqual(3);
|
|
33
|
+
expect(res.locations[0].end).toEqual(5);
|
|
34
|
+
expect(res.locations[1].start).toEqual(6);
|
|
35
|
+
expect(res.locations[1].end).toEqual(9);
|
|
36
|
+
});
|
|
37
|
+
});
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
import {assign} from "lodash";
|
|
2
|
+
import shortid from "shortid";
|
|
3
|
+
import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
getSequenceWithinRange,
|
|
7
|
+
normalizePositionByRangeLength,
|
|
8
|
+
reversePositionInRange,
|
|
9
|
+
} from "@teselagen/range-utils";
|
|
10
|
+
|
|
11
|
+
export default function cutSequenceByRestrictionEnzyme(
|
|
12
|
+
pSequence,
|
|
13
|
+
circular,
|
|
14
|
+
restrictionEnzyme
|
|
15
|
+
) {
|
|
16
|
+
if (
|
|
17
|
+
restrictionEnzyme.forwardRegex.length === 0 ||
|
|
18
|
+
restrictionEnzyme.reverseRegex.length === 0
|
|
19
|
+
) {
|
|
20
|
+
const returnArray = [];
|
|
21
|
+
returnArray.error =
|
|
22
|
+
"Cannot cut sequence. Enzyme restriction site must be at least 1 bp long.";
|
|
23
|
+
return returnArray;
|
|
24
|
+
}
|
|
25
|
+
const forwardRegExpPattern = new RegExp(restrictionEnzyme.forwardRegex, "ig");
|
|
26
|
+
const sequence = pSequence;
|
|
27
|
+
|
|
28
|
+
const cutsitesForward = cutSequence(
|
|
29
|
+
forwardRegExpPattern,
|
|
30
|
+
restrictionEnzyme,
|
|
31
|
+
sequence,
|
|
32
|
+
circular
|
|
33
|
+
);
|
|
34
|
+
let cutsitesReverse = [];
|
|
35
|
+
if (restrictionEnzyme.forwardRegex !== restrictionEnzyme.reverseRegex) {
|
|
36
|
+
const revSequence = getReverseComplementSequenceString(sequence);
|
|
37
|
+
cutsitesReverse = cutSequence(
|
|
38
|
+
forwardRegExpPattern,
|
|
39
|
+
restrictionEnzyme,
|
|
40
|
+
revSequence,
|
|
41
|
+
circular
|
|
42
|
+
);
|
|
43
|
+
cutsitesReverse = cutsitesReverse.map(cutsite => {
|
|
44
|
+
return reverseAllPositionsOfCutsite(cutsite, sequence.length);
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
return cutsitesForward.concat(cutsitesReverse);
|
|
48
|
+
|
|
49
|
+
function reverseAllPositionsOfCutsite(cutsite, rangeLength) {
|
|
50
|
+
cutsite.start = reversePositionInRange(cutsite.start, rangeLength, false);
|
|
51
|
+
cutsite.end = reversePositionInRange(cutsite.end, rangeLength, false);
|
|
52
|
+
cutsite.topSnipPosition = reversePositionInRange(
|
|
53
|
+
cutsite.topSnipPosition,
|
|
54
|
+
rangeLength,
|
|
55
|
+
true
|
|
56
|
+
);
|
|
57
|
+
cutsite.bottomSnipPosition = reversePositionInRange(
|
|
58
|
+
cutsite.bottomSnipPosition,
|
|
59
|
+
rangeLength,
|
|
60
|
+
true
|
|
61
|
+
);
|
|
62
|
+
if (cutsite.cutsTwice) {
|
|
63
|
+
cutsite.upstreamTopSnip = reversePositionInRange(
|
|
64
|
+
cutsite.upstreamTopSnip,
|
|
65
|
+
rangeLength,
|
|
66
|
+
true
|
|
67
|
+
);
|
|
68
|
+
cutsite.upstreamBottomSnip = reversePositionInRange(
|
|
69
|
+
cutsite.upstreamBottomSnip,
|
|
70
|
+
rangeLength,
|
|
71
|
+
true
|
|
72
|
+
);
|
|
73
|
+
}
|
|
74
|
+
cutsite.recognitionSiteRange.start = reversePositionInRange(
|
|
75
|
+
cutsite.recognitionSiteRange.start,
|
|
76
|
+
rangeLength,
|
|
77
|
+
false
|
|
78
|
+
);
|
|
79
|
+
cutsite.recognitionSiteRange.end = reversePositionInRange(
|
|
80
|
+
cutsite.recognitionSiteRange.end,
|
|
81
|
+
rangeLength,
|
|
82
|
+
false
|
|
83
|
+
);
|
|
84
|
+
return assign({}, cutsite, {
|
|
85
|
+
start: cutsite.end,
|
|
86
|
+
end: cutsite.start,
|
|
87
|
+
overhangBps: getReverseComplementSequenceString(cutsite.overhangBps),
|
|
88
|
+
topSnipPosition: cutsite.bottomSnipPosition,
|
|
89
|
+
bottomSnipPosition: cutsite.topSnipPosition,
|
|
90
|
+
upstreamTopSnip: cutsite.upstreamBottomSnip,
|
|
91
|
+
upstreamBottomSnip: cutsite.upstreamTopSnip,
|
|
92
|
+
upstreamTopBeforeBottom: !!cutsite.upstreamTopBeforeBottom,
|
|
93
|
+
topSnipBeforeBottom: !!cutsite.topSnipBeforeBottom,
|
|
94
|
+
recognitionSiteRange: {
|
|
95
|
+
start: cutsite.recognitionSiteRange.end,
|
|
96
|
+
end: cutsite.recognitionSiteRange.start
|
|
97
|
+
},
|
|
98
|
+
forward: false
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
function cutSequence(
|
|
104
|
+
forwardRegExpPattern,
|
|
105
|
+
restrictionEnzyme,
|
|
106
|
+
sequence,
|
|
107
|
+
circular
|
|
108
|
+
) {
|
|
109
|
+
const restrictionCutSites = [];
|
|
110
|
+
let restrictionCutSite;
|
|
111
|
+
const recognitionSiteLength = restrictionEnzyme.site.length;
|
|
112
|
+
const originalSequence = sequence;
|
|
113
|
+
const originalSequenceLength = sequence.length;
|
|
114
|
+
if (circular) {
|
|
115
|
+
//if the sequence is circular, we send in double the sequence
|
|
116
|
+
//we'll deduplicate the results afterwards!
|
|
117
|
+
sequence += sequence;
|
|
118
|
+
}
|
|
119
|
+
const currentSequenceLength = sequence.length;
|
|
120
|
+
|
|
121
|
+
let matchIndex = sequence.search(forwardRegExpPattern);
|
|
122
|
+
let startIndex = 0;
|
|
123
|
+
let subSequence = sequence;
|
|
124
|
+
|
|
125
|
+
while (matchIndex !== -1) {
|
|
126
|
+
const recognitionSiteRange = {};
|
|
127
|
+
let start; //start and end should fully enclose the enzyme snips and the recognition site!
|
|
128
|
+
let end;
|
|
129
|
+
let upstreamTopSnip = null; //upstream top snip position
|
|
130
|
+
let upstreamBottomSnip = null; //upstream bottom snip position
|
|
131
|
+
let upstreamTopBeforeBottom = false;
|
|
132
|
+
let topSnipPosition = null; //downstream top snip position
|
|
133
|
+
let bottomSnipPosition = null; //downstream bottom snip position
|
|
134
|
+
let topSnipBeforeBottom = false;
|
|
135
|
+
|
|
136
|
+
let fitsWithinSequence = false;
|
|
137
|
+
// if (matchIndex + startIndex + recognitionSiteLength - 1 >= sequence.length) { // subSequence is too short
|
|
138
|
+
// break;
|
|
139
|
+
// }
|
|
140
|
+
|
|
141
|
+
recognitionSiteRange.start = matchIndex + startIndex;
|
|
142
|
+
start = recognitionSiteRange.start; //this might change later on!
|
|
143
|
+
|
|
144
|
+
recognitionSiteRange.end =
|
|
145
|
+
matchIndex + recognitionSiteLength - 1 + startIndex;
|
|
146
|
+
end = recognitionSiteRange.end; //this might change later on!
|
|
147
|
+
|
|
148
|
+
//we need to get the snip sites, top and bottom for each of these cut sites
|
|
149
|
+
//as well as all of the bp's between the snip sites
|
|
150
|
+
|
|
151
|
+
//if the cutsite is type 1, it cuts both upstream and downstream of its recognition site (cutsite type 0's cut only downstream)
|
|
152
|
+
if (restrictionEnzyme.cutType === 1) {
|
|
153
|
+
//double cutter, add upstream cutsite here
|
|
154
|
+
upstreamTopSnip = recognitionSiteRange.end - restrictionEnzyme.usForward;
|
|
155
|
+
upstreamBottomSnip =
|
|
156
|
+
recognitionSiteRange.end - restrictionEnzyme.usReverse;
|
|
157
|
+
if (upstreamTopSnip >= 0 && upstreamBottomSnip >= 0) {
|
|
158
|
+
fitsWithinSequence = true;
|
|
159
|
+
if (upstreamTopSnip < upstreamBottomSnip) {
|
|
160
|
+
if (start > upstreamTopSnip) {
|
|
161
|
+
start = upstreamTopSnip + 1;
|
|
162
|
+
}
|
|
163
|
+
upstreamTopBeforeBottom = true;
|
|
164
|
+
} else {
|
|
165
|
+
if (start > upstreamBottomSnip) {
|
|
166
|
+
start = upstreamBottomSnip + 1;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
upstreamTopSnip = normalizePositionByRangeLength(
|
|
170
|
+
upstreamTopSnip,
|
|
171
|
+
originalSequenceLength,
|
|
172
|
+
true
|
|
173
|
+
);
|
|
174
|
+
upstreamBottomSnip = normalizePositionByRangeLength(
|
|
175
|
+
upstreamBottomSnip,
|
|
176
|
+
originalSequenceLength,
|
|
177
|
+
true
|
|
178
|
+
);
|
|
179
|
+
} else {
|
|
180
|
+
upstreamTopSnip = null;
|
|
181
|
+
upstreamBottomSnip = null;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
//add downstream cutsite here
|
|
186
|
+
topSnipPosition =
|
|
187
|
+
recognitionSiteRange.start + restrictionEnzyme.topSnipOffset;
|
|
188
|
+
bottomSnipPosition =
|
|
189
|
+
recognitionSiteRange.start + restrictionEnzyme.bottomSnipOffset;
|
|
190
|
+
if (
|
|
191
|
+
bottomSnipPosition <= currentSequenceLength &&
|
|
192
|
+
topSnipPosition <= currentSequenceLength
|
|
193
|
+
) {
|
|
194
|
+
fitsWithinSequence = true;
|
|
195
|
+
if (topSnipPosition > bottomSnipPosition) {
|
|
196
|
+
if (topSnipPosition > recognitionSiteRange.end) {
|
|
197
|
+
end = topSnipPosition - 1;
|
|
198
|
+
}
|
|
199
|
+
} else {
|
|
200
|
+
if (bottomSnipPosition > recognitionSiteRange.end) {
|
|
201
|
+
end = bottomSnipPosition - 1;
|
|
202
|
+
}
|
|
203
|
+
topSnipBeforeBottom = true;
|
|
204
|
+
}
|
|
205
|
+
topSnipPosition = normalizePositionByRangeLength(
|
|
206
|
+
topSnipPosition,
|
|
207
|
+
originalSequenceLength,
|
|
208
|
+
true
|
|
209
|
+
);
|
|
210
|
+
bottomSnipPosition = normalizePositionByRangeLength(
|
|
211
|
+
bottomSnipPosition,
|
|
212
|
+
originalSequenceLength,
|
|
213
|
+
true
|
|
214
|
+
);
|
|
215
|
+
} else {
|
|
216
|
+
topSnipPosition = null;
|
|
217
|
+
bottomSnipPosition = null;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
if (
|
|
221
|
+
fitsWithinSequence &&
|
|
222
|
+
start >= 0 &&
|
|
223
|
+
end >= 0 &&
|
|
224
|
+
start < originalSequenceLength &&
|
|
225
|
+
end < currentSequenceLength
|
|
226
|
+
) {
|
|
227
|
+
//only push cutsites onto the array if they are fully contained within the boundaries of the sequence!
|
|
228
|
+
//and they aren't duplicated
|
|
229
|
+
start = normalizePositionByRangeLength(
|
|
230
|
+
start,
|
|
231
|
+
originalSequenceLength,
|
|
232
|
+
false
|
|
233
|
+
);
|
|
234
|
+
end = normalizePositionByRangeLength(end, originalSequenceLength, false);
|
|
235
|
+
recognitionSiteRange.start = normalizePositionByRangeLength(
|
|
236
|
+
recognitionSiteRange.start,
|
|
237
|
+
originalSequenceLength,
|
|
238
|
+
false
|
|
239
|
+
);
|
|
240
|
+
recognitionSiteRange.end = normalizePositionByRangeLength(
|
|
241
|
+
recognitionSiteRange.end,
|
|
242
|
+
originalSequenceLength,
|
|
243
|
+
false
|
|
244
|
+
);
|
|
245
|
+
let cutRange = {
|
|
246
|
+
start: -1,
|
|
247
|
+
end: -1
|
|
248
|
+
};
|
|
249
|
+
|
|
250
|
+
if (topSnipPosition !== bottomSnipPosition) {
|
|
251
|
+
//there is only a cut range if the snips don't snip at the exact same spot on top and bottom
|
|
252
|
+
cutRange = topSnipBeforeBottom
|
|
253
|
+
? {
|
|
254
|
+
start: topSnipPosition,
|
|
255
|
+
end: normalizePositionByRangeLength(
|
|
256
|
+
bottomSnipPosition - 1,
|
|
257
|
+
originalSequenceLength
|
|
258
|
+
)
|
|
259
|
+
}
|
|
260
|
+
: {
|
|
261
|
+
start: bottomSnipPosition,
|
|
262
|
+
end: normalizePositionByRangeLength(
|
|
263
|
+
topSnipPosition - 1,
|
|
264
|
+
originalSequenceLength
|
|
265
|
+
)
|
|
266
|
+
};
|
|
267
|
+
}
|
|
268
|
+
const overhangBps = getSequenceWithinRange(cutRange, originalSequence);
|
|
269
|
+
|
|
270
|
+
restrictionCutSite = {
|
|
271
|
+
id: shortid(),
|
|
272
|
+
start,
|
|
273
|
+
end,
|
|
274
|
+
topSnipPosition,
|
|
275
|
+
bottomSnipPosition,
|
|
276
|
+
topSnipBeforeBottom,
|
|
277
|
+
overhangBps,
|
|
278
|
+
overhangSize: overhangBps.length,
|
|
279
|
+
upstreamTopBeforeBottom,
|
|
280
|
+
upstreamTopSnip,
|
|
281
|
+
annotationTypePlural: "cutsites",
|
|
282
|
+
upstreamBottomSnip,
|
|
283
|
+
recognitionSiteRange,
|
|
284
|
+
forward: true,
|
|
285
|
+
name: restrictionEnzyme.name,
|
|
286
|
+
restrictionEnzyme
|
|
287
|
+
};
|
|
288
|
+
restrictionCutSites.push(restrictionCutSite);
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// Make sure that we always store the previous match index to ensure
|
|
292
|
+
// that we are always storing indices relative to the whole sequence,
|
|
293
|
+
// not just the subSequence.
|
|
294
|
+
startIndex = startIndex + matchIndex + 1;
|
|
295
|
+
|
|
296
|
+
// Search again on subSequence, starting from the index of the last match + 1.
|
|
297
|
+
subSequence = sequence.substring(startIndex, sequence.length);
|
|
298
|
+
matchIndex = subSequence.search(forwardRegExpPattern);
|
|
299
|
+
}
|
|
300
|
+
return restrictionCutSites;
|
|
301
|
+
}
|