@teselagen/sequence-utils 0.1.21 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +12030 -26126
- package/index.mjs +12119 -26124
- package/index.umd.js +24056 -38154
- package/package.json +2 -2
- package/src/DNAComplementMap.js +32 -0
- package/src/addGapsToSeqReads.js +417 -0
- package/src/addGapsToSeqReads.test.js +358 -0
- package/src/adjustAnnotationsToInsert.js +19 -0
- package/src/adjustBpsToReplaceOrInsert.js +50 -0
- package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
- package/src/aliasedEnzymesByName.js +7363 -0
- package/src/aminoAcidToDegenerateDnaMap.js +32 -0
- package/src/aminoAcidToDegenerateRnaMap.js +32 -0
- package/src/aminoAcidToDnaRna.test.js +27 -0
- package/src/annotateSingleSeq.js +29 -0
- package/src/annotateSingleSeq.test.js +64 -0
- package/src/annotationTypes.js +23 -0
- package/src/autoAnnotate.js +242 -0
- package/src/autoAnnotate.test.js +1039 -0
- package/src/bioData.js +431 -0
- package/src/calculateNebTa.js +34 -0
- package/src/calculateNebTa.test.js +57 -0
- package/src/calculateNebTm.js +127 -0
- package/src/calculateNebTm.test.js +32 -0
- package/src/calculatePercentGC.js +3 -0
- package/src/calculatePercentGC.test.js +14 -0
- package/src/calculateTm.js +297 -0
- package/src/calculateTm.test.js +7 -0
- package/src/computeDigestFragments.js +179 -0
- package/src/computeDigestFragments.test.js +73 -0
- package/src/condensePairwiseAlignmentDifferences.js +85 -0
- package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
- package/src/convertAACaretPositionOrRangeToDna.js +24 -0
- package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
- package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
- package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
- package/src/cutSequenceByRestrictionEnzyme.js +301 -0
- package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
- package/src/defaultEnzymesByName.js +278 -0
- package/src/degenerateDnaToAminoAcidMap.js +5 -0
- package/src/degenerateRnaToAminoAcidMap.js +5 -0
- package/src/deleteSequenceDataAtRange.js +5 -0
- package/src/deleteSequenceDataAtRange.test.js +146 -0
- package/src/diffUtils.js +64 -0
- package/src/diffUtils.test.js +74 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
- package/src/featureTypesAndColors.js +152 -0
- package/src/featureTypesAndColors.test.js +52 -0
- package/src/filterAminoAcidSequenceString.js +13 -0
- package/src/filterAminoAcidSequenceString.test.js +22 -0
- package/src/filterSequenceString.js +22 -0
- package/src/filterSequenceString.test.js +13 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
- package/src/findOrfsInPlasmid.js +26 -0
- package/src/findSequenceMatches.js +133 -0
- package/src/findSequenceMatches.test.js +286 -0
- package/src/generateAnnotations.js +34 -0
- package/src/generateSequenceData.js +206 -0
- package/src/generateSequenceData.test.js +22 -0
- package/src/getAllInsertionsInSeqReads.js +83 -0
- package/src/getAllInsertionsInSeqReads.test.js +26 -0
- package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
- package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
- package/src/getAminoAcidFromSequenceTriplet.js +22 -0
- package/src/getAminoAcidStringFromSequenceString.js +18 -0
- package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
- package/src/getCodonRangeForAASliver.js +63 -0
- package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
- package/src/getComplementSequenceAndAnnotations.js +20 -0
- package/src/getComplementSequenceString.js +19 -0
- package/src/getComplementSequenceString.test.js +13 -0
- package/src/getCutsiteType.js +10 -0
- package/src/getCutsitesFromSequence.js +17 -0
- package/src/getDegenerateDnaStringFromAAString.js +8 -0
- package/src/getDegenerateRnaStringFromAAString.js +8 -0
- package/src/getDigestFragmentsForCutsites.js +105 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
- package/src/getInsertBetweenVals.js +28 -0
- package/src/getInsertBetweenVals.test.js +33 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
- package/src/getMassOfAaString.js +24 -0
- package/src/getMassofAaString.test.js +18 -0
- package/src/getOrfsFromSequence.js +124 -0
- package/src/getOrfsFromSequence.test.js +210 -0
- package/src/getOverlapBetweenTwoSequences.js +30 -0
- package/src/getOverlapBetweenTwoSequences.test.js +23 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
- package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
- package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
- package/src/getReverseComplementAnnotation.js +23 -0
- package/src/getReverseComplementAnnotation.test.js +44 -0
- package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
- package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
- package/src/getReverseComplementSequenceString.js +17 -0
- package/src/getReverseComplementSequenceString.test.js +11 -0
- package/src/getReverseSequenceString.js +12 -0
- package/src/getReverseSequenceString.test.js +9 -0
- package/src/getSequenceDataBetweenRange.js +131 -0
- package/src/getSequenceDataBetweenRange.test.js +474 -0
- package/src/getVirtualDigest.js +125 -0
- package/src/getVirtualDigest.test.js +134 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
- package/src/index.js +106 -0
- package/src/index.test.js +38 -0
- package/src/insertGapsIntoRefSeq.js +38 -0
- package/src/insertGapsIntoRefSeq.test.js +20 -0
- package/src/insertSequenceDataAtPosition.js +2 -0
- package/src/insertSequenceDataAtPosition.test.js +75 -0
- package/src/insertSequenceDataAtPositionOrRange.js +249 -0
- package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
- package/src/isEnzymeType2S.js +3 -0
- package/src/mapAnnotationsToRows.js +174 -0
- package/src/mapAnnotationsToRows.test.js +425 -0
- package/src/prepareCircularViewData.js +17 -0
- package/src/prepareCircularViewData.test.js +196 -0
- package/src/prepareRowData.js +41 -0
- package/src/prepareRowData.test.js +36 -0
- package/src/prepareRowData_output1.json +391 -0
- package/src/proteinAlphabet.js +257 -0
- package/src/rotateBpsToPosition.js +13 -0
- package/src/rotateBpsToPosition.test.js +6 -0
- package/src/rotateSequenceDataToPosition.js +48 -0
- package/src/rotateSequenceDataToPosition.test.js +71 -0
- package/src/shiftAnnotationsByLen.js +17 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
- package/src/tidyUpAnnotation.js +182 -0
- package/src/tidyUpSequenceData.js +169 -0
- package/src/tidyUpSequenceData.test.js +332 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
import chai from "chai";
|
|
2
|
+
import chaiSubset from "chai-subset";
|
|
3
|
+
import getPossiblePartsFromSequenceAndEnzymes from "./getPossiblePartsFromSequenceAndEnzymes";
|
|
4
|
+
import enzymeList from "./aliasedEnzymesByName";
|
|
5
|
+
chai.should();
|
|
6
|
+
chai.use(chaiSubset);
|
|
7
|
+
describe("getPossiblePartsFromSequenceAndEnzymes", () => {
|
|
8
|
+
//bamhi
|
|
9
|
+
// "bamhi": {
|
|
10
|
+
// "name": "bamhi",
|
|
11
|
+
// "site": "ggatcc",
|
|
12
|
+
// "forwardRegex": "g{2}atc{2}",
|
|
13
|
+
// "reverseRegex": "g{2}atc{2}",
|
|
14
|
+
// "topSnipOffset": 1,
|
|
15
|
+
// "bottomSnipOffset": 5,
|
|
16
|
+
// "usForward": 0,
|
|
17
|
+
// "usReverse": 0
|
|
18
|
+
// },
|
|
19
|
+
it("cuts using a single palindromic enzyme", () => {
|
|
20
|
+
const sequence = {
|
|
21
|
+
sequence:
|
|
22
|
+
"tggttgtagtagttagttgatgttatagggatcctgtagtatttatgtagtagtatgatgtagagtagtagtggatcctattatatata",
|
|
23
|
+
circular: true
|
|
24
|
+
};
|
|
25
|
+
const parts = getPossiblePartsFromSequenceAndEnzymes(sequence, [
|
|
26
|
+
enzymeList["bamhi"]
|
|
27
|
+
]);
|
|
28
|
+
// eslint-disable-next-line no-unused-expressions
|
|
29
|
+
parts.should.be.an("array");
|
|
30
|
+
parts.length.should.equal(2);
|
|
31
|
+
parts[0].start.should.equal(29);
|
|
32
|
+
parts[0].end.should.equal(76);
|
|
33
|
+
parts[0].firstCutOffset.should.equal(4);
|
|
34
|
+
parts[0].firstCutOverhang.should.equal("gatc");
|
|
35
|
+
parts[0].firstCutOverhangTop.should.equal("gatc");
|
|
36
|
+
parts[0].secondCutOffset.should.equal(4);
|
|
37
|
+
parts[0].secondCutOverhang.should.equal("gatc");
|
|
38
|
+
parts[0].secondCutOverhangTop.should.equal("");
|
|
39
|
+
|
|
40
|
+
parts[1].start.should.equal(73);
|
|
41
|
+
parts[1].end.should.equal(32);
|
|
42
|
+
parts.should.containSubset([
|
|
43
|
+
{
|
|
44
|
+
start: 29,
|
|
45
|
+
end: 76,
|
|
46
|
+
start1Based: 30,
|
|
47
|
+
end1Based: 77,
|
|
48
|
+
firstCut: {
|
|
49
|
+
start: 28,
|
|
50
|
+
end: 33,
|
|
51
|
+
topSnipPosition: 29,
|
|
52
|
+
bottomSnipPosition: 33,
|
|
53
|
+
topSnipBeforeBottom: true,
|
|
54
|
+
overhangBps: "gatc",
|
|
55
|
+
forward: true
|
|
56
|
+
},
|
|
57
|
+
firstCutOffset: 4,
|
|
58
|
+
firstCutOverhang: "gatc",
|
|
59
|
+
firstCutOverhangTop: "gatc",
|
|
60
|
+
firstCutOverhangBottom: "",
|
|
61
|
+
secondCut: {
|
|
62
|
+
start: 72,
|
|
63
|
+
end: 77,
|
|
64
|
+
topSnipPosition: 73,
|
|
65
|
+
bottomSnipPosition: 77,
|
|
66
|
+
topSnipBeforeBottom: true,
|
|
67
|
+
overhangBps: "gatc",
|
|
68
|
+
forward: true
|
|
69
|
+
},
|
|
70
|
+
secondCutOffset: 4,
|
|
71
|
+
secondCutOverhang: "gatc",
|
|
72
|
+
secondCutOverhangTop: "",
|
|
73
|
+
secondCutOverhangBottom: "ctag"
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
start: 73,
|
|
77
|
+
end: 32,
|
|
78
|
+
start1Based: 74,
|
|
79
|
+
end1Based: 33,
|
|
80
|
+
firstCut: {
|
|
81
|
+
start: 72,
|
|
82
|
+
end: 77,
|
|
83
|
+
topSnipPosition: 73,
|
|
84
|
+
bottomSnipPosition: 77,
|
|
85
|
+
topSnipBeforeBottom: true,
|
|
86
|
+
overhangBps: "gatc",
|
|
87
|
+
upstreamTopBeforeBottom: false,
|
|
88
|
+
upstreamTopSnip: null,
|
|
89
|
+
upstreamBottomSnip: null,
|
|
90
|
+
forward: true
|
|
91
|
+
},
|
|
92
|
+
firstCutOffset: 4,
|
|
93
|
+
firstCutOverhang: "gatc",
|
|
94
|
+
firstCutOverhangTop: "gatc",
|
|
95
|
+
firstCutOverhangBottom: "",
|
|
96
|
+
secondCut: {
|
|
97
|
+
start: 28,
|
|
98
|
+
end: 33,
|
|
99
|
+
topSnipPosition: 29,
|
|
100
|
+
bottomSnipPosition: 33,
|
|
101
|
+
topSnipBeforeBottom: true,
|
|
102
|
+
overhangBps: "gatc",
|
|
103
|
+
upstreamTopBeforeBottom: false,
|
|
104
|
+
upstreamTopSnip: null,
|
|
105
|
+
upstreamBottomSnip: null,
|
|
106
|
+
forward: true
|
|
107
|
+
},
|
|
108
|
+
secondCutOffset: 4,
|
|
109
|
+
secondCutOverhang: "gatc",
|
|
110
|
+
secondCutOverhangTop: "",
|
|
111
|
+
secondCutOverhangBottom: "ctag"
|
|
112
|
+
}
|
|
113
|
+
]);
|
|
114
|
+
});
|
|
115
|
+
it("cuts using two golden gate enzymes", () => {
|
|
116
|
+
const sequence = {
|
|
117
|
+
// sapi ->
|
|
118
|
+
sequence:
|
|
119
|
+
"tggttgtagtGCTCTTCagttagttgatgttatagggatcctgtagtatttatgtagtaGGAGACCtatgatgtagggtcatcagtagtagtggatcctattatatata",
|
|
120
|
+
// accaacatcacgagaagtcaatcaactacaatatccctaggacatcataaatacatcatcctctggatactacatcCCAGAGtcatcatcacctaggataatatatat
|
|
121
|
+
// <- bsai
|
|
122
|
+
circular: true
|
|
123
|
+
};
|
|
124
|
+
const parts = getPossiblePartsFromSequenceAndEnzymes(sequence, [
|
|
125
|
+
enzymeList["sapi"],
|
|
126
|
+
enzymeList["bsai"]
|
|
127
|
+
]);
|
|
128
|
+
parts.length.should.equal(2);
|
|
129
|
+
parts.should.containSubset([
|
|
130
|
+
{
|
|
131
|
+
start: 18,
|
|
132
|
+
end: 58,
|
|
133
|
+
start1Based: 19,
|
|
134
|
+
end1Based: 59,
|
|
135
|
+
firstCut: {
|
|
136
|
+
start: 10,
|
|
137
|
+
end: 20,
|
|
138
|
+
topSnipPosition: 18,
|
|
139
|
+
bottomSnipPosition: 21,
|
|
140
|
+
topSnipBeforeBottom: true,
|
|
141
|
+
overhangBps: "gtt",
|
|
142
|
+
upstreamTopBeforeBottom: false,
|
|
143
|
+
upstreamTopSnip: null,
|
|
144
|
+
upstreamBottomSnip: null,
|
|
145
|
+
forward: true
|
|
146
|
+
},
|
|
147
|
+
firstCutOffset: 3,
|
|
148
|
+
firstCutOverhang: "gtt",
|
|
149
|
+
firstCutOverhangTop: "gtt",
|
|
150
|
+
firstCutOverhangBottom: "",
|
|
151
|
+
secondCut: {
|
|
152
|
+
start: 55,
|
|
153
|
+
end: 65,
|
|
154
|
+
topSnipPosition: 55,
|
|
155
|
+
bottomSnipPosition: 59,
|
|
156
|
+
topSnipBeforeBottom: true,
|
|
157
|
+
overhangBps: "agta",
|
|
158
|
+
upstreamTopBeforeBottom: false,
|
|
159
|
+
upstreamTopSnip: null,
|
|
160
|
+
upstreamBottomSnip: null,
|
|
161
|
+
forward: false
|
|
162
|
+
},
|
|
163
|
+
secondCutOffset: 4,
|
|
164
|
+
secondCutOverhang: "agta",
|
|
165
|
+
secondCutOverhangTop: "",
|
|
166
|
+
secondCutOverhangBottom: "tcat"
|
|
167
|
+
},
|
|
168
|
+
{
|
|
169
|
+
start: 55,
|
|
170
|
+
end: 20,
|
|
171
|
+
start1Based: 56,
|
|
172
|
+
end1Based: 21,
|
|
173
|
+
firstCut: {
|
|
174
|
+
start: 55,
|
|
175
|
+
end: 65,
|
|
176
|
+
topSnipPosition: 55,
|
|
177
|
+
bottomSnipPosition: 59,
|
|
178
|
+
topSnipBeforeBottom: true,
|
|
179
|
+
overhangBps: "agta",
|
|
180
|
+
upstreamTopBeforeBottom: false,
|
|
181
|
+
upstreamTopSnip: null,
|
|
182
|
+
upstreamBottomSnip: null,
|
|
183
|
+
forward: false
|
|
184
|
+
},
|
|
185
|
+
firstCutOffset: 4,
|
|
186
|
+
firstCutOverhang: "agta",
|
|
187
|
+
firstCutOverhangTop: "agta",
|
|
188
|
+
firstCutOverhangBottom: "",
|
|
189
|
+
secondCut: {
|
|
190
|
+
start: 10,
|
|
191
|
+
end: 20,
|
|
192
|
+
topSnipPosition: 18,
|
|
193
|
+
bottomSnipPosition: 21,
|
|
194
|
+
topSnipBeforeBottom: true,
|
|
195
|
+
overhangBps: "gtt",
|
|
196
|
+
upstreamTopBeforeBottom: false,
|
|
197
|
+
upstreamTopSnip: null,
|
|
198
|
+
upstreamBottomSnip: null,
|
|
199
|
+
forward: true
|
|
200
|
+
},
|
|
201
|
+
secondCutOffset: 3,
|
|
202
|
+
secondCutOverhang: "gtt",
|
|
203
|
+
secondCutOverhangTop: "",
|
|
204
|
+
secondCutOverhangBottom: "caa"
|
|
205
|
+
}
|
|
206
|
+
]);
|
|
207
|
+
});
|
|
208
|
+
});
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import getAminoAcidDataForEachBaseOfDna from "./getAminoAcidDataForEachBaseOfDna";
|
|
2
|
+
|
|
3
|
+
export default function getReverseAminoAcidStringFromSequenceString(
|
|
4
|
+
sequenceString
|
|
5
|
+
) {
|
|
6
|
+
const aminoAcidsPerBase = getAminoAcidDataForEachBaseOfDna(
|
|
7
|
+
sequenceString,
|
|
8
|
+
false
|
|
9
|
+
);
|
|
10
|
+
const aaArray = [];
|
|
11
|
+
let aaString = "";
|
|
12
|
+
aminoAcidsPerBase.forEach(aa => {
|
|
13
|
+
if (!aa.fullCodon) {
|
|
14
|
+
return;
|
|
15
|
+
}
|
|
16
|
+
aaArray[aa.aminoAcidIndex] = aa.aminoAcid.value;
|
|
17
|
+
});
|
|
18
|
+
aaString = aaArray.join("");
|
|
19
|
+
return aaString;
|
|
20
|
+
};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import getReverseAminoAcidStringFromSequenceString from "./getReverseAminoAcidStringFromSequenceString";
|
|
2
|
+
import assert from "assert";
|
|
3
|
+
|
|
4
|
+
describe("getReverseAminoAcidStringFromSequenceString", () => {
|
|
5
|
+
it("computes a aa string from dna", () => {
|
|
6
|
+
assert.equal("M", getReverseAminoAcidStringFromSequenceString("cat"));
|
|
7
|
+
assert.equal("H", getReverseAminoAcidStringFromSequenceString("atg"));
|
|
8
|
+
assert.equal("HH", getReverseAminoAcidStringFromSequenceString("atgatg"));
|
|
9
|
+
assert.equal("", getReverseAminoAcidStringFromSequenceString("at"));
|
|
10
|
+
});
|
|
11
|
+
});
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
|
|
2
|
+
import getAminoAcidStringFromSequenceString from './getAminoAcidStringFromSequenceString';
|
|
3
|
+
import getReverseComplementSequenceString from './getReverseComplementSequenceString';
|
|
4
|
+
|
|
5
|
+
export default function getReverseComplementAminoAcidStringFromSequenceString(sequenceString) {
|
|
6
|
+
return getAminoAcidStringFromSequenceString(getReverseComplementSequenceString(sequenceString))
|
|
7
|
+
};
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export default function getReverseComplementAnnotation(
|
|
2
|
+
annotation,
|
|
3
|
+
sequenceLength
|
|
4
|
+
) {
|
|
5
|
+
//note this function assumes that the entire sequence (or subsequence) is being reverse complemented
|
|
6
|
+
//TNR: this is what is happening:
|
|
7
|
+
//0123456789
|
|
8
|
+
//-feature-- //normal
|
|
9
|
+
//--erutaef- //reverse complemented
|
|
10
|
+
|
|
11
|
+
//sequence length = 10
|
|
12
|
+
//feature start = 1
|
|
13
|
+
//feature end = 7
|
|
14
|
+
//so, erutaef start = 2 = 10 - (7+1)
|
|
15
|
+
//and, erutaef end = 8 = 10 - (1+1)
|
|
16
|
+
|
|
17
|
+
return Object.assign({}, annotation, {
|
|
18
|
+
start: sequenceLength - (annotation.end + 1),
|
|
19
|
+
end: sequenceLength - (annotation.start + 1),
|
|
20
|
+
forward: !annotation.forward,
|
|
21
|
+
strand: annotation.strand === 1 ? -1 : 1
|
|
22
|
+
});
|
|
23
|
+
};
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import chai from "chai";
|
|
2
|
+
import chaiSubset from "chai-subset";
|
|
3
|
+
import getReverseComplementAnnotation from "./getReverseComplementAnnotation";
|
|
4
|
+
chai.should();
|
|
5
|
+
chai.use(chaiSubset);
|
|
6
|
+
describe("getReverseComplementAnnotation", () => {
|
|
7
|
+
it("reverse complements an annotation ", () => {
|
|
8
|
+
//0123456789
|
|
9
|
+
//---abc---- //normal
|
|
10
|
+
//----cba--- //reverse complemented
|
|
11
|
+
const newAnn = getReverseComplementAnnotation(
|
|
12
|
+
{
|
|
13
|
+
start: 3,
|
|
14
|
+
end: 5
|
|
15
|
+
},
|
|
16
|
+
10
|
|
17
|
+
);
|
|
18
|
+
newAnn.should.deep.equal({
|
|
19
|
+
start: 4,
|
|
20
|
+
end: 6,
|
|
21
|
+
forward: true,
|
|
22
|
+
strand: 1
|
|
23
|
+
});
|
|
24
|
+
});
|
|
25
|
+
it("reverse complements an annotation crossing origin", () => {
|
|
26
|
+
//0123456789
|
|
27
|
+
//cde-----ab //normal
|
|
28
|
+
//ab-----edc //reverse complemented
|
|
29
|
+
const newAnn = getReverseComplementAnnotation(
|
|
30
|
+
{
|
|
31
|
+
start: 8,
|
|
32
|
+
end: 2,
|
|
33
|
+
strand: 1
|
|
34
|
+
},
|
|
35
|
+
10
|
|
36
|
+
);
|
|
37
|
+
newAnn.should.deep.equal({
|
|
38
|
+
start: 7,
|
|
39
|
+
end: 1,
|
|
40
|
+
forward: true,
|
|
41
|
+
strand: -1
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
});
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
|
|
2
|
+
import getReverseComplementAnnotation from "./getReverseComplementAnnotation";
|
|
3
|
+
import { annotationTypes } from "./annotationTypes";
|
|
4
|
+
import {map} from "lodash";
|
|
5
|
+
import tidyUpSequenceData from "./tidyUpSequenceData";
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
import getSequenceDataBetweenRange from "./getSequenceDataBetweenRange";
|
|
9
|
+
|
|
10
|
+
// ac.throw([ac.string,ac.bool],arguments);
|
|
11
|
+
export default function getReverseComplementSequenceAndAnnoations(
|
|
12
|
+
pSeqObj,
|
|
13
|
+
options = {}
|
|
14
|
+
) {
|
|
15
|
+
const seqObj = tidyUpSequenceData(
|
|
16
|
+
getSequenceDataBetweenRange(pSeqObj, options.range),
|
|
17
|
+
options
|
|
18
|
+
);
|
|
19
|
+
const newSeqObj = Object.assign(
|
|
20
|
+
{},
|
|
21
|
+
seqObj,
|
|
22
|
+
{
|
|
23
|
+
sequence: getReverseComplementSequenceString(seqObj.sequence)
|
|
24
|
+
},
|
|
25
|
+
annotationTypes.reduce((acc, type) => {
|
|
26
|
+
if (seqObj[type]) {
|
|
27
|
+
acc[type] = map(seqObj[type], annotation => {
|
|
28
|
+
return getReverseComplementAnnotation(
|
|
29
|
+
annotation,
|
|
30
|
+
seqObj.sequence.length
|
|
31
|
+
);
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
return acc;
|
|
35
|
+
}, {})
|
|
36
|
+
);
|
|
37
|
+
return tidyUpSequenceData(newSeqObj, options);
|
|
38
|
+
};
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import chai from "chai";
|
|
2
|
+
import chaiSubset from "chai-subset";
|
|
3
|
+
import getReverseComplementSequenceAndAnnotations from "./getReverseComplementSequenceAndAnnotations";
|
|
4
|
+
chai.should();
|
|
5
|
+
chai.use(chaiSubset);
|
|
6
|
+
describe("getReverseComplementSequenceAndAnnotations", () => {
|
|
7
|
+
it("reverse complements an annotation ", () => {
|
|
8
|
+
const newSeq = getReverseComplementSequenceAndAnnotations({
|
|
9
|
+
sequence: "aaatttcccg",
|
|
10
|
+
circular: true,
|
|
11
|
+
features: [
|
|
12
|
+
{
|
|
13
|
+
start: 3,
|
|
14
|
+
end: 5
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
start: 8,
|
|
18
|
+
end: 2
|
|
19
|
+
}
|
|
20
|
+
]
|
|
21
|
+
});
|
|
22
|
+
newSeq.should.containSubset({
|
|
23
|
+
sequence: "cgggaaattt",
|
|
24
|
+
features: [
|
|
25
|
+
{
|
|
26
|
+
start: 4,
|
|
27
|
+
end: 6,
|
|
28
|
+
forward: true
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
start: 7,
|
|
32
|
+
end: 1,
|
|
33
|
+
forward: true
|
|
34
|
+
}
|
|
35
|
+
]
|
|
36
|
+
});
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("handles a range option correctly and reverse complements a subset of the sequence ", () => {
|
|
40
|
+
const newSeq = getReverseComplementSequenceAndAnnotations(
|
|
41
|
+
{
|
|
42
|
+
sequence: "aaatttcccgttt",
|
|
43
|
+
circular: true,
|
|
44
|
+
features: [
|
|
45
|
+
{
|
|
46
|
+
start: 3,
|
|
47
|
+
end: 5
|
|
48
|
+
}
|
|
49
|
+
]
|
|
50
|
+
},
|
|
51
|
+
{ range: { start: 0, end: 9 } }
|
|
52
|
+
);
|
|
53
|
+
newSeq.should.containSubset({
|
|
54
|
+
sequence: "cgggaaattt",
|
|
55
|
+
features: [
|
|
56
|
+
{
|
|
57
|
+
start: 4,
|
|
58
|
+
end: 6,
|
|
59
|
+
forward: true
|
|
60
|
+
}
|
|
61
|
+
]
|
|
62
|
+
});
|
|
63
|
+
});
|
|
64
|
+
it("handles a range option correctly and reverse complements a subset of the sequence across the origin ", () => {
|
|
65
|
+
const newSeq = getReverseComplementSequenceAndAnnotations(
|
|
66
|
+
{
|
|
67
|
+
sequence: "aaatttcccgttt",
|
|
68
|
+
// 0123456789
|
|
69
|
+
// rrr rrrrrrr
|
|
70
|
+
// fffff
|
|
71
|
+
circular: true,
|
|
72
|
+
features: [
|
|
73
|
+
{
|
|
74
|
+
start: 3,
|
|
75
|
+
end: 7
|
|
76
|
+
}
|
|
77
|
+
]
|
|
78
|
+
},
|
|
79
|
+
{ range: { start: 6, end: 2 } }
|
|
80
|
+
);
|
|
81
|
+
newSeq.should.containSubset({
|
|
82
|
+
sequence: "tttaaacggg",
|
|
83
|
+
features: [
|
|
84
|
+
{
|
|
85
|
+
start: 8,
|
|
86
|
+
end: 9,
|
|
87
|
+
forward: true
|
|
88
|
+
}
|
|
89
|
+
]
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
it("handles a range option correctly and reverse complements a subset of the sequence across the origin ", () => {
|
|
93
|
+
const newSeq = getReverseComplementSequenceAndAnnotations(
|
|
94
|
+
{
|
|
95
|
+
sequence: "cccttt"
|
|
96
|
+
// 012345
|
|
97
|
+
// rr rr
|
|
98
|
+
},
|
|
99
|
+
{ range: { start: 4, end: 1 } }
|
|
100
|
+
);
|
|
101
|
+
newSeq.should.containSubset({
|
|
102
|
+
sequence: "ggaa"
|
|
103
|
+
});
|
|
104
|
+
});
|
|
105
|
+
});
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import DNAComplementMap from "./DNAComplementMap";
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
// ac.throw([ac.string,ac.bool],arguments);
|
|
5
|
+
export default function getReverseComplementSequenceString(sequence) {
|
|
6
|
+
// ac.throw([ac.string],arguments);
|
|
7
|
+
let reverseComplementSequenceString = "";
|
|
8
|
+
for (let i = sequence.length - 1; i >= 0; i--) {
|
|
9
|
+
let revChar = DNAComplementMap[sequence[i]];
|
|
10
|
+
if (!revChar) {
|
|
11
|
+
revChar = sequence[i];
|
|
12
|
+
// throw new Error('trying to get the reverse compelement of an invalid base');
|
|
13
|
+
}
|
|
14
|
+
reverseComplementSequenceString += revChar;
|
|
15
|
+
}
|
|
16
|
+
return reverseComplementSequenceString;
|
|
17
|
+
};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import chai from "chai";
|
|
2
|
+
import chaiSubset from "chai-subset";
|
|
3
|
+
import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
|
|
4
|
+
chai.should();
|
|
5
|
+
chai.use(chaiSubset);
|
|
6
|
+
describe("getReverseComplementSequenceAndAnnotations", () => {
|
|
7
|
+
it("handles a range option correctly and reverse complements a subset of the sequence across the origin ", () => {
|
|
8
|
+
const newSeq = getReverseComplementSequenceString("uuuucccttt");
|
|
9
|
+
newSeq.should.eq("aaagggaaaa");
|
|
10
|
+
});
|
|
11
|
+
});
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export default function getReverseSequenceString(sequence) {
|
|
2
|
+
let reverseSequenceString = "";
|
|
3
|
+
for (let i = sequence.length - 1; i >= 0; i--) {
|
|
4
|
+
let revChar = sequence[i];
|
|
5
|
+
if (!revChar) {
|
|
6
|
+
revChar = sequence[i];
|
|
7
|
+
// throw new Error('trying to get the reverse of an invalid base');
|
|
8
|
+
}
|
|
9
|
+
reverseSequenceString += revChar;
|
|
10
|
+
}
|
|
11
|
+
return reverseSequenceString;
|
|
12
|
+
};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import chai from "chai";
|
|
2
|
+
import getReverseSequenceString from "./getReverseSequenceString";
|
|
3
|
+
chai.should();
|
|
4
|
+
describe("getReverseSequenceAndAnnotations", () => {
|
|
5
|
+
it("handles a range option correctly and reverse complements a subset of the sequence across the origin ", () => {
|
|
6
|
+
const newSeq = getReverseSequenceString("uuuucccttt");
|
|
7
|
+
newSeq.should.eq("tttcccuuuu");
|
|
8
|
+
});
|
|
9
|
+
});
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import {flatMap, extend, forEach, startCase} from "lodash";
|
|
2
|
+
import {getRangeLength} from "@teselagen/range-utils";
|
|
3
|
+
import convertDnaCaretPositionOrRangeToAa from "./convertDnaCaretPositionOrRangeToAA";
|
|
4
|
+
import insertSequenceDataAtPosition from "./insertSequenceDataAtPosition";
|
|
5
|
+
import {getSequenceWithinRange, getZeroedRangeOverlaps} from "@teselagen/range-utils";
|
|
6
|
+
import tidyUpSequenceData from "./tidyUpSequenceData";
|
|
7
|
+
import { annotationTypes } from "./annotationTypes";
|
|
8
|
+
|
|
9
|
+
export default function getSequenceDataBetweenRange(
|
|
10
|
+
seqData,
|
|
11
|
+
range,
|
|
12
|
+
options = {}
|
|
13
|
+
) {
|
|
14
|
+
if (!range) return seqData;
|
|
15
|
+
const { exclude = {}, excludePartial = {} } = options;
|
|
16
|
+
const seqDataToUse = tidyUpSequenceData(seqData, options);
|
|
17
|
+
annotationTypes.forEach(type => {
|
|
18
|
+
delete seqDataToUse[`filtered${startCase(type)}`];
|
|
19
|
+
});
|
|
20
|
+
const seqDataToReturn = extend(
|
|
21
|
+
{},
|
|
22
|
+
seqDataToUse,
|
|
23
|
+
{
|
|
24
|
+
circular:
|
|
25
|
+
seqDataToUse.sequence.length ===
|
|
26
|
+
getRangeLength(range, seqData.sequence.length)
|
|
27
|
+
? seqDataToUse.circular
|
|
28
|
+
: false,
|
|
29
|
+
sequence: getSequenceWithinRange(range, seqDataToUse.sequence),
|
|
30
|
+
proteinSequence: getSequenceWithinRange(
|
|
31
|
+
convertDnaCaretPositionOrRangeToAa(range),
|
|
32
|
+
seqDataToUse.proteinSequence
|
|
33
|
+
)
|
|
34
|
+
},
|
|
35
|
+
annotationTypes.reduce((acc, type) => {
|
|
36
|
+
if (exclude[type]) {
|
|
37
|
+
acc[type] = [];
|
|
38
|
+
return acc; //return early cause we're not interested in these annotations
|
|
39
|
+
}
|
|
40
|
+
acc[type] = getAnnotationsBetweenRange(
|
|
41
|
+
seqDataToUse[type],
|
|
42
|
+
range,
|
|
43
|
+
seqDataToUse.sequence.length,
|
|
44
|
+
excludePartial[type]
|
|
45
|
+
);
|
|
46
|
+
return acc;
|
|
47
|
+
}, {})
|
|
48
|
+
);
|
|
49
|
+
if (range.overlapsSelf) {
|
|
50
|
+
const extendedSeqData = insertSequenceDataAtPosition(
|
|
51
|
+
{ sequence: seqDataToReturn.sequence },
|
|
52
|
+
seqDataToUse,
|
|
53
|
+
range.start
|
|
54
|
+
);
|
|
55
|
+
|
|
56
|
+
const toRet = getSequenceDataBetweenRange(
|
|
57
|
+
extendedSeqData,
|
|
58
|
+
{
|
|
59
|
+
start: range.end + 1,
|
|
60
|
+
end: range.end
|
|
61
|
+
},
|
|
62
|
+
options
|
|
63
|
+
);
|
|
64
|
+
annotationTypes.forEach(type => {
|
|
65
|
+
//we need to go through and adjust any anns where overlapsSelf=true to no longer overlap themselves if they match the range completely
|
|
66
|
+
forEach(toRet[type], ann => {
|
|
67
|
+
if (
|
|
68
|
+
ann.overlapsSelf &&
|
|
69
|
+
ann.start === 0 &&
|
|
70
|
+
getRangeLength(ann, seqDataToUse.sequence.length) ===
|
|
71
|
+
getRangeLength(range, seqDataToUse.sequence.length)
|
|
72
|
+
) {
|
|
73
|
+
ann.overlapsSelf = false;
|
|
74
|
+
ann.end = toRet.sequence.length - 1;
|
|
75
|
+
}
|
|
76
|
+
});
|
|
77
|
+
});
|
|
78
|
+
return tidyUpSequenceData(toRet, options);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return tidyUpSequenceData(seqDataToReturn, options);
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
function getAnnotationsBetweenRange(
|
|
85
|
+
annotationsToBeAdjusted,
|
|
86
|
+
range,
|
|
87
|
+
maxLength,
|
|
88
|
+
shouldExcludePartial
|
|
89
|
+
) {
|
|
90
|
+
return flatMap(annotationsToBeAdjusted, annotation => {
|
|
91
|
+
if (annotation.locations && annotation.locations.length) {
|
|
92
|
+
annotation.locations = getAnnotationsBetweenRange(
|
|
93
|
+
annotation.locations,
|
|
94
|
+
range,
|
|
95
|
+
maxLength,
|
|
96
|
+
shouldExcludePartial
|
|
97
|
+
);
|
|
98
|
+
}
|
|
99
|
+
//map through every annotation and get the overlap of the annotation with the range
|
|
100
|
+
const overlaps = getZeroedRangeOverlaps(annotation, range, maxLength).map(
|
|
101
|
+
overlap => {
|
|
102
|
+
//we get back 1 or more overlaps here
|
|
103
|
+
|
|
104
|
+
return extend({}, annotation, overlap);
|
|
105
|
+
}
|
|
106
|
+
);
|
|
107
|
+
if (shouldExcludePartial) {
|
|
108
|
+
if (overlaps.length > 1) return []; //the annotation has multiple overlaps and thus must be a partial copy so we exclude it completely
|
|
109
|
+
if (overlaps[0]) {
|
|
110
|
+
//there is just 1 overlap, if it doesn't have the same length, it must be a partial copy so we need to exclude it
|
|
111
|
+
if (
|
|
112
|
+
getRangeLength(overlaps[0], maxLength) !==
|
|
113
|
+
getRangeLength(annotation, maxLength)
|
|
114
|
+
) {
|
|
115
|
+
return [];
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return overlaps;
|
|
121
|
+
}).map(annotation => {
|
|
122
|
+
if (annotation.locations && annotation.locations.length) {
|
|
123
|
+
annotation.start = annotation.locations[0].start;
|
|
124
|
+
annotation.end =
|
|
125
|
+
annotation.locations[annotation.locations.length - 1].end;
|
|
126
|
+
|
|
127
|
+
if (annotation.locations.length === 1) delete annotation.locations;
|
|
128
|
+
}
|
|
129
|
+
return annotation;
|
|
130
|
+
}); //filter any fully deleted ranges
|
|
131
|
+
}
|