@teselagen/sequence-utils 0.1.22 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +12030 -26126
- package/index.mjs +12119 -26124
- package/index.umd.js +24056 -38154
- package/package.json +2 -2
- package/src/DNAComplementMap.js +32 -0
- package/src/addGapsToSeqReads.js +417 -0
- package/src/addGapsToSeqReads.test.js +358 -0
- package/src/adjustAnnotationsToInsert.js +19 -0
- package/src/adjustBpsToReplaceOrInsert.js +50 -0
- package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
- package/src/aliasedEnzymesByName.js +7363 -0
- package/src/aminoAcidToDegenerateDnaMap.js +32 -0
- package/src/aminoAcidToDegenerateRnaMap.js +32 -0
- package/src/aminoAcidToDnaRna.test.js +27 -0
- package/src/annotateSingleSeq.js +29 -0
- package/src/annotateSingleSeq.test.js +64 -0
- package/src/annotationTypes.js +23 -0
- package/src/autoAnnotate.js +242 -0
- package/src/autoAnnotate.test.js +1039 -0
- package/src/bioData.js +431 -0
- package/src/calculateNebTa.js +34 -0
- package/src/calculateNebTa.test.js +57 -0
- package/src/calculateNebTm.js +127 -0
- package/src/calculateNebTm.test.js +32 -0
- package/src/calculatePercentGC.js +3 -0
- package/src/calculatePercentGC.test.js +14 -0
- package/src/calculateTm.js +297 -0
- package/src/calculateTm.test.js +7 -0
- package/src/computeDigestFragments.js +179 -0
- package/src/computeDigestFragments.test.js +73 -0
- package/src/condensePairwiseAlignmentDifferences.js +85 -0
- package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
- package/src/convertAACaretPositionOrRangeToDna.js +24 -0
- package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
- package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
- package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
- package/src/cutSequenceByRestrictionEnzyme.js +301 -0
- package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
- package/src/defaultEnzymesByName.js +278 -0
- package/src/degenerateDnaToAminoAcidMap.js +5 -0
- package/src/degenerateRnaToAminoAcidMap.js +5 -0
- package/src/deleteSequenceDataAtRange.js +5 -0
- package/src/deleteSequenceDataAtRange.test.js +146 -0
- package/src/diffUtils.js +64 -0
- package/src/diffUtils.test.js +74 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
- package/src/featureTypesAndColors.js +152 -0
- package/src/featureTypesAndColors.test.js +52 -0
- package/src/filterAminoAcidSequenceString.js +13 -0
- package/src/filterAminoAcidSequenceString.test.js +22 -0
- package/src/filterSequenceString.js +22 -0
- package/src/filterSequenceString.test.js +13 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
- package/src/findOrfsInPlasmid.js +26 -0
- package/src/findSequenceMatches.js +133 -0
- package/src/findSequenceMatches.test.js +286 -0
- package/src/generateAnnotations.js +34 -0
- package/src/generateSequenceData.js +206 -0
- package/src/generateSequenceData.test.js +22 -0
- package/src/getAllInsertionsInSeqReads.js +83 -0
- package/src/getAllInsertionsInSeqReads.test.js +26 -0
- package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
- package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
- package/src/getAminoAcidFromSequenceTriplet.js +22 -0
- package/src/getAminoAcidStringFromSequenceString.js +18 -0
- package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
- package/src/getCodonRangeForAASliver.js +63 -0
- package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
- package/src/getComplementSequenceAndAnnotations.js +20 -0
- package/src/getComplementSequenceString.js +19 -0
- package/src/getComplementSequenceString.test.js +13 -0
- package/src/getCutsiteType.js +10 -0
- package/src/getCutsitesFromSequence.js +17 -0
- package/src/getDegenerateDnaStringFromAAString.js +8 -0
- package/src/getDegenerateRnaStringFromAAString.js +8 -0
- package/src/getDigestFragmentsForCutsites.js +105 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
- package/src/getInsertBetweenVals.js +28 -0
- package/src/getInsertBetweenVals.test.js +33 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
- package/src/getMassOfAaString.js +24 -0
- package/src/getMassofAaString.test.js +18 -0
- package/src/getOrfsFromSequence.js +124 -0
- package/src/getOrfsFromSequence.test.js +210 -0
- package/src/getOverlapBetweenTwoSequences.js +30 -0
- package/src/getOverlapBetweenTwoSequences.test.js +23 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
- package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
- package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
- package/src/getReverseComplementAnnotation.js +23 -0
- package/src/getReverseComplementAnnotation.test.js +44 -0
- package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
- package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
- package/src/getReverseComplementSequenceString.js +17 -0
- package/src/getReverseComplementSequenceString.test.js +11 -0
- package/src/getReverseSequenceString.js +12 -0
- package/src/getReverseSequenceString.test.js +9 -0
- package/src/getSequenceDataBetweenRange.js +131 -0
- package/src/getSequenceDataBetweenRange.test.js +474 -0
- package/src/getVirtualDigest.js +125 -0
- package/src/getVirtualDigest.test.js +134 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
- package/src/index.js +106 -0
- package/src/index.test.js +38 -0
- package/src/insertGapsIntoRefSeq.js +38 -0
- package/src/insertGapsIntoRefSeq.test.js +20 -0
- package/src/insertSequenceDataAtPosition.js +2 -0
- package/src/insertSequenceDataAtPosition.test.js +75 -0
- package/src/insertSequenceDataAtPositionOrRange.js +249 -0
- package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
- package/src/isEnzymeType2S.js +3 -0
- package/src/mapAnnotationsToRows.js +174 -0
- package/src/mapAnnotationsToRows.test.js +425 -0
- package/src/prepareCircularViewData.js +17 -0
- package/src/prepareCircularViewData.test.js +196 -0
- package/src/prepareRowData.js +41 -0
- package/src/prepareRowData.test.js +36 -0
- package/src/prepareRowData_output1.json +391 -0
- package/src/proteinAlphabet.js +257 -0
- package/src/rotateBpsToPosition.js +13 -0
- package/src/rotateBpsToPosition.test.js +6 -0
- package/src/rotateSequenceDataToPosition.js +48 -0
- package/src/rotateSequenceDataToPosition.test.js +71 -0
- package/src/shiftAnnotationsByLen.js +17 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
- package/src/tidyUpAnnotation.js +182 -0
- package/src/tidyUpSequenceData.js +169 -0
- package/src/tidyUpSequenceData.test.js +332 -0
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
//tnr: half finished test.
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
import chai from "chai";
|
|
5
|
+
|
|
6
|
+
import {getRangeLength} from "@teselagen/range-utils";
|
|
7
|
+
import {cloneDeep} from "lodash";
|
|
8
|
+
import chaiSubset from "chai-subset";
|
|
9
|
+
|
|
10
|
+
import deleteSequenceDataAtRange from "./deleteSequenceDataAtRange";
|
|
11
|
+
|
|
12
|
+
chai.should();
|
|
13
|
+
chai.use(chaiSubset);
|
|
14
|
+
|
|
15
|
+
describe("deleteSequenceDataAtRange", () => {
|
|
16
|
+
it("Deletes everything if the range spans the whole sequence", () => {
|
|
17
|
+
const existingSequence = {
|
|
18
|
+
sequence: "atagatag"
|
|
19
|
+
};
|
|
20
|
+
const range = { start: 0, end: 7 };
|
|
21
|
+
const postDeleteSeqData = deleteSequenceDataAtRange(existingSequence, range);
|
|
22
|
+
postDeleteSeqData.sequence.length.should.equal(
|
|
23
|
+
existingSequence.sequence.length - getRangeLength(range)
|
|
24
|
+
);
|
|
25
|
+
});
|
|
26
|
+
it("Deletes everything if the range spans the whole sequence (circular selection)", () => {
|
|
27
|
+
const existingSequence = {
|
|
28
|
+
sequence: "atagatag"
|
|
29
|
+
};
|
|
30
|
+
const range = { start: 4, end: 3 };
|
|
31
|
+
const postDeleteSeqData = deleteSequenceDataAtRange(existingSequence, range);
|
|
32
|
+
postDeleteSeqData.sequence.length.should.equal(
|
|
33
|
+
existingSequence.sequence.length -
|
|
34
|
+
getRangeLength(range, existingSequence.sequence.length)
|
|
35
|
+
);
|
|
36
|
+
});
|
|
37
|
+
it("Delete characters at correct range", () => {
|
|
38
|
+
const existingSequence = {
|
|
39
|
+
sequence: "atagatag"
|
|
40
|
+
};
|
|
41
|
+
const range = { start: 3, end: 5 };
|
|
42
|
+
const postDeleteSeqData = deleteSequenceDataAtRange(existingSequence, range);
|
|
43
|
+
postDeleteSeqData.sequence.length.should.equal(
|
|
44
|
+
existingSequence.sequence.length - getRangeLength(range)
|
|
45
|
+
);
|
|
46
|
+
});
|
|
47
|
+
it("does not mutate the original sequence", () => {
|
|
48
|
+
const existingSequence = {
|
|
49
|
+
sequence: "atagatag",
|
|
50
|
+
features: {
|
|
51
|
+
"1": {
|
|
52
|
+
start: 7,
|
|
53
|
+
end: 7
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
};
|
|
57
|
+
const clonedExistingSeq = cloneDeep(existingSequence);
|
|
58
|
+
const range = { start: 3, end: 5 };
|
|
59
|
+
const postDeleteSeqData = deleteSequenceDataAtRange(existingSequence, range);
|
|
60
|
+
existingSequence.should.deep.equal(clonedExistingSeq);
|
|
61
|
+
postDeleteSeqData.sequence.length.should.equal(
|
|
62
|
+
existingSequence.sequence.length - getRangeLength(range)
|
|
63
|
+
);
|
|
64
|
+
});
|
|
65
|
+
it("Handles a non valid range by returning the original sequence", () => {
|
|
66
|
+
const existingSequence = {
|
|
67
|
+
sequence: "atgagagaga",
|
|
68
|
+
|
|
69
|
+
features: [
|
|
70
|
+
{
|
|
71
|
+
start: 0,
|
|
72
|
+
end: 9,
|
|
73
|
+
locations: [
|
|
74
|
+
{ start: 0, end: 2 },
|
|
75
|
+
{ start: 4, end: 9 }
|
|
76
|
+
]
|
|
77
|
+
}
|
|
78
|
+
]
|
|
79
|
+
};
|
|
80
|
+
const range = { start: -1, end: -1 };
|
|
81
|
+
const postDeleteSeqData = deleteSequenceDataAtRange(existingSequence, range);
|
|
82
|
+
postDeleteSeqData.should.containSubset({
|
|
83
|
+
sequence: "atgagagaga",
|
|
84
|
+
features: [
|
|
85
|
+
{
|
|
86
|
+
start: 0,
|
|
87
|
+
end: 9,
|
|
88
|
+
locations: [
|
|
89
|
+
{ start: 0, end: 2 },
|
|
90
|
+
{ start: 4, end: 9 }
|
|
91
|
+
]
|
|
92
|
+
}
|
|
93
|
+
]
|
|
94
|
+
});
|
|
95
|
+
postDeleteSeqData.features.length.should.equal(1);
|
|
96
|
+
});
|
|
97
|
+
it("Delete characters and features (with joined locations) at correct range", () => {
|
|
98
|
+
const existingSequence = {
|
|
99
|
+
sequence: "atgagagaga",
|
|
100
|
+
features: [
|
|
101
|
+
{
|
|
102
|
+
start: 0,
|
|
103
|
+
end: 9,
|
|
104
|
+
locations: [
|
|
105
|
+
{ start: 0, end: 2 },
|
|
106
|
+
{ start: 3, end: 7 },
|
|
107
|
+
{ start: 9, end: 9 }
|
|
108
|
+
]
|
|
109
|
+
}
|
|
110
|
+
]
|
|
111
|
+
};
|
|
112
|
+
const postDeleteSeqData = deleteSequenceDataAtRange(existingSequence, {
|
|
113
|
+
start: 3,
|
|
114
|
+
end: 7
|
|
115
|
+
});
|
|
116
|
+
postDeleteSeqData.should.containSubset({
|
|
117
|
+
sequence: "atgga",
|
|
118
|
+
features: [
|
|
119
|
+
{
|
|
120
|
+
start: 0,
|
|
121
|
+
end: 4,
|
|
122
|
+
locations: [
|
|
123
|
+
{ start: 0, end: 2 },
|
|
124
|
+
{ start: 4, end: 4 }
|
|
125
|
+
]
|
|
126
|
+
}
|
|
127
|
+
]
|
|
128
|
+
});
|
|
129
|
+
postDeleteSeqData.features.length.should.equal(1);
|
|
130
|
+
});
|
|
131
|
+
it("Moves annotations when delete occurs before annotation", () => {
|
|
132
|
+
const existingSequence = {
|
|
133
|
+
sequence: "atgagagaga",
|
|
134
|
+
parts: [{ start: 5, end: 9 }]
|
|
135
|
+
};
|
|
136
|
+
const postDeleteSeqData = deleteSequenceDataAtRange(existingSequence, {
|
|
137
|
+
start: 3,
|
|
138
|
+
end: 3
|
|
139
|
+
});
|
|
140
|
+
postDeleteSeqData.should.containSubset({
|
|
141
|
+
sequence: "atggagaga",
|
|
142
|
+
parts: [{ start: 4, end: 8 }]
|
|
143
|
+
});
|
|
144
|
+
postDeleteSeqData.parts.length.should.equal(1);
|
|
145
|
+
});
|
|
146
|
+
});
|
package/src/diffUtils.js
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { cloneDeep, forEach } from 'lodash';
|
|
2
|
+
import {
|
|
3
|
+
diff,
|
|
4
|
+
patch,
|
|
5
|
+
reverse,
|
|
6
|
+
} from 'jsondiffpatch-rc';
|
|
7
|
+
|
|
8
|
+
import tidyUpSequenceData from './tidyUpSequenceData';
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
const getDiffFromSeqs = (oldData, newData, { ignoreKeys = [] } = {}) => {
|
|
12
|
+
|
|
13
|
+
oldData = tidyUpSequenceData(oldData, {
|
|
14
|
+
annotationsAsObjects: true,
|
|
15
|
+
noTranslationData: true,
|
|
16
|
+
});
|
|
17
|
+
newData = tidyUpSequenceData(newData, {
|
|
18
|
+
annotationsAsObjects: true,
|
|
19
|
+
noTranslationData: true,
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
[oldData, newData].forEach((d) => {
|
|
23
|
+
[
|
|
24
|
+
'cutsites',
|
|
25
|
+
'orfs',
|
|
26
|
+
'filteredFeatures',
|
|
27
|
+
'size',
|
|
28
|
+
'fromFileUpload',
|
|
29
|
+
'description',
|
|
30
|
+
'materiallyAvailable',
|
|
31
|
+
...ignoreKeys,
|
|
32
|
+
].forEach((prop) => {
|
|
33
|
+
delete d[prop];
|
|
34
|
+
});
|
|
35
|
+
if (d.translations) {
|
|
36
|
+
forEach(d.translations, (translation, key) => {
|
|
37
|
+
if (
|
|
38
|
+
translation.translationType &&
|
|
39
|
+
translation.translationType !== 'User Created'
|
|
40
|
+
) {
|
|
41
|
+
delete d.translations[key];
|
|
42
|
+
} else {
|
|
43
|
+
delete translation.aminoAcids;
|
|
44
|
+
}
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
return diff(oldData, newData);
|
|
50
|
+
};
|
|
51
|
+
const patchSeqWithDiff = (oldData, diff, { ignoreKeys = [] } = {}) => {
|
|
52
|
+
ignoreKeys.forEach((k) => {
|
|
53
|
+
delete diff[k];
|
|
54
|
+
});
|
|
55
|
+
return patch(
|
|
56
|
+
tidyUpSequenceData(cloneDeep(oldData), { annotationsAsObjects: true }),
|
|
57
|
+
diff
|
|
58
|
+
);
|
|
59
|
+
};
|
|
60
|
+
const reverseSeqDiff = (diff) => {
|
|
61
|
+
return reverse(diff);
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
export { getDiffFromSeqs, patchSeqWithDiff, reverseSeqDiff };
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import chai from "chai";
|
|
2
|
+
import assert from "assert";
|
|
3
|
+
import {map} from "lodash";
|
|
4
|
+
import tidyUpSequenceData from "./tidyUpSequenceData";
|
|
5
|
+
import {getDiffFromSeqs, patchSeqWithDiff, reverseSeqDiff} from "./diffUtils";
|
|
6
|
+
import chaiSubset from "chai-subset";
|
|
7
|
+
|
|
8
|
+
chai.should();
|
|
9
|
+
chai.use(chaiSubset);
|
|
10
|
+
|
|
11
|
+
describe("getDiffFromSeqs patchSeqWithDiff reverseSeqDiff", () => {
|
|
12
|
+
it("ignores translation amino acids, cutsites, orfs, filteredFeatures, and works as expected", () => {
|
|
13
|
+
const originalSeq = tidyUpSequenceData(
|
|
14
|
+
{
|
|
15
|
+
sequence: "atagatagatagatagatagatagatagatagatagatagatagatagatagatag",
|
|
16
|
+
translations: [{ id: 10, start: 10, end: 24 }],
|
|
17
|
+
features: [],
|
|
18
|
+
cutsites: [{ name: "fakeSite" }],
|
|
19
|
+
orfs: [{ name: "fakeOrf", start: 10, end: 20 }],
|
|
20
|
+
filteredFeatures: [{ name: "filteredFeat1" }]
|
|
21
|
+
},
|
|
22
|
+
{ annotationsAsObjects: true }
|
|
23
|
+
);
|
|
24
|
+
|
|
25
|
+
const alteredSeq = tidyUpSequenceData(
|
|
26
|
+
{
|
|
27
|
+
sequence: "agatagatagatagatagatagatagatagatagatagatagatagatagatag",
|
|
28
|
+
translations: [
|
|
29
|
+
{ id: 10, start: 13, end: 24 },
|
|
30
|
+
{ id: "awgwtwt", start: 3, end: 20, translationType: "CDS Feature" }
|
|
31
|
+
],
|
|
32
|
+
cutsites: [{ name: "fakeSite" }],
|
|
33
|
+
orfs: [],
|
|
34
|
+
features: [{ name: "I'm new!", start: 30, end: 35 }],
|
|
35
|
+
filteredFeatures: [{ name: "filteredFeat1" }]
|
|
36
|
+
},
|
|
37
|
+
{ annotationsAsObjects: true }
|
|
38
|
+
);
|
|
39
|
+
|
|
40
|
+
const diff = getDiffFromSeqs(originalSeq, alteredSeq);
|
|
41
|
+
assert(
|
|
42
|
+
!JSON.stringify(diff, null, 4).includes("aminoAcids"),
|
|
43
|
+
"Diffs should not include aminoAcids!"
|
|
44
|
+
);
|
|
45
|
+
assert(
|
|
46
|
+
!JSON.stringify(diff, null, 4).includes("cutsites"),
|
|
47
|
+
"Diffs should not include cutsites!"
|
|
48
|
+
);
|
|
49
|
+
assert(
|
|
50
|
+
!JSON.stringify(diff, null, 4).includes("translationType"),
|
|
51
|
+
"Diffs should not include non-user-created translations!"
|
|
52
|
+
);
|
|
53
|
+
//get the altered seq from the original by applying the diff between the two
|
|
54
|
+
const alteredSeqFromOriginalPatchedWithDiff = patchSeqWithDiff(
|
|
55
|
+
originalSeq,
|
|
56
|
+
diff
|
|
57
|
+
);
|
|
58
|
+
//the altered seq should have the expected features
|
|
59
|
+
map(alteredSeqFromOriginalPatchedWithDiff.features).should.containSubset([
|
|
60
|
+
{
|
|
61
|
+
name: "I'm new!",
|
|
62
|
+
start: 30,
|
|
63
|
+
end: 35
|
|
64
|
+
}
|
|
65
|
+
]);
|
|
66
|
+
//get the original back from the altered by using the reversed diff
|
|
67
|
+
const originalSeqFromAlteredAndReversedDiff = patchSeqWithDiff(
|
|
68
|
+
alteredSeq,
|
|
69
|
+
reverseSeqDiff(diff)
|
|
70
|
+
);
|
|
71
|
+
//it should have no features!
|
|
72
|
+
map(originalSeqFromAlteredAndReversedDiff.features).length.should.equal(0);
|
|
73
|
+
});
|
|
74
|
+
});
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
import chai from "chai";
|
|
4
|
+
import doesEnzymeChopOutsideOfRecognitionSite from "./doesEnzymeChopOutsideOfRecognitionSite.js";
|
|
5
|
+
import enzymeList from "./aliasedEnzymesByName";
|
|
6
|
+
|
|
7
|
+
chai.should();
|
|
8
|
+
|
|
9
|
+
describe("doesEnzymeChopOutsideOfRecognitionSite", () => {
|
|
10
|
+
//bamhi
|
|
11
|
+
// "bamhi": {
|
|
12
|
+
// "name": "bamhi",
|
|
13
|
+
// "site": "ggatcdc",
|
|
14
|
+
// "forwardRegex": "g{2}atc{2}",
|
|
15
|
+
// "reverseRegex": "g{2}atc{2}",
|
|
16
|
+
// "topSnipOffset": 1,
|
|
17
|
+
// "bottomSnipOffset": 5
|
|
18
|
+
// },
|
|
19
|
+
// ATGATCAGA
|
|
20
|
+
// 012345678
|
|
21
|
+
it("should return a result for every enzyme", () => {
|
|
22
|
+
Object.keys(enzymeList).forEach(key => {
|
|
23
|
+
doesEnzymeChopOutsideOfRecognitionSite(enzymeList[key]);
|
|
24
|
+
});
|
|
25
|
+
});
|
|
26
|
+
it("knows which enzymes chop within the recognition site", () => {
|
|
27
|
+
doesEnzymeChopOutsideOfRecognitionSite(enzymeList["bamhi"]).should.equal(
|
|
28
|
+
false
|
|
29
|
+
);
|
|
30
|
+
doesEnzymeChopOutsideOfRecognitionSite(enzymeList["xhoi"]).should.equal(
|
|
31
|
+
false
|
|
32
|
+
);
|
|
33
|
+
|
|
34
|
+
doesEnzymeChopOutsideOfRecognitionSite(enzymeList["bsmbi"]).should.equal(
|
|
35
|
+
true
|
|
36
|
+
);
|
|
37
|
+
doesEnzymeChopOutsideOfRecognitionSite(enzymeList["bsai"]).should.equal(
|
|
38
|
+
true
|
|
39
|
+
);
|
|
40
|
+
});
|
|
41
|
+
});
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import {get, keyBy, filter} from "lodash";
|
|
2
|
+
|
|
3
|
+
const genbankFeatureTypes = [
|
|
4
|
+
{ name: "-10_signal", color: "#4ECDC4" },
|
|
5
|
+
{ name: "-35_signal", color: "#F7FFF7" },
|
|
6
|
+
{ name: "3'clip", color: "#FF6B6B" },
|
|
7
|
+
{ name: "3'UTR", color: "#FFE66D" },
|
|
8
|
+
{ name: "5'clip", color: "#3E517A" },
|
|
9
|
+
{ name: "5'UTR", color: "#BBBBBB" },
|
|
10
|
+
{ name: "D-loop", color: "#F13C73" },
|
|
11
|
+
{ name: "assembly_gap", color: "#DE9151" },
|
|
12
|
+
{ name: "centromere", color: "#F34213" },
|
|
13
|
+
{ name: "Het", color: "#BC5D2E" },
|
|
14
|
+
{ name: "mobile_element", color: "#6DB1BF" },
|
|
15
|
+
{ name: "ncRNA", color: "#FFEAEC" },
|
|
16
|
+
{ name: "proprotein", color: "#F39A9D" },
|
|
17
|
+
{ name: "regulatory", color: "#3F6C51" },
|
|
18
|
+
{ name: "SecStr", color: "#7B4B94" },
|
|
19
|
+
{ name: "Site", color: "#7D82B8" },
|
|
20
|
+
{ name: "telomere", color: "DE9151" },
|
|
21
|
+
{ name: "tmRNA", color: "#B7E3CC" },
|
|
22
|
+
{ name: "unsure", color: "#C4FFB2" },
|
|
23
|
+
{ name: "V_segment", color: "#D6F7A3" },
|
|
24
|
+
{ name: "allele", color: "#D86D6D" },
|
|
25
|
+
{ name: "attenuator", color: "#6B7F9C" },
|
|
26
|
+
{ name: "C_region", color: "#B5D89D" },
|
|
27
|
+
{ name: "CAAT_signal", color: "#E9CD98" },
|
|
28
|
+
{ name: "CDS", color: "#EF6500" },
|
|
29
|
+
{ name: "conserved", color: "#A3A5F0" },
|
|
30
|
+
{ name: "D_segment", color: "#C060F7" },
|
|
31
|
+
{ name: "default", color: "#CCCCCC" },
|
|
32
|
+
{ name: "enhancer", color: "#38F872" },
|
|
33
|
+
{ name: "exon", color: "#95F844" },
|
|
34
|
+
{ name: "gap", color: "#F7D43C" },
|
|
35
|
+
{ name: "GC_signal", color: "#861F1F" },
|
|
36
|
+
{ name: "gene", color: "#684E27" },
|
|
37
|
+
{ name: "iDNA", color: "#A59B41" },
|
|
38
|
+
{ name: "intron", color: "#52963E" },
|
|
39
|
+
{ name: "J_region", color: "#369283" },
|
|
40
|
+
{ name: "LTR", color: "#31748F" },
|
|
41
|
+
{ name: "m_rna", color: "#FFFF00" },
|
|
42
|
+
{ name: "mat_peptide", color: "#353E8F" },
|
|
43
|
+
{ name: "misc_binding", color: "#006FEF" },
|
|
44
|
+
{ name: "misc_difference", color: "#5A368A" },
|
|
45
|
+
{ name: "misc_feature", color: "#006FEF" },
|
|
46
|
+
{ name: "misc_marker", color: "#8DCEB1" },
|
|
47
|
+
{ name: "misc_part", color: "#006FEF" },
|
|
48
|
+
{ name: "misc_recomb", color: "#DD97B4" },
|
|
49
|
+
{ name: "misc_RNA", color: "#BD0101" },
|
|
50
|
+
{ name: "misc_signal", color: "#FF9A04" },
|
|
51
|
+
{ name: "misc_structure", color: "#B3FF00" },
|
|
52
|
+
{ name: "modified_base", color: "#00F7FF" },
|
|
53
|
+
{ name: "mRNA", color: "#FFD900" },
|
|
54
|
+
{ name: "N_region", color: "#AE00FF" },
|
|
55
|
+
{ name: "old_sequence", color: "#F0A7FF" },
|
|
56
|
+
{ name: "operator", color: "#63004D" },
|
|
57
|
+
{ name: "operon", color: "#000653" },
|
|
58
|
+
{ name: "oriT", color: "#580000" },
|
|
59
|
+
{ name: "plasmid", color: "#00635E" },
|
|
60
|
+
{ name: "polyA_signal", color: "#BBBBBB" },
|
|
61
|
+
{ name: "polyA_site", color: "#003328" },
|
|
62
|
+
{ name: "precursor_RNA", color: "#443200" },
|
|
63
|
+
{ name: "prim_transcript", color: "#665E4C" },
|
|
64
|
+
{ name: "primer_bind", color: "#53d969" },
|
|
65
|
+
{ name: "promoter", color: "#31B440" },
|
|
66
|
+
{ name: "protein_bind", color: "#2E2E2E" },
|
|
67
|
+
{ name: "protein_domain", color: "#4D4B4B" },
|
|
68
|
+
{ name: "protein", color: "#696969" },
|
|
69
|
+
{ name: "RBS", color: "#BDFFCB" },
|
|
70
|
+
{ name: "rep_origin", color: "#878787" },
|
|
71
|
+
{ name: "repeat_region", color: "#966363" },
|
|
72
|
+
{ name: "repeat_unit", color: "#A16D8D" },
|
|
73
|
+
{ name: "rRNA", color: "#9BF0FF" },
|
|
74
|
+
{ name: "s_mutation", color: "#70A2FF" },
|
|
75
|
+
{ name: "S_region", color: "#FF74A9" },
|
|
76
|
+
{ name: "satellite", color: "#164E64" },
|
|
77
|
+
{ name: "scRNA", color: "#A057FF" },
|
|
78
|
+
{ name: "sig_peptide", color: "#2FFF8D" },
|
|
79
|
+
{ name: "snoRNA", color: "#296B14" },
|
|
80
|
+
{ name: "snRNA", color: "#A16249" },
|
|
81
|
+
{ name: "source", color: "#0B17BD" },
|
|
82
|
+
{ name: "start", color: "#D6A336" },
|
|
83
|
+
{ name: "stem_loop", color: "#67069E" },
|
|
84
|
+
{ name: "stop", color: "#D44FC9" },
|
|
85
|
+
{ name: "STS", color: "#597FE7" },
|
|
86
|
+
{ name: "tag", color: "#E419DA" },
|
|
87
|
+
{ name: "TATA_signal", color: "#EB2B2B" },
|
|
88
|
+
{ name: "terminator", color: "#F51600" },
|
|
89
|
+
{ name: "transit_peptide", color: "#24D491" },
|
|
90
|
+
{ name: "transposon", color: "#B6E436" },
|
|
91
|
+
{ name: "tRNA", color: "#D1456F" },
|
|
92
|
+
{ name: "V_region", color: "#7B5EE7" },
|
|
93
|
+
{ name: "variation", color: "#2EE455" }
|
|
94
|
+
];
|
|
95
|
+
|
|
96
|
+
const getMergedFeatureMap = () => {
|
|
97
|
+
const keyedGBFeats = keyBy(
|
|
98
|
+
genbankFeatureTypes.map(f => ({
|
|
99
|
+
...f,
|
|
100
|
+
isGenbankStandardType: true
|
|
101
|
+
})),
|
|
102
|
+
"name"
|
|
103
|
+
);
|
|
104
|
+
let featureOverrides =
|
|
105
|
+
(typeof window !== "undefined" && get(window, "tg_featureTypeOverrides")) ||
|
|
106
|
+
(typeof global !== "undefined" && get(global, "tg_featureTypeOverrides"));
|
|
107
|
+
|
|
108
|
+
featureOverrides = featureOverrides || [];
|
|
109
|
+
featureOverrides = featureOverrides.map(fo => {
|
|
110
|
+
const originalGenbankFeat = keyedGBFeats[fo.name];
|
|
111
|
+
return {
|
|
112
|
+
...originalGenbankFeat,
|
|
113
|
+
...fo,
|
|
114
|
+
...(originalGenbankFeat ? { isOverridden: true } : { isCustomType: true })
|
|
115
|
+
};
|
|
116
|
+
});
|
|
117
|
+
featureOverrides = keyBy(featureOverrides, "name");
|
|
118
|
+
|
|
119
|
+
return {
|
|
120
|
+
...keyedGBFeats,
|
|
121
|
+
...featureOverrides
|
|
122
|
+
};
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
const getFeatureToColorMap = ({ includeHidden } = {}) => {
|
|
126
|
+
const toRet = {};
|
|
127
|
+
filter(getMergedFeatureMap(), f =>
|
|
128
|
+
includeHidden ? true : !f.isHidden
|
|
129
|
+
).forEach(f => {
|
|
130
|
+
toRet[f.name] = f.color;
|
|
131
|
+
});
|
|
132
|
+
return toRet;
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
const getFeatureTypes = ({ includeHidden } = {}) =>
|
|
136
|
+
filter(getMergedFeatureMap(), f => (includeHidden ? true : !f.isHidden)).map(
|
|
137
|
+
f => f.name
|
|
138
|
+
);
|
|
139
|
+
|
|
140
|
+
export {genbankFeatureTypes};
|
|
141
|
+
|
|
142
|
+
export function getGenbankFeatureToColorMap() {
|
|
143
|
+
const toRet = {};
|
|
144
|
+
genbankFeatureTypes.forEach(({ name, color }) => {
|
|
145
|
+
toRet[name] = color;
|
|
146
|
+
});
|
|
147
|
+
return toRet;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
export {getFeatureToColorMap};
|
|
151
|
+
export {getFeatureTypes};
|
|
152
|
+
export {getMergedFeatureMap};
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import {
|
|
2
|
+
getFeatureTypes,
|
|
3
|
+
getFeatureToColorMap,
|
|
4
|
+
getMergedFeatureMap,
|
|
5
|
+
getGenbankFeatureToColorMap,
|
|
6
|
+
} from "./featureTypesAndColors";
|
|
7
|
+
|
|
8
|
+
describe("getFeatureToColorMap", () => {
|
|
9
|
+
it("should pass back feature colors by default ", () => {
|
|
10
|
+
expect(getFeatureToColorMap().proprotein).toEqual("#F39A9D");
|
|
11
|
+
});
|
|
12
|
+
it("getFeatureTypes should not show hidden types by default ", () => {
|
|
13
|
+
global.tg_featureTypeOverrides = [
|
|
14
|
+
{ name: "proprotein", isHidden: true },
|
|
15
|
+
{ name: "CDS", color: "blue" },
|
|
16
|
+
{ name: "someRandomFeature", color: "red", genbankEquivalentType: "RBS" }
|
|
17
|
+
];
|
|
18
|
+
expect(getFeatureTypes().includes("proprotein")).toEqual(false);
|
|
19
|
+
expect(getFeatureTypes().includes("CDS")).toEqual(true);
|
|
20
|
+
expect(
|
|
21
|
+
getFeatureTypes({ includeHidden: true }).includes("proprotein")
|
|
22
|
+
).toEqual(true);
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
it("should allow overwriting of colors ", () => {
|
|
26
|
+
global.tg_featureTypeOverrides = [
|
|
27
|
+
{ name: "proprotein", isHidden: true },
|
|
28
|
+
{ name: "CDS", color: "blue" },
|
|
29
|
+
{ name: "someRandomFeature", color: "red", genbankEquivalentType: "RBS" }
|
|
30
|
+
];
|
|
31
|
+
const featMap = getFeatureToColorMap();
|
|
32
|
+
expect(featMap.CDS).toEqual("blue");
|
|
33
|
+
// expect(featMap.CDS.isGenbankStandardType).toEqual(true);
|
|
34
|
+
expect(featMap.proprotein).toEqual(undefined);
|
|
35
|
+
expect(featMap.someRandomFeature).toEqual("red");
|
|
36
|
+
expect(getGenbankFeatureToColorMap().someRandomFeature).toEqual(undefined);
|
|
37
|
+
});
|
|
38
|
+
});
|
|
39
|
+
describe("getMergedFeatureMap", () => {
|
|
40
|
+
it("should maintain the genbankEquivalentType", () => {
|
|
41
|
+
global.tg_featureTypeOverrides = [
|
|
42
|
+
{ name: "proprotein", isHidden: true },
|
|
43
|
+
{ name: "CDS", color: "blue" },
|
|
44
|
+
{ name: "someRandomFeature", color: "red", genbankEquivalentType: "RBS" }
|
|
45
|
+
];
|
|
46
|
+
const mergedFeatMap = getMergedFeatureMap();
|
|
47
|
+
expect(mergedFeatMap.someRandomFeature.genbankEquivalentType).toEqual(
|
|
48
|
+
"RBS"
|
|
49
|
+
);
|
|
50
|
+
expect(mergedFeatMap.CDS.isGenbankStandardType).toEqual(true);
|
|
51
|
+
});
|
|
52
|
+
});
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
//
|
|
2
|
+
export default function filterAminoAcidSequenceString(
|
|
3
|
+
sequenceString,
|
|
4
|
+
options
|
|
5
|
+
) {
|
|
6
|
+
options = options || {};
|
|
7
|
+
if (options.includeStopCodon) {
|
|
8
|
+
//tnrtodo this maybe needs the stop codon char in it?
|
|
9
|
+
return sequenceString.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
|
|
10
|
+
}
|
|
11
|
+
// ac.throw(ac.string, sequenceString);
|
|
12
|
+
return sequenceString.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
|
|
13
|
+
};
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import assert from "assert";
|
|
2
|
+
import filterAminoAcidSequenceString from "./filterAminoAcidSequenceString";
|
|
3
|
+
describe("filterAminoAcidSequenceString", () => {
|
|
4
|
+
it("should filter only valid amino acids by default", () => {
|
|
5
|
+
const filteredString = filterAminoAcidSequenceString(
|
|
6
|
+
'bbb342"""xtgalmfwkqespvicyhrnd,,../'
|
|
7
|
+
);
|
|
8
|
+
assert.equal(filteredString, "xtgalmfwkqespvicyhrnd");
|
|
9
|
+
});
|
|
10
|
+
it("should handle upper case letters", () => {
|
|
11
|
+
const filteredString = filterAminoAcidSequenceString("xtgalmfWKQEspvicyhrnd");
|
|
12
|
+
assert.equal(filteredString, "xtgalmfWKQEspvicyhrnd");
|
|
13
|
+
});
|
|
14
|
+
it("should handle the option to includeStopCodon by allowing periods", () => {
|
|
15
|
+
const options = { includeStopCodon: true };
|
|
16
|
+
const filteredString = filterAminoAcidSequenceString(
|
|
17
|
+
'bbb342"""xtgalmfwkqespvicyhrnd,,../',
|
|
18
|
+
options
|
|
19
|
+
);
|
|
20
|
+
assert.equal(filteredString, "xtgalmfwkqespvicyhrnd..");
|
|
21
|
+
});
|
|
22
|
+
});
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
// this is throwing a weird eslint error
|
|
2
|
+
|
|
3
|
+
//
|
|
4
|
+
export default function filterSequenceString(
|
|
5
|
+
sequenceString,
|
|
6
|
+
additionalValidChars = "",
|
|
7
|
+
charOverrides
|
|
8
|
+
) {
|
|
9
|
+
// ac.throw(ac.string,sequenceString);
|
|
10
|
+
if (sequenceString) {
|
|
11
|
+
return sequenceString.replace(
|
|
12
|
+
new RegExp(
|
|
13
|
+
`[^${charOverrides ||
|
|
14
|
+
`atgcyrswkmbvdhnu${additionalValidChars.split("").join("\\")}`}]`,
|
|
15
|
+
"gi"
|
|
16
|
+
),
|
|
17
|
+
""
|
|
18
|
+
);
|
|
19
|
+
} else {
|
|
20
|
+
return sequenceString;
|
|
21
|
+
}
|
|
22
|
+
};
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import filterSequenceString from "./filterSequenceString";
|
|
2
|
+
|
|
3
|
+
describe("filterSequenceString", () => {
|
|
4
|
+
it("should filter out unwanted chars", () => {
|
|
5
|
+
expect(filterSequenceString("tatag--a")).toBe("tataga");
|
|
6
|
+
});
|
|
7
|
+
it("should handle additional chars option", () => {
|
|
8
|
+
expect(filterSequenceString("tatag--a", "-")).toBe("tatag--a");
|
|
9
|
+
});
|
|
10
|
+
it("should handle additional chars option", () => {
|
|
11
|
+
expect(filterSequenceString("tatag--a", "f-q")).toBe("tatag--a");
|
|
12
|
+
});
|
|
13
|
+
});
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import {normalizeRange} from "@teselagen/range-utils";
|
|
2
|
+
function findNearestRangeOfSequenceOverlapToPosition(
|
|
3
|
+
sequenceToSearch,
|
|
4
|
+
overlapSequence,
|
|
5
|
+
positionStart,
|
|
6
|
+
isLinear
|
|
7
|
+
) {
|
|
8
|
+
if (!positionStart) positionStart = 0;
|
|
9
|
+
if (sequenceToSearch.length < overlapSequence.length) {
|
|
10
|
+
return null;
|
|
11
|
+
}
|
|
12
|
+
const regex = new RegExp(overlapSequence, "ig");
|
|
13
|
+
let result;
|
|
14
|
+
let index;
|
|
15
|
+
let distance = Infinity;
|
|
16
|
+
while (
|
|
17
|
+
(result = regex.exec(sequenceToSearch + (isLinear ? "" : sequenceToSearch)))
|
|
18
|
+
) {
|
|
19
|
+
if (result.index > sequenceToSearch.length) break;
|
|
20
|
+
let newDistance = Math.abs(result.index - positionStart);
|
|
21
|
+
newDistance = isLinear
|
|
22
|
+
? newDistance //if linear, don't check around the origin
|
|
23
|
+
: Math.min(newDistance, Math.abs(newDistance - sequenceToSearch.length));
|
|
24
|
+
if (newDistance > distance) {
|
|
25
|
+
break;
|
|
26
|
+
}
|
|
27
|
+
index = result.index;
|
|
28
|
+
distance = newDistance;
|
|
29
|
+
}
|
|
30
|
+
//index is the closest range start
|
|
31
|
+
return normalizeRange(
|
|
32
|
+
{
|
|
33
|
+
start: index,
|
|
34
|
+
end: index + overlapSequence.length - 1
|
|
35
|
+
},
|
|
36
|
+
sequenceToSearch.length
|
|
37
|
+
);
|
|
38
|
+
}
|
|
39
|
+
export default findNearestRangeOfSequenceOverlapToPosition;
|