@teselagen/sequence-utils 0.1.21 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +12030 -26126
- package/index.mjs +12119 -26124
- package/index.umd.js +24056 -38154
- package/package.json +2 -2
- package/src/DNAComplementMap.js +32 -0
- package/src/addGapsToSeqReads.js +417 -0
- package/src/addGapsToSeqReads.test.js +358 -0
- package/src/adjustAnnotationsToInsert.js +19 -0
- package/src/adjustBpsToReplaceOrInsert.js +50 -0
- package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
- package/src/aliasedEnzymesByName.js +7363 -0
- package/src/aminoAcidToDegenerateDnaMap.js +32 -0
- package/src/aminoAcidToDegenerateRnaMap.js +32 -0
- package/src/aminoAcidToDnaRna.test.js +27 -0
- package/src/annotateSingleSeq.js +29 -0
- package/src/annotateSingleSeq.test.js +64 -0
- package/src/annotationTypes.js +23 -0
- package/src/autoAnnotate.js +242 -0
- package/src/autoAnnotate.test.js +1039 -0
- package/src/bioData.js +431 -0
- package/src/calculateNebTa.js +34 -0
- package/src/calculateNebTa.test.js +57 -0
- package/src/calculateNebTm.js +127 -0
- package/src/calculateNebTm.test.js +32 -0
- package/src/calculatePercentGC.js +3 -0
- package/src/calculatePercentGC.test.js +14 -0
- package/src/calculateTm.js +297 -0
- package/src/calculateTm.test.js +7 -0
- package/src/computeDigestFragments.js +179 -0
- package/src/computeDigestFragments.test.js +73 -0
- package/src/condensePairwiseAlignmentDifferences.js +85 -0
- package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
- package/src/convertAACaretPositionOrRangeToDna.js +24 -0
- package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
- package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
- package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
- package/src/cutSequenceByRestrictionEnzyme.js +301 -0
- package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
- package/src/defaultEnzymesByName.js +278 -0
- package/src/degenerateDnaToAminoAcidMap.js +5 -0
- package/src/degenerateRnaToAminoAcidMap.js +5 -0
- package/src/deleteSequenceDataAtRange.js +5 -0
- package/src/deleteSequenceDataAtRange.test.js +146 -0
- package/src/diffUtils.js +64 -0
- package/src/diffUtils.test.js +74 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
- package/src/featureTypesAndColors.js +152 -0
- package/src/featureTypesAndColors.test.js +52 -0
- package/src/filterAminoAcidSequenceString.js +13 -0
- package/src/filterAminoAcidSequenceString.test.js +22 -0
- package/src/filterSequenceString.js +22 -0
- package/src/filterSequenceString.test.js +13 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
- package/src/findOrfsInPlasmid.js +26 -0
- package/src/findSequenceMatches.js +133 -0
- package/src/findSequenceMatches.test.js +286 -0
- package/src/generateAnnotations.js +34 -0
- package/src/generateSequenceData.js +206 -0
- package/src/generateSequenceData.test.js +22 -0
- package/src/getAllInsertionsInSeqReads.js +83 -0
- package/src/getAllInsertionsInSeqReads.test.js +26 -0
- package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
- package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
- package/src/getAminoAcidFromSequenceTriplet.js +22 -0
- package/src/getAminoAcidStringFromSequenceString.js +18 -0
- package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
- package/src/getCodonRangeForAASliver.js +63 -0
- package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
- package/src/getComplementSequenceAndAnnotations.js +20 -0
- package/src/getComplementSequenceString.js +19 -0
- package/src/getComplementSequenceString.test.js +13 -0
- package/src/getCutsiteType.js +10 -0
- package/src/getCutsitesFromSequence.js +17 -0
- package/src/getDegenerateDnaStringFromAAString.js +8 -0
- package/src/getDegenerateRnaStringFromAAString.js +8 -0
- package/src/getDigestFragmentsForCutsites.js +105 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
- package/src/getInsertBetweenVals.js +28 -0
- package/src/getInsertBetweenVals.test.js +33 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
- package/src/getMassOfAaString.js +24 -0
- package/src/getMassofAaString.test.js +18 -0
- package/src/getOrfsFromSequence.js +124 -0
- package/src/getOrfsFromSequence.test.js +210 -0
- package/src/getOverlapBetweenTwoSequences.js +30 -0
- package/src/getOverlapBetweenTwoSequences.test.js +23 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
- package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
- package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
- package/src/getReverseComplementAnnotation.js +23 -0
- package/src/getReverseComplementAnnotation.test.js +44 -0
- package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
- package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
- package/src/getReverseComplementSequenceString.js +17 -0
- package/src/getReverseComplementSequenceString.test.js +11 -0
- package/src/getReverseSequenceString.js +12 -0
- package/src/getReverseSequenceString.test.js +9 -0
- package/src/getSequenceDataBetweenRange.js +131 -0
- package/src/getSequenceDataBetweenRange.test.js +474 -0
- package/src/getVirtualDigest.js +125 -0
- package/src/getVirtualDigest.test.js +134 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
- package/src/index.js +106 -0
- package/src/index.test.js +38 -0
- package/src/insertGapsIntoRefSeq.js +38 -0
- package/src/insertGapsIntoRefSeq.test.js +20 -0
- package/src/insertSequenceDataAtPosition.js +2 -0
- package/src/insertSequenceDataAtPosition.test.js +75 -0
- package/src/insertSequenceDataAtPositionOrRange.js +249 -0
- package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
- package/src/isEnzymeType2S.js +3 -0
- package/src/mapAnnotationsToRows.js +174 -0
- package/src/mapAnnotationsToRows.test.js +425 -0
- package/src/prepareCircularViewData.js +17 -0
- package/src/prepareCircularViewData.test.js +196 -0
- package/src/prepareRowData.js +41 -0
- package/src/prepareRowData.test.js +36 -0
- package/src/prepareRowData_output1.json +391 -0
- package/src/proteinAlphabet.js +257 -0
- package/src/rotateBpsToPosition.js +13 -0
- package/src/rotateBpsToPosition.test.js +6 -0
- package/src/rotateSequenceDataToPosition.js +48 -0
- package/src/rotateSequenceDataToPosition.test.js +71 -0
- package/src/shiftAnnotationsByLen.js +17 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
- package/src/tidyUpAnnotation.js +182 -0
- package/src/tidyUpSequenceData.js +169 -0
- package/src/tidyUpSequenceData.test.js +332 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import assert from "assert";
|
|
2
|
+
import findNearestRangeOfSequenceOverlapToPosition from "./findNearestRangeOfSequenceOverlapToPosition";
|
|
3
|
+
describe("findNearestRangeOfSequenceOverlapToPosition", () => {
|
|
4
|
+
it("should find the nearest overlap range to the given position", () => {
|
|
5
|
+
const range = findNearestRangeOfSequenceOverlapToPosition(
|
|
6
|
+
"gagagtagagatagagtagagatagagatagagagagagccagcagacgacgagcagcctacgtcatcatagagagagaag",
|
|
7
|
+
"atagagagag",
|
|
8
|
+
17
|
|
9
|
+
);
|
|
10
|
+
assert.equal(range.start, 27);
|
|
11
|
+
assert.equal(range.end, 36);
|
|
12
|
+
});
|
|
13
|
+
it("should find the nearest overlap range to the given position at the end of the sequence", () => {
|
|
14
|
+
const range = findNearestRangeOfSequenceOverlapToPosition(
|
|
15
|
+
"gagagtagagatagagtagagatagagatagagagagagccagcagacgacgagcagcctacgtcatcatagagagagaag",
|
|
16
|
+
"atagagagag",
|
|
17
|
+
0
|
|
18
|
+
);
|
|
19
|
+
assert.equal(range.start, 68);
|
|
20
|
+
assert.equal(range.end, 77);
|
|
21
|
+
});
|
|
22
|
+
it("should find the nearest overlap range even when that range overlaps the origin", () => {
|
|
23
|
+
const range = findNearestRangeOfSequenceOverlapToPosition(
|
|
24
|
+
"agagaggagagtagagatagagtagagatagagatagagagagagccagcagacgacgagcagcctacgtcatcatagagagagaagatag",
|
|
25
|
+
"atagagagag",
|
|
26
|
+
0
|
|
27
|
+
);
|
|
28
|
+
assert.equal(range.start, 87);
|
|
29
|
+
assert.equal(range.end, 5);
|
|
30
|
+
});
|
|
31
|
+
});
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import getOrfsFromSequence from "./getOrfsFromSequence.js";
|
|
2
|
+
|
|
3
|
+
export default function findOrfsInPlasmid(
|
|
4
|
+
sequence,
|
|
5
|
+
circular,
|
|
6
|
+
minimumOrfSize,
|
|
7
|
+
useAdditionalOrfStartCodons
|
|
8
|
+
) {
|
|
9
|
+
//tnr, we should do the parsing down of the orfs immediately after they're returned from this sequence
|
|
10
|
+
// const orfs1Forward = eliminateCircularOrfsThatOverlapWithNonCircularOrfs(getOrfsFromSequence(0, doubleForwardSequence, minimumOrfSize, true), maxLength);
|
|
11
|
+
const forwardOrfs = getOrfsFromSequence({
|
|
12
|
+
sequence: sequence,
|
|
13
|
+
minimumOrfSize: minimumOrfSize,
|
|
14
|
+
forward: true,
|
|
15
|
+
circular: circular,
|
|
16
|
+
useAdditionalOrfStartCodons
|
|
17
|
+
});
|
|
18
|
+
const reverseOrfs = getOrfsFromSequence({
|
|
19
|
+
sequence: sequence,
|
|
20
|
+
minimumOrfSize: minimumOrfSize,
|
|
21
|
+
forward: false,
|
|
22
|
+
circular: circular,
|
|
23
|
+
useAdditionalOrfStartCodons
|
|
24
|
+
});
|
|
25
|
+
return forwardOrfs.concat(reverseOrfs);
|
|
26
|
+
};
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import {modulateRangeBySequenceLength, flipContainedRange} from "@teselagen/range-utils";
|
|
2
|
+
import {reduce, uniqBy} from "lodash";
|
|
3
|
+
import escapeStringRegexp from "escape-string-regexp";
|
|
4
|
+
import getAminoAcidStringFromSequenceString from "./getAminoAcidStringFromSequenceString";
|
|
5
|
+
import {ambiguous_dna_values, extended_protein_values} from "./bioData";
|
|
6
|
+
import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
|
|
7
|
+
|
|
8
|
+
export default function findSequenceMatches(
|
|
9
|
+
sequence,
|
|
10
|
+
searchString,
|
|
11
|
+
options = {}
|
|
12
|
+
) {
|
|
13
|
+
let matches = findSequenceMatchesTopStrand(sequence, searchString, options);
|
|
14
|
+
const { searchReverseStrand } = options;
|
|
15
|
+
|
|
16
|
+
if (searchReverseStrand) {
|
|
17
|
+
const sequenceLength = sequence.length;
|
|
18
|
+
const reverseSeq = getReverseComplementSequenceString(sequence);
|
|
19
|
+
const reverseMatches = findSequenceMatchesTopStrand(
|
|
20
|
+
reverseSeq,
|
|
21
|
+
searchString,
|
|
22
|
+
options
|
|
23
|
+
);
|
|
24
|
+
const flippedReverseMatches = reverseMatches.map(range => {
|
|
25
|
+
return {
|
|
26
|
+
...flipContainedRange(
|
|
27
|
+
range,
|
|
28
|
+
{ start: 0, end: sequenceLength - 1 },
|
|
29
|
+
sequenceLength
|
|
30
|
+
),
|
|
31
|
+
bottomStrand: true
|
|
32
|
+
};
|
|
33
|
+
});
|
|
34
|
+
matches = [...matches, ...flippedReverseMatches];
|
|
35
|
+
}
|
|
36
|
+
return matches;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
function findSequenceMatchesTopStrand(sequence, searchString, options = {}) {
|
|
40
|
+
const {
|
|
41
|
+
isCircular,
|
|
42
|
+
isAmbiguous,
|
|
43
|
+
isProteinSequence,
|
|
44
|
+
isProteinSearch
|
|
45
|
+
} = options;
|
|
46
|
+
let searchStringToUse = escapeStringRegexp(searchString);
|
|
47
|
+
if (isAmbiguous) {
|
|
48
|
+
if (isProteinSearch || isProteinSequence) {
|
|
49
|
+
searchStringToUse = convertAmbiguousStringToRegex(
|
|
50
|
+
searchStringToUse,
|
|
51
|
+
true
|
|
52
|
+
);
|
|
53
|
+
} else {
|
|
54
|
+
//we're searching DNA
|
|
55
|
+
searchStringToUse = convertAmbiguousStringToRegex(searchStringToUse);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
if (!searchStringToUse) return []; //short circuit if nothing is actually being searched for (eg searching for "%%"")
|
|
59
|
+
let sequenceToUse = sequence;
|
|
60
|
+
if (isCircular) {
|
|
61
|
+
sequenceToUse = sequenceToUse + sequenceToUse;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
let sequencesToCheck = [{ seqToCheck: sequenceToUse, offset: 0 }];
|
|
65
|
+
if (isProteinSearch) {
|
|
66
|
+
sequencesToCheck = [
|
|
67
|
+
{
|
|
68
|
+
seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse),
|
|
69
|
+
offset: 0
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
seqToCheck: getAminoAcidStringFromSequenceString(
|
|
73
|
+
sequenceToUse.substr(1)
|
|
74
|
+
),
|
|
75
|
+
offset: 1
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
seqToCheck: getAminoAcidStringFromSequenceString(
|
|
79
|
+
sequenceToUse.substr(2)
|
|
80
|
+
),
|
|
81
|
+
offset: 2
|
|
82
|
+
}
|
|
83
|
+
];
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const ranges = [];
|
|
87
|
+
sequencesToCheck.forEach(({ seqToCheck, offset }) => {
|
|
88
|
+
const reg = new RegExp(searchStringToUse, "ig");
|
|
89
|
+
let match;
|
|
90
|
+
let range;
|
|
91
|
+
/* eslint-disable no-cond-assign*/
|
|
92
|
+
|
|
93
|
+
while ((match = reg.exec(seqToCheck)) !== null) {
|
|
94
|
+
range = {
|
|
95
|
+
start: match.index,
|
|
96
|
+
end: match.index + searchString.length - 1 //this should be the original searchString here j
|
|
97
|
+
};
|
|
98
|
+
if (isProteinSearch) {
|
|
99
|
+
range.start = range.start * 3 + offset;
|
|
100
|
+
range.end = range.end * 3 + 2 + offset;
|
|
101
|
+
}
|
|
102
|
+
ranges.push(modulateRangeBySequenceLength(range, sequence.length));
|
|
103
|
+
reg.lastIndex = match.index + 1;
|
|
104
|
+
}
|
|
105
|
+
/* eslint-enable no-cond-assign*/
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
return uniqBy(ranges, e => {
|
|
109
|
+
return e.start + "-" + e.end;
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function convertAmbiguousStringToRegex(string, isProtein) {
|
|
114
|
+
// Search for a DNA subseq in sequence.
|
|
115
|
+
// use ambiguous values (like N = A or T or C or G, R = A or G etc.)
|
|
116
|
+
// searches only on forward strand
|
|
117
|
+
return reduce(
|
|
118
|
+
string,
|
|
119
|
+
(acc, char) => {
|
|
120
|
+
const value = isProtein
|
|
121
|
+
? extended_protein_values[char.toUpperCase()]
|
|
122
|
+
: ambiguous_dna_values[char.toUpperCase()];
|
|
123
|
+
if (!value) return acc;
|
|
124
|
+
if (value.length === 1) {
|
|
125
|
+
acc += value;
|
|
126
|
+
} else {
|
|
127
|
+
acc += `[${value}]`;
|
|
128
|
+
}
|
|
129
|
+
return acc;
|
|
130
|
+
},
|
|
131
|
+
""
|
|
132
|
+
);
|
|
133
|
+
}
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
import findSequenceMatches from "./findSequenceMatches";
|
|
2
|
+
|
|
3
|
+
describe("findSequenceMatches", () => {
|
|
4
|
+
it("ambiguous protein sequence with * as stop codon", () => {
|
|
5
|
+
expect(
|
|
6
|
+
findSequenceMatches("mmhlrl*", "Mxxlrl*", {
|
|
7
|
+
isAmbiguous: true,
|
|
8
|
+
isProteinSequence: true /* isProteinSearch: true */
|
|
9
|
+
})
|
|
10
|
+
).toEqual([
|
|
11
|
+
{
|
|
12
|
+
start: 0,
|
|
13
|
+
end: 6
|
|
14
|
+
}
|
|
15
|
+
]);
|
|
16
|
+
expect(
|
|
17
|
+
findSequenceMatches("mmhlrl*", "mx", {
|
|
18
|
+
isAmbiguous: true,
|
|
19
|
+
isProteinSequence: true /* isProteinSearch: true */
|
|
20
|
+
})
|
|
21
|
+
).toEqual([
|
|
22
|
+
{
|
|
23
|
+
start: 0,
|
|
24
|
+
end: 1
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
start: 1,
|
|
28
|
+
end: 2
|
|
29
|
+
}
|
|
30
|
+
]);
|
|
31
|
+
});
|
|
32
|
+
it("protein sequence with * as stop codon", () => {
|
|
33
|
+
expect(
|
|
34
|
+
findSequenceMatches("mmhlrl*", "mMh", {
|
|
35
|
+
isProteinSequence: true /* isProteinSearch: true */
|
|
36
|
+
})
|
|
37
|
+
).toEqual([
|
|
38
|
+
{
|
|
39
|
+
start: 0,
|
|
40
|
+
end: 2
|
|
41
|
+
}
|
|
42
|
+
]);
|
|
43
|
+
expect(
|
|
44
|
+
findSequenceMatches("mmhlrl*", "Mmhlrl*", {
|
|
45
|
+
isProteinSequence: true /* isProteinSearch: true */
|
|
46
|
+
})
|
|
47
|
+
).toEqual([
|
|
48
|
+
{
|
|
49
|
+
start: 0,
|
|
50
|
+
end: 6
|
|
51
|
+
}
|
|
52
|
+
]);
|
|
53
|
+
});
|
|
54
|
+
it("returns an empty array when nothing matches", () => {
|
|
55
|
+
expect([]).toEqual(findSequenceMatches("atg", "xtag"));
|
|
56
|
+
});
|
|
57
|
+
it("handles various weird characters", () => {
|
|
58
|
+
expect([]).toEqual(findSequenceMatches("atg", " . xt ** ag $#@@!"));
|
|
59
|
+
});
|
|
60
|
+
it("returns matches for non-circular, non-ambiguous, dna searches", () => {
|
|
61
|
+
expect([
|
|
62
|
+
{
|
|
63
|
+
start: 1,
|
|
64
|
+
end: 1
|
|
65
|
+
}
|
|
66
|
+
]).toEqual(findSequenceMatches("atg", "t"));
|
|
67
|
+
expect([
|
|
68
|
+
{
|
|
69
|
+
start: 2,
|
|
70
|
+
end: 3
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
start: 3,
|
|
74
|
+
end: 4
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
start: 7,
|
|
78
|
+
end: 8
|
|
79
|
+
}
|
|
80
|
+
]).toEqual(findSequenceMatches("atgggaagg", "gg"));
|
|
81
|
+
//atgggaagg
|
|
82
|
+
//012345678
|
|
83
|
+
});
|
|
84
|
+
it("returns matches for circular, non-ambiguous, dna searches", () => {
|
|
85
|
+
const matches = findSequenceMatches("atg", "ga", { isCircular: true });
|
|
86
|
+
expect(matches).toEqual([
|
|
87
|
+
{
|
|
88
|
+
start: 2,
|
|
89
|
+
end: 0
|
|
90
|
+
}
|
|
91
|
+
]);
|
|
92
|
+
});
|
|
93
|
+
it("returns matches for circular, non-ambiguous, dna searches on bottom strand that cross origin", () => {
|
|
94
|
+
const matches = findSequenceMatches("atga", "ttc", {
|
|
95
|
+
isCircular: true,
|
|
96
|
+
searchReverseStrand: true
|
|
97
|
+
});
|
|
98
|
+
expect(matches).toEqual([
|
|
99
|
+
{
|
|
100
|
+
bottomStrand: true,
|
|
101
|
+
start: 2,
|
|
102
|
+
end: 0
|
|
103
|
+
}
|
|
104
|
+
]);
|
|
105
|
+
});
|
|
106
|
+
it("returns matches for a long circular, non-ambiguous, dna searches", () => {
|
|
107
|
+
const matches = findSequenceMatches(
|
|
108
|
+
"gacgtcttatgacaacttgacggctacatcattcactttttcttcacaaccggcacggaactcgctcgggctggccccggtgcattttttaaatacccgcgagaaatagagttgatcgtcaaaaccaacattgcgaccgacggtggcgataggcatccgggtggtgctcaaaagcagcttcgcctggctgatacgttggtcctcgcgccagcttaagacgctaatccctaactgctggcggaaaagatgtgacagacgcgacggcgacaagcaaacatgctgtgcgacgctggcgatatcaaaattgctgtctgccaggtgatcgctgatgtactgacaagcctcgcgtacccgattatccatcggtggatggagcgactcgttaatcgcttccatgcgccgcagtaacaattgctcaagcagatttatcgccagcagctccgaatagcgcccttccccttgcccggcgttaatgatttgcccaaacaggtcgctgaaatgcggctggtgcgcttcatccgggcgaaagaaccccgtattggcaaatattgacggccagttaagccattcatgccagtaggcgcgcggacgaaagtaaacccactggtgataccattcgcgagcctccggatgacgaccgtagtgatgaatctctcctggcgggaacagcaaaatatcacccggtcggcaaacaaattctcgtccctgatttttcaccaccccctgaccgcgaatggtgagattgagaatataacctttcattcccagcggtcggtcgataaaaaaatcgagataaccgttggcctcaatcggcgttaaacccgccaccagatgggcattaaacgagtatcccggcagcaggggatcattttgcgcttcagccatacttttcatactcccgccattcagagaagaaaccaattgtccatattgcatcagacattgccgtcactgcgtcttttactggctcttctcgctaaccaaaccggtaaccccgcttattaaaagcattctgtaacaaagcgggaccaaagccatgacaaaaacgcgtaacaaaagtgtctataatcacggcagaaaagtccacattgattatttgcacggcgtcacactttgctatgccatagcatttttatccataagattagcggattctacctgacgctttttatcgcaactctctactgtttctccatacccgtttttttgggaatttttaagaaggagatatacatatgagtaaaggagaagaacttttcactggagttgtcccaattcttgttgaattagatggtgatgttaatgggcacaaattttctgtcagtggagagggtgaaggtgatgcaacatacggaaaacttacccttaaatttatttgcactactggaaaactacctgttccatggccaacacttgtcactactttctcttatggtgttcaatgcttttcccgttatccggatcatatgaaacggcatgactttttcaagagtgccatgcccgaaggttatgtacaggaacgcactatatctttcaaagatgacgggaactacaagacgcgtgctgaagtcaagtttgaaggtgatacccttgttaatcgtatcgagttaaaaggtattgattttaaagaagatggaaacattctcggacacaaactcgaatacaactataactcacacaatgtatacatcacggcagacaaacaaaagaatggaatcaaagctaacttcaaaattcgccacaacattgaagatggatctgttcaactagcagaccattatcaacaaaatactccaattggcgatggccctgtccttttaccagacaaccattacctgtcgacacaatctgccctttcgaaagatcccaacgaaaagcgtgaccacatggtccttcttgagtttgtaactgctgctgggattacacatggcatggatgagctcggcggcggcggcagcaaggtctacggcaaggaacagtttttgcggatgcgccagagcatgttccccgatcgctaaatcgagtaaggatctccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttatacctagggtacgggttttgctgcccgcaaacgggctgttctggtgttgctagtttgttatcagaatcgcagatccggcttcagccggtttgccggctgaaagcgctatttcttccagaattgccatgattttttccccacgggaggcgtcactggctcccgtgttgtcggcagctttgattcgataagcagcatcgcctgtttcaggctgtctatgtgtgactgttgagctgtaacaagttgtctcaggtgttcaatttcatgttctagttgctttgttttactggtttcacctgttctattaggtgttacatgctgttcatctgttacattgtcgatctgttcatggtgaacagctttgaatgcaccaaaaactcgtaaaagctctgatgtatctatcttttttacaccgttttcatctgtgcatatggacagttttccctttgatatgtaacggtgaacagttgttctacttttgtttgttagtcttgatgcttcactgatagatacaagagccataagaacctcagatccttccgtatttagccagtatgttctctagtgtggttcgttgtttttgcgtgagccatgagaacgaaccattgagatcatacttactttgcatgtcactcaaaaattttgcctcaaaactggtgagctgaatttttgcagttaaagcatcgtgtagtgtttttcttagtccgttatgtaggtaggaatctgatgtaatggttgttggtattttgtcaccattcatttttatctggttgttctcaagttcggttacgagatccatttgtctatctagttcaacttggaaaatcaacgtatcagtcgggcggcctcgcttatcaaccaccaatttcatattgctgtaagtgtttaaatctttacttattggtttcaaaacccattggttaagccttttaaactcatggtagttattttcaagcattaacatgaacttaaattcatcaaggctaatctctatatttgccttgtgagttttcttttgtgttagttcttttaataaccactcataaatcctcatagagtatttgttttcaaaagacttaacatgttccagattatattttatgaatttttttaactggaaaagataaggcaatatctcttcactaaaaactaattctaatttttcgcttgagaacttggcatagtttgtccactggaaaatctcaaagcctttaaccaaaggattcctgatttccacagttctcgtcatcagctctctggttgctttagctaatacaccataagcattttccctactgatgttcatcatctgagcgtattggttataagtgaacgataccgtccgttctttccttgtagggttttcaatcgtggggttgagtagtgccacacagcataaaattagcttggtttcatgctccgttaagtcatagcgactaatcgctagttcatttgctttgaaaacaactaattcagacatacatctcaattggtctaggtgattttaatcactataccaattgagatgggctagtcaatgataattactagtccttttcccgggtgatctgggtatctgtaaattctgctagacctttgctggaaaacttgtaaattctgctagaccctctgtaaattccgctagacctttgtgtgttttttttgtttatattcaagtggttataatttatagaataaagaaagaataaaaaaagataaaaagaatagatcccagccctgtgtataactcactactttagtcagttccgcagtattacaaaaggatgtcgcaaacgctgtttgctcctctacaaaacagaccttaaaaccctaaaggcttaagtagcaccctcgcaagctcgggcaaatcgctgaatattccttttgtctccgaccatcaggcacctgagtcgctgtctttttcgtgacattcagttcgctgcgctcacggctctggcagtgaatgggggtaaatggcactacaggcgccttttatggattcatgcaaggaaactacccataatacaagaaaagcccgtcacgggcttctcagggcgttttatggcgggtctgctatgtggtgctatctgactttttgctgttcagcagttcctgccctctgattttccagtctgaccacttcggattatcccgtgacaggtcattcagactggctaatgcacccagtaaggcagcggtatcatcaacaggcttacccgtcttactgtccctagtgcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatccagatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaacaagggtgaacactatcccatatcaccagctcaccgtctttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgccgatcaacgtctcattttcgccagatatc",
|
|
109
|
+
"atgagacg",
|
|
110
|
+
{ isCircular: true, searchReverseStrand: true }
|
|
111
|
+
);
|
|
112
|
+
expect(matches).toEqual([
|
|
113
|
+
{
|
|
114
|
+
bottomStrand: true,
|
|
115
|
+
end: 5284,
|
|
116
|
+
start: 5277
|
|
117
|
+
}
|
|
118
|
+
]);
|
|
119
|
+
});
|
|
120
|
+
it("non-ambiguous, dna searches for nothing results in empty array", () => {
|
|
121
|
+
const matches = findSequenceMatches("atg", "*", {});
|
|
122
|
+
expect(matches).toEqual([]);
|
|
123
|
+
});
|
|
124
|
+
it("ambiguous, protein searches for nothing results in empty array", () => {
|
|
125
|
+
const matches = findSequenceMatches("atg", "*", {
|
|
126
|
+
isProteinSearch: true,
|
|
127
|
+
isAmbiguous: true
|
|
128
|
+
});
|
|
129
|
+
expect(matches).toEqual([]);
|
|
130
|
+
});
|
|
131
|
+
it("ambiguous, dna searches for nothing results in empty array", () => {
|
|
132
|
+
const matches = findSequenceMatches("atg", "*", { isAmbiguous: true });
|
|
133
|
+
expect(matches).toEqual([]);
|
|
134
|
+
});
|
|
135
|
+
it("ambiguous, dna searches with *", () => {
|
|
136
|
+
const matches = findSequenceMatches("atg", "", { isAmbiguous: true });
|
|
137
|
+
expect(matches).toEqual([]);
|
|
138
|
+
});
|
|
139
|
+
it(" AA with * as stop codon", () => {
|
|
140
|
+
expect(
|
|
141
|
+
findSequenceMatches("atgtaa", "M*", { isProteinSearch: true })
|
|
142
|
+
).toEqual([
|
|
143
|
+
{
|
|
144
|
+
start: 0,
|
|
145
|
+
end: 5
|
|
146
|
+
}
|
|
147
|
+
]);
|
|
148
|
+
});
|
|
149
|
+
it(" AA with * as stop codon", () => {
|
|
150
|
+
expect(
|
|
151
|
+
findSequenceMatches("atgtaaccc", "M**", { isProteinSearch: true })
|
|
152
|
+
).toEqual([]);
|
|
153
|
+
});
|
|
154
|
+
it("works with ambiguous AA", () => {
|
|
155
|
+
expect(
|
|
156
|
+
findSequenceMatches("atgatg", "MX", {
|
|
157
|
+
isProteinSearch: true,
|
|
158
|
+
isAmbiguous: true
|
|
159
|
+
})
|
|
160
|
+
).toEqual([
|
|
161
|
+
{
|
|
162
|
+
start: 0,
|
|
163
|
+
end: 5
|
|
164
|
+
}
|
|
165
|
+
]);
|
|
166
|
+
});
|
|
167
|
+
it("works with ambiguous AA with * in search string", () => {
|
|
168
|
+
expect(
|
|
169
|
+
findSequenceMatches("atgtaa", "M*", {
|
|
170
|
+
isProteinSearch: true,
|
|
171
|
+
isAmbiguous: true
|
|
172
|
+
})
|
|
173
|
+
).toEqual([
|
|
174
|
+
{
|
|
175
|
+
start: 0,
|
|
176
|
+
end: 5
|
|
177
|
+
}
|
|
178
|
+
]);
|
|
179
|
+
});
|
|
180
|
+
it("returns matches for non-circular, non-ambiguous, AA searches", () => {
|
|
181
|
+
expect(findSequenceMatches("atg", "M", { isProteinSearch: true })).toEqual([
|
|
182
|
+
{
|
|
183
|
+
start: 0,
|
|
184
|
+
end: 2
|
|
185
|
+
}
|
|
186
|
+
]);
|
|
187
|
+
expect(
|
|
188
|
+
findSequenceMatches("TTTATGAGT", "MS", { isProteinSearch: true })
|
|
189
|
+
).toEqual([
|
|
190
|
+
{
|
|
191
|
+
start: 3,
|
|
192
|
+
end: 8
|
|
193
|
+
}
|
|
194
|
+
]);
|
|
195
|
+
expect(
|
|
196
|
+
findSequenceMatches("TTATGAGT", "MS", { isProteinSearch: true })
|
|
197
|
+
).toEqual([
|
|
198
|
+
{
|
|
199
|
+
start: 2,
|
|
200
|
+
end: 7
|
|
201
|
+
}
|
|
202
|
+
]);
|
|
203
|
+
expect(
|
|
204
|
+
findSequenceMatches("TTTTATGAGT", "MS", { isProteinSearch: true })
|
|
205
|
+
).toEqual([
|
|
206
|
+
{
|
|
207
|
+
start: 4,
|
|
208
|
+
end: 9
|
|
209
|
+
}
|
|
210
|
+
]);
|
|
211
|
+
|
|
212
|
+
// 0 1 2
|
|
213
|
+
// P T R
|
|
214
|
+
// 012 345 678
|
|
215
|
+
// ATG ATG ATG
|
|
216
|
+
});
|
|
217
|
+
it("returns matches for non-circular, ambiguous, dna searches", () => {
|
|
218
|
+
const matches = findSequenceMatches("atg", "m", { isAmbiguous: true });
|
|
219
|
+
expect(matches).toEqual([
|
|
220
|
+
{
|
|
221
|
+
start: 0,
|
|
222
|
+
end: 0
|
|
223
|
+
}
|
|
224
|
+
]);
|
|
225
|
+
expect(findSequenceMatches("atg", "n", { isAmbiguous: true })).toEqual([
|
|
226
|
+
{
|
|
227
|
+
start: 0,
|
|
228
|
+
end: 0
|
|
229
|
+
},
|
|
230
|
+
{
|
|
231
|
+
start: 1,
|
|
232
|
+
end: 1
|
|
233
|
+
},
|
|
234
|
+
{
|
|
235
|
+
start: 2,
|
|
236
|
+
end: 2
|
|
237
|
+
}
|
|
238
|
+
]);
|
|
239
|
+
expect(
|
|
240
|
+
findSequenceMatches("atgcctcc", "ccnnc", { isAmbiguous: true })
|
|
241
|
+
).toEqual([
|
|
242
|
+
{
|
|
243
|
+
start: 3,
|
|
244
|
+
end: 7
|
|
245
|
+
}
|
|
246
|
+
]);
|
|
247
|
+
});
|
|
248
|
+
it("returns matches for both strands for non-circular, ambiguous, dna searches", () => {
|
|
249
|
+
const matches = findSequenceMatches("atg", "m", {
|
|
250
|
+
isAmbiguous: true,
|
|
251
|
+
searchReverseStrand: true
|
|
252
|
+
});
|
|
253
|
+
expect(matches).toEqual([
|
|
254
|
+
{
|
|
255
|
+
start: 0,
|
|
256
|
+
end: 0
|
|
257
|
+
},
|
|
258
|
+
{ bottomStrand: true, end: 2, start: 2 },
|
|
259
|
+
{ bottomStrand: true, end: 1, start: 1 }
|
|
260
|
+
]);
|
|
261
|
+
expect(
|
|
262
|
+
findSequenceMatches("atg", "n", {
|
|
263
|
+
isAmbiguous: true,
|
|
264
|
+
searchReverseStrand: true
|
|
265
|
+
})
|
|
266
|
+
).toEqual([
|
|
267
|
+
{ end: 0, start: 0 },
|
|
268
|
+
{ end: 1, start: 1 },
|
|
269
|
+
{ end: 2, start: 2 },
|
|
270
|
+
{ bottomStrand: true, end: 2, start: 2 },
|
|
271
|
+
{ bottomStrand: true, end: 1, start: 1 },
|
|
272
|
+
{ bottomStrand: true, end: 0, start: 0 }
|
|
273
|
+
]);
|
|
274
|
+
expect(
|
|
275
|
+
findSequenceMatches("atgcctcc", "ccnnc", {
|
|
276
|
+
isAmbiguous: true,
|
|
277
|
+
searchReverseStrand: true
|
|
278
|
+
})
|
|
279
|
+
).toEqual([
|
|
280
|
+
{
|
|
281
|
+
start: 3,
|
|
282
|
+
end: 7
|
|
283
|
+
}
|
|
284
|
+
]);
|
|
285
|
+
});
|
|
286
|
+
});
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import {generateRandomRange} from "@teselagen/range-utils";
|
|
2
|
+
import shortid from "shortid";
|
|
3
|
+
|
|
4
|
+
function generateAnnotations(
|
|
5
|
+
numberOfAnnotationsToGenerate,
|
|
6
|
+
start,
|
|
7
|
+
end,
|
|
8
|
+
maxLength
|
|
9
|
+
) {
|
|
10
|
+
const result = {};
|
|
11
|
+
for (let i = 0; i < numberOfAnnotationsToGenerate; i++) {
|
|
12
|
+
const annotation = generateAnnotation(start, end, maxLength);
|
|
13
|
+
result[annotation.id] = annotation;
|
|
14
|
+
}
|
|
15
|
+
return result;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function generateAnnotation(start, end, maxLength) {
|
|
19
|
+
const range = generateRandomRange(start, end, maxLength);
|
|
20
|
+
return {
|
|
21
|
+
...range,
|
|
22
|
+
name: getRandomInt(0, 100000).toString(),
|
|
23
|
+
type: "misc_feature",
|
|
24
|
+
id: shortid(),
|
|
25
|
+
forward: Math.random() > 0.5,
|
|
26
|
+
notes: {}
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function getRandomInt(min, max) {
|
|
31
|
+
return Math.floor(Math.random() * (max - min)) + min;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export default generateAnnotations;
|