@teselagen/sequence-utils 0.1.22 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +12030 -26126
- package/index.mjs +12119 -26124
- package/index.umd.js +24056 -38154
- package/package.json +2 -2
- package/src/DNAComplementMap.js +32 -0
- package/src/addGapsToSeqReads.js +417 -0
- package/src/addGapsToSeqReads.test.js +358 -0
- package/src/adjustAnnotationsToInsert.js +19 -0
- package/src/adjustBpsToReplaceOrInsert.js +50 -0
- package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
- package/src/aliasedEnzymesByName.js +7363 -0
- package/src/aminoAcidToDegenerateDnaMap.js +32 -0
- package/src/aminoAcidToDegenerateRnaMap.js +32 -0
- package/src/aminoAcidToDnaRna.test.js +27 -0
- package/src/annotateSingleSeq.js +29 -0
- package/src/annotateSingleSeq.test.js +64 -0
- package/src/annotationTypes.js +23 -0
- package/src/autoAnnotate.js +242 -0
- package/src/autoAnnotate.test.js +1039 -0
- package/src/bioData.js +431 -0
- package/src/calculateNebTa.js +34 -0
- package/src/calculateNebTa.test.js +57 -0
- package/src/calculateNebTm.js +127 -0
- package/src/calculateNebTm.test.js +32 -0
- package/src/calculatePercentGC.js +3 -0
- package/src/calculatePercentGC.test.js +14 -0
- package/src/calculateTm.js +297 -0
- package/src/calculateTm.test.js +7 -0
- package/src/computeDigestFragments.js +179 -0
- package/src/computeDigestFragments.test.js +73 -0
- package/src/condensePairwiseAlignmentDifferences.js +85 -0
- package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
- package/src/convertAACaretPositionOrRangeToDna.js +24 -0
- package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
- package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
- package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
- package/src/cutSequenceByRestrictionEnzyme.js +301 -0
- package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
- package/src/defaultEnzymesByName.js +278 -0
- package/src/degenerateDnaToAminoAcidMap.js +5 -0
- package/src/degenerateRnaToAminoAcidMap.js +5 -0
- package/src/deleteSequenceDataAtRange.js +5 -0
- package/src/deleteSequenceDataAtRange.test.js +146 -0
- package/src/diffUtils.js +64 -0
- package/src/diffUtils.test.js +74 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
- package/src/featureTypesAndColors.js +152 -0
- package/src/featureTypesAndColors.test.js +52 -0
- package/src/filterAminoAcidSequenceString.js +13 -0
- package/src/filterAminoAcidSequenceString.test.js +22 -0
- package/src/filterSequenceString.js +22 -0
- package/src/filterSequenceString.test.js +13 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
- package/src/findOrfsInPlasmid.js +26 -0
- package/src/findSequenceMatches.js +133 -0
- package/src/findSequenceMatches.test.js +286 -0
- package/src/generateAnnotations.js +34 -0
- package/src/generateSequenceData.js +206 -0
- package/src/generateSequenceData.test.js +22 -0
- package/src/getAllInsertionsInSeqReads.js +83 -0
- package/src/getAllInsertionsInSeqReads.test.js +26 -0
- package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
- package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
- package/src/getAminoAcidFromSequenceTriplet.js +22 -0
- package/src/getAminoAcidStringFromSequenceString.js +18 -0
- package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
- package/src/getCodonRangeForAASliver.js +63 -0
- package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
- package/src/getComplementSequenceAndAnnotations.js +20 -0
- package/src/getComplementSequenceString.js +19 -0
- package/src/getComplementSequenceString.test.js +13 -0
- package/src/getCutsiteType.js +10 -0
- package/src/getCutsitesFromSequence.js +17 -0
- package/src/getDegenerateDnaStringFromAAString.js +8 -0
- package/src/getDegenerateRnaStringFromAAString.js +8 -0
- package/src/getDigestFragmentsForCutsites.js +105 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
- package/src/getInsertBetweenVals.js +28 -0
- package/src/getInsertBetweenVals.test.js +33 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
- package/src/getMassOfAaString.js +24 -0
- package/src/getMassofAaString.test.js +18 -0
- package/src/getOrfsFromSequence.js +124 -0
- package/src/getOrfsFromSequence.test.js +210 -0
- package/src/getOverlapBetweenTwoSequences.js +30 -0
- package/src/getOverlapBetweenTwoSequences.test.js +23 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
- package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
- package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
- package/src/getReverseComplementAnnotation.js +23 -0
- package/src/getReverseComplementAnnotation.test.js +44 -0
- package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
- package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
- package/src/getReverseComplementSequenceString.js +17 -0
- package/src/getReverseComplementSequenceString.test.js +11 -0
- package/src/getReverseSequenceString.js +12 -0
- package/src/getReverseSequenceString.test.js +9 -0
- package/src/getSequenceDataBetweenRange.js +131 -0
- package/src/getSequenceDataBetweenRange.test.js +474 -0
- package/src/getVirtualDigest.js +125 -0
- package/src/getVirtualDigest.test.js +134 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
- package/src/index.js +106 -0
- package/src/index.test.js +38 -0
- package/src/insertGapsIntoRefSeq.js +38 -0
- package/src/insertGapsIntoRefSeq.test.js +20 -0
- package/src/insertSequenceDataAtPosition.js +2 -0
- package/src/insertSequenceDataAtPosition.test.js +75 -0
- package/src/insertSequenceDataAtPositionOrRange.js +249 -0
- package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
- package/src/isEnzymeType2S.js +3 -0
- package/src/mapAnnotationsToRows.js +174 -0
- package/src/mapAnnotationsToRows.test.js +425 -0
- package/src/prepareCircularViewData.js +17 -0
- package/src/prepareCircularViewData.test.js +196 -0
- package/src/prepareRowData.js +41 -0
- package/src/prepareRowData.test.js +36 -0
- package/src/prepareRowData_output1.json +391 -0
- package/src/proteinAlphabet.js +257 -0
- package/src/rotateBpsToPosition.js +13 -0
- package/src/rotateBpsToPosition.test.js +6 -0
- package/src/rotateSequenceDataToPosition.js +48 -0
- package/src/rotateSequenceDataToPosition.test.js +71 -0
- package/src/shiftAnnotationsByLen.js +17 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
- package/src/tidyUpAnnotation.js +182 -0
- package/src/tidyUpSequenceData.js +169 -0
- package/src/tidyUpSequenceData.test.js +332 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import shortid from "shortid";
|
|
2
|
+
import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* @private
|
|
7
|
+
* Finds ORFs in a given DNA forward in a given frame.
|
|
8
|
+
* @param {Int} frame The frame to look in.
|
|
9
|
+
* @param {String}sequence The dna sequence.
|
|
10
|
+
* @param {Int} minimumOrfSize The minimum length of ORF to return.
|
|
11
|
+
* @param {boolean} forward Should we find forward facing orfs or reverse facing orfs
|
|
12
|
+
* @return {Teselagen.bio.orf.ORF[]} The list of ORFs found.
|
|
13
|
+
*/
|
|
14
|
+
export default function getOrfsFromSequence(options) {
|
|
15
|
+
// ac.throw([ac.shape({
|
|
16
|
+
// sequence: ac.string,
|
|
17
|
+
// minimumOrfSize: ac.posInt,
|
|
18
|
+
// forward: ac.bool,
|
|
19
|
+
// circular: ac.bool
|
|
20
|
+
// })], arguments);
|
|
21
|
+
|
|
22
|
+
// const frame = options.frame;
|
|
23
|
+
let sequence = options.sequence;
|
|
24
|
+
const minimumOrfSize = options.minimumOrfSize;
|
|
25
|
+
const forward = options.forward;
|
|
26
|
+
const circular = options.circular;
|
|
27
|
+
const useAdditionalOrfStartCodons = options.useAdditionalOrfStartCodons;
|
|
28
|
+
|
|
29
|
+
const originalSequenceLength = sequence.length;
|
|
30
|
+
if (!forward) {
|
|
31
|
+
//we reverse the sequence
|
|
32
|
+
sequence = getReverseComplementSequenceString(sequence);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (circular) {
|
|
36
|
+
//we'll pass in double the sequence and then trim excess orfs
|
|
37
|
+
sequence += sequence;
|
|
38
|
+
}
|
|
39
|
+
const re = useAdditionalOrfStartCodons
|
|
40
|
+
? /(?=((?:A[TU]G|G[TU]G|C[TU]G)(?:.{3})*?(?:[TU]AG|[TU]AA|[TU]GA)))/gi
|
|
41
|
+
: /(?=((?:A[TU]G)(?:.{3})*?(?:[TU]AG|[TU]AA|[TU]GA)))/gi;
|
|
42
|
+
// const str = 'tatgaatgaatgffffffatgfftaaftaafatgfatgfffffsdfatgffatgfffstaafftaafffffffffffffffatgtaaataa\n\natgffftaaf\n\natgffatgftaafftaa\n\natgatgftaafftaa\n\natgatgtaataa\n\ntttttttttttttaatgatgfffffffffftaa';
|
|
43
|
+
let m;
|
|
44
|
+
const orfRanges = [];
|
|
45
|
+
//loop through orf hits!
|
|
46
|
+
/* eslint-disable no-cond-assign*/
|
|
47
|
+
|
|
48
|
+
while ((m = re.exec(sequence)) !== null) {
|
|
49
|
+
//stuff to get the regex to work
|
|
50
|
+
if (m.index === re.lastIndex) {
|
|
51
|
+
re.lastIndex++;
|
|
52
|
+
}
|
|
53
|
+
//orf logic:
|
|
54
|
+
const orfLength = m[1].length;
|
|
55
|
+
if (orfLength >= minimumOrfSize) {
|
|
56
|
+
//only keep orfs >= to the minimum size
|
|
57
|
+
const start = m.index;
|
|
58
|
+
let end = orfLength + start - 1;
|
|
59
|
+
//normalize the end if it is greater than the original sequence length
|
|
60
|
+
if (end >= originalSequenceLength) {
|
|
61
|
+
end -= originalSequenceLength;
|
|
62
|
+
}
|
|
63
|
+
if (start < originalSequenceLength) {
|
|
64
|
+
//only keep orfs that *begin* before the original sequence length (only the case when dealing with circular orfs)
|
|
65
|
+
orfRanges.push({
|
|
66
|
+
start: start,
|
|
67
|
+
end: end,
|
|
68
|
+
length: m[1].length,
|
|
69
|
+
internalStartCodonIndices: [],
|
|
70
|
+
frame: start % 3,
|
|
71
|
+
forward: forward,
|
|
72
|
+
annotationTypePlural: "orfs",
|
|
73
|
+
isOrf: true,
|
|
74
|
+
id: shortid()
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
// pair down the orfs to remove duplicates
|
|
80
|
+
// and deal with revComp orfs
|
|
81
|
+
const orfEnds = {};
|
|
82
|
+
orfRanges.forEach((orf, index) => {
|
|
83
|
+
const indexOfAlreadyExistingOrf = orfEnds[orf.end];
|
|
84
|
+
|
|
85
|
+
if (typeof indexOfAlreadyExistingOrf !== "undefined") {
|
|
86
|
+
let internalOrf = orf;
|
|
87
|
+
let containingOrf = orfRanges[indexOfAlreadyExistingOrf];
|
|
88
|
+
if (containingOrf.length < internalOrf.length) {
|
|
89
|
+
internalOrf = orfRanges[indexOfAlreadyExistingOrf];
|
|
90
|
+
containingOrf = orf;
|
|
91
|
+
orfEnds[orf.end] = index;
|
|
92
|
+
}
|
|
93
|
+
const internalStartCodonIndex = forward
|
|
94
|
+
? internalOrf.start
|
|
95
|
+
: originalSequenceLength - internalOrf.start - 1; //use either the start or the end depending on the direction of the internalOrf
|
|
96
|
+
//we know because of how the regex works that larger orfs come first in the array
|
|
97
|
+
containingOrf.internalStartCodonIndices = [
|
|
98
|
+
...containingOrf.internalStartCodonIndices,
|
|
99
|
+
...internalOrf.internalStartCodonIndices,
|
|
100
|
+
internalStartCodonIndex
|
|
101
|
+
];
|
|
102
|
+
//set a flag that we'll use to remove all these shorter, duplicated orfs
|
|
103
|
+
internalOrf.remove = true;
|
|
104
|
+
} else {
|
|
105
|
+
orfEnds[orf.end] = index;
|
|
106
|
+
if (!forward) {
|
|
107
|
+
// if (originalSequenceLength - orf.end - 1 == 3657) {
|
|
108
|
+
// }
|
|
109
|
+
//this check needs to come after the above assignment of orfEnds
|
|
110
|
+
//flip the start and ends
|
|
111
|
+
const endHolder = orf.end; //temp variable
|
|
112
|
+
orf.end = originalSequenceLength - orf.start - 1;
|
|
113
|
+
orf.start = originalSequenceLength - endHolder - 1;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
const nonDuplicatedOrfRanges = orfRanges.filter(orf => {
|
|
118
|
+
if (!orf.remove) {
|
|
119
|
+
return true;
|
|
120
|
+
}
|
|
121
|
+
return false;
|
|
122
|
+
});
|
|
123
|
+
return nonDuplicatedOrfRanges;
|
|
124
|
+
};
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
import {expect} from "chai";
|
|
4
|
+
|
|
5
|
+
import getOrfsFromSequence from "./getOrfsFromSequence.js";
|
|
6
|
+
// getOrfsFromSequence(frame, sequence, minimumOrfSize, forward, circular)
|
|
7
|
+
describe("getOrfsFromSequence", () => {
|
|
8
|
+
it("finds correct orfs in reverse direction in slightly more complex sequence", () => {
|
|
9
|
+
const orfs = getOrfsFromSequence({
|
|
10
|
+
sequence:
|
|
11
|
+
"gattttaatcactataccaattgagatgggctagtcaatgataattactagtccttttcccgggtgatctgggtatctgtaaattctgctagacctttgctggaaaacttgtaaattctgctagaccctctgtaaattccgctagacctttgtgtgttttttttgtttatattcaagtggttataatttatagaataaagaaagaataaaaaaagataaaaagaatagatcccagccctgtgtataactcactactttagtcagttccgcagtattacaaaaggatgtcgcaaacgctgtttgctcctctacaaaacagaccttaaaaccctaaaggcttaagtagcaccctcgcaagctcgggcaaatcgctgaatattccttttgtctccgaccatcaggcacctgagtcgctgtctttttcgtgacattcagttcgctgcgctcacggctctggcagtgaatgggggtaaatggcactacaggcgccttttatggattcatgcaaggaaactacccataatacaagaaaagcccgtcacgggcttctcagggcgttttatggcgggtctgctatgtggtgctatctgactttttgctgttcagcagttcctgccctctgattttccagtctgaccacttcggattatcccgtgacaggtcattcagactggctaatgcacccagtaaggcagcggtatcatcaacaggcttacccgtcttactgtccctagtgcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatccagatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaacaagggtgaacactatcccatatcaccagctcaccgtctttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgccgatcaacgtctcattttcgccagatatcgacgtcttatgacaacttgacggctacatcattcactttttcttcacaaccggcacggaactcgctcgggctggccccggtgcattttttaaatacccgcgagaaatagagttgatcgtcaaaaccaacattgcgaccgacggtggcgataggcatccgggtggtgctcaaaagcagcttcgcctggctgatacgttggtcctcgcgccagcttaagacgctaatccctaactgctggcggaaaagatgtgacagacgcgacggcgacaagcaaacatgctgtgcgacgctggcgatatcaaaattgctgtctgccaggtgatcgctgatgtactgacaagcctcgcgtacccgattatccatcggtggatggagcgactcgttaatcgcttccatgcgccgcagtaacaattgctcaagcagatttatcgccagcagctccgaatagcgcccttccccttgcccggcgttaatgatttgcccaaacaggtcgctgaaatgcggctggtgcgcttcatccgggcgaaagaaccccgtattggcaaatattgacggccagttaagccattcatgccagtaggcgcgcggacgaaagtaaacccactggtgataccattcgcgagcctccggatgacgaccgtagtgatgaatctctcctggcgggaacagcaaaatatcacccggtcggcaaacaaattctcgtccctgatttttcaccaccccctgaccgcgaatggtgagattgagaatataacctttcattcccagcggtcggtcgataaaaaaatcgagataaccgttggcctcaatcggcgttaaacccgccaccagatgggcattaaacgagtatcccggcagcaggggatcattttgcgcttcagccatacttttcatactcccgccattcagagaagaaaccaattgtccatattgcatcagacattgccgtcactgcgtcttttactggctcttctcgctaaccaaaccggtaaccccgcttattaaaagcattctgtaacaaagcgggaccaaagccatgacaaaaacgcgtaacaaaagtgtctataatcacggcagaaaagtccacattgattatttgcacggcgtcacactttgctatgccatagcatttttatccataagattagcggattctacctgacgctttttatcgcaactctctactgtttctccatacccgtttttttgggaatttttaagaaggagatatacatatgagtaaaggagaagaacttttcactggagttgtcccaattcttgttgaattagatggtgatgttaatgggcacaaattttctgtcagtggagagggtgaaggtgatgcaacatacggaaaacttacccttaaatttatttgcactactggaaaactacctgttccatggccaacacttgtcactactttctcttatggtgttcaatgcttttcccgttatccggatcatatgaaacggcatgactttttcaagagtgccatgcccgaaggttatgtacaggaacgcactatatctttcaaagatgacgggaactacaagacgcgtgctgaagtcaagtttgaaggtgatacccttgttaatcgtatcgagttaaaaggtattgattttaaagaagatggaaacat",
|
|
12
|
+
minimumOrfSize: 3280,
|
|
13
|
+
forward: false,
|
|
14
|
+
circular: false
|
|
15
|
+
});
|
|
16
|
+
expect(orfs).to.be.length(0);
|
|
17
|
+
// const orf = orfs[0];
|
|
18
|
+
// expect(orf).to.be.an('object');
|
|
19
|
+
// expect(orf.start).to.equal(11);
|
|
20
|
+
// expect(orf.end).to.equal(0);
|
|
21
|
+
// expect(orf.forward).to.equal(false);
|
|
22
|
+
// expect(orf.frame).to.equal(0);
|
|
23
|
+
// expect(orf.internalStartCodonIndices).to.deep.equal([8]);
|
|
24
|
+
// expect(orf.id).to.be.a('string');
|
|
25
|
+
});
|
|
26
|
+
it("finds correct orfs in reverse direction in slightly more complex sequence", () => {
|
|
27
|
+
const orfs = getOrfsFromSequence({
|
|
28
|
+
sequence: "ttarrrcatcat",
|
|
29
|
+
// E S S
|
|
30
|
+
//rrrttarrrcatrrrcatr
|
|
31
|
+
//fatgfffatgffftaafff
|
|
32
|
+
//0123456789012345678
|
|
33
|
+
// S S E
|
|
34
|
+
//
|
|
35
|
+
//E S S
|
|
36
|
+
//ttarrrcatcat
|
|
37
|
+
//atgatgffftaa
|
|
38
|
+
//0123456789012345
|
|
39
|
+
//S S E
|
|
40
|
+
//
|
|
41
|
+
minimumOrfSize: 0,
|
|
42
|
+
forward: false,
|
|
43
|
+
circular: false
|
|
44
|
+
});
|
|
45
|
+
expect(orfs).to.be.length(1);
|
|
46
|
+
const orf = orfs[0];
|
|
47
|
+
expect(orf).to.be.an("object");
|
|
48
|
+
expect(orf.start).to.equal(0);
|
|
49
|
+
expect(orf.end).to.equal(11);
|
|
50
|
+
expect(orf.forward).to.equal(false);
|
|
51
|
+
expect(orf.frame).to.equal(0);
|
|
52
|
+
expect(orf.isOrf).to.equal(true);
|
|
53
|
+
expect(orf.internalStartCodonIndices).to.deep.equal([8]);
|
|
54
|
+
expect(orf.id).to.be.a("string");
|
|
55
|
+
});
|
|
56
|
+
it("finds correct orfs in reverse direction in simple sequence", () => {
|
|
57
|
+
const orfs = getOrfsFromSequence({
|
|
58
|
+
sequence: "ttacat",
|
|
59
|
+
minimumOrfSize: 0,
|
|
60
|
+
forward: false,
|
|
61
|
+
circular: false
|
|
62
|
+
});
|
|
63
|
+
expect(orfs).to.be.length(1);
|
|
64
|
+
const orf = orfs[0];
|
|
65
|
+
expect(orf).to.be.an("object");
|
|
66
|
+
expect(orf.start).to.equal(0);
|
|
67
|
+
expect(orf.end).to.equal(5);
|
|
68
|
+
expect(orf.forward).to.equal(false);
|
|
69
|
+
expect(orf.frame).to.equal(0);
|
|
70
|
+
expect(orf.isOrf).to.equal(true);
|
|
71
|
+
expect(orf.internalStartCodonIndices).to.deep.equal([]);
|
|
72
|
+
expect(orf.id).to.be.a("string");
|
|
73
|
+
});
|
|
74
|
+
it("finds correct orfs in slightly more complex sequence", () => {
|
|
75
|
+
const orfs = getOrfsFromSequence({
|
|
76
|
+
sequence: "atgatgffftaa",
|
|
77
|
+
minimumOrfSize: 0,
|
|
78
|
+
forward: true,
|
|
79
|
+
circular: false
|
|
80
|
+
});
|
|
81
|
+
expect(orfs).to.be.length(1);
|
|
82
|
+
const orf = orfs[0];
|
|
83
|
+
expect(orf).to.be.an("object");
|
|
84
|
+
expect(orf.start).to.equal(0);
|
|
85
|
+
expect(orf.end).to.equal(11);
|
|
86
|
+
expect(orf.isOrf).to.equal(true);
|
|
87
|
+
expect(orf.forward).to.equal(true);
|
|
88
|
+
expect(orf.frame).to.equal(0);
|
|
89
|
+
expect(orf.internalStartCodonIndices).to.deep.equal([3]);
|
|
90
|
+
expect(orf.id).to.be.a("string");
|
|
91
|
+
});
|
|
92
|
+
it("finds correct orfs in simple sequence", () => {
|
|
93
|
+
const orfs = getOrfsFromSequence({
|
|
94
|
+
sequence: "atgtaa",
|
|
95
|
+
minimumOrfSize: 0,
|
|
96
|
+
forward: true,
|
|
97
|
+
circular: false
|
|
98
|
+
});
|
|
99
|
+
expect(orfs).to.be.length(1);
|
|
100
|
+
const orf = orfs[0];
|
|
101
|
+
expect(orf).to.be.an("object");
|
|
102
|
+
expect(orf.start).to.equal(0);
|
|
103
|
+
expect(orf.end).to.equal(5);
|
|
104
|
+
expect(orf.forward).to.equal(true);
|
|
105
|
+
expect(orf.isOrf).to.equal(true);
|
|
106
|
+
expect(orf.frame).to.equal(0);
|
|
107
|
+
expect(orf.internalStartCodonIndices).to.deep.equal([]);
|
|
108
|
+
expect(orf.id).to.be.a("string");
|
|
109
|
+
});
|
|
110
|
+
it("it will find additional orfs if useAdditionalOrfStartCodons is set to true in simple sequence", () => {
|
|
111
|
+
let orfs = getOrfsFromSequence({
|
|
112
|
+
sequence: "ctgtaa",
|
|
113
|
+
minimumOrfSize: 0,
|
|
114
|
+
forward: true,
|
|
115
|
+
circular: false,
|
|
116
|
+
useAdditionalOrfStartCodons: true
|
|
117
|
+
});
|
|
118
|
+
expect(orfs).to.be.length(1);
|
|
119
|
+
let orf = orfs[0];
|
|
120
|
+
expect(orf).to.be.an("object");
|
|
121
|
+
expect(orf.start).to.equal(0);
|
|
122
|
+
expect(orf.end).to.equal(5);
|
|
123
|
+
expect(orf.forward).to.equal(true);
|
|
124
|
+
expect(orf.isOrf).to.equal(true);
|
|
125
|
+
expect(orf.frame).to.equal(0);
|
|
126
|
+
expect(orf.internalStartCodonIndices).to.deep.equal([]);
|
|
127
|
+
expect(orf.id).to.be.a("string");
|
|
128
|
+
orfs = getOrfsFromSequence({
|
|
129
|
+
sequence: "gtgtaa",
|
|
130
|
+
minimumOrfSize: 0,
|
|
131
|
+
forward: true,
|
|
132
|
+
circular: false,
|
|
133
|
+
useAdditionalOrfStartCodons: true
|
|
134
|
+
});
|
|
135
|
+
expect(orfs).to.be.length(1);
|
|
136
|
+
orf = orfs[0];
|
|
137
|
+
expect(orf).to.be.an("object");
|
|
138
|
+
expect(orf.start).to.equal(0);
|
|
139
|
+
expect(orf.end).to.equal(5);
|
|
140
|
+
expect(orf.forward).to.equal(true);
|
|
141
|
+
expect(orf.isOrf).to.equal(true);
|
|
142
|
+
expect(orf.frame).to.equal(0);
|
|
143
|
+
expect(orf.internalStartCodonIndices).to.deep.equal([]);
|
|
144
|
+
expect(orf.id).to.be.a("string");
|
|
145
|
+
});
|
|
146
|
+
it("finds correct orfs in simple sequence with different capitalizations", () => {
|
|
147
|
+
const orfs = getOrfsFromSequence({
|
|
148
|
+
sequence: "ATGTAA",
|
|
149
|
+
minimumOrfSize: 0,
|
|
150
|
+
forward: true,
|
|
151
|
+
circular: false
|
|
152
|
+
});
|
|
153
|
+
expect(orfs).to.be.length(1);
|
|
154
|
+
const orf = orfs[0];
|
|
155
|
+
expect(orf).to.be.an("object");
|
|
156
|
+
expect(orf.start).to.equal(0);
|
|
157
|
+
expect(orf.end).to.equal(5);
|
|
158
|
+
expect(orf.forward).to.equal(true);
|
|
159
|
+
expect(orf.isOrf).to.equal(true);
|
|
160
|
+
expect(orf.frame).to.equal(0);
|
|
161
|
+
expect(orf.internalStartCodonIndices).to.deep.equal([]);
|
|
162
|
+
expect(orf.id).to.be.a("string");
|
|
163
|
+
});
|
|
164
|
+
it("finds a single correct orf in simple circular sequence", () => {
|
|
165
|
+
const orfs = getOrfsFromSequence({
|
|
166
|
+
sequence: "tgtaaa",
|
|
167
|
+
minimumOrfSize: 0,
|
|
168
|
+
forward: true,
|
|
169
|
+
circular: true
|
|
170
|
+
});
|
|
171
|
+
expect(orfs).to.be.length(1);
|
|
172
|
+
const orf = orfs[0];
|
|
173
|
+
expect(orf).to.be.an("object");
|
|
174
|
+
expect(orf.start).to.equal(5);
|
|
175
|
+
expect(orf.end).to.equal(4);
|
|
176
|
+
expect(orf.forward).to.equal(true);
|
|
177
|
+
expect(orf.isOrf).to.equal(true);
|
|
178
|
+
expect(orf.frame).to.equal(2);
|
|
179
|
+
expect(orf.internalStartCodonIndices).to.deep.equal([]);
|
|
180
|
+
expect(orf.id).to.be.a("string");
|
|
181
|
+
});
|
|
182
|
+
it("finds multiple internal start codons correctly for orfs that span the origin", () => {
|
|
183
|
+
const orfs = getOrfsFromSequence({
|
|
184
|
+
sequence: "tgATGTAAatga",
|
|
185
|
+
minimumOrfSize: 0,
|
|
186
|
+
forward: true,
|
|
187
|
+
circular: true
|
|
188
|
+
});
|
|
189
|
+
expect(orfs).to.be.length(1);
|
|
190
|
+
const orf = orfs[0];
|
|
191
|
+
expect(orf).to.be.an("object");
|
|
192
|
+
expect(orf.start).to.equal(8);
|
|
193
|
+
expect(orf.end).to.equal(7);
|
|
194
|
+
expect(orf.forward).to.equal(true);
|
|
195
|
+
expect(orf.isOrf).to.equal(true);
|
|
196
|
+
expect(orf.frame).to.equal(2);
|
|
197
|
+
expect(orf.internalStartCodonIndices).to.deep.equal([2, 11]);
|
|
198
|
+
expect(orf.id).to.be.a("string");
|
|
199
|
+
});
|
|
200
|
+
it("doesnt find orfs in simple sequence with no orfs", () => {
|
|
201
|
+
const orfs = getOrfsFromSequence({
|
|
202
|
+
sequence: "gtgtaa",
|
|
203
|
+
minimumOrfSize: 0,
|
|
204
|
+
forward: true,
|
|
205
|
+
circular: false
|
|
206
|
+
});
|
|
207
|
+
expect(orfs).to.be.an("array");
|
|
208
|
+
expect(orfs).to.be.length(0);
|
|
209
|
+
});
|
|
210
|
+
});
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import {modulatePositionByRange} from "@teselagen/range-utils";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* This function gets the overlapping of one sequence to another based on sequence equality.
|
|
5
|
+
*
|
|
6
|
+
* @param {string} sequenceToFind
|
|
7
|
+
* @param {string} sequenceToSearchIn
|
|
8
|
+
* @param {object} options optional
|
|
9
|
+
* @return {object || null} null if no overlap exists or a range object with .start and .end properties
|
|
10
|
+
*/
|
|
11
|
+
export default function getOverlapBetweenTwoSequences(
|
|
12
|
+
sequenceToFind,
|
|
13
|
+
sequenceToSearchIn,
|
|
14
|
+
) {
|
|
15
|
+
sequenceToSearchIn = sequenceToSearchIn.toLowerCase();
|
|
16
|
+
sequenceToFind = sequenceToFind.toLowerCase();
|
|
17
|
+
const lengthenedSeqToSearch = sequenceToSearchIn + sequenceToSearchIn;
|
|
18
|
+
const index = lengthenedSeqToSearch.indexOf(sequenceToFind);
|
|
19
|
+
if (index > -1) {
|
|
20
|
+
return {
|
|
21
|
+
start: index,
|
|
22
|
+
end: modulatePositionByRange(index + sequenceToFind.length - 1, {
|
|
23
|
+
start: 0,
|
|
24
|
+
end: sequenceToSearchIn.length - 1
|
|
25
|
+
})
|
|
26
|
+
};
|
|
27
|
+
} else {
|
|
28
|
+
return null;
|
|
29
|
+
}
|
|
30
|
+
};
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import {expect} from "chai";
|
|
2
|
+
import getOverlapBetweenTwoSequences from "./getOverlapBetweenTwoSequences.js";
|
|
3
|
+
describe("getOverlapBetweenTwoSequences", () => {
|
|
4
|
+
it("should get the range overlap given a seq and a seq to search in", () => {
|
|
5
|
+
expect(getOverlapBetweenTwoSequences("gtt", "agttaa")).to.deep.equal({
|
|
6
|
+
start: 1,
|
|
7
|
+
end: 3
|
|
8
|
+
});
|
|
9
|
+
expect(getOverlapBetweenTwoSequences("gtt", "ttaaag")).to.deep.equal({
|
|
10
|
+
start: 5,
|
|
11
|
+
end: 1
|
|
12
|
+
});
|
|
13
|
+
});
|
|
14
|
+
it("should return null if no overlap is found", () => {
|
|
15
|
+
expect(getOverlapBetweenTwoSequences("gtt", "agattaa")).to.deep.equal(null);
|
|
16
|
+
});
|
|
17
|
+
it("should not care about case sensitivity", () => {
|
|
18
|
+
expect(getOverlapBetweenTwoSequences("gTt", "agttaa")).to.deep.equal({
|
|
19
|
+
start: 1,
|
|
20
|
+
end: 3
|
|
21
|
+
});
|
|
22
|
+
});
|
|
23
|
+
});
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import getComplementSequenceString from "./getComplementSequenceString";
|
|
2
|
+
import {normalizePositionByRangeLength} from "@teselagen/range-utils";
|
|
3
|
+
import cutSequenceByRestrictionEnzyme from "./cutSequenceByRestrictionEnzyme";
|
|
4
|
+
|
|
5
|
+
export default function getPossiblePartsFromSequenceAndEnzyme(
|
|
6
|
+
seqData,
|
|
7
|
+
restrictionEnzymes
|
|
8
|
+
) {
|
|
9
|
+
// ac.throw([
|
|
10
|
+
// ac.string,
|
|
11
|
+
// ac.bool,
|
|
12
|
+
// ac.shape({
|
|
13
|
+
// "name": ac.string,
|
|
14
|
+
// "site": ac.string,
|
|
15
|
+
// "forwardRegex": ac.string,
|
|
16
|
+
// "reverseRegex": ac.string,
|
|
17
|
+
// "topSnipOffset": ac.number,
|
|
18
|
+
// "bottomSnipOffset": ac.number
|
|
19
|
+
// })
|
|
20
|
+
// ], arguments);
|
|
21
|
+
restrictionEnzymes = restrictionEnzymes.length
|
|
22
|
+
? restrictionEnzymes
|
|
23
|
+
: [restrictionEnzymes];
|
|
24
|
+
const bps = seqData.sequence;
|
|
25
|
+
const seqLen = bps.length;
|
|
26
|
+
const circular = seqData.circular;
|
|
27
|
+
let cutsites = [];
|
|
28
|
+
restrictionEnzymes.forEach(enzyme => {
|
|
29
|
+
const newCutsites = cutSequenceByRestrictionEnzyme(bps, circular, enzyme);
|
|
30
|
+
cutsites = cutsites.concat(newCutsites);
|
|
31
|
+
});
|
|
32
|
+
const parts = [];
|
|
33
|
+
if (cutsites.length < 1) {
|
|
34
|
+
return parts;
|
|
35
|
+
} else if (cutsites.length === 1) {
|
|
36
|
+
parts.push(
|
|
37
|
+
getPartBetweenEnzymesWithInclusiveOverhangs(
|
|
38
|
+
cutsites[0],
|
|
39
|
+
cutsites[0],
|
|
40
|
+
seqLen
|
|
41
|
+
)
|
|
42
|
+
);
|
|
43
|
+
return parts;
|
|
44
|
+
} else {
|
|
45
|
+
const pairs = pairwise(cutsites);
|
|
46
|
+
pairs.forEach(pair => {
|
|
47
|
+
const cut1 = pair[0];
|
|
48
|
+
const cut2 = pair[1];
|
|
49
|
+
const part1 = getPartBetweenEnzymesWithInclusiveOverhangs(
|
|
50
|
+
cut1,
|
|
51
|
+
cut2,
|
|
52
|
+
seqLen
|
|
53
|
+
);
|
|
54
|
+
const part2 = getPartBetweenEnzymesWithInclusiveOverhangs(
|
|
55
|
+
cut2,
|
|
56
|
+
cut1,
|
|
57
|
+
seqLen
|
|
58
|
+
);
|
|
59
|
+
if (circular || !(part1.start > part1.end)) {
|
|
60
|
+
//only add origin spanning parts if the sequence is circular
|
|
61
|
+
parts.push(part1);
|
|
62
|
+
}
|
|
63
|
+
if (circular || !(part2.start > part2.end)) {
|
|
64
|
+
//only add origin spanning parts if the sequence is circular
|
|
65
|
+
parts.push(part2);
|
|
66
|
+
}
|
|
67
|
+
});
|
|
68
|
+
return parts;
|
|
69
|
+
}
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
function getPartBetweenEnzymesWithInclusiveOverhangs(cut1, cut2, seqLen) {
|
|
73
|
+
const firstCutOffset = getEnzymeRelativeOffset(cut1.restrictionEnzyme);
|
|
74
|
+
const secondCutOffset = getEnzymeRelativeOffset(cut2.restrictionEnzyme);
|
|
75
|
+
const start = cut1.topSnipBeforeBottom
|
|
76
|
+
? cut1.topSnipPosition
|
|
77
|
+
: cut1.bottomSnipPosition;
|
|
78
|
+
const end = normalizePositionByRangeLength(
|
|
79
|
+
(cut2.topSnipBeforeBottom
|
|
80
|
+
? cut2.bottomSnipPosition
|
|
81
|
+
: cut2.topSnipPosition) - 1,
|
|
82
|
+
seqLen
|
|
83
|
+
);
|
|
84
|
+
return {
|
|
85
|
+
start,
|
|
86
|
+
start1Based: start + 1,
|
|
87
|
+
end,
|
|
88
|
+
end1Based: end + 1,
|
|
89
|
+
firstCut: cut1,
|
|
90
|
+
//the offset is always counting with 0 being at the top snip position
|
|
91
|
+
firstCutOffset,
|
|
92
|
+
firstCutOverhang: cut1.overhangBps,
|
|
93
|
+
firstCutOverhangTop: firstCutOffset > 0 ? cut1.overhangBps : "",
|
|
94
|
+
firstCutOverhangBottom:
|
|
95
|
+
firstCutOffset < 0 ? getComplementSequenceString(cut1.overhangBps) : "",
|
|
96
|
+
secondCut: cut2,
|
|
97
|
+
//the offset is always counting with 0 being at the top snip position
|
|
98
|
+
secondCutOffset,
|
|
99
|
+
secondCutOverhang: cut2.overhangBps,
|
|
100
|
+
secondCutOverhangTop: secondCutOffset < 0 ? cut2.overhangBps : "",
|
|
101
|
+
secondCutOverhangBottom:
|
|
102
|
+
secondCutOffset > 0 ? getComplementSequenceString(cut2.overhangBps) : ""
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function getEnzymeRelativeOffset(enzyme) {
|
|
107
|
+
//the offset is always counting with 0 being at the top snip position
|
|
108
|
+
return enzyme.bottomSnipOffset - enzyme.topSnipOffset;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function pairwise(list) {
|
|
112
|
+
if (list.length < 2) {
|
|
113
|
+
return [];
|
|
114
|
+
}
|
|
115
|
+
const first = list[0],
|
|
116
|
+
rest = list.slice(1),
|
|
117
|
+
pairs = rest.map(x => {
|
|
118
|
+
return [first, x];
|
|
119
|
+
});
|
|
120
|
+
return pairs.concat(pairwise(rest));
|
|
121
|
+
}
|