@teselagen/sequence-utils 0.1.22 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +12030 -26126
- package/index.mjs +12119 -26124
- package/index.umd.js +24056 -38154
- package/package.json +2 -2
- package/src/DNAComplementMap.js +32 -0
- package/src/addGapsToSeqReads.js +417 -0
- package/src/addGapsToSeqReads.test.js +358 -0
- package/src/adjustAnnotationsToInsert.js +19 -0
- package/src/adjustBpsToReplaceOrInsert.js +50 -0
- package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
- package/src/aliasedEnzymesByName.js +7363 -0
- package/src/aminoAcidToDegenerateDnaMap.js +32 -0
- package/src/aminoAcidToDegenerateRnaMap.js +32 -0
- package/src/aminoAcidToDnaRna.test.js +27 -0
- package/src/annotateSingleSeq.js +29 -0
- package/src/annotateSingleSeq.test.js +64 -0
- package/src/annotationTypes.js +23 -0
- package/src/autoAnnotate.js +242 -0
- package/src/autoAnnotate.test.js +1039 -0
- package/src/bioData.js +431 -0
- package/src/calculateNebTa.js +34 -0
- package/src/calculateNebTa.test.js +57 -0
- package/src/calculateNebTm.js +127 -0
- package/src/calculateNebTm.test.js +32 -0
- package/src/calculatePercentGC.js +3 -0
- package/src/calculatePercentGC.test.js +14 -0
- package/src/calculateTm.js +297 -0
- package/src/calculateTm.test.js +7 -0
- package/src/computeDigestFragments.js +179 -0
- package/src/computeDigestFragments.test.js +73 -0
- package/src/condensePairwiseAlignmentDifferences.js +85 -0
- package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
- package/src/convertAACaretPositionOrRangeToDna.js +24 -0
- package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
- package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
- package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
- package/src/cutSequenceByRestrictionEnzyme.js +301 -0
- package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
- package/src/defaultEnzymesByName.js +278 -0
- package/src/degenerateDnaToAminoAcidMap.js +5 -0
- package/src/degenerateRnaToAminoAcidMap.js +5 -0
- package/src/deleteSequenceDataAtRange.js +5 -0
- package/src/deleteSequenceDataAtRange.test.js +146 -0
- package/src/diffUtils.js +64 -0
- package/src/diffUtils.test.js +74 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
- package/src/featureTypesAndColors.js +152 -0
- package/src/featureTypesAndColors.test.js +52 -0
- package/src/filterAminoAcidSequenceString.js +13 -0
- package/src/filterAminoAcidSequenceString.test.js +22 -0
- package/src/filterSequenceString.js +22 -0
- package/src/filterSequenceString.test.js +13 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
- package/src/findOrfsInPlasmid.js +26 -0
- package/src/findSequenceMatches.js +133 -0
- package/src/findSequenceMatches.test.js +286 -0
- package/src/generateAnnotations.js +34 -0
- package/src/generateSequenceData.js +206 -0
- package/src/generateSequenceData.test.js +22 -0
- package/src/getAllInsertionsInSeqReads.js +83 -0
- package/src/getAllInsertionsInSeqReads.test.js +26 -0
- package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
- package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
- package/src/getAminoAcidFromSequenceTriplet.js +22 -0
- package/src/getAminoAcidStringFromSequenceString.js +18 -0
- package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
- package/src/getCodonRangeForAASliver.js +63 -0
- package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
- package/src/getComplementSequenceAndAnnotations.js +20 -0
- package/src/getComplementSequenceString.js +19 -0
- package/src/getComplementSequenceString.test.js +13 -0
- package/src/getCutsiteType.js +10 -0
- package/src/getCutsitesFromSequence.js +17 -0
- package/src/getDegenerateDnaStringFromAAString.js +8 -0
- package/src/getDegenerateRnaStringFromAAString.js +8 -0
- package/src/getDigestFragmentsForCutsites.js +105 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
- package/src/getInsertBetweenVals.js +28 -0
- package/src/getInsertBetweenVals.test.js +33 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
- package/src/getMassOfAaString.js +24 -0
- package/src/getMassofAaString.test.js +18 -0
- package/src/getOrfsFromSequence.js +124 -0
- package/src/getOrfsFromSequence.test.js +210 -0
- package/src/getOverlapBetweenTwoSequences.js +30 -0
- package/src/getOverlapBetweenTwoSequences.test.js +23 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
- package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
- package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
- package/src/getReverseComplementAnnotation.js +23 -0
- package/src/getReverseComplementAnnotation.test.js +44 -0
- package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
- package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
- package/src/getReverseComplementSequenceString.js +17 -0
- package/src/getReverseComplementSequenceString.test.js +11 -0
- package/src/getReverseSequenceString.js +12 -0
- package/src/getReverseSequenceString.test.js +9 -0
- package/src/getSequenceDataBetweenRange.js +131 -0
- package/src/getSequenceDataBetweenRange.test.js +474 -0
- package/src/getVirtualDigest.js +125 -0
- package/src/getVirtualDigest.test.js +134 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
- package/src/index.js +106 -0
- package/src/index.test.js +38 -0
- package/src/insertGapsIntoRefSeq.js +38 -0
- package/src/insertGapsIntoRefSeq.test.js +20 -0
- package/src/insertSequenceDataAtPosition.js +2 -0
- package/src/insertSequenceDataAtPosition.test.js +75 -0
- package/src/insertSequenceDataAtPositionOrRange.js +249 -0
- package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
- package/src/isEnzymeType2S.js +3 -0
- package/src/mapAnnotationsToRows.js +174 -0
- package/src/mapAnnotationsToRows.test.js +425 -0
- package/src/prepareCircularViewData.js +17 -0
- package/src/prepareCircularViewData.test.js +196 -0
- package/src/prepareRowData.js +41 -0
- package/src/prepareRowData.test.js +36 -0
- package/src/prepareRowData_output1.json +391 -0
- package/src/proteinAlphabet.js +257 -0
- package/src/rotateBpsToPosition.js +13 -0
- package/src/rotateBpsToPosition.test.js +6 -0
- package/src/rotateSequenceDataToPosition.js +48 -0
- package/src/rotateSequenceDataToPosition.test.js +71 -0
- package/src/shiftAnnotationsByLen.js +17 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
- package/src/tidyUpAnnotation.js +182 -0
- package/src/tidyUpSequenceData.js +169 -0
- package/src/tidyUpSequenceData.test.js +332 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import {ambiguous_dna_letters} from "./bioData";
|
|
2
|
+
|
|
3
|
+
export default function guessIfSequenceIsDnaAndNotProtein(seq, options = {}) {
|
|
4
|
+
const { threshold = 0.9, loose } = options;
|
|
5
|
+
const dnaLetters =
|
|
6
|
+
options.dnaLetters || loose
|
|
7
|
+
? [...ambiguous_dna_letters.split(""), "U"]
|
|
8
|
+
: ["G", "A", "T", "C", "U"];
|
|
9
|
+
// Guess if the given sequence is DNA or Protein.
|
|
10
|
+
|
|
11
|
+
// It's considered DNA if more than 90% of the sequence is GATCs. The threshold
|
|
12
|
+
// is configurable via the threshold parameter. dnaLetters can be used to configure
|
|
13
|
+
// which letters are considered DNA; for instance, adding N might be useful if
|
|
14
|
+
// you are expecting data with ambiguous bases.
|
|
15
|
+
const dnaLetterMap = dnaLetters.reduce((acc, letter) => {
|
|
16
|
+
acc[letter.toUpperCase()] = true;
|
|
17
|
+
return acc;
|
|
18
|
+
}, {});
|
|
19
|
+
let count = 0;
|
|
20
|
+
if (!seq || !seq.length) return true;
|
|
21
|
+
|
|
22
|
+
for (let index = 0; index < seq.length; index++) {
|
|
23
|
+
const letter = seq[index];
|
|
24
|
+
if (dnaLetterMap[letter.toUpperCase()]) {
|
|
25
|
+
count = count + 1;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
if (count / seq.length > threshold) {
|
|
30
|
+
return true; //it is DNA
|
|
31
|
+
}
|
|
32
|
+
return false; //it is protein
|
|
33
|
+
};
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import chai from "chai";
|
|
2
|
+
import guessIfSequenceIsDnaAndNotProtein from "./guessIfSequenceIsDnaAndNotProtein";
|
|
3
|
+
chai.should();
|
|
4
|
+
describe("guessIfSequenceIsDnaAndNotProtein", () => {
|
|
5
|
+
it("should default to DNA for a length 0 sequecne", () => {
|
|
6
|
+
guessIfSequenceIsDnaAndNotProtein("").should.equal(true);
|
|
7
|
+
});
|
|
8
|
+
it("should correctly guess that a DNA seq is DNA", () => {
|
|
9
|
+
guessIfSequenceIsDnaAndNotProtein("gtatacc").should.equal(true);
|
|
10
|
+
});
|
|
11
|
+
it("should correctly guess that a DNA seq with some ambiguity is a DNA", () => {
|
|
12
|
+
guessIfSequenceIsDnaAndNotProtein("gtatacctaacn").should.equal(true);
|
|
13
|
+
});
|
|
14
|
+
it("should correctly guess that a seq with lots of ambiguity is a protein when in the default strict mode", () => {
|
|
15
|
+
guessIfSequenceIsDnaAndNotProtein("gtatacybctaacn", {
|
|
16
|
+
loose: false
|
|
17
|
+
}).should.equal(false);
|
|
18
|
+
});
|
|
19
|
+
it("should correctly guess that a seq with lots of ambiguity is dna when in the loose mode", () => {
|
|
20
|
+
guessIfSequenceIsDnaAndNotProtein("gtatacybctaacn", {
|
|
21
|
+
loose: true
|
|
22
|
+
}).should.equal(true);
|
|
23
|
+
});
|
|
24
|
+
it("should correctly guess that a DNA with lots of ambiguities is dna when the threshold is lower ", () => {
|
|
25
|
+
guessIfSequenceIsDnaAndNotProtein("gtatacybctaacn", {
|
|
26
|
+
threshold: 0.5
|
|
27
|
+
}).should.equal(true);
|
|
28
|
+
});
|
|
29
|
+
it("should correctly guess that a DNA with lots of ambiguity is a dna when the ambiguous letter is included ", () => {
|
|
30
|
+
guessIfSequenceIsDnaAndNotProtein("gtatanccnnntaacn", {
|
|
31
|
+
dnaLetters: ["g", "a", "t", "c", "n"]
|
|
32
|
+
}).should.equal(true);
|
|
33
|
+
});
|
|
34
|
+
});
|
package/src/index.js
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import {
|
|
2
|
+
autoAnnotate,
|
|
3
|
+
convertApELikeRegexToRegex,
|
|
4
|
+
convertProteinSeqToDNAIupac,
|
|
5
|
+
} from './autoAnnotate';
|
|
6
|
+
|
|
7
|
+
import {
|
|
8
|
+
genbankFeatureTypes,
|
|
9
|
+
getFeatureToColorMap,
|
|
10
|
+
getFeatureTypes,
|
|
11
|
+
getMergedFeatureMap,
|
|
12
|
+
} from './featureTypesAndColors';
|
|
13
|
+
|
|
14
|
+
export * from './computeDigestFragments';
|
|
15
|
+
export * from './diffUtils';
|
|
16
|
+
export * from './annotationTypes';
|
|
17
|
+
|
|
18
|
+
/* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */
|
|
19
|
+
//tnr: these are deprecated exports and should no longer be used!
|
|
20
|
+
const FeatureTypes = getFeatureTypes();
|
|
21
|
+
const featureColors = getFeatureToColorMap();
|
|
22
|
+
export {
|
|
23
|
+
getFeatureToColorMap,
|
|
24
|
+
getFeatureTypes,
|
|
25
|
+
genbankFeatureTypes,
|
|
26
|
+
getMergedFeatureMap,
|
|
27
|
+
FeatureTypes,
|
|
28
|
+
featureColors,
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
/* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */
|
|
32
|
+
export { autoAnnotate };
|
|
33
|
+
export { convertApELikeRegexToRegex };
|
|
34
|
+
export { convertProteinSeqToDNAIupac };
|
|
35
|
+
export * as bioData from './bioData';
|
|
36
|
+
export { default as getAllInsertionsInSeqReads } from './getAllInsertionsInSeqReads';
|
|
37
|
+
export { default as annotateSingleSeq } from './annotateSingleSeq';
|
|
38
|
+
export { default as getDegenerateDnaStringFromAAString } from './getDegenerateDnaStringFromAAString';
|
|
39
|
+
export { default as getDegenerateRnaStringFromAAString } from './getDegenerateRnaStringFromAAString';
|
|
40
|
+
export { default as getVirtualDigest } from './getVirtualDigest';
|
|
41
|
+
export { default as isEnzymeType2S } from './isEnzymeType2S';
|
|
42
|
+
export { default as insertGapsIntoRefSeq } from './insertGapsIntoRefSeq';
|
|
43
|
+
export { default as adjustBpsToReplaceOrInsert } from './adjustBpsToReplaceOrInsert';
|
|
44
|
+
export { default as calculatePercentGC } from './calculatePercentGC';
|
|
45
|
+
export { default as calculateTm } from './calculateTm';
|
|
46
|
+
export { default as cutSequenceByRestrictionEnzyme } from './cutSequenceByRestrictionEnzyme';
|
|
47
|
+
export { default as deleteSequenceDataAtRange } from './deleteSequenceDataAtRange';
|
|
48
|
+
export { default as DNAComplementMap } from './DNAComplementMap';
|
|
49
|
+
export { default as doesEnzymeChopOutsideOfRecognitionSite } from './doesEnzymeChopOutsideOfRecognitionSite';
|
|
50
|
+
export { default as aliasedEnzymesByName } from './aliasedEnzymesByName';
|
|
51
|
+
export { default as defaultEnzymesByName } from './defaultEnzymesByName';
|
|
52
|
+
export { default as generateSequenceData } from './generateSequenceData';
|
|
53
|
+
export { default as generateAnnotations } from './generateAnnotations';
|
|
54
|
+
export { default as filterAminoAcidSequenceString } from './filterAminoAcidSequenceString';
|
|
55
|
+
export { default as filterSequenceString } from './filterSequenceString';
|
|
56
|
+
export { default as findNearestRangeOfSequenceOverlapToPosition } from './findNearestRangeOfSequenceOverlapToPosition';
|
|
57
|
+
export { default as findOrfsInPlasmid } from './findOrfsInPlasmid';
|
|
58
|
+
export { default as findSequenceMatches } from './findSequenceMatches';
|
|
59
|
+
export { default as getAminoAcidDataForEachBaseOfDna } from './getAminoAcidDataForEachBaseOfDna';
|
|
60
|
+
export { default as getAminoAcidFromSequenceTriplet } from './getAminoAcidFromSequenceTriplet';
|
|
61
|
+
export { default as getAminoAcidStringFromSequenceString } from './getAminoAcidStringFromSequenceString';
|
|
62
|
+
export { default as getCodonRangeForAASliver } from './getCodonRangeForAASliver';
|
|
63
|
+
export { default as getComplementAminoAcidStringFromSequenceString } from './getComplementAminoAcidStringFromSequenceString';
|
|
64
|
+
export { default as getComplementSequenceAndAnnotations } from './getComplementSequenceAndAnnotations';
|
|
65
|
+
export { default as getComplementSequenceString } from './getComplementSequenceString';
|
|
66
|
+
export { default as getCutsitesFromSequence } from './getCutsitesFromSequence';
|
|
67
|
+
export { default as getCutsiteType } from './getCutsiteType';
|
|
68
|
+
export { default as getInsertBetweenVals } from './getInsertBetweenVals';
|
|
69
|
+
export { default as getLeftAndRightOfSequenceInRangeGivenPosition } from './getLeftAndRightOfSequenceInRangeGivenPosition';
|
|
70
|
+
export { default as getOrfsFromSequence } from './getOrfsFromSequence';
|
|
71
|
+
export { default as getOverlapBetweenTwoSequences } from './getOverlapBetweenTwoSequences';
|
|
72
|
+
export { default as getPossiblePartsFromSequenceAndEnzymes } from './getPossiblePartsFromSequenceAndEnzymes';
|
|
73
|
+
export { default as getReverseAminoAcidStringFromSequenceString } from './getReverseAminoAcidStringFromSequenceString';
|
|
74
|
+
export { default as getReverseComplementAminoAcidStringFromSequenceString } from './getReverseComplementAminoAcidStringFromSequenceString';
|
|
75
|
+
export { default as getReverseComplementAnnotation } from './getReverseComplementAnnotation';
|
|
76
|
+
export { default as getReverseComplementSequenceAndAnnotations } from './getReverseComplementSequenceAndAnnotations';
|
|
77
|
+
export { default as getReverseComplementSequenceString } from './getReverseComplementSequenceString';
|
|
78
|
+
export { default as getReverseSequenceString } from './getReverseSequenceString';
|
|
79
|
+
export { default as getSequenceDataBetweenRange } from './getSequenceDataBetweenRange';
|
|
80
|
+
export { default as guessIfSequenceIsDnaAndNotProtein } from './guessIfSequenceIsDnaAndNotProtein';
|
|
81
|
+
export { default as insertSequenceDataAtPosition } from './insertSequenceDataAtPosition';
|
|
82
|
+
export { default as insertSequenceDataAtPositionOrRange } from './insertSequenceDataAtPositionOrRange';
|
|
83
|
+
export { default as mapAnnotationsToRows } from './mapAnnotationsToRows';
|
|
84
|
+
export { default as prepareCircularViewData } from './prepareCircularViewData';
|
|
85
|
+
export { default as prepareRowData } from './prepareRowData';
|
|
86
|
+
export { default as proteinAlphabet } from './proteinAlphabet';
|
|
87
|
+
export { default as rotateSequenceDataToPosition } from './rotateSequenceDataToPosition';
|
|
88
|
+
export { default as rotateBpsToPosition } from './rotateBpsToPosition';
|
|
89
|
+
export { default as threeLetterSequenceStringToAminoAcidMap } from './threeLetterSequenceStringToAminoAcidMap';
|
|
90
|
+
export { default as tidyUpSequenceData } from './tidyUpSequenceData';
|
|
91
|
+
export { default as tidyUpAnnotation } from './tidyUpAnnotation';
|
|
92
|
+
export { default as condensePairwiseAlignmentDifferences } from './condensePairwiseAlignmentDifferences';
|
|
93
|
+
export { default as addGapsToSeqReads } from './addGapsToSeqReads';
|
|
94
|
+
export { default as calculateNebTm } from './calculateNebTm';
|
|
95
|
+
export { default as calculateNebTa } from './calculateNebTa';
|
|
96
|
+
export { default as getDigestFragmentsForCutsites } from './getDigestFragmentsForCutsites';
|
|
97
|
+
export { default as getDigestFragmentsForRestrictionEnzymes } from './getDigestFragmentsForRestrictionEnzymes';
|
|
98
|
+
export { default as convertDnaCaretPositionOrRangeToAA } from './convertDnaCaretPositionOrRangeToAA';
|
|
99
|
+
export { default as convertAACaretPositionOrRangeToDna } from './convertAACaretPositionOrRangeToDna';
|
|
100
|
+
export { default as aminoAcidToDegenerateDnaMap } from './aminoAcidToDegenerateDnaMap';
|
|
101
|
+
export { default as aminoAcidToDegenerateRnaMap } from './aminoAcidToDegenerateRnaMap';
|
|
102
|
+
export { default as degenerateDnaToAminoAcidMap } from './degenerateDnaToAminoAcidMap';
|
|
103
|
+
export { default as degenerateRnaToAminoAcidMap } from './degenerateRnaToAminoAcidMap';
|
|
104
|
+
export { default as getMassOfAaString } from './getMassOfAaString';
|
|
105
|
+
export { default as shiftAnnotationsByLen } from './shiftAnnotationsByLen';
|
|
106
|
+
export { default as adjustAnnotationsToInsert } from './adjustAnnotationsToInsert';
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import * as src from ".";
|
|
2
|
+
import fs from "fs";
|
|
3
|
+
|
|
4
|
+
describe("index.js", () => {
|
|
5
|
+
it(`should export all functions defined`, () => {
|
|
6
|
+
return new Promise((resolve) => {
|
|
7
|
+
|
|
8
|
+
fs.readdir(__dirname, (err, files) => {
|
|
9
|
+
let passes = true;
|
|
10
|
+
files.forEach(file => {
|
|
11
|
+
if (
|
|
12
|
+
file.indexOf(".test.js") > -1 ||
|
|
13
|
+
file.indexOf("index.js") > -1 ||
|
|
14
|
+
file.indexOf("prepareRowData_output1.json") > -1 ||
|
|
15
|
+
file.indexOf("featureTypesAndColors") > -1 ||
|
|
16
|
+
file.indexOf("diffUtils") > -1 ||
|
|
17
|
+
file.indexOf(".test.js") > -1 ||
|
|
18
|
+
file.indexOf("index.js") > -1
|
|
19
|
+
) {
|
|
20
|
+
return;
|
|
21
|
+
}
|
|
22
|
+
const funcOrObj = src[file.replace(".js", "")];
|
|
23
|
+
if (!funcOrObj) {
|
|
24
|
+
console.info(
|
|
25
|
+
`Uh oh, it looks like you forgot to export (or explicitly ignore) this file:`,
|
|
26
|
+
file
|
|
27
|
+
);
|
|
28
|
+
passes = false;
|
|
29
|
+
}
|
|
30
|
+
});
|
|
31
|
+
if (!passes) {
|
|
32
|
+
throw new Error("Please make sure to export (or ignore) each file! Update index.js to export the file");
|
|
33
|
+
}
|
|
34
|
+
resolve();
|
|
35
|
+
});
|
|
36
|
+
})
|
|
37
|
+
});
|
|
38
|
+
});
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import getAllInsertionsInSeqReads from "./getAllInsertionsInSeqReads.js";
|
|
2
|
+
|
|
3
|
+
// seqReads should be an array of objects [{name, seq, pos, cigar}, {name, seq, pos, cigar}, ...]
|
|
4
|
+
// add gaps in reference sequence where there are insertions
|
|
5
|
+
export default function insertGapsIntoRefSeq(refSeq, seqReads) {
|
|
6
|
+
// turn ref seq into an array ["A", "T", "C", "G"...]
|
|
7
|
+
const refSeqWithGaps = refSeq.split("");
|
|
8
|
+
const allInsertionsInSeqReads = getAllInsertionsInSeqReads(seqReads);
|
|
9
|
+
for (let i = 0; i < allInsertionsInSeqReads.length; i++) {
|
|
10
|
+
const bpPosOfInsertion = allInsertionsInSeqReads[i].bpPos;
|
|
11
|
+
const numberOfInsertions = allInsertionsInSeqReads[i].number;
|
|
12
|
+
// adding gaps at the bp pos of insertion
|
|
13
|
+
let insertionGaps = "";
|
|
14
|
+
for (let gapI = 0; gapI < numberOfInsertions; gapI++) {
|
|
15
|
+
insertionGaps += "-";
|
|
16
|
+
}
|
|
17
|
+
refSeqWithGaps.splice(bpPosOfInsertion - 1, 0, insertionGaps);
|
|
18
|
+
for (let posI = i + 1; posI < allInsertionsInSeqReads.length; posI++) {
|
|
19
|
+
allInsertionsInSeqReads[posI].bpPos += 1;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
// refSeqWithGaps is a string "GGGA--GA-C--ACC"
|
|
23
|
+
return refSeqWithGaps.join("");
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
// allInsertionsInSeqReads.forEach(insertion => {
|
|
27
|
+
// // adding gap at the bp pos of insertion
|
|
28
|
+
// refSeqWithGaps.splice(insertion - 1, 0, "-");
|
|
29
|
+
// });
|
|
30
|
+
// for (let i = 0; i < allInsertionsInSeqReads.length; i++) {
|
|
31
|
+
// refSeqWithGaps.splice(allInsertionsInSeqReads[i] - 1, 0, "-");
|
|
32
|
+
// for (let innerI = i + 1; innerI < allInsertionsInSeqReads.length; innerI++){
|
|
33
|
+
// if (refSeqWithGaps[i] - 1 !== "-") {
|
|
34
|
+
// // allInsertionsInSeqReads[innerI] += 1;
|
|
35
|
+
// allInsertionsInSeqReads[i + 1] += 1;
|
|
36
|
+
// }
|
|
37
|
+
// }
|
|
38
|
+
// }
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import insertGapsIntoRefSeq from "./insertGapsIntoRefSeq.js";
|
|
2
|
+
|
|
3
|
+
describe("insert gaps into ref seq from seq reads' insertions", () => {
|
|
4
|
+
it("ref seq with all insertions", () => {
|
|
5
|
+
const refSeq = "GGGAGACACC";
|
|
6
|
+
const seqReads = [
|
|
7
|
+
{ name: "r1", seq: "GATTGAC", pos: 3, cigar: "2M2I3M" },
|
|
8
|
+
{ name: "r2", seq: "GAGAGAC", pos: 3, cigar: "7M" },
|
|
9
|
+
{ name: "r3", seq: "GGGAGATCAC", pos: 1, cigar: "6M1I3M" },
|
|
10
|
+
{ name: "r4", seq: "GATTGAC", pos: 3, cigar: "2M2I3M" },
|
|
11
|
+
{ name: "r5", seq: "GAGC", pos: 3, cigar: "3M1D1M" },
|
|
12
|
+
{ name: "r6", seq: "GAGCTTACC", pos: 3, cigar: "3M1D1M2I3M" },
|
|
13
|
+
{ name: "r7", seq: "GGCATTTCC", pos: 2, cigar: "2M3D2M3I2M" },
|
|
14
|
+
{ name: "r8", seq: "GGATTGACATT", pos: 1, cigar: "1D3M2I4M2I2D" },
|
|
15
|
+
{ name: "r9", seq: "GGTTTGACCTTT", pos: 1, cigar: "2M3I2D1M2D3M3I" }
|
|
16
|
+
];
|
|
17
|
+
const result = insertGapsIntoRefSeq(refSeq, seqReads);
|
|
18
|
+
expect(result).toEqual("GG---GA--GA-C--A---CC---");
|
|
19
|
+
});
|
|
20
|
+
});
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
//tnr: half finished test.
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
import chai from "chai";
|
|
5
|
+
import chaiSubset from "chai-subset";
|
|
6
|
+
|
|
7
|
+
import tidyUpSequenceData from "./tidyUpSequenceData";
|
|
8
|
+
import insertSequenceDataAtPosition from "./insertSequenceDataAtPosition";
|
|
9
|
+
|
|
10
|
+
chai.should();
|
|
11
|
+
chai.use(chaiSubset);
|
|
12
|
+
|
|
13
|
+
describe("insertSequenceData", () => {
|
|
14
|
+
it("inserts characters at correct caret position", () => {
|
|
15
|
+
let seqToInsert = {
|
|
16
|
+
sequence: "atgagagaga"
|
|
17
|
+
};
|
|
18
|
+
let preInsertSeq = {
|
|
19
|
+
sequence: "0"
|
|
20
|
+
};
|
|
21
|
+
seqToInsert = tidyUpSequenceData(seqToInsert);
|
|
22
|
+
const caretPosition = 0;
|
|
23
|
+
preInsertSeq = tidyUpSequenceData({});
|
|
24
|
+
const postInsertSeq = insertSequenceDataAtPosition(
|
|
25
|
+
seqToInsert,
|
|
26
|
+
preInsertSeq,
|
|
27
|
+
caretPosition
|
|
28
|
+
);
|
|
29
|
+
postInsertSeq.sequence.length.should.equal(
|
|
30
|
+
preInsertSeq.sequence.length + seqToInsert.sequence.length
|
|
31
|
+
);
|
|
32
|
+
});
|
|
33
|
+
it("inserts characters at correct caret position", () => {
|
|
34
|
+
let seqToInsert = {
|
|
35
|
+
sequence: "atgagagaga"
|
|
36
|
+
};
|
|
37
|
+
let preInsertSeq = {
|
|
38
|
+
sequence: "atgagagaga",
|
|
39
|
+
features: [
|
|
40
|
+
{
|
|
41
|
+
start: 0,
|
|
42
|
+
end: 9,
|
|
43
|
+
locations: [{ start: 0, end: 3 }, { start: 5, end: 9 }]
|
|
44
|
+
}
|
|
45
|
+
]
|
|
46
|
+
};
|
|
47
|
+
seqToInsert = tidyUpSequenceData(seqToInsert);
|
|
48
|
+
preInsertSeq = tidyUpSequenceData(preInsertSeq);
|
|
49
|
+
const caretPosition = 0;
|
|
50
|
+
const postInsertSeq = insertSequenceDataAtPosition(
|
|
51
|
+
seqToInsert,
|
|
52
|
+
preInsertSeq,
|
|
53
|
+
caretPosition
|
|
54
|
+
);
|
|
55
|
+
postInsertSeq.sequence.length.should.equal(
|
|
56
|
+
preInsertSeq.sequence.length + seqToInsert.sequence.length
|
|
57
|
+
);
|
|
58
|
+
postInsertSeq.features.length.should.equal(1);
|
|
59
|
+
postInsertSeq.features[0].start.should.equal(
|
|
60
|
+
preInsertSeq.features[0].start + seqToInsert.sequence.length
|
|
61
|
+
);
|
|
62
|
+
postInsertSeq.features[0].locations[0].start.should.equal(
|
|
63
|
+
preInsertSeq.features[0].locations[0].start + seqToInsert.sequence.length
|
|
64
|
+
);
|
|
65
|
+
postInsertSeq.features[0].locations[1].start.should.equal(
|
|
66
|
+
preInsertSeq.features[0].locations[1].start + seqToInsert.sequence.length
|
|
67
|
+
);
|
|
68
|
+
postInsertSeq.features[0].locations[0].end.should.equal(
|
|
69
|
+
preInsertSeq.features[0].locations[0].end + seqToInsert.sequence.length
|
|
70
|
+
);
|
|
71
|
+
postInsertSeq.features[0].locations[1].end.should.equal(
|
|
72
|
+
preInsertSeq.features[0].locations[1].end + seqToInsert.sequence.length
|
|
73
|
+
);
|
|
74
|
+
});
|
|
75
|
+
});
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
import {getRangeLength} from "@teselagen/range-utils";
|
|
2
|
+
import {map, cloneDeep} from "lodash";
|
|
3
|
+
import convertDnaCaretPositionOrRangeToAa from "./convertDnaCaretPositionOrRangeToAA";
|
|
4
|
+
import rotateSequenceDataToPosition from "./rotateSequenceDataToPosition";
|
|
5
|
+
import {adjustRangeToDeletionOfAnotherRange} from "@teselagen/range-utils";
|
|
6
|
+
import tidyUpSequenceData from "./tidyUpSequenceData";
|
|
7
|
+
import {modifiableTypes} from "./annotationTypes";
|
|
8
|
+
import adjustBpsToReplaceOrInsert from "./adjustBpsToReplaceOrInsert";
|
|
9
|
+
import adjustAnnotationsToInsert from "./adjustAnnotationsToInsert";
|
|
10
|
+
|
|
11
|
+
export default function insertSequenceDataAtPositionOrRange(
|
|
12
|
+
_sequenceDataToInsert,
|
|
13
|
+
_existingSequenceData,
|
|
14
|
+
caretPositionOrRange,
|
|
15
|
+
options = {}
|
|
16
|
+
) {
|
|
17
|
+
//maintainOriginSplit means that if you're inserting around the origin with n bps selected before the origin
|
|
18
|
+
//when inserting new seq, n bps of the new seq should go in before the origin and the rest should be
|
|
19
|
+
//inserted at the sequence start
|
|
20
|
+
const { maintainOriginSplit } = options;
|
|
21
|
+
let existingSequenceData = tidyUpSequenceData(_existingSequenceData, options);
|
|
22
|
+
const sequenceDataToInsert = tidyUpSequenceData(
|
|
23
|
+
_sequenceDataToInsert,
|
|
24
|
+
options
|
|
25
|
+
);
|
|
26
|
+
const newSequenceData = cloneDeep(existingSequenceData);
|
|
27
|
+
const insertLength = sequenceDataToInsert.proteinSequence
|
|
28
|
+
? sequenceDataToInsert.proteinSequence.length * 3
|
|
29
|
+
: sequenceDataToInsert.sequence.length;
|
|
30
|
+
let caretPosition = caretPositionOrRange;
|
|
31
|
+
|
|
32
|
+
const isInsertSameLengthAsSelection =
|
|
33
|
+
sequenceDataToInsert.sequence.length ===
|
|
34
|
+
getRangeLength(caretPositionOrRange, existingSequenceData.sequence.length);
|
|
35
|
+
|
|
36
|
+
if (
|
|
37
|
+
caretPositionOrRange.start > -1 &&
|
|
38
|
+
getRangeLength(
|
|
39
|
+
caretPositionOrRange,
|
|
40
|
+
existingSequenceData.sequence.length
|
|
41
|
+
) === existingSequenceData.sequence.length
|
|
42
|
+
) {
|
|
43
|
+
//handle the case where we're deleting everything!
|
|
44
|
+
existingSequenceData = tidyUpSequenceData(
|
|
45
|
+
{
|
|
46
|
+
...existingSequenceData,
|
|
47
|
+
...modifiableTypes.reduce((acc, type) => {
|
|
48
|
+
return (acc[type] = []);
|
|
49
|
+
}, {}),
|
|
50
|
+
sequence: "",
|
|
51
|
+
proteinSequence: "",
|
|
52
|
+
chromatogramData: undefined
|
|
53
|
+
},
|
|
54
|
+
options
|
|
55
|
+
);
|
|
56
|
+
newSequenceData.chromatogramData = undefined;
|
|
57
|
+
} else if (
|
|
58
|
+
newSequenceData.chromatogramData &&
|
|
59
|
+
newSequenceData.chromatogramData.baseTraces
|
|
60
|
+
) {
|
|
61
|
+
//handle chromatogramData updates
|
|
62
|
+
if (caretPositionOrRange && caretPositionOrRange.start > -1) {
|
|
63
|
+
if (caretPositionOrRange.start > caretPositionOrRange.end) {
|
|
64
|
+
newSequenceData.chromatogramData = trimChromatogram({
|
|
65
|
+
chromatogramData: newSequenceData.chromatogramData,
|
|
66
|
+
range: {
|
|
67
|
+
start: caretPositionOrRange.start,
|
|
68
|
+
end: newSequenceData.sequence.length
|
|
69
|
+
},
|
|
70
|
+
justBaseCalls: isInsertSameLengthAsSelection
|
|
71
|
+
});
|
|
72
|
+
newSequenceData.chromatogramData = trimChromatogram({
|
|
73
|
+
chromatogramData: newSequenceData.chromatogramData,
|
|
74
|
+
range: {
|
|
75
|
+
start: 0,
|
|
76
|
+
end: caretPositionOrRange.end
|
|
77
|
+
},
|
|
78
|
+
justBaseCalls: isInsertSameLengthAsSelection
|
|
79
|
+
});
|
|
80
|
+
} else {
|
|
81
|
+
newSequenceData.chromatogramData = trimChromatogram({
|
|
82
|
+
chromatogramData: newSequenceData.chromatogramData,
|
|
83
|
+
range: {
|
|
84
|
+
start: caretPositionOrRange.start,
|
|
85
|
+
end: caretPositionOrRange.end
|
|
86
|
+
},
|
|
87
|
+
justBaseCalls: isInsertSameLengthAsSelection
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
if (sequenceDataToInsert.sequence) {
|
|
92
|
+
insertIntoChromatogram({
|
|
93
|
+
chromatogramData: newSequenceData.chromatogramData,
|
|
94
|
+
caretPosition:
|
|
95
|
+
caretPositionOrRange.start > -1
|
|
96
|
+
? caretPositionOrRange.start
|
|
97
|
+
: caretPositionOrRange,
|
|
98
|
+
seqToInsert: sequenceDataToInsert.sequence,
|
|
99
|
+
justBaseCalls: isInsertSameLengthAsSelection
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
//update the sequence
|
|
105
|
+
newSequenceData.sequence = adjustBpsToReplaceOrInsert(
|
|
106
|
+
existingSequenceData.sequence,
|
|
107
|
+
sequenceDataToInsert.sequence,
|
|
108
|
+
caretPositionOrRange
|
|
109
|
+
);
|
|
110
|
+
newSequenceData.size = newSequenceData.sequence.length;
|
|
111
|
+
newSequenceData.proteinSequence = adjustBpsToReplaceOrInsert(
|
|
112
|
+
existingSequenceData.proteinSequence,
|
|
113
|
+
sequenceDataToInsert.proteinSequence,
|
|
114
|
+
convertDnaCaretPositionOrRangeToAa(caretPositionOrRange)
|
|
115
|
+
);
|
|
116
|
+
newSequenceData.proteinSize = newSequenceData.proteinSequence.length;
|
|
117
|
+
|
|
118
|
+
//handle the insert
|
|
119
|
+
modifiableTypes.forEach(annotationType => {
|
|
120
|
+
let existingAnnotations = existingSequenceData[annotationType];
|
|
121
|
+
//update the annotations:
|
|
122
|
+
//handle the delete if necessary
|
|
123
|
+
if (caretPositionOrRange && caretPositionOrRange.start > -1) {
|
|
124
|
+
//we have a range! so let's delete it!
|
|
125
|
+
const range = caretPositionOrRange;
|
|
126
|
+
caretPosition = range.start > range.end ? 0 : range.start;
|
|
127
|
+
//update all annotations for the deletion
|
|
128
|
+
existingAnnotations = adjustAnnotationsToDelete(
|
|
129
|
+
existingAnnotations,
|
|
130
|
+
range,
|
|
131
|
+
existingSequenceData.sequence.length
|
|
132
|
+
);
|
|
133
|
+
}
|
|
134
|
+
//first clear the newSequenceData's annotations
|
|
135
|
+
newSequenceData[annotationType] = [];
|
|
136
|
+
//in two steps adjust the annotations to the insert
|
|
137
|
+
newSequenceData[annotationType] = newSequenceData[annotationType].concat(
|
|
138
|
+
adjustAnnotationsToInsert(
|
|
139
|
+
existingAnnotations,
|
|
140
|
+
caretPosition,
|
|
141
|
+
insertLength
|
|
142
|
+
)
|
|
143
|
+
);
|
|
144
|
+
newSequenceData[annotationType] = newSequenceData[annotationType].concat(
|
|
145
|
+
adjustAnnotationsToInsert(
|
|
146
|
+
sequenceDataToInsert[annotationType],
|
|
147
|
+
0,
|
|
148
|
+
caretPosition
|
|
149
|
+
)
|
|
150
|
+
);
|
|
151
|
+
});
|
|
152
|
+
if (
|
|
153
|
+
maintainOriginSplit &&
|
|
154
|
+
caretPositionOrRange &&
|
|
155
|
+
caretPositionOrRange.start > caretPositionOrRange.end
|
|
156
|
+
) {
|
|
157
|
+
//we're replacing around the origin and maintainOriginSplit=true
|
|
158
|
+
//so rotate the resulting seqData n bps
|
|
159
|
+
const caretPosToRotateTo =
|
|
160
|
+
existingSequenceData.sequence.length - caretPositionOrRange.start;
|
|
161
|
+
return rotateSequenceDataToPosition(
|
|
162
|
+
newSequenceData,
|
|
163
|
+
Math.min(caretPosToRotateTo, insertLength)
|
|
164
|
+
);
|
|
165
|
+
}
|
|
166
|
+
return newSequenceData;
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
function adjustAnnotationsToDelete(annotationsToBeAdjusted, range, maxLength) {
|
|
170
|
+
return map(annotationsToBeAdjusted, annotation => {
|
|
171
|
+
const newRange = adjustRangeToDeletionOfAnotherRange(
|
|
172
|
+
annotation,
|
|
173
|
+
range,
|
|
174
|
+
maxLength
|
|
175
|
+
);
|
|
176
|
+
const newLocations =
|
|
177
|
+
annotation.locations &&
|
|
178
|
+
annotation.locations
|
|
179
|
+
.map(loc => adjustRangeToDeletionOfAnotherRange(loc, range, maxLength))
|
|
180
|
+
.filter(range => !!range);
|
|
181
|
+
if (newLocations && newLocations.length) {
|
|
182
|
+
return {
|
|
183
|
+
...newRange,
|
|
184
|
+
start: newLocations[0].start,
|
|
185
|
+
end: newLocations[newLocations.length - 1].end,
|
|
186
|
+
...(newLocations.length > 1 && { locations: newLocations })
|
|
187
|
+
};
|
|
188
|
+
} else {
|
|
189
|
+
return newRange;
|
|
190
|
+
}
|
|
191
|
+
}).filter(range => !!range); //filter any fully deleted ranges
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function insertIntoChromatogram({
|
|
195
|
+
chromatogramData,
|
|
196
|
+
caretPosition,
|
|
197
|
+
seqToInsert,
|
|
198
|
+
justBaseCalls
|
|
199
|
+
}) {
|
|
200
|
+
if (!seqToInsert.length) return;
|
|
201
|
+
|
|
202
|
+
chromatogramData.baseCalls &&
|
|
203
|
+
chromatogramData.baseCalls.splice(
|
|
204
|
+
caretPosition,
|
|
205
|
+
0,
|
|
206
|
+
...seqToInsert.split("")
|
|
207
|
+
);
|
|
208
|
+
if (justBaseCalls) {
|
|
209
|
+
//return early if just base calls
|
|
210
|
+
return chromatogramData;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
const baseTracesToInsert = [];
|
|
214
|
+
const qualNumsToInsert = [];
|
|
215
|
+
|
|
216
|
+
for (let index = 0; index < seqToInsert.length; index++) {
|
|
217
|
+
qualNumsToInsert.push(0);
|
|
218
|
+
const toPush = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
|
219
|
+
baseTracesToInsert.push({
|
|
220
|
+
aTrace: toPush,
|
|
221
|
+
cTrace: toPush,
|
|
222
|
+
gTrace: toPush,
|
|
223
|
+
tTrace: toPush
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
chromatogramData.baseTraces &&
|
|
228
|
+
chromatogramData.baseTraces.splice(caretPosition, 0, ...baseTracesToInsert);
|
|
229
|
+
chromatogramData.qualNums &&
|
|
230
|
+
chromatogramData.qualNums.splice(caretPosition, 0, ...qualNumsToInsert);
|
|
231
|
+
|
|
232
|
+
return chromatogramData;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function trimChromatogram({
|
|
236
|
+
chromatogramData,
|
|
237
|
+
range: { start, end },
|
|
238
|
+
justBaseCalls
|
|
239
|
+
}) {
|
|
240
|
+
[
|
|
241
|
+
"baseCalls",
|
|
242
|
+
...(justBaseCalls ? [] : ["qualNums", "baseTraces", "basePos"])
|
|
243
|
+
].forEach(type => {
|
|
244
|
+
chromatogramData[type] &&
|
|
245
|
+
chromatogramData[type].splice(start, end - start + 1);
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
return chromatogramData;
|
|
249
|
+
}
|