@teselagen/sequence-utils 0.1.22 → 0.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +12030 -26126
- package/index.mjs +12119 -26124
- package/index.umd.js +24056 -38154
- package/package.json +4 -3
- package/src/DNAComplementMap.js +32 -0
- package/src/addGapsToSeqReads.js +417 -0
- package/src/addGapsToSeqReads.test.js +358 -0
- package/src/adjustAnnotationsToInsert.js +19 -0
- package/src/adjustBpsToReplaceOrInsert.js +50 -0
- package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
- package/src/aliasedEnzymesByName.js +7363 -0
- package/src/aminoAcidToDegenerateDnaMap.js +32 -0
- package/src/aminoAcidToDegenerateRnaMap.js +32 -0
- package/src/aminoAcidToDnaRna.test.js +27 -0
- package/src/annotateSingleSeq.js +29 -0
- package/src/annotateSingleSeq.test.js +64 -0
- package/src/annotationTypes.js +23 -0
- package/src/autoAnnotate.js +242 -0
- package/src/autoAnnotate.test.js +1039 -0
- package/src/bioData.js +431 -0
- package/src/calculateNebTa.js +34 -0
- package/src/calculateNebTa.test.js +57 -0
- package/src/calculateNebTm.js +127 -0
- package/src/calculateNebTm.test.js +32 -0
- package/src/calculatePercentGC.js +3 -0
- package/src/calculatePercentGC.test.js +14 -0
- package/src/calculateTm.js +297 -0
- package/src/calculateTm.test.js +7 -0
- package/src/computeDigestFragments.js +179 -0
- package/src/computeDigestFragments.test.js +73 -0
- package/src/condensePairwiseAlignmentDifferences.js +85 -0
- package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
- package/src/convertAACaretPositionOrRangeToDna.js +24 -0
- package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
- package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
- package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
- package/src/cutSequenceByRestrictionEnzyme.js +301 -0
- package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
- package/src/defaultEnzymesByName.js +278 -0
- package/src/degenerateDnaToAminoAcidMap.js +5 -0
- package/src/degenerateRnaToAminoAcidMap.js +5 -0
- package/src/deleteSequenceDataAtRange.js +5 -0
- package/src/deleteSequenceDataAtRange.test.js +146 -0
- package/src/diffUtils.js +64 -0
- package/src/diffUtils.test.js +74 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
- package/src/featureTypesAndColors.js +152 -0
- package/src/featureTypesAndColors.test.js +52 -0
- package/src/filterAminoAcidSequenceString.js +13 -0
- package/src/filterAminoAcidSequenceString.test.js +22 -0
- package/src/filterSequenceString.js +22 -0
- package/src/filterSequenceString.test.js +13 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
- package/src/findOrfsInPlasmid.js +26 -0
- package/src/findSequenceMatches.js +133 -0
- package/src/findSequenceMatches.test.js +286 -0
- package/src/generateAnnotations.js +34 -0
- package/src/generateSequenceData.js +206 -0
- package/src/generateSequenceData.test.js +22 -0
- package/src/getAllInsertionsInSeqReads.js +83 -0
- package/src/getAllInsertionsInSeqReads.test.js +26 -0
- package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
- package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
- package/src/getAminoAcidFromSequenceTriplet.js +22 -0
- package/src/getAminoAcidStringFromSequenceString.js +18 -0
- package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
- package/src/getCodonRangeForAASliver.js +63 -0
- package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
- package/src/getComplementSequenceAndAnnotations.js +20 -0
- package/src/getComplementSequenceString.js +19 -0
- package/src/getComplementSequenceString.test.js +13 -0
- package/src/getCutsiteType.js +10 -0
- package/src/getCutsitesFromSequence.js +17 -0
- package/src/getDegenerateDnaStringFromAAString.js +8 -0
- package/src/getDegenerateRnaStringFromAAString.js +8 -0
- package/src/getDigestFragmentsForCutsites.js +105 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
- package/src/getInsertBetweenVals.js +28 -0
- package/src/getInsertBetweenVals.test.js +33 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
- package/src/getMassOfAaString.js +24 -0
- package/src/getMassofAaString.test.js +18 -0
- package/src/getOrfsFromSequence.js +124 -0
- package/src/getOrfsFromSequence.test.js +210 -0
- package/src/getOverlapBetweenTwoSequences.js +30 -0
- package/src/getOverlapBetweenTwoSequences.test.js +23 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
- package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
- package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
- package/src/getReverseComplementAnnotation.js +23 -0
- package/src/getReverseComplementAnnotation.test.js +44 -0
- package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
- package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
- package/src/getReverseComplementSequenceString.js +17 -0
- package/src/getReverseComplementSequenceString.test.js +11 -0
- package/src/getReverseSequenceString.js +12 -0
- package/src/getReverseSequenceString.test.js +9 -0
- package/src/getSequenceDataBetweenRange.js +131 -0
- package/src/getSequenceDataBetweenRange.test.js +474 -0
- package/src/getVirtualDigest.js +125 -0
- package/src/getVirtualDigest.test.js +134 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
- package/src/index.js +106 -0
- package/src/index.test.js +38 -0
- package/src/insertGapsIntoRefSeq.js +38 -0
- package/src/insertGapsIntoRefSeq.test.js +20 -0
- package/src/insertSequenceDataAtPosition.js +2 -0
- package/src/insertSequenceDataAtPosition.test.js +75 -0
- package/src/insertSequenceDataAtPositionOrRange.js +249 -0
- package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
- package/src/isEnzymeType2S.js +3 -0
- package/src/mapAnnotationsToRows.js +174 -0
- package/src/mapAnnotationsToRows.test.js +425 -0
- package/src/prepareCircularViewData.js +17 -0
- package/src/prepareCircularViewData.test.js +196 -0
- package/src/prepareRowData.js +41 -0
- package/src/prepareRowData.test.js +36 -0
- package/src/prepareRowData_output1.json +391 -0
- package/src/proteinAlphabet.js +257 -0
- package/src/rotateBpsToPosition.js +13 -0
- package/src/rotateBpsToPosition.test.js +6 -0
- package/src/rotateSequenceDataToPosition.js +48 -0
- package/src/rotateSequenceDataToPosition.test.js +71 -0
- package/src/shiftAnnotationsByLen.js +17 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
- package/src/tidyUpAnnotation.js +182 -0
- package/src/tidyUpSequenceData.js +169 -0
- package/src/tidyUpSequenceData.test.js +332 -0
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
// tnrtodo: figure out where to insert this validation exactly..
|
|
2
|
+
import shortid from "shortid";
|
|
3
|
+
|
|
4
|
+
import getAminoAcidDataForEachBaseOfDna from "./getAminoAcidDataForEachBaseOfDna";
|
|
5
|
+
import {cloneDeep, flatMap} from "lodash";
|
|
6
|
+
import { annotationTypes } from "./annotationTypes";
|
|
7
|
+
import filterSequenceString from "./filterSequenceString";
|
|
8
|
+
import tidyUpAnnotation from "./tidyUpAnnotation";
|
|
9
|
+
import filterAminoAcidSequenceString from "./filterAminoAcidSequenceString";
|
|
10
|
+
import getDegenerateDnaStringFromAaString from "./getDegenerateDnaStringFromAAString";
|
|
11
|
+
import {getFeatureTypes} from "./featureTypesAndColors";
|
|
12
|
+
|
|
13
|
+
export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
14
|
+
const {
|
|
15
|
+
annotationsAsObjects,
|
|
16
|
+
logMessages,
|
|
17
|
+
removeUnwantedChars,
|
|
18
|
+
additionalValidChars,
|
|
19
|
+
noTranslationData,
|
|
20
|
+
charOverrides,
|
|
21
|
+
doNotProvideIdsForAnnotations,
|
|
22
|
+
proteinFilterOptions,
|
|
23
|
+
noCdsTranslations,
|
|
24
|
+
convertAnnotationsFromAAIndices
|
|
25
|
+
} = options;
|
|
26
|
+
let seqData = cloneDeep(pSeqData); //sequence is usually immutable, so we clone it and return it
|
|
27
|
+
const response = {
|
|
28
|
+
messages: []
|
|
29
|
+
};
|
|
30
|
+
if (!seqData) {
|
|
31
|
+
seqData = {};
|
|
32
|
+
}
|
|
33
|
+
if (!seqData.sequence) {
|
|
34
|
+
seqData.sequence = "";
|
|
35
|
+
}
|
|
36
|
+
if (!seqData.proteinSequence) {
|
|
37
|
+
seqData.proteinSequence = "";
|
|
38
|
+
}
|
|
39
|
+
let needsBackTranslation = false;
|
|
40
|
+
if (seqData.isProtein) {
|
|
41
|
+
seqData.circular = false; //there are no circular proteins..
|
|
42
|
+
if (!seqData.proteinSequence && seqData.proteinSequence !== "") {
|
|
43
|
+
seqData.proteinSequence = seqData.sequence; //if there is no proteinSequence, assign seqData.sequence
|
|
44
|
+
}
|
|
45
|
+
if (
|
|
46
|
+
!seqData.sequence ||
|
|
47
|
+
seqData.sequence.length !== seqData.proteinSequence.length * 3
|
|
48
|
+
) {
|
|
49
|
+
//if we don't have a sequence or it is clear that the DNA sequence doesn't match the proteinSequence, add a back translation
|
|
50
|
+
needsBackTranslation = true;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
if (seqData.isRna) {
|
|
54
|
+
//flip all t's to u's
|
|
55
|
+
seqData.sequence = seqData.sequence.replace(/t/gi, "u");
|
|
56
|
+
}
|
|
57
|
+
if (removeUnwantedChars) {
|
|
58
|
+
if (seqData.isProtein) {
|
|
59
|
+
seqData.proteinSequence = filterAminoAcidSequenceString(
|
|
60
|
+
seqData.proteinSequence,
|
|
61
|
+
{ includeStopCodon: true, ...proteinFilterOptions }
|
|
62
|
+
);
|
|
63
|
+
} else {
|
|
64
|
+
seqData.sequence = filterSequenceString(
|
|
65
|
+
seqData.sequence,
|
|
66
|
+
`${additionalValidChars || ""}${
|
|
67
|
+
seqData.isRna || seqData.isMixedRnaAndDna ? "u" : "" //if it is rna or mixed, allow u's
|
|
68
|
+
}`,
|
|
69
|
+
charOverrides
|
|
70
|
+
);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
if (seqData.isProtein) {
|
|
74
|
+
if (needsBackTranslation) {
|
|
75
|
+
//backtranslate the AA sequence
|
|
76
|
+
seqData.sequence = getDegenerateDnaStringFromAaString(
|
|
77
|
+
seqData.proteinSequence
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
seqData.aminoAcidDataForEachBaseOfDNA = getAminoAcidDataForEachBaseOfDna(
|
|
81
|
+
seqData.proteinSequence,
|
|
82
|
+
true,
|
|
83
|
+
null,
|
|
84
|
+
true
|
|
85
|
+
);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
seqData.size = seqData.noSequence ? seqData.size : seqData.sequence.length;
|
|
89
|
+
seqData.proteinSize = seqData.noSequence
|
|
90
|
+
? seqData.proteinSize
|
|
91
|
+
: seqData.proteinSequence.length;
|
|
92
|
+
if (
|
|
93
|
+
seqData.circular === "false" ||
|
|
94
|
+
/* eslint-disable eqeqeq*/
|
|
95
|
+
|
|
96
|
+
seqData.circular == -1 ||
|
|
97
|
+
/* eslint-enable eqeqeq*/
|
|
98
|
+
seqData.circular === false ||
|
|
99
|
+
(!seqData.circular && seqData.sequenceTypeCode !== "CIRCULAR_DNA")
|
|
100
|
+
) {
|
|
101
|
+
seqData.circular = false;
|
|
102
|
+
} else {
|
|
103
|
+
seqData.circular = true;
|
|
104
|
+
}
|
|
105
|
+
const featureTypes = getFeatureTypes();
|
|
106
|
+
|
|
107
|
+
annotationTypes.forEach(annotationType => {
|
|
108
|
+
if (!Array.isArray(seqData[annotationType])) {
|
|
109
|
+
if (typeof seqData[annotationType] === "object") {
|
|
110
|
+
seqData[annotationType] = Object.keys(seqData[annotationType]).map(
|
|
111
|
+
key => {
|
|
112
|
+
return seqData[annotationType][key];
|
|
113
|
+
}
|
|
114
|
+
);
|
|
115
|
+
} else {
|
|
116
|
+
seqData[annotationType] = [];
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
seqData[annotationType] = seqData[annotationType].filter(annotation => {
|
|
120
|
+
return tidyUpAnnotation(annotation, {
|
|
121
|
+
...options,
|
|
122
|
+
featureTypes,
|
|
123
|
+
sequenceData: seqData,
|
|
124
|
+
convertAnnotationsFromAAIndices,
|
|
125
|
+
mutative: true,
|
|
126
|
+
annotationType
|
|
127
|
+
});
|
|
128
|
+
});
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
if (!noTranslationData) {
|
|
132
|
+
seqData.translations = flatMap(seqData.translations, translation => {
|
|
133
|
+
if (noCdsTranslations && translation.translationType === "CDS Feature") {
|
|
134
|
+
//filter off cds translations
|
|
135
|
+
return [];
|
|
136
|
+
}
|
|
137
|
+
if (!translation.aminoAcids && !seqData.noSequence) {
|
|
138
|
+
translation.aminoAcids = getAminoAcidDataForEachBaseOfDna(
|
|
139
|
+
seqData.sequence,
|
|
140
|
+
translation.forward,
|
|
141
|
+
translation
|
|
142
|
+
);
|
|
143
|
+
}
|
|
144
|
+
return translation;
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if (annotationsAsObjects) {
|
|
149
|
+
annotationTypes.forEach(name => {
|
|
150
|
+
seqData[name] = seqData[name].reduce((acc, item) => {
|
|
151
|
+
let itemId;
|
|
152
|
+
if (item.id || item.id === 0) {
|
|
153
|
+
itemId = item.id;
|
|
154
|
+
} else {
|
|
155
|
+
itemId = shortid();
|
|
156
|
+
if (!doNotProvideIdsForAnnotations) {
|
|
157
|
+
item.id = itemId; //assign the newly created id to the item
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
acc[itemId] = item;
|
|
161
|
+
return acc;
|
|
162
|
+
}, {});
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
if (logMessages && response.messages.length > 0) {
|
|
166
|
+
console.info("tidyUpSequenceData messages:", response.messages);
|
|
167
|
+
}
|
|
168
|
+
return seqData;
|
|
169
|
+
};
|
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
import assert from "assert";
|
|
2
|
+
import tidyUpSequenceData from "./tidyUpSequenceData";
|
|
3
|
+
import chai from "chai";
|
|
4
|
+
import chaiSubset from "chai-subset";
|
|
5
|
+
chai.use(chaiSubset);
|
|
6
|
+
chai.should();
|
|
7
|
+
describe("tidyUpSequenceData", () => {
|
|
8
|
+
it("should remove unwanted chars if passed that option, while handling annotation start,end (and location start,end) truncation correctly", () => {
|
|
9
|
+
const res = tidyUpSequenceData(
|
|
10
|
+
{
|
|
11
|
+
sequence: "http://localhost:3344/Standalone",
|
|
12
|
+
features: [
|
|
13
|
+
{
|
|
14
|
+
start: 3,
|
|
15
|
+
end: 20,
|
|
16
|
+
locations: [
|
|
17
|
+
{
|
|
18
|
+
start: "3", //this should be converted to an int :)
|
|
19
|
+
end: 5
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
start: 10,
|
|
23
|
+
end: 20
|
|
24
|
+
}
|
|
25
|
+
]
|
|
26
|
+
}
|
|
27
|
+
]
|
|
28
|
+
},
|
|
29
|
+
{ removeUnwantedChars: true }
|
|
30
|
+
);
|
|
31
|
+
res.should.containSubset({
|
|
32
|
+
sequence: "httcahstStandan",
|
|
33
|
+
circular: false,
|
|
34
|
+
features: [
|
|
35
|
+
{
|
|
36
|
+
start: 3,
|
|
37
|
+
end: 14,
|
|
38
|
+
locations: [
|
|
39
|
+
{
|
|
40
|
+
start: 3,
|
|
41
|
+
end: 5
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
start: 10,
|
|
45
|
+
end: 14
|
|
46
|
+
}
|
|
47
|
+
]
|
|
48
|
+
}
|
|
49
|
+
]
|
|
50
|
+
});
|
|
51
|
+
});
|
|
52
|
+
// const res = tidyUpSequenceData(
|
|
53
|
+
// {
|
|
54
|
+
// isProtein: true,
|
|
55
|
+
// circular: true,
|
|
56
|
+
// proteinSequence: "gagiuhwgagalasjglj*.",
|
|
57
|
+
// features: [{ start: 3, end: 10 }, { start: 10, end: 20 }]
|
|
58
|
+
// },
|
|
59
|
+
// { convertAnnotationsFromAAIndices: true, removeUnwantedChars: true }
|
|
60
|
+
// );
|
|
61
|
+
|
|
62
|
+
it("should handle a protein sequence being passed in with isProtein set to true", () => {
|
|
63
|
+
const res = tidyUpSequenceData(
|
|
64
|
+
{
|
|
65
|
+
isProtein: true,
|
|
66
|
+
circular: true,
|
|
67
|
+
proteinSequence: "gagiuhwgagalasjglj*.",
|
|
68
|
+
features: [
|
|
69
|
+
{ start: 3, end: 10, forward: false },
|
|
70
|
+
{ start: 10, end: 20 },
|
|
71
|
+
{ name: "iDon'tFit", start: 25, end: 35 }
|
|
72
|
+
]
|
|
73
|
+
},
|
|
74
|
+
{ convertAnnotationsFromAAIndices: true, removeUnwantedChars: true }
|
|
75
|
+
);
|
|
76
|
+
res.should.containSubset({
|
|
77
|
+
aminoAcidDataForEachBaseOfDNA: [
|
|
78
|
+
{
|
|
79
|
+
aminoAcid: {
|
|
80
|
+
value: ".",
|
|
81
|
+
name: "Gap",
|
|
82
|
+
threeLettersName: "Gap"
|
|
83
|
+
},
|
|
84
|
+
positionInCodon: 0,
|
|
85
|
+
aminoAcidIndex: 17,
|
|
86
|
+
sequenceIndex: 51,
|
|
87
|
+
codonRange: {
|
|
88
|
+
start: 51,
|
|
89
|
+
end: 53
|
|
90
|
+
},
|
|
91
|
+
fullCodon: true
|
|
92
|
+
},
|
|
93
|
+
{
|
|
94
|
+
aminoAcid: {
|
|
95
|
+
value: ".",
|
|
96
|
+
name: "Gap",
|
|
97
|
+
threeLettersName: "Gap"
|
|
98
|
+
},
|
|
99
|
+
positionInCodon: 1,
|
|
100
|
+
aminoAcidIndex: 17,
|
|
101
|
+
sequenceIndex: 52,
|
|
102
|
+
codonRange: {
|
|
103
|
+
start: 51,
|
|
104
|
+
end: 53
|
|
105
|
+
},
|
|
106
|
+
fullCodon: true
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
aminoAcid: {
|
|
110
|
+
value: ".",
|
|
111
|
+
name: "Gap",
|
|
112
|
+
threeLettersName: "Gap"
|
|
113
|
+
},
|
|
114
|
+
positionInCodon: 2,
|
|
115
|
+
aminoAcidIndex: 17,
|
|
116
|
+
sequenceIndex: 53,
|
|
117
|
+
codonRange: {
|
|
118
|
+
start: 51,
|
|
119
|
+
end: 53
|
|
120
|
+
},
|
|
121
|
+
fullCodon: true
|
|
122
|
+
}
|
|
123
|
+
],
|
|
124
|
+
isProtein: true,
|
|
125
|
+
size: 54, //size should refer to the DNA length
|
|
126
|
+
proteinSize: 18, //proteinSize should refer to the amino acid length
|
|
127
|
+
sequence: "ggngcnggnathtgacaytggggngcnggngcnytngcnwsnggnytntrr...", //degenerate sequence
|
|
128
|
+
proteinSequence: "gagiuhwgagalasgl*.",
|
|
129
|
+
circular: false,
|
|
130
|
+
features: [
|
|
131
|
+
{ start: 9, end: 32, forward: true },
|
|
132
|
+
{ start: 30, end: 53, forward: true },
|
|
133
|
+
{
|
|
134
|
+
name: "iDon'tFit",
|
|
135
|
+
start: 51,
|
|
136
|
+
end: 53,
|
|
137
|
+
forward: true
|
|
138
|
+
}
|
|
139
|
+
]
|
|
140
|
+
});
|
|
141
|
+
});
|
|
142
|
+
it("isRna should make the t's converted to u's", () => {
|
|
143
|
+
const res = tidyUpSequenceData({
|
|
144
|
+
sequence: "tgatavagauugagcctttuuu",
|
|
145
|
+
isRna: true
|
|
146
|
+
});
|
|
147
|
+
res.should.containSubset({
|
|
148
|
+
sequence: "ugauavagauugagccuuuuuu",
|
|
149
|
+
isRna: true
|
|
150
|
+
});
|
|
151
|
+
});
|
|
152
|
+
it("should handle the noSequence option correctly and not truncate .size", () => {
|
|
153
|
+
const res = tidyUpSequenceData({
|
|
154
|
+
noSequence: true,
|
|
155
|
+
size: 20
|
|
156
|
+
});
|
|
157
|
+
res.should.containSubset({
|
|
158
|
+
noSequence: true,
|
|
159
|
+
sequence: "",
|
|
160
|
+
size: 20,
|
|
161
|
+
circular: false,
|
|
162
|
+
features: [],
|
|
163
|
+
parts: [],
|
|
164
|
+
translations: [],
|
|
165
|
+
cutsites: [],
|
|
166
|
+
orfs: []
|
|
167
|
+
});
|
|
168
|
+
});
|
|
169
|
+
it("should add default fields to an empty sequence obj", () => {
|
|
170
|
+
const res = tidyUpSequenceData({});
|
|
171
|
+
res.should.containSubset({
|
|
172
|
+
sequence: "",
|
|
173
|
+
size: 0,
|
|
174
|
+
circular: false,
|
|
175
|
+
features: [],
|
|
176
|
+
parts: [],
|
|
177
|
+
translations: [],
|
|
178
|
+
cutsites: [],
|
|
179
|
+
orfs: []
|
|
180
|
+
});
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
it("should add default fields to an empty sequence obj, and handle annotationsAsObjects=true", () => {
|
|
184
|
+
const res = tidyUpSequenceData({}, { annotationsAsObjects: true });
|
|
185
|
+
res.should.containSubset({
|
|
186
|
+
sequence: "",
|
|
187
|
+
size: 0,
|
|
188
|
+
circular: false,
|
|
189
|
+
features: {},
|
|
190
|
+
parts: {},
|
|
191
|
+
translations: {},
|
|
192
|
+
cutsites: {},
|
|
193
|
+
orfs: {}
|
|
194
|
+
});
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
it("should add ids to annotations", () => {
|
|
198
|
+
const res = tidyUpSequenceData(
|
|
199
|
+
{
|
|
200
|
+
features: [{ start: 4, end: 5 }, {}]
|
|
201
|
+
},
|
|
202
|
+
{ annotationsAsObjects: true }
|
|
203
|
+
);
|
|
204
|
+
Object.keys(res.features).should.be.length(2);
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
it("should add feature type = misc_feature if no type is provided", () => {
|
|
208
|
+
const res = tidyUpSequenceData({
|
|
209
|
+
features: [{ start: 4, end: 5 }]
|
|
210
|
+
});
|
|
211
|
+
res.features[0].type.should.equal("misc_feature");
|
|
212
|
+
});
|
|
213
|
+
it("should try to auto-parse annotation.notes into JSON and gracefully handle errors", () => {
|
|
214
|
+
const res = tidyUpSequenceData({
|
|
215
|
+
features: [
|
|
216
|
+
{
|
|
217
|
+
start: 4,
|
|
218
|
+
end: 5,
|
|
219
|
+
notes:
|
|
220
|
+
'{"gene":["Ampicillin"],"note":["ORF frame 1"],"translation":["MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRIDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPVAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW*"],"ApEinfo_fwdcolor":["pink"],"ApEinfo_revcolor":["pink"],"ApEinfo_graphicformat":["arrow_data {{0 1 2 0 0 -1} {} 0}"]}'
|
|
221
|
+
}
|
|
222
|
+
]
|
|
223
|
+
});
|
|
224
|
+
res.features[0].notes.gene[0].should.equal("Ampicillin");
|
|
225
|
+
const res2 = tidyUpSequenceData({
|
|
226
|
+
features: [
|
|
227
|
+
{
|
|
228
|
+
start: 4,
|
|
229
|
+
end: 5,
|
|
230
|
+
//messed up JSON notes here:
|
|
231
|
+
notes:
|
|
232
|
+
'{"gene:["Ampicillin"],"note":["ORF frame 1"],"translation":["MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRIDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPVAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW*"],"ApEinfo_fwdcolor":["pink"],"ApEinfo_revcolor":["pink"],"ApEinfo_graphicformat":["arrow_data {{0 1 2 0 0 -1} {} 0}"]}'
|
|
233
|
+
}
|
|
234
|
+
]
|
|
235
|
+
});
|
|
236
|
+
res2.features[0].notes.should.equal(
|
|
237
|
+
'{"gene:["Ampicillin"],"note":["ORF frame 1"],"translation":["MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRIDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPVAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW*"],"ApEinfo_fwdcolor":["pink"],"ApEinfo_revcolor":["pink"],"ApEinfo_graphicformat":["arrow_data {{0 1 2 0 0 -1} {} 0}"]}'
|
|
238
|
+
);
|
|
239
|
+
});
|
|
240
|
+
it("should add feature type = misc_feature if an invalid type is provided", () => {
|
|
241
|
+
const res = tidyUpSequenceData({
|
|
242
|
+
features: [{ start: 4, end: 5, type: "idontexist" }]
|
|
243
|
+
});
|
|
244
|
+
res.features[0].type.should.equal("misc_feature");
|
|
245
|
+
});
|
|
246
|
+
it("should allow non-standard genbank feature types if allowNonStandardGenbankTypes=true", () => {
|
|
247
|
+
const res = tidyUpSequenceData(
|
|
248
|
+
{
|
|
249
|
+
features: [{ start: 4, end: 5, type: "idontexist" }]
|
|
250
|
+
},
|
|
251
|
+
{
|
|
252
|
+
allowNonStandardGenbankTypes: true
|
|
253
|
+
}
|
|
254
|
+
);
|
|
255
|
+
res.features[0].type.should.equal("idontexist");
|
|
256
|
+
});
|
|
257
|
+
it("should normalize strange upper/lower casing in feature types", () => {
|
|
258
|
+
const res = tidyUpSequenceData({
|
|
259
|
+
features: [{ start: 4, end: 5, type: "cDs" }]
|
|
260
|
+
});
|
|
261
|
+
res.features[0].type.should.equal("CDS");
|
|
262
|
+
});
|
|
263
|
+
it("should not clobber existing feature types", () => {
|
|
264
|
+
const res = tidyUpSequenceData({
|
|
265
|
+
features: [{ start: 4, end: 5, type: "CDS" }]
|
|
266
|
+
});
|
|
267
|
+
res.features[0].type.should.equal("CDS");
|
|
268
|
+
});
|
|
269
|
+
|
|
270
|
+
it("should add correct color based on type for existing features colors", () => {
|
|
271
|
+
const res = tidyUpSequenceData({
|
|
272
|
+
features: [{ start: 4, end: 5, type: "CDS" }]
|
|
273
|
+
});
|
|
274
|
+
res.features[0].color.should.equal("#EF6500");
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
it("should not clobber existing feature colors", () => {
|
|
278
|
+
const res = tidyUpSequenceData({
|
|
279
|
+
features: [{ start: 4, end: 5, color: "#f4f4f4" }]
|
|
280
|
+
});
|
|
281
|
+
res.features[0].color.should.equal("#f4f4f4");
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
it("should add new ids to annotations if passed that option", () => {
|
|
285
|
+
const res = tidyUpSequenceData(
|
|
286
|
+
{
|
|
287
|
+
features: [{ start: 4, end: 5, id: 123 }, {}]
|
|
288
|
+
},
|
|
289
|
+
{ provideNewIdsForAnnotations: true }
|
|
290
|
+
);
|
|
291
|
+
res.features[0].id.should.not.equal(123);
|
|
292
|
+
});
|
|
293
|
+
it("should not add ids even if the ids are missing if doNotProvideIdsForAnnotations=true", () => {
|
|
294
|
+
const res = tidyUpSequenceData(
|
|
295
|
+
{
|
|
296
|
+
features: [{ start: 4, end: 5 }, {}]
|
|
297
|
+
},
|
|
298
|
+
{ doNotProvideIdsForAnnotations: true }
|
|
299
|
+
);
|
|
300
|
+
|
|
301
|
+
assert.strictEqual(res.features[0].id, undefined);
|
|
302
|
+
});
|
|
303
|
+
it("should add the annotationTypePlural field", () => {
|
|
304
|
+
const res = tidyUpSequenceData(
|
|
305
|
+
{
|
|
306
|
+
features: [{ start: 4, end: 5, id: 123 }, {}]
|
|
307
|
+
},
|
|
308
|
+
{ provideNewIdsForAnnotations: true }
|
|
309
|
+
);
|
|
310
|
+
res.features[0].id.should.not.equal(123);
|
|
311
|
+
res.features[0].annotationTypePlural.should.equal("features");
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
// it("should add amino acids to a bare translation obj", function() {
|
|
315
|
+
// const res = tidyUpSequenceData({
|
|
316
|
+
// sequence: "gtagagatagagataga",
|
|
317
|
+
// size: 0,
|
|
318
|
+
// circular: false,
|
|
319
|
+
// features: [],
|
|
320
|
+
// parts: [],
|
|
321
|
+
// translations: [
|
|
322
|
+
// {
|
|
323
|
+
// start: 0,
|
|
324
|
+
// end: 10
|
|
325
|
+
// }
|
|
326
|
+
// ],
|
|
327
|
+
// cutsites: [],
|
|
328
|
+
// orfs: []
|
|
329
|
+
// });
|
|
330
|
+
// // res.should.containSubset({})
|
|
331
|
+
// });
|
|
332
|
+
});
|