@teselagen/sequence-utils 0.3.37 → 0.3.38-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DNAComplementMap.d.ts +1 -1
- package/addGapsToSeqReads.d.ts +16 -3
- package/adjustAnnotationsToInsert.d.ts +2 -1
- package/adjustBpsToReplaceOrInsert.d.ts +2 -1
- package/aliasedEnzymesByName.d.ts +37 -1
- package/aminoAcidToDegenerateDnaMap.d.ts +1 -31
- package/aminoAcidToDegenerateRnaMap.d.ts +1 -1
- package/annotateSingleSeq.d.ts +5 -4
- package/annotationTypes.d.ts +2 -2
- package/autoAnnotate.d.ts +17 -8
- package/bioData.d.ts +10 -58
- package/calculateEndStability.d.ts +1 -1
- package/calculateNebTa.d.ts +6 -1
- package/calculateNebTm.d.ts +6 -4
- package/calculatePercentGC.d.ts +1 -1
- package/calculateSantaLuciaTm.d.ts +28 -114
- package/calculateTm.d.ts +13 -1
- package/computeDigestFragments.d.ts +30 -24
- package/condensePairwiseAlignmentDifferences.d.ts +1 -1
- package/convertAACaretPositionOrRangeToDna.d.ts +2 -1
- package/convertDnaCaretPositionOrRangeToAA.d.ts +2 -1
- package/cutSequenceByRestrictionEnzyme.d.ts +2 -1
- package/defaultEnzymesByName.d.ts +2 -1
- package/degenerateDnaToAminoAcidMap.d.ts +1 -1
- package/degenerateRnaToAminoAcidMap.d.ts +1 -1
- package/deleteSequenceDataAtRange.d.ts +2 -1
- package/diffUtils.d.ts +9 -7
- package/doesEnzymeChopOutsideOfRecognitionSite.d.ts +2 -1
- package/featureTypesAndColors.d.ts +19 -6
- package/filterSequenceString.d.ts +14 -10
- package/findApproxMatches.d.ts +7 -1
- package/findNearestRangeOfSequenceOverlapToPosition.d.ts +2 -1
- package/findOrfsInPlasmid.d.ts +2 -11
- package/findSequenceMatches.d.ts +11 -1
- package/generateAnnotations.d.ts +2 -1
- package/generateSequenceData.d.ts +8 -13
- package/getAllInsertionsInSeqReads.d.ts +11 -1
- package/getAminoAcidDataForEachBaseOfDna.d.ts +6 -5
- package/getAminoAcidFromSequenceTriplet.d.ts +1 -1
- package/getAminoAcidStringFromSequenceString.d.ts +3 -1
- package/getCodonRangeForAASliver.d.ts +3 -4
- package/getComplementAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getComplementSequenceAndAnnotations.d.ts +5 -1
- package/getComplementSequenceString.d.ts +1 -1
- package/getCutsiteType.d.ts +2 -1
- package/getCutsitesFromSequence.d.ts +2 -1
- package/getDegenerateDnaStringFromAAString.d.ts +1 -1
- package/getDegenerateRnaStringFromAAString.d.ts +1 -1
- package/getDigestFragmentsForCutsites.d.ts +4 -1
- package/getDigestFragmentsForRestrictionEnzymes.d.ts +8 -1
- package/getInsertBetweenVals.d.ts +2 -1
- package/getLeftAndRightOfSequenceInRangeGivenPosition.d.ts +2 -1
- package/getOrfsFromSequence.d.ts +17 -11
- package/getOverlapBetweenTwoSequences.d.ts +2 -1
- package/getPossiblePartsFromSequenceAndEnzymes.d.ts +18 -1
- package/getReverseAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getReverseComplementAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getReverseComplementAnnotation.d.ts +11 -1
- package/getReverseComplementSequenceAndAnnotations.d.ts +5 -1
- package/getReverseComplementSequenceString.d.ts +1 -1
- package/getReverseSequenceString.d.ts +1 -1
- package/getSequenceDataBetweenRange.d.ts +9 -1
- package/getVirtualDigest.d.ts +11 -10
- package/guessIfSequenceIsDnaAndNotProtein.d.ts +5 -1
- package/index.cjs +732 -483
- package/index.d.ts +8 -5
- package/index.js +732 -483
- package/index.umd.cjs +732 -483
- package/insertGapsIntoRefSeq.d.ts +2 -1
- package/insertSequenceDataAtPositionOrRange.d.ts +10 -1
- package/isEnzymeType2S.d.ts +2 -1
- package/mapAnnotationsToRows.d.ts +9 -1
- package/package.json +9 -6
- package/prepareCircularViewData.d.ts +2 -1
- package/prepareRowData.d.ts +7 -3
- package/proteinAlphabet.d.ts +1 -1
- package/rotateBpsToPosition.d.ts +1 -1
- package/rotateSequenceDataToPosition.d.ts +3 -1
- package/shiftAnnotationsByLen.d.ts +4 -3
- package/src/DNAComplementMap.ts +32 -0
- package/src/addGapsToSeqReads.ts +436 -0
- package/src/adjustAnnotationsToInsert.ts +20 -0
- package/src/adjustBpsToReplaceOrInsert.ts +73 -0
- package/src/aliasedEnzymesByName.ts +7366 -0
- package/src/aminoAcidToDegenerateDnaMap.ts +32 -0
- package/src/aminoAcidToDegenerateRnaMap.ts +32 -0
- package/src/annotateSingleSeq.ts +37 -0
- package/src/annotationTypes.ts +23 -0
- package/src/autoAnnotate.test.js +0 -1
- package/src/autoAnnotate.ts +290 -0
- package/src/bioData.ts +65 -0
- package/src/calculateEndStability.ts +91 -0
- package/src/calculateNebTa.ts +46 -0
- package/src/calculateNebTm.ts +132 -0
- package/src/calculatePercentGC.ts +3 -0
- package/src/calculateSantaLuciaTm.ts +184 -0
- package/src/calculateTm.ts +242 -0
- package/src/computeDigestFragments.ts +238 -0
- package/src/condensePairwiseAlignmentDifferences.ts +85 -0
- package/src/convertAACaretPositionOrRangeToDna.ts +28 -0
- package/src/convertDnaCaretPositionOrRangeToAA.ts +28 -0
- package/src/cutSequenceByRestrictionEnzyme.ts +345 -0
- package/src/defaultEnzymesByName.ts +280 -0
- package/src/degenerateDnaToAminoAcidMap.ts +5 -0
- package/src/degenerateRnaToAminoAcidMap.ts +5 -0
- package/src/deleteSequenceDataAtRange.ts +13 -0
- package/src/diffUtils.ts +80 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.ts +16 -0
- package/src/featureTypesAndColors.ts +167 -0
- package/src/filterSequenceString.ts +153 -0
- package/src/findApproxMatches.ts +58 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.ts +43 -0
- package/src/findOrfsInPlasmid.js +6 -1
- package/src/findOrfsInPlasmid.ts +31 -0
- package/src/findSequenceMatches.ts +154 -0
- package/src/generateAnnotations.ts +39 -0
- package/src/generateSequenceData.ts +212 -0
- package/src/getAllInsertionsInSeqReads.ts +100 -0
- package/src/getAminoAcidDataForEachBaseOfDna.ts +305 -0
- package/src/getAminoAcidFromSequenceTriplet.ts +27 -0
- package/src/getAminoAcidStringFromSequenceString.ts +36 -0
- package/src/getCodonRangeForAASliver.ts +73 -0
- package/src/getComplementAminoAcidStringFromSequenceString.ts +10 -0
- package/src/getComplementSequenceAndAnnotations.ts +25 -0
- package/src/getComplementSequenceString.ts +23 -0
- package/src/getCutsiteType.ts +18 -0
- package/src/getCutsitesFromSequence.ts +22 -0
- package/src/getDegenerateDnaStringFromAAString.ts +15 -0
- package/src/getDegenerateRnaStringFromAAString.ts +15 -0
- package/src/getDigestFragmentsForCutsites.ts +126 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.ts +50 -0
- package/src/getInsertBetweenVals.ts +31 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.ts +40 -0
- package/src/getMassOfAaString.ts +29 -0
- package/src/getOrfsFromSequence.ts +132 -0
- package/src/getOverlapBetweenTwoSequences.ts +30 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.ts +149 -0
- package/src/getReverseAminoAcidStringFromSequenceString.ts +22 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.ts +10 -0
- package/src/getReverseComplementAnnotation.ts +33 -0
- package/src/getReverseComplementSequenceAndAnnotations.ts +46 -0
- package/src/getReverseComplementSequenceString.ts +18 -0
- package/src/getReverseSequenceString.ts +12 -0
- package/src/getSequenceDataBetweenRange.ts +154 -0
- package/src/getVirtualDigest.ts +139 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.ts +39 -0
- package/src/index.test.ts +43 -0
- package/src/index.ts +111 -0
- package/src/insertGapsIntoRefSeq.ts +43 -0
- package/src/insertSequenceDataAtPosition.ts +2 -0
- package/src/insertSequenceDataAtPositionOrRange.ts +328 -0
- package/src/isEnzymeType2S.ts +5 -0
- package/src/mapAnnotationsToRows.ts +256 -0
- package/src/prepareCircularViewData.ts +24 -0
- package/src/prepareRowData.ts +61 -0
- package/src/prepareRowData_output1.json +1 -0
- package/src/proteinAlphabet.ts +271 -0
- package/src/rotateBpsToPosition.ts +12 -0
- package/src/rotateSequenceDataToPosition.ts +54 -0
- package/src/shiftAnnotationsByLen.ts +24 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.ts +198 -0
- package/src/tidyUpAnnotation.ts +205 -0
- package/src/tidyUpSequenceData.ts +213 -0
- package/src/types.ts +109 -0
- package/threeLetterSequenceStringToAminoAcidMap.d.ts +11 -921
- package/tidyUpAnnotation.d.ts +13 -11
- package/tidyUpSequenceData.d.ts +15 -1
- package/types.d.ts +105 -0
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
import { cloneDeep, get, some } from "lodash-es";
|
|
2
|
+
import { getFeatureToColorMap, getFeatureTypes } from "./featureTypesAndColors";
|
|
3
|
+
import shortid from "shortid";
|
|
4
|
+
import { Annotation, SequenceData } from "./types";
|
|
5
|
+
|
|
6
|
+
export interface TidyUpAnnotationOptions {
|
|
7
|
+
sequenceData?: Partial<SequenceData>;
|
|
8
|
+
convertAnnotationsFromAAIndices?: boolean;
|
|
9
|
+
annotationType?: string;
|
|
10
|
+
provideNewIdsForAnnotations?: boolean;
|
|
11
|
+
doNotProvideIdsForAnnotations?: boolean;
|
|
12
|
+
messages?: string[];
|
|
13
|
+
mutative?: boolean;
|
|
14
|
+
allowNonStandardGenbankTypes?: boolean;
|
|
15
|
+
featureTypes?: string[];
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export default function tidyUpAnnotation(
|
|
19
|
+
_annotation: Annotation,
|
|
20
|
+
{
|
|
21
|
+
sequenceData = {},
|
|
22
|
+
convertAnnotationsFromAAIndices,
|
|
23
|
+
annotationType,
|
|
24
|
+
provideNewIdsForAnnotations,
|
|
25
|
+
doNotProvideIdsForAnnotations,
|
|
26
|
+
messages = [],
|
|
27
|
+
mutative,
|
|
28
|
+
allowNonStandardGenbankTypes,
|
|
29
|
+
featureTypes
|
|
30
|
+
}: TidyUpAnnotationOptions
|
|
31
|
+
) {
|
|
32
|
+
const { size, circular, isProtein } = sequenceData;
|
|
33
|
+
if (!_annotation || typeof _annotation !== "object") {
|
|
34
|
+
messages.push("Invalid annotation detected and removed");
|
|
35
|
+
return false;
|
|
36
|
+
}
|
|
37
|
+
let annotation = _annotation;
|
|
38
|
+
if (!mutative) {
|
|
39
|
+
annotation = cloneDeep(_annotation);
|
|
40
|
+
}
|
|
41
|
+
annotation.annotationTypePlural = annotationType;
|
|
42
|
+
|
|
43
|
+
if (!annotation.name || typeof annotation.name !== "string") {
|
|
44
|
+
messages.push(
|
|
45
|
+
'Unable to detect valid name for annotation, setting name to "Untitled annotation"'
|
|
46
|
+
);
|
|
47
|
+
annotation.name = "Untitled annotation";
|
|
48
|
+
}
|
|
49
|
+
if (provideNewIdsForAnnotations) {
|
|
50
|
+
annotation.id = shortid();
|
|
51
|
+
}
|
|
52
|
+
if (!annotation.id && annotation.id !== 0 && !doNotProvideIdsForAnnotations) {
|
|
53
|
+
annotation.id = shortid();
|
|
54
|
+
messages.push(
|
|
55
|
+
"Unable to detect valid ID for annotation, setting ID to " + annotation.id
|
|
56
|
+
);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
//run this for the annotation itself
|
|
60
|
+
coerceLocation({
|
|
61
|
+
isProtein,
|
|
62
|
+
location: annotation,
|
|
63
|
+
convertAnnotationsFromAAIndices,
|
|
64
|
+
size,
|
|
65
|
+
messages,
|
|
66
|
+
circular,
|
|
67
|
+
name: annotation.name
|
|
68
|
+
});
|
|
69
|
+
//and for each location
|
|
70
|
+
annotation.locations &&
|
|
71
|
+
annotation.locations.forEach(location => {
|
|
72
|
+
coerceLocation({
|
|
73
|
+
isProtein,
|
|
74
|
+
location,
|
|
75
|
+
convertAnnotationsFromAAIndices,
|
|
76
|
+
size,
|
|
77
|
+
messages,
|
|
78
|
+
circular,
|
|
79
|
+
name: annotation.name
|
|
80
|
+
});
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
if (
|
|
84
|
+
isProtein ||
|
|
85
|
+
annotation.forward === true ||
|
|
86
|
+
(annotation.forward as unknown) === "true" ||
|
|
87
|
+
annotation.strand === 1 ||
|
|
88
|
+
annotation.strand === "1" ||
|
|
89
|
+
annotation.strand === "+"
|
|
90
|
+
) {
|
|
91
|
+
annotation.forward = true;
|
|
92
|
+
annotation.strand = 1;
|
|
93
|
+
} else {
|
|
94
|
+
annotation.forward = false;
|
|
95
|
+
annotation.strand = -1;
|
|
96
|
+
}
|
|
97
|
+
if (
|
|
98
|
+
!annotation.type ||
|
|
99
|
+
typeof annotation.type !== "string" ||
|
|
100
|
+
!some(featureTypes || getFeatureTypes(), featureType => {
|
|
101
|
+
if (
|
|
102
|
+
featureType.toLowerCase() === (annotation.type as string).toLowerCase()
|
|
103
|
+
) {
|
|
104
|
+
annotation.type = featureType; //this makes sure the annotation.type is being set to the exact value of the accepted featureType
|
|
105
|
+
return true;
|
|
106
|
+
}
|
|
107
|
+
if (
|
|
108
|
+
allowNonStandardGenbankTypes ||
|
|
109
|
+
(typeof window !== "undefined" &&
|
|
110
|
+
get(window, "tg_allowNonStandardGenbankTypes")) ||
|
|
111
|
+
(typeof global !== "undefined" &&
|
|
112
|
+
get(global, "tg_allowNonStandardGenbankTypes"))
|
|
113
|
+
)
|
|
114
|
+
return true;
|
|
115
|
+
return false;
|
|
116
|
+
})
|
|
117
|
+
) {
|
|
118
|
+
messages.push(
|
|
119
|
+
"Invalid annotation type detected: " +
|
|
120
|
+
annotation.type +
|
|
121
|
+
" for " +
|
|
122
|
+
annotation.name +
|
|
123
|
+
". set type to misc_feature"
|
|
124
|
+
);
|
|
125
|
+
annotation.type = "misc_feature";
|
|
126
|
+
}
|
|
127
|
+
if (annotation.notes && typeof annotation.notes === "string") {
|
|
128
|
+
try {
|
|
129
|
+
annotation.notes = JSON.parse(annotation.notes);
|
|
130
|
+
} catch (error) {
|
|
131
|
+
console.info(
|
|
132
|
+
`warning 33y00a0912 - couldn't parse notes for ${
|
|
133
|
+
annotation.name || ""
|
|
134
|
+
} ${annotation.notes}:`,
|
|
135
|
+
error
|
|
136
|
+
);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
if (!annotation.color) {
|
|
141
|
+
annotation.color =
|
|
142
|
+
getFeatureToColorMap()[
|
|
143
|
+
annotation.type as keyof ReturnType<typeof getFeatureToColorMap>
|
|
144
|
+
];
|
|
145
|
+
}
|
|
146
|
+
return annotation;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function coerceLocation({
|
|
150
|
+
location,
|
|
151
|
+
convertAnnotationsFromAAIndices,
|
|
152
|
+
size,
|
|
153
|
+
isProtein,
|
|
154
|
+
messages,
|
|
155
|
+
circular,
|
|
156
|
+
name
|
|
157
|
+
}: {
|
|
158
|
+
location: Annotation;
|
|
159
|
+
convertAnnotationsFromAAIndices?: boolean;
|
|
160
|
+
size?: number;
|
|
161
|
+
isProtein?: boolean;
|
|
162
|
+
messages: string[];
|
|
163
|
+
circular?: boolean;
|
|
164
|
+
name?: string;
|
|
165
|
+
}) {
|
|
166
|
+
location.start = parseInt(String(location.start), 10);
|
|
167
|
+
location.end = parseInt(String(location.end), 10);
|
|
168
|
+
|
|
169
|
+
if (convertAnnotationsFromAAIndices) {
|
|
170
|
+
location.start = location.start * 3;
|
|
171
|
+
location.end = location.end * 3 + 2;
|
|
172
|
+
}
|
|
173
|
+
if (size !== undefined && (location.start < 0 || location.start > size - 1)) {
|
|
174
|
+
messages.push(
|
|
175
|
+
"Invalid annotation start: " +
|
|
176
|
+
location.start +
|
|
177
|
+
" detected for " +
|
|
178
|
+
location.name +
|
|
179
|
+
" and set to size: " +
|
|
180
|
+
size
|
|
181
|
+
); //setting it to 0 internally, but users will see it as 1
|
|
182
|
+
location.start = Math.max(0, size - (isProtein ? 3 : 1));
|
|
183
|
+
}
|
|
184
|
+
if (size !== undefined && (location.end < 0 || location.end > size - 1)) {
|
|
185
|
+
messages.push(
|
|
186
|
+
"Invalid annotation end: " +
|
|
187
|
+
location.end +
|
|
188
|
+
" detected for " +
|
|
189
|
+
location.name +
|
|
190
|
+
" and set to seq size: " +
|
|
191
|
+
size
|
|
192
|
+
); //setting it to 0 internally, but users will see it as 1
|
|
193
|
+
location.end = Math.max(0, size - 1);
|
|
194
|
+
}
|
|
195
|
+
if (
|
|
196
|
+
size !== undefined &&
|
|
197
|
+
location.start > location.end &&
|
|
198
|
+
circular === false
|
|
199
|
+
) {
|
|
200
|
+
messages.push(
|
|
201
|
+
"Invalid circular annotation detected for " + name + ". end set to 1"
|
|
202
|
+
); //setting it to 0 internally, but users will see it as 1
|
|
203
|
+
location.end = size;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
// tnrtodo: figure out where to insert this validation exactly..
|
|
2
|
+
import shortid from "shortid";
|
|
3
|
+
|
|
4
|
+
import getAminoAcidDataForEachBaseOfDna from "./getAminoAcidDataForEachBaseOfDna";
|
|
5
|
+
import { cloneDeep, flatMap } from "lodash-es";
|
|
6
|
+
import { annotationTypes } from "./annotationTypes";
|
|
7
|
+
import filterSequenceString from "./filterSequenceString";
|
|
8
|
+
import tidyUpAnnotation from "./tidyUpAnnotation";
|
|
9
|
+
import getDegenerateDnaStringFromAaString from "./getDegenerateDnaStringFromAAString";
|
|
10
|
+
import { getFeatureTypes } from "./featureTypesAndColors";
|
|
11
|
+
import getAminoAcidStringFromSequenceString from "./getAminoAcidStringFromSequenceString";
|
|
12
|
+
import { expandOrContractRangeByLength } from "@teselagen/range-utils";
|
|
13
|
+
|
|
14
|
+
import { SequenceData, Annotation } from "./types";
|
|
15
|
+
|
|
16
|
+
export interface TidyUpSequenceDataOptions {
|
|
17
|
+
annotationsAsObjects?: boolean;
|
|
18
|
+
logMessages?: boolean;
|
|
19
|
+
doNotRemoveInvalidChars?: boolean;
|
|
20
|
+
additionalValidChars?: string;
|
|
21
|
+
noTranslationData?: boolean;
|
|
22
|
+
includeProteinSequence?: boolean;
|
|
23
|
+
doNotProvideIdsForAnnotations?: boolean;
|
|
24
|
+
noCdsTranslations?: boolean;
|
|
25
|
+
convertAnnotationsFromAAIndices?: boolean;
|
|
26
|
+
topLevelSeqData?: Partial<SequenceData>;
|
|
27
|
+
allowNonStandardGenbankTypes?: boolean;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export default function tidyUpSequenceData(
|
|
31
|
+
pSeqData: Partial<SequenceData>,
|
|
32
|
+
options: TidyUpSequenceDataOptions = {}
|
|
33
|
+
) {
|
|
34
|
+
const {
|
|
35
|
+
annotationsAsObjects,
|
|
36
|
+
logMessages,
|
|
37
|
+
doNotRemoveInvalidChars,
|
|
38
|
+
additionalValidChars,
|
|
39
|
+
noTranslationData,
|
|
40
|
+
includeProteinSequence,
|
|
41
|
+
doNotProvideIdsForAnnotations,
|
|
42
|
+
noCdsTranslations,
|
|
43
|
+
convertAnnotationsFromAAIndices,
|
|
44
|
+
topLevelSeqData
|
|
45
|
+
} = options;
|
|
46
|
+
let seqData = cloneDeep(pSeqData) as SequenceData; //sequence is usually immutable, so we clone it and return it
|
|
47
|
+
const response = {
|
|
48
|
+
messages: []
|
|
49
|
+
};
|
|
50
|
+
if (!seqData) {
|
|
51
|
+
seqData = { sequence: "" } as SequenceData;
|
|
52
|
+
}
|
|
53
|
+
if (!seqData.sequence) {
|
|
54
|
+
seqData.sequence = "";
|
|
55
|
+
}
|
|
56
|
+
if (!seqData.proteinSequence) {
|
|
57
|
+
seqData.proteinSequence = "";
|
|
58
|
+
}
|
|
59
|
+
let needsBackTranslation = false;
|
|
60
|
+
if (seqData.isProtein) {
|
|
61
|
+
seqData.circular = false; //there are no circular proteins..
|
|
62
|
+
if (!seqData.proteinSequence && seqData.proteinSequence !== "") {
|
|
63
|
+
seqData.proteinSequence = seqData.sequence; //if there is no proteinSequence, assign seqData.sequence
|
|
64
|
+
}
|
|
65
|
+
if (
|
|
66
|
+
!seqData.sequence ||
|
|
67
|
+
seqData.sequence.length !== seqData.proteinSequence.length * 3
|
|
68
|
+
) {
|
|
69
|
+
//if we don't have a sequence or it is clear that the DNA sequence doesn't match the proteinSequence, add a back translation
|
|
70
|
+
needsBackTranslation = true;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
if (seqData.isRna) {
|
|
74
|
+
//flip all t's to u's
|
|
75
|
+
seqData.sequence = seqData.sequence.replace(/t/gi, "u");
|
|
76
|
+
}
|
|
77
|
+
if (!doNotRemoveInvalidChars) {
|
|
78
|
+
if (seqData.isProtein) {
|
|
79
|
+
const [newSeq] = filterSequenceString(seqData.proteinSequence, {
|
|
80
|
+
...(topLevelSeqData || seqData),
|
|
81
|
+
isProtein: true
|
|
82
|
+
});
|
|
83
|
+
seqData.proteinSequence = newSeq;
|
|
84
|
+
} else {
|
|
85
|
+
const [newSeq] = filterSequenceString(seqData.sequence, {
|
|
86
|
+
additionalValidChars,
|
|
87
|
+
...(topLevelSeqData || seqData)
|
|
88
|
+
});
|
|
89
|
+
seqData.sequence = newSeq;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
if (seqData.isProtein) {
|
|
93
|
+
if (needsBackTranslation) {
|
|
94
|
+
//backtranslate the AA sequence
|
|
95
|
+
seqData.sequence = getDegenerateDnaStringFromAaString(
|
|
96
|
+
seqData.proteinSequence
|
|
97
|
+
);
|
|
98
|
+
}
|
|
99
|
+
seqData.aminoAcidDataForEachBaseOfDNA = getAminoAcidDataForEachBaseOfDna(
|
|
100
|
+
seqData.proteinSequence,
|
|
101
|
+
true,
|
|
102
|
+
null,
|
|
103
|
+
true
|
|
104
|
+
);
|
|
105
|
+
} else if (includeProteinSequence) {
|
|
106
|
+
seqData.proteinSequence = getAminoAcidStringFromSequenceString(
|
|
107
|
+
seqData.sequence
|
|
108
|
+
);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
seqData.size = seqData.noSequence ? seqData.size : seqData.sequence.length;
|
|
112
|
+
seqData.proteinSize = seqData.noSequence
|
|
113
|
+
? seqData.proteinSize
|
|
114
|
+
: seqData.proteinSequence.length;
|
|
115
|
+
if (
|
|
116
|
+
seqData.circular === ("false" as unknown) ||
|
|
117
|
+
/* eslint-disable eqeqeq*/
|
|
118
|
+
|
|
119
|
+
seqData.circular == (-1 as unknown) ||
|
|
120
|
+
/* eslint-enable eqeqeq*/
|
|
121
|
+
seqData.circular === false ||
|
|
122
|
+
(!seqData.circular && seqData.sequenceTypeCode !== "CIRCULAR_DNA")
|
|
123
|
+
) {
|
|
124
|
+
seqData.circular = false;
|
|
125
|
+
} else {
|
|
126
|
+
seqData.circular = true;
|
|
127
|
+
}
|
|
128
|
+
const featureTypes = getFeatureTypes();
|
|
129
|
+
|
|
130
|
+
annotationTypes.forEach(annotationType => {
|
|
131
|
+
if (!Array.isArray(seqData[annotationType])) {
|
|
132
|
+
if (
|
|
133
|
+
seqData[annotationType] &&
|
|
134
|
+
typeof seqData[annotationType] === "object"
|
|
135
|
+
) {
|
|
136
|
+
seqData[annotationType] = Object.keys(
|
|
137
|
+
seqData[annotationType] as object
|
|
138
|
+
).map(key => {
|
|
139
|
+
return (seqData[annotationType] as Record<string, unknown>)[key];
|
|
140
|
+
});
|
|
141
|
+
} else {
|
|
142
|
+
seqData[annotationType] = [];
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
seqData[annotationType] = (seqData[annotationType] as Annotation[]).filter(
|
|
146
|
+
annotation => {
|
|
147
|
+
return tidyUpAnnotation(annotation, {
|
|
148
|
+
...options,
|
|
149
|
+
featureTypes,
|
|
150
|
+
sequenceData: seqData,
|
|
151
|
+
convertAnnotationsFromAAIndices,
|
|
152
|
+
mutative: true,
|
|
153
|
+
annotationType
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
);
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
if (!noTranslationData) {
|
|
160
|
+
seqData.translations = flatMap(seqData.translations, translation => {
|
|
161
|
+
if (noCdsTranslations && translation.translationType === "CDS Feature") {
|
|
162
|
+
//filter off cds translations
|
|
163
|
+
return [];
|
|
164
|
+
}
|
|
165
|
+
const codonStart =
|
|
166
|
+
(
|
|
167
|
+
(translation?.notes as Record<string, unknown>)?.[
|
|
168
|
+
"codon_start"
|
|
169
|
+
] as number[]
|
|
170
|
+
)?.[0] - 1 || 0;
|
|
171
|
+
const expandedRange = expandOrContractRangeByLength(
|
|
172
|
+
translation,
|
|
173
|
+
-codonStart,
|
|
174
|
+
true,
|
|
175
|
+
seqData.sequence.length
|
|
176
|
+
);
|
|
177
|
+
if (!expandedRange.aminoAcids && !seqData.noSequence) {
|
|
178
|
+
expandedRange.aminoAcids = getAminoAcidDataForEachBaseOfDna(
|
|
179
|
+
seqData.sequence,
|
|
180
|
+
expandedRange.forward || false,
|
|
181
|
+
expandedRange,
|
|
182
|
+
false
|
|
183
|
+
);
|
|
184
|
+
}
|
|
185
|
+
return expandedRange;
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
if (annotationsAsObjects) {
|
|
190
|
+
annotationTypes.forEach(name => {
|
|
191
|
+
seqData[name] = (seqData[name] as Annotation[]).reduce(
|
|
192
|
+
(acc: Record<string, Annotation>, item: Annotation) => {
|
|
193
|
+
let itemId;
|
|
194
|
+
if (item.id || item.id === 0) {
|
|
195
|
+
itemId = item.id;
|
|
196
|
+
} else {
|
|
197
|
+
itemId = shortid();
|
|
198
|
+
if (!doNotProvideIdsForAnnotations) {
|
|
199
|
+
item.id = itemId; //assign the newly created id to the item
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
acc[itemId] = item;
|
|
203
|
+
return acc;
|
|
204
|
+
},
|
|
205
|
+
{}
|
|
206
|
+
);
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
if (logMessages && response.messages.length > 0) {
|
|
210
|
+
console.info("tidyUpSequenceData messages:", response.messages);
|
|
211
|
+
}
|
|
212
|
+
return seqData;
|
|
213
|
+
}
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
// import { Range } from "@teselagen/range-utils";
|
|
2
|
+
// Temp fix:
|
|
3
|
+
export interface Range {
|
|
4
|
+
start: number;
|
|
5
|
+
end: number;
|
|
6
|
+
type?: string;
|
|
7
|
+
locations?: Range[];
|
|
8
|
+
overlapsSelf?: boolean;
|
|
9
|
+
yOffset?: number;
|
|
10
|
+
aminoAcids?: unknown[];
|
|
11
|
+
[key: string]: unknown;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface Annotation extends Range {
|
|
15
|
+
id?: string | number;
|
|
16
|
+
name?: string;
|
|
17
|
+
forward?: boolean;
|
|
18
|
+
strand?: number | string;
|
|
19
|
+
type?: string;
|
|
20
|
+
color?: string;
|
|
21
|
+
notes?: Record<string, unknown> | string;
|
|
22
|
+
annotationTypePlural?: string;
|
|
23
|
+
translationType?: string;
|
|
24
|
+
[key: string]: unknown;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export interface SequenceData {
|
|
28
|
+
id?: string;
|
|
29
|
+
sequence: string;
|
|
30
|
+
proteinSequence?: string;
|
|
31
|
+
circular?: boolean;
|
|
32
|
+
isProtein?: boolean;
|
|
33
|
+
isRna?: boolean;
|
|
34
|
+
size?: number;
|
|
35
|
+
proteinSize?: number;
|
|
36
|
+
name?: string;
|
|
37
|
+
description?: string;
|
|
38
|
+
features?: Annotation[];
|
|
39
|
+
parts?: Annotation[];
|
|
40
|
+
translations?: Annotation[];
|
|
41
|
+
primers?: Annotation[];
|
|
42
|
+
cutsites?: Annotation[];
|
|
43
|
+
orfs?: Annotation[];
|
|
44
|
+
guides?: Annotation[];
|
|
45
|
+
noSequence?: boolean;
|
|
46
|
+
sequenceTypeCode?: string;
|
|
47
|
+
aminoAcidDataForEachBaseOfDNA?: unknown[];
|
|
48
|
+
chromatogramData?: ChromatogramData;
|
|
49
|
+
[key: string]: unknown;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export interface ChromatogramData {
|
|
53
|
+
baseTraces?: unknown[];
|
|
54
|
+
baseCalls?: unknown[];
|
|
55
|
+
qualNums?: unknown[];
|
|
56
|
+
basePos?: unknown[];
|
|
57
|
+
[key: string]: unknown;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export interface RestrictionEnzyme {
|
|
61
|
+
name: string;
|
|
62
|
+
site: string;
|
|
63
|
+
forwardRegex: string;
|
|
64
|
+
reverseRegex: string;
|
|
65
|
+
topSnipOffset?: number;
|
|
66
|
+
bottomSnipOffset?: number;
|
|
67
|
+
cutType?: number; // 0 or 1
|
|
68
|
+
usForward?: number; // upstream forward
|
|
69
|
+
usReverse?: number;
|
|
70
|
+
[key: string]: unknown;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export interface CutSite extends Annotation {
|
|
74
|
+
topSnipPosition: number | null;
|
|
75
|
+
bottomSnipPosition: number | null;
|
|
76
|
+
overhangSize: number;
|
|
77
|
+
overhangBps?: string;
|
|
78
|
+
restrictionEnzyme: RestrictionEnzyme;
|
|
79
|
+
upstreamTopSnip?: number | null;
|
|
80
|
+
upstreamBottomSnip?: number | null;
|
|
81
|
+
topSnipBeforeBottom?: boolean;
|
|
82
|
+
upstreamTopBeforeBottom?: boolean;
|
|
83
|
+
cutType?: number;
|
|
84
|
+
cutsTwice?: boolean;
|
|
85
|
+
recognitionSiteRange?: Range;
|
|
86
|
+
isOverhangIncludedInFragmentSize?: boolean;
|
|
87
|
+
[key: string]: unknown;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export interface DigestFragment extends Range {
|
|
91
|
+
isFormedFromLinearEnd?: boolean;
|
|
92
|
+
madeFromOneCutsite?: boolean;
|
|
93
|
+
size: number;
|
|
94
|
+
cut1: CutSite;
|
|
95
|
+
cut2: CutSite;
|
|
96
|
+
id: string;
|
|
97
|
+
name: string;
|
|
98
|
+
onFragmentSelect?: () => void;
|
|
99
|
+
[key: string]: unknown;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export interface AminoAcidData {
|
|
103
|
+
fullCodon: boolean | null;
|
|
104
|
+
aminoAcid: { value: string } | null;
|
|
105
|
+
aminoAcidIndex: number | null;
|
|
106
|
+
positionInCodon?: number | null;
|
|
107
|
+
sequenceIndex?: number | null;
|
|
108
|
+
codonRange?: { start: number; end: number } | null;
|
|
109
|
+
}
|