@teselagen/sequence-utils 0.3.37 → 0.3.38-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DNAComplementMap.d.ts +1 -1
- package/addGapsToSeqReads.d.ts +16 -3
- package/adjustAnnotationsToInsert.d.ts +2 -1
- package/adjustBpsToReplaceOrInsert.d.ts +2 -1
- package/aliasedEnzymesByName.d.ts +37 -1
- package/aminoAcidToDegenerateDnaMap.d.ts +1 -31
- package/aminoAcidToDegenerateRnaMap.d.ts +1 -1
- package/annotateSingleSeq.d.ts +5 -4
- package/annotationTypes.d.ts +2 -2
- package/autoAnnotate.d.ts +17 -8
- package/bioData.d.ts +10 -58
- package/calculateEndStability.d.ts +1 -1
- package/calculateNebTa.d.ts +6 -1
- package/calculateNebTm.d.ts +6 -4
- package/calculatePercentGC.d.ts +1 -1
- package/calculateSantaLuciaTm.d.ts +28 -114
- package/calculateTm.d.ts +13 -1
- package/computeDigestFragments.d.ts +30 -24
- package/condensePairwiseAlignmentDifferences.d.ts +1 -1
- package/convertAACaretPositionOrRangeToDna.d.ts +2 -1
- package/convertDnaCaretPositionOrRangeToAA.d.ts +2 -1
- package/cutSequenceByRestrictionEnzyme.d.ts +2 -1
- package/defaultEnzymesByName.d.ts +2 -1
- package/degenerateDnaToAminoAcidMap.d.ts +1 -1
- package/degenerateRnaToAminoAcidMap.d.ts +1 -1
- package/deleteSequenceDataAtRange.d.ts +2 -1
- package/diffUtils.d.ts +9 -7
- package/doesEnzymeChopOutsideOfRecognitionSite.d.ts +2 -1
- package/featureTypesAndColors.d.ts +19 -6
- package/filterSequenceString.d.ts +14 -10
- package/findApproxMatches.d.ts +7 -1
- package/findNearestRangeOfSequenceOverlapToPosition.d.ts +2 -1
- package/findOrfsInPlasmid.d.ts +2 -11
- package/findSequenceMatches.d.ts +11 -1
- package/generateAnnotations.d.ts +2 -1
- package/generateSequenceData.d.ts +8 -13
- package/getAllInsertionsInSeqReads.d.ts +11 -1
- package/getAminoAcidDataForEachBaseOfDna.d.ts +6 -5
- package/getAminoAcidFromSequenceTriplet.d.ts +1 -1
- package/getAminoAcidStringFromSequenceString.d.ts +3 -1
- package/getCodonRangeForAASliver.d.ts +3 -4
- package/getComplementAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getComplementSequenceAndAnnotations.d.ts +5 -1
- package/getComplementSequenceString.d.ts +1 -1
- package/getCutsiteType.d.ts +2 -1
- package/getCutsitesFromSequence.d.ts +2 -1
- package/getDegenerateDnaStringFromAAString.d.ts +1 -1
- package/getDegenerateRnaStringFromAAString.d.ts +1 -1
- package/getDigestFragmentsForCutsites.d.ts +4 -1
- package/getDigestFragmentsForRestrictionEnzymes.d.ts +8 -1
- package/getInsertBetweenVals.d.ts +2 -1
- package/getLeftAndRightOfSequenceInRangeGivenPosition.d.ts +2 -1
- package/getOrfsFromSequence.d.ts +17 -11
- package/getOverlapBetweenTwoSequences.d.ts +2 -1
- package/getPossiblePartsFromSequenceAndEnzymes.d.ts +18 -1
- package/getReverseAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getReverseComplementAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getReverseComplementAnnotation.d.ts +11 -1
- package/getReverseComplementSequenceAndAnnotations.d.ts +5 -1
- package/getReverseComplementSequenceString.d.ts +1 -1
- package/getReverseSequenceString.d.ts +1 -1
- package/getSequenceDataBetweenRange.d.ts +9 -1
- package/getVirtualDigest.d.ts +11 -10
- package/guessIfSequenceIsDnaAndNotProtein.d.ts +5 -1
- package/index.cjs +732 -483
- package/index.d.ts +8 -5
- package/index.js +732 -483
- package/index.umd.cjs +732 -483
- package/insertGapsIntoRefSeq.d.ts +2 -1
- package/insertSequenceDataAtPositionOrRange.d.ts +10 -1
- package/isEnzymeType2S.d.ts +2 -1
- package/mapAnnotationsToRows.d.ts +9 -1
- package/package.json +9 -6
- package/prepareCircularViewData.d.ts +2 -1
- package/prepareRowData.d.ts +7 -3
- package/proteinAlphabet.d.ts +1 -1
- package/rotateBpsToPosition.d.ts +1 -1
- package/rotateSequenceDataToPosition.d.ts +3 -1
- package/shiftAnnotationsByLen.d.ts +4 -3
- package/src/DNAComplementMap.ts +32 -0
- package/src/addGapsToSeqReads.ts +436 -0
- package/src/adjustAnnotationsToInsert.ts +20 -0
- package/src/adjustBpsToReplaceOrInsert.ts +73 -0
- package/src/aliasedEnzymesByName.ts +7366 -0
- package/src/aminoAcidToDegenerateDnaMap.ts +32 -0
- package/src/aminoAcidToDegenerateRnaMap.ts +32 -0
- package/src/annotateSingleSeq.ts +37 -0
- package/src/annotationTypes.ts +23 -0
- package/src/autoAnnotate.test.js +0 -1
- package/src/autoAnnotate.ts +290 -0
- package/src/bioData.ts +65 -0
- package/src/calculateEndStability.ts +91 -0
- package/src/calculateNebTa.ts +46 -0
- package/src/calculateNebTm.ts +132 -0
- package/src/calculatePercentGC.ts +3 -0
- package/src/calculateSantaLuciaTm.ts +184 -0
- package/src/calculateTm.ts +242 -0
- package/src/computeDigestFragments.ts +238 -0
- package/src/condensePairwiseAlignmentDifferences.ts +85 -0
- package/src/convertAACaretPositionOrRangeToDna.ts +28 -0
- package/src/convertDnaCaretPositionOrRangeToAA.ts +28 -0
- package/src/cutSequenceByRestrictionEnzyme.ts +345 -0
- package/src/defaultEnzymesByName.ts +280 -0
- package/src/degenerateDnaToAminoAcidMap.ts +5 -0
- package/src/degenerateRnaToAminoAcidMap.ts +5 -0
- package/src/deleteSequenceDataAtRange.ts +13 -0
- package/src/diffUtils.ts +80 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.ts +16 -0
- package/src/featureTypesAndColors.ts +167 -0
- package/src/filterSequenceString.ts +153 -0
- package/src/findApproxMatches.ts +58 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.ts +43 -0
- package/src/findOrfsInPlasmid.js +6 -1
- package/src/findOrfsInPlasmid.ts +31 -0
- package/src/findSequenceMatches.ts +154 -0
- package/src/generateAnnotations.ts +39 -0
- package/src/generateSequenceData.ts +212 -0
- package/src/getAllInsertionsInSeqReads.ts +100 -0
- package/src/getAminoAcidDataForEachBaseOfDna.ts +305 -0
- package/src/getAminoAcidFromSequenceTriplet.ts +27 -0
- package/src/getAminoAcidStringFromSequenceString.ts +36 -0
- package/src/getCodonRangeForAASliver.ts +73 -0
- package/src/getComplementAminoAcidStringFromSequenceString.ts +10 -0
- package/src/getComplementSequenceAndAnnotations.ts +25 -0
- package/src/getComplementSequenceString.ts +23 -0
- package/src/getCutsiteType.ts +18 -0
- package/src/getCutsitesFromSequence.ts +22 -0
- package/src/getDegenerateDnaStringFromAAString.ts +15 -0
- package/src/getDegenerateRnaStringFromAAString.ts +15 -0
- package/src/getDigestFragmentsForCutsites.ts +126 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.ts +50 -0
- package/src/getInsertBetweenVals.ts +31 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.ts +40 -0
- package/src/getMassOfAaString.ts +29 -0
- package/src/getOrfsFromSequence.ts +132 -0
- package/src/getOverlapBetweenTwoSequences.ts +30 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.ts +149 -0
- package/src/getReverseAminoAcidStringFromSequenceString.ts +22 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.ts +10 -0
- package/src/getReverseComplementAnnotation.ts +33 -0
- package/src/getReverseComplementSequenceAndAnnotations.ts +46 -0
- package/src/getReverseComplementSequenceString.ts +18 -0
- package/src/getReverseSequenceString.ts +12 -0
- package/src/getSequenceDataBetweenRange.ts +154 -0
- package/src/getVirtualDigest.ts +139 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.ts +39 -0
- package/src/index.test.ts +43 -0
- package/src/index.ts +111 -0
- package/src/insertGapsIntoRefSeq.ts +43 -0
- package/src/insertSequenceDataAtPosition.ts +2 -0
- package/src/insertSequenceDataAtPositionOrRange.ts +328 -0
- package/src/isEnzymeType2S.ts +5 -0
- package/src/mapAnnotationsToRows.ts +256 -0
- package/src/prepareCircularViewData.ts +24 -0
- package/src/prepareRowData.ts +61 -0
- package/src/prepareRowData_output1.json +1 -0
- package/src/proteinAlphabet.ts +271 -0
- package/src/rotateBpsToPosition.ts +12 -0
- package/src/rotateSequenceDataToPosition.ts +54 -0
- package/src/shiftAnnotationsByLen.ts +24 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.ts +198 -0
- package/src/tidyUpAnnotation.ts +205 -0
- package/src/tidyUpSequenceData.ts +213 -0
- package/src/types.ts +109 -0
- package/threeLetterSequenceStringToAminoAcidMap.d.ts +11 -921
- package/tidyUpAnnotation.d.ts +13 -11
- package/tidyUpSequenceData.d.ts +15 -1
- package/types.d.ts +105 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
|
|
2
|
+
import getReverseComplementAnnotation from "./getReverseComplementAnnotation";
|
|
3
|
+
import { annotationTypes } from "./annotationTypes";
|
|
4
|
+
import { map } from "lodash-es";
|
|
5
|
+
import tidyUpSequenceData from "./tidyUpSequenceData";
|
|
6
|
+
|
|
7
|
+
import getSequenceDataBetweenRange from "./getSequenceDataBetweenRange";
|
|
8
|
+
import { SequenceData, Range, Annotation } from "./types";
|
|
9
|
+
|
|
10
|
+
// ac.throw([ac.string,ac.bool],arguments);
|
|
11
|
+
export default function getReverseComplementSequenceAndAnnoations(
|
|
12
|
+
pSeqObj: SequenceData,
|
|
13
|
+
options: { range?: Range; [key: string]: unknown } = {}
|
|
14
|
+
): SequenceData {
|
|
15
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
16
|
+
const seqObj = tidyUpSequenceData(
|
|
17
|
+
getSequenceDataBetweenRange(pSeqObj, options.range || null),
|
|
18
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
19
|
+
{ doNotRemoveInvalidChars: true, ...options } as any
|
|
20
|
+
);
|
|
21
|
+
const newSeqObj = Object.assign(
|
|
22
|
+
{},
|
|
23
|
+
seqObj,
|
|
24
|
+
{
|
|
25
|
+
sequence: getReverseComplementSequenceString(seqObj.sequence)
|
|
26
|
+
},
|
|
27
|
+
annotationTypes.reduce(
|
|
28
|
+
(acc, type) => {
|
|
29
|
+
if (seqObj[type]) {
|
|
30
|
+
acc[type] = map(seqObj[type] as Annotation[], annotation => {
|
|
31
|
+
return getReverseComplementAnnotation(
|
|
32
|
+
annotation,
|
|
33
|
+
seqObj.sequence.length
|
|
34
|
+
);
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
return acc;
|
|
38
|
+
},
|
|
39
|
+
{} as Record<string, Annotation[]>
|
|
40
|
+
)
|
|
41
|
+
);
|
|
42
|
+
return tidyUpSequenceData(newSeqObj, {
|
|
43
|
+
doNotRemoveInvalidChars: true,
|
|
44
|
+
...options
|
|
45
|
+
});
|
|
46
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import DNAComplementMap from "./DNAComplementMap";
|
|
2
|
+
|
|
3
|
+
// ac.throw([ac.string,ac.bool],arguments);
|
|
4
|
+
export default function getReverseComplementSequenceString(
|
|
5
|
+
sequence: string
|
|
6
|
+
): string {
|
|
7
|
+
// ac.throw([ac.string],arguments);
|
|
8
|
+
let reverseComplementSequenceString = "";
|
|
9
|
+
for (let i = sequence.length - 1; i >= 0; i--) {
|
|
10
|
+
let revChar = (DNAComplementMap as Record<string, string>)[sequence[i]];
|
|
11
|
+
if (!revChar) {
|
|
12
|
+
revChar = sequence[i];
|
|
13
|
+
// throw new Error('trying to get the reverse compelement of an invalid base');
|
|
14
|
+
}
|
|
15
|
+
reverseComplementSequenceString += revChar;
|
|
16
|
+
}
|
|
17
|
+
return reverseComplementSequenceString;
|
|
18
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export default function getReverseSequenceString(sequence: string): string {
|
|
2
|
+
let reverseSequenceString = "";
|
|
3
|
+
for (let i = sequence.length - 1; i >= 0; i--) {
|
|
4
|
+
let revChar = sequence[i];
|
|
5
|
+
if (!revChar) {
|
|
6
|
+
revChar = sequence[i];
|
|
7
|
+
// throw new Error('trying to get the reverse of an invalid base');
|
|
8
|
+
}
|
|
9
|
+
reverseSequenceString += revChar;
|
|
10
|
+
}
|
|
11
|
+
return reverseSequenceString;
|
|
12
|
+
}
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import { flatMap, extend, forEach, startCase } from "lodash-es";
|
|
2
|
+
import { getRangeLength, Range } from "@teselagen/range-utils";
|
|
3
|
+
import convertDnaCaretPositionOrRangeToAa from "./convertDnaCaretPositionOrRangeToAA";
|
|
4
|
+
import insertSequenceDataAtPosition from "./insertSequenceDataAtPosition";
|
|
5
|
+
import {
|
|
6
|
+
getSequenceWithinRange,
|
|
7
|
+
getZeroedRangeOverlaps
|
|
8
|
+
} from "@teselagen/range-utils";
|
|
9
|
+
import tidyUpSequenceData from "./tidyUpSequenceData";
|
|
10
|
+
import { annotationTypes } from "./annotationTypes";
|
|
11
|
+
import { Annotation, SequenceData } from "./types";
|
|
12
|
+
|
|
13
|
+
interface GetSequenceDataBetweenRangeOptions {
|
|
14
|
+
exclude?: Record<string, boolean>;
|
|
15
|
+
excludePartial?: Record<string, boolean>;
|
|
16
|
+
[key: string]: unknown;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export default function getSequenceDataBetweenRange(
|
|
20
|
+
seqData: SequenceData,
|
|
21
|
+
range: Range | null,
|
|
22
|
+
options: GetSequenceDataBetweenRangeOptions = {}
|
|
23
|
+
): SequenceData {
|
|
24
|
+
if (!range) return seqData;
|
|
25
|
+
const { exclude = {}, excludePartial = {} } = options;
|
|
26
|
+
const seqDataToUse = tidyUpSequenceData(seqData, {
|
|
27
|
+
doNotRemoveInvalidChars: true,
|
|
28
|
+
...options
|
|
29
|
+
});
|
|
30
|
+
annotationTypes.forEach(type => {
|
|
31
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
32
|
+
delete (seqDataToUse as any)[`filtered${startCase(type)}`];
|
|
33
|
+
});
|
|
34
|
+
const seqDataToReturn = extend(
|
|
35
|
+
{},
|
|
36
|
+
seqDataToUse,
|
|
37
|
+
{
|
|
38
|
+
circular:
|
|
39
|
+
seqDataToUse.sequence.length ===
|
|
40
|
+
getRangeLength(range, seqData.sequence.length)
|
|
41
|
+
? seqDataToUse.circular
|
|
42
|
+
: false,
|
|
43
|
+
sequence: getSequenceWithinRange(range, seqDataToUse.sequence),
|
|
44
|
+
proteinSequence: getSequenceWithinRange(
|
|
45
|
+
convertDnaCaretPositionOrRangeToAa(range) as Range,
|
|
46
|
+
seqDataToUse.proteinSequence || ""
|
|
47
|
+
)
|
|
48
|
+
},
|
|
49
|
+
annotationTypes.reduce(
|
|
50
|
+
(acc, type) => {
|
|
51
|
+
if (exclude[type]) {
|
|
52
|
+
acc[type] = [];
|
|
53
|
+
return acc; //return early cause we're not interested in these annotations
|
|
54
|
+
}
|
|
55
|
+
acc[type] = getAnnotationsBetweenRange(
|
|
56
|
+
seqDataToUse[type] as Annotation[],
|
|
57
|
+
range,
|
|
58
|
+
seqDataToUse.sequence.length,
|
|
59
|
+
excludePartial[type]
|
|
60
|
+
);
|
|
61
|
+
return acc;
|
|
62
|
+
},
|
|
63
|
+
{} as Record<string, Annotation[]>
|
|
64
|
+
)
|
|
65
|
+
);
|
|
66
|
+
if (range.overlapsSelf) {
|
|
67
|
+
const extendedSeqData = insertSequenceDataAtPosition(
|
|
68
|
+
{ sequence: (seqDataToReturn as SequenceData).sequence }, // Wrapping in object as per assumed signature
|
|
69
|
+
seqDataToUse,
|
|
70
|
+
range.start
|
|
71
|
+
);
|
|
72
|
+
|
|
73
|
+
const toRet = getSequenceDataBetweenRange(
|
|
74
|
+
extendedSeqData,
|
|
75
|
+
{
|
|
76
|
+
start: range.end + 1,
|
|
77
|
+
end: range.end
|
|
78
|
+
},
|
|
79
|
+
options
|
|
80
|
+
);
|
|
81
|
+
annotationTypes.forEach(type => {
|
|
82
|
+
//we need to go through and adjust any anns where overlapsSelf=true to no longer overlap themselves if they match the range completely
|
|
83
|
+
forEach(toRet[type] as Annotation[], ann => {
|
|
84
|
+
if (
|
|
85
|
+
ann.overlapsSelf &&
|
|
86
|
+
ann.start === 0 &&
|
|
87
|
+
getRangeLength(ann, seqDataToUse.sequence.length) ===
|
|
88
|
+
getRangeLength(range, seqDataToUse.sequence.length)
|
|
89
|
+
) {
|
|
90
|
+
ann.overlapsSelf = false;
|
|
91
|
+
ann.end = toRet.sequence.length - 1;
|
|
92
|
+
}
|
|
93
|
+
});
|
|
94
|
+
});
|
|
95
|
+
return tidyUpSequenceData(toRet, {
|
|
96
|
+
doNotRemoveInvalidChars: true,
|
|
97
|
+
...options
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return tidyUpSequenceData(seqDataToReturn, {
|
|
102
|
+
doNotRemoveInvalidChars: true,
|
|
103
|
+
...options
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function getAnnotationsBetweenRange(
|
|
108
|
+
annotationsToBeAdjusted: Annotation[],
|
|
109
|
+
range: Range,
|
|
110
|
+
maxLength: number,
|
|
111
|
+
shouldExcludePartial?: boolean
|
|
112
|
+
): Annotation[] {
|
|
113
|
+
return flatMap(annotationsToBeAdjusted, annotation => {
|
|
114
|
+
if (annotation.locations && annotation.locations.length) {
|
|
115
|
+
annotation.locations = getAnnotationsBetweenRange(
|
|
116
|
+
annotation.locations,
|
|
117
|
+
range,
|
|
118
|
+
maxLength,
|
|
119
|
+
shouldExcludePartial
|
|
120
|
+
);
|
|
121
|
+
}
|
|
122
|
+
//map through every annotation and get the overlap of the annotation with the range
|
|
123
|
+
const overlaps = getZeroedRangeOverlaps(annotation, range, maxLength).map(
|
|
124
|
+
overlap => {
|
|
125
|
+
//we get back 1 or more overlaps here
|
|
126
|
+
|
|
127
|
+
return extend({}, annotation, overlap);
|
|
128
|
+
}
|
|
129
|
+
);
|
|
130
|
+
if (shouldExcludePartial) {
|
|
131
|
+
if (overlaps.length > 1) return []; //the annotation has multiple overlaps and thus must be a partial copy so we exclude it completely
|
|
132
|
+
if (overlaps[0]) {
|
|
133
|
+
//there is just 1 overlap, if it doesn't have the same length, it must be a partial copy so we need to exclude it
|
|
134
|
+
if (
|
|
135
|
+
getRangeLength(overlaps[0], maxLength) !==
|
|
136
|
+
getRangeLength(annotation, maxLength)
|
|
137
|
+
) {
|
|
138
|
+
return [];
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return overlaps;
|
|
144
|
+
}).map(annotation => {
|
|
145
|
+
if (annotation.locations && annotation.locations.length) {
|
|
146
|
+
annotation.start = annotation.locations[0].start;
|
|
147
|
+
annotation.end =
|
|
148
|
+
annotation.locations[annotation.locations.length - 1].end;
|
|
149
|
+
|
|
150
|
+
if (annotation.locations.length === 1) delete annotation.locations;
|
|
151
|
+
}
|
|
152
|
+
return annotation;
|
|
153
|
+
}); //filter any fully deleted ranges
|
|
154
|
+
}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
//UNDER CONSTRUCTION
|
|
2
|
+
|
|
3
|
+
import { get } from "lodash-es";
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
normalizePositionByRangeLength,
|
|
7
|
+
getRangeLength
|
|
8
|
+
} from "@teselagen/range-utils";
|
|
9
|
+
import { CutSite, DigestFragment } from "./types";
|
|
10
|
+
|
|
11
|
+
export default function getVirtualDigest({
|
|
12
|
+
cutsites,
|
|
13
|
+
sequenceLength,
|
|
14
|
+
isCircular,
|
|
15
|
+
computePartialDigest,
|
|
16
|
+
computePartialDigestDisabled,
|
|
17
|
+
computeDigestDisabled
|
|
18
|
+
}: {
|
|
19
|
+
cutsites: CutSite[];
|
|
20
|
+
sequenceLength: number;
|
|
21
|
+
isCircular: boolean;
|
|
22
|
+
computePartialDigest?: boolean;
|
|
23
|
+
computePartialDigestDisabled?: boolean;
|
|
24
|
+
computeDigestDisabled?: boolean;
|
|
25
|
+
}) {
|
|
26
|
+
let fragments: DigestFragment[] = [];
|
|
27
|
+
const overlappingEnzymes: DigestFragment[] = [];
|
|
28
|
+
const pairs: CutSite[][] = [];
|
|
29
|
+
|
|
30
|
+
const sortedCutsites = cutsites.sort((a, b) => {
|
|
31
|
+
return (a.topSnipPosition || 0) - (b.topSnipPosition || 0);
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
sortedCutsites.forEach((cutsite1, index) => {
|
|
35
|
+
if (computePartialDigest && !computePartialDigestDisabled) {
|
|
36
|
+
sortedCutsites.forEach((cs, index2) => {
|
|
37
|
+
// if (index2 === index + 1 || index2 === 0) { //tnw: not sure if this is necessary or not. commenting out for now
|
|
38
|
+
// return;
|
|
39
|
+
// }
|
|
40
|
+
pairs.push([cutsite1, sortedCutsites[index2]]);
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
if (!computeDigestDisabled) {
|
|
44
|
+
pairs.push([
|
|
45
|
+
cutsite1,
|
|
46
|
+
sortedCutsites[index + 1]
|
|
47
|
+
? sortedCutsites[index + 1]
|
|
48
|
+
: sortedCutsites[0]
|
|
49
|
+
]);
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
pairs.forEach(([cut1, cut2]) => {
|
|
54
|
+
const start = normalizePositionByRangeLength(
|
|
55
|
+
cut1.topSnipPosition || 0,
|
|
56
|
+
sequenceLength
|
|
57
|
+
);
|
|
58
|
+
const end = normalizePositionByRangeLength(
|
|
59
|
+
(cut2.topSnipPosition || 0) - 1,
|
|
60
|
+
sequenceLength
|
|
61
|
+
);
|
|
62
|
+
|
|
63
|
+
if (!isCircular && start > end) {
|
|
64
|
+
//we have a fragment that spans the origin so we need to split it in 2 pieces
|
|
65
|
+
const frag1 = {
|
|
66
|
+
start: start,
|
|
67
|
+
end: sequenceLength - 1,
|
|
68
|
+
cut1,
|
|
69
|
+
cut2: {
|
|
70
|
+
type: "endOfSeq",
|
|
71
|
+
restrictionEnzyme: {
|
|
72
|
+
name: "End Of Seq"
|
|
73
|
+
}
|
|
74
|
+
} as unknown as CutSite // Cast to CutSite as it's a mock
|
|
75
|
+
};
|
|
76
|
+
const frag2 = {
|
|
77
|
+
start: 0,
|
|
78
|
+
end: end,
|
|
79
|
+
cut1: {
|
|
80
|
+
type: "startOfSeq",
|
|
81
|
+
restrictionEnzyme: {
|
|
82
|
+
name: "Start Of Seq"
|
|
83
|
+
}
|
|
84
|
+
} as unknown as CutSite, // Cast
|
|
85
|
+
cut2: cut2
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
fragments.push(addSizeIdName(frag1, sequenceLength));
|
|
89
|
+
fragments.push(addSizeIdName(frag2, sequenceLength));
|
|
90
|
+
} else {
|
|
91
|
+
const frag = {
|
|
92
|
+
cut1,
|
|
93
|
+
cut2,
|
|
94
|
+
start,
|
|
95
|
+
end
|
|
96
|
+
};
|
|
97
|
+
fragments.push(addSizeIdName(frag, sequenceLength));
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
fragments = fragments.filter(fragment => {
|
|
101
|
+
if (!fragment.size) {
|
|
102
|
+
overlappingEnzymes.push(fragment);
|
|
103
|
+
return false;
|
|
104
|
+
}
|
|
105
|
+
return true;
|
|
106
|
+
});
|
|
107
|
+
return {
|
|
108
|
+
computePartialDigestDisabled,
|
|
109
|
+
computeDigestDisabled,
|
|
110
|
+
fragments,
|
|
111
|
+
overlappingEnzymes
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function addSizeIdName(
|
|
116
|
+
frag: { start: number; end: number; cut1: CutSite; cut2: CutSite },
|
|
117
|
+
sequenceLength: number
|
|
118
|
+
): DigestFragment {
|
|
119
|
+
const size = getRangeLength(
|
|
120
|
+
{ start: frag.start, end: frag.end },
|
|
121
|
+
sequenceLength
|
|
122
|
+
);
|
|
123
|
+
const name = `${get(
|
|
124
|
+
frag,
|
|
125
|
+
"cut1.restrictionEnzyme.name",
|
|
126
|
+
"Untitled Cutsite"
|
|
127
|
+
)} -- ${get(
|
|
128
|
+
frag,
|
|
129
|
+
"cut2.restrictionEnzyme.name",
|
|
130
|
+
"Untitled Cutsite"
|
|
131
|
+
)} ${size} bps`;
|
|
132
|
+
|
|
133
|
+
return {
|
|
134
|
+
...frag,
|
|
135
|
+
size,
|
|
136
|
+
name,
|
|
137
|
+
id: frag.start + "-" + frag.end + "-" + size + "-"
|
|
138
|
+
};
|
|
139
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { ambiguous_dna_letters } from "./bioData";
|
|
2
|
+
|
|
3
|
+
export default function guessIfSequenceIsDnaAndNotProtein(
|
|
4
|
+
seq: string,
|
|
5
|
+
options: { threshold?: number; loose?: boolean; dnaLetters?: string[] } = {}
|
|
6
|
+
) {
|
|
7
|
+
const { threshold = 0.9, loose } = options;
|
|
8
|
+
const dnaLetters =
|
|
9
|
+
options.dnaLetters || loose
|
|
10
|
+
? [...ambiguous_dna_letters.split(""), "U"]
|
|
11
|
+
: ["G", "A", "T", "C", "U"];
|
|
12
|
+
// Guess if the given sequence is DNA or Protein.
|
|
13
|
+
|
|
14
|
+
// It's considered DNA if more than 90% of the sequence is GATCs. The threshold
|
|
15
|
+
// is configurable via the threshold parameter. dnaLetters can be used to configure
|
|
16
|
+
// which letters are considered DNA; for instance, adding N might be useful if
|
|
17
|
+
// you are expecting data with ambiguous bases.
|
|
18
|
+
const dnaLetterMap = dnaLetters.reduce(
|
|
19
|
+
(acc, letter) => {
|
|
20
|
+
acc[letter.toUpperCase()] = true;
|
|
21
|
+
return acc;
|
|
22
|
+
},
|
|
23
|
+
{} as Record<string, boolean>
|
|
24
|
+
);
|
|
25
|
+
let count = 0;
|
|
26
|
+
if (!seq || !seq.length) return true;
|
|
27
|
+
|
|
28
|
+
for (let index = 0; index < seq.length; index++) {
|
|
29
|
+
const letter = seq[index];
|
|
30
|
+
if (dnaLetterMap[letter.toUpperCase()]) {
|
|
31
|
+
count = count + 1;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (count / seq.length > threshold) {
|
|
36
|
+
return true; //it is DNA
|
|
37
|
+
}
|
|
38
|
+
return false; //it is protein
|
|
39
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import * as src from ".";
|
|
2
|
+
import fs from "fs";
|
|
3
|
+
|
|
4
|
+
describe("index.js", () => {
|
|
5
|
+
it(`should export all functions defined`, () => {
|
|
6
|
+
return new Promise<void>(resolve => {
|
|
7
|
+
fs.readdir(__dirname, (err, files) => {
|
|
8
|
+
let passes = true;
|
|
9
|
+
files.forEach(file => {
|
|
10
|
+
if (
|
|
11
|
+
file.indexOf(".test.ts") > -1 ||
|
|
12
|
+
file.indexOf(".test.js") > -1 ||
|
|
13
|
+
file.indexOf("index.ts") > -1 ||
|
|
14
|
+
file.indexOf("index.js") > -1 ||
|
|
15
|
+
file.indexOf("prepareRowData_output1.json") > -1 ||
|
|
16
|
+
file.indexOf("featureTypesAndColors") > -1 ||
|
|
17
|
+
file.indexOf("diffUtils") > -1 ||
|
|
18
|
+
file.indexOf("types.ts") > -1 ||
|
|
19
|
+
file.indexOf("shims.d.ts") > -1
|
|
20
|
+
) {
|
|
21
|
+
return;
|
|
22
|
+
}
|
|
23
|
+
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
|
24
|
+
// @ts-ignore
|
|
25
|
+
const funcOrObj = src[file.replace(".ts", "").replace(".js", "")];
|
|
26
|
+
if (!funcOrObj) {
|
|
27
|
+
console.info(
|
|
28
|
+
`Uh oh, it looks like you forgot to export (or explicitly ignore) this file:`,
|
|
29
|
+
file
|
|
30
|
+
);
|
|
31
|
+
passes = false;
|
|
32
|
+
}
|
|
33
|
+
});
|
|
34
|
+
if (!passes) {
|
|
35
|
+
throw new Error(
|
|
36
|
+
"Please make sure to export (or ignore) each file! Update index.ts to export the file"
|
|
37
|
+
);
|
|
38
|
+
}
|
|
39
|
+
resolve();
|
|
40
|
+
});
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
});
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import {
|
|
2
|
+
autoAnnotate,
|
|
3
|
+
convertApELikeRegexToRegex,
|
|
4
|
+
convertProteinSeqToDNAIupac
|
|
5
|
+
} from "./autoAnnotate";
|
|
6
|
+
|
|
7
|
+
import {
|
|
8
|
+
genbankFeatureTypes,
|
|
9
|
+
getFeatureToColorMap,
|
|
10
|
+
getFeatureTypes,
|
|
11
|
+
getMergedFeatureMap
|
|
12
|
+
} from "./featureTypesAndColors";
|
|
13
|
+
|
|
14
|
+
export * from "./computeDigestFragments";
|
|
15
|
+
export * from "./diffUtils";
|
|
16
|
+
export * from "./annotationTypes";
|
|
17
|
+
|
|
18
|
+
/* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */
|
|
19
|
+
//tnr: these are deprecated exports and should no longer be used!
|
|
20
|
+
const FeatureTypes = getFeatureTypes();
|
|
21
|
+
const featureColors = getFeatureToColorMap();
|
|
22
|
+
export {
|
|
23
|
+
getFeatureToColorMap,
|
|
24
|
+
getFeatureTypes,
|
|
25
|
+
genbankFeatureTypes,
|
|
26
|
+
getMergedFeatureMap,
|
|
27
|
+
FeatureTypes,
|
|
28
|
+
featureColors
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
/* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */
|
|
32
|
+
export { autoAnnotate };
|
|
33
|
+
export { convertApELikeRegexToRegex };
|
|
34
|
+
export { convertProteinSeqToDNAIupac };
|
|
35
|
+
export * as bioData from "./bioData";
|
|
36
|
+
export { default as getAllInsertionsInSeqReads } from "./getAllInsertionsInSeqReads";
|
|
37
|
+
export { default as annotateSingleSeq } from "./annotateSingleSeq";
|
|
38
|
+
export { default as getDegenerateDnaStringFromAAString } from "./getDegenerateDnaStringFromAAString";
|
|
39
|
+
export { default as getDegenerateRnaStringFromAAString } from "./getDegenerateRnaStringFromAAString";
|
|
40
|
+
export { default as getVirtualDigest } from "./getVirtualDigest";
|
|
41
|
+
export { default as isEnzymeType2S } from "./isEnzymeType2S";
|
|
42
|
+
export { default as insertGapsIntoRefSeq } from "./insertGapsIntoRefSeq";
|
|
43
|
+
export { default as findApproxMatches } from "./findApproxMatches";
|
|
44
|
+
export { default as adjustBpsToReplaceOrInsert } from "./adjustBpsToReplaceOrInsert";
|
|
45
|
+
export { default as calculatePercentGC } from "./calculatePercentGC";
|
|
46
|
+
export { default as calculateTm } from "./calculateTm";
|
|
47
|
+
export { default as cutSequenceByRestrictionEnzyme } from "./cutSequenceByRestrictionEnzyme";
|
|
48
|
+
export { default as deleteSequenceDataAtRange } from "./deleteSequenceDataAtRange";
|
|
49
|
+
export { default as DNAComplementMap } from "./DNAComplementMap";
|
|
50
|
+
export { default as doesEnzymeChopOutsideOfRecognitionSite } from "./doesEnzymeChopOutsideOfRecognitionSite";
|
|
51
|
+
export { default as aliasedEnzymesByName } from "./aliasedEnzymesByName";
|
|
52
|
+
export { default as defaultEnzymesByName } from "./defaultEnzymesByName";
|
|
53
|
+
export { default as generateSequenceData } from "./generateSequenceData";
|
|
54
|
+
export { default as generateAnnotations } from "./generateAnnotations";
|
|
55
|
+
export {
|
|
56
|
+
default as filterSequenceString,
|
|
57
|
+
filterRnaString
|
|
58
|
+
} from "./filterSequenceString";
|
|
59
|
+
export { default as findNearestRangeOfSequenceOverlapToPosition } from "./findNearestRangeOfSequenceOverlapToPosition";
|
|
60
|
+
export { default as findOrfsInPlasmid } from "./findOrfsInPlasmid";
|
|
61
|
+
export { default as findSequenceMatches } from "./findSequenceMatches";
|
|
62
|
+
export { default as getAminoAcidDataForEachBaseOfDna } from "./getAminoAcidDataForEachBaseOfDna";
|
|
63
|
+
export { default as getAminoAcidFromSequenceTriplet } from "./getAminoAcidFromSequenceTriplet";
|
|
64
|
+
export { default as getAminoAcidStringFromSequenceString } from "./getAminoAcidStringFromSequenceString";
|
|
65
|
+
export { default as getCodonRangeForAASliver } from "./getCodonRangeForAASliver";
|
|
66
|
+
export { default as getComplementAminoAcidStringFromSequenceString } from "./getComplementAminoAcidStringFromSequenceString";
|
|
67
|
+
export { default as getComplementSequenceAndAnnotations } from "./getComplementSequenceAndAnnotations";
|
|
68
|
+
export { default as getComplementSequenceString } from "./getComplementSequenceString";
|
|
69
|
+
export { default as getCutsitesFromSequence } from "./getCutsitesFromSequence";
|
|
70
|
+
export { default as getCutsiteType } from "./getCutsiteType";
|
|
71
|
+
export { default as getInsertBetweenVals } from "./getInsertBetweenVals";
|
|
72
|
+
export { default as getLeftAndRightOfSequenceInRangeGivenPosition } from "./getLeftAndRightOfSequenceInRangeGivenPosition";
|
|
73
|
+
export { default as getOrfsFromSequence } from "./getOrfsFromSequence";
|
|
74
|
+
export { default as getOverlapBetweenTwoSequences } from "./getOverlapBetweenTwoSequences";
|
|
75
|
+
export { default as getPossiblePartsFromSequenceAndEnzymes } from "./getPossiblePartsFromSequenceAndEnzymes";
|
|
76
|
+
export { default as getReverseAminoAcidStringFromSequenceString } from "./getReverseAminoAcidStringFromSequenceString";
|
|
77
|
+
export { default as getReverseComplementAminoAcidStringFromSequenceString } from "./getReverseComplementAminoAcidStringFromSequenceString";
|
|
78
|
+
export { default as getReverseComplementAnnotation } from "./getReverseComplementAnnotation";
|
|
79
|
+
export { default as getReverseComplementSequenceAndAnnotations } from "./getReverseComplementSequenceAndAnnotations";
|
|
80
|
+
export { default as getReverseComplementSequenceString } from "./getReverseComplementSequenceString";
|
|
81
|
+
export { default as getReverseSequenceString } from "./getReverseSequenceString";
|
|
82
|
+
export { default as getSequenceDataBetweenRange } from "./getSequenceDataBetweenRange";
|
|
83
|
+
export { default as guessIfSequenceIsDnaAndNotProtein } from "./guessIfSequenceIsDnaAndNotProtein";
|
|
84
|
+
export { default as insertSequenceDataAtPosition } from "./insertSequenceDataAtPosition";
|
|
85
|
+
export { default as insertSequenceDataAtPositionOrRange } from "./insertSequenceDataAtPositionOrRange";
|
|
86
|
+
export { default as mapAnnotationsToRows } from "./mapAnnotationsToRows";
|
|
87
|
+
export { default as prepareCircularViewData } from "./prepareCircularViewData";
|
|
88
|
+
export { default as prepareRowData } from "./prepareRowData";
|
|
89
|
+
export { default as proteinAlphabet } from "./proteinAlphabet";
|
|
90
|
+
export { default as rotateSequenceDataToPosition } from "./rotateSequenceDataToPosition";
|
|
91
|
+
export { default as rotateBpsToPosition } from "./rotateBpsToPosition";
|
|
92
|
+
export { default as threeLetterSequenceStringToAminoAcidMap } from "./threeLetterSequenceStringToAminoAcidMap";
|
|
93
|
+
export { default as tidyUpSequenceData } from "./tidyUpSequenceData";
|
|
94
|
+
export { default as tidyUpAnnotation } from "./tidyUpAnnotation";
|
|
95
|
+
export { default as condensePairwiseAlignmentDifferences } from "./condensePairwiseAlignmentDifferences";
|
|
96
|
+
export { default as addGapsToSeqReads } from "./addGapsToSeqReads";
|
|
97
|
+
export { default as calculateNebTm } from "./calculateNebTm";
|
|
98
|
+
export { default as calculateNebTa } from "./calculateNebTa";
|
|
99
|
+
export { default as calculateSantaLuciaTm } from "./calculateSantaLuciaTm";
|
|
100
|
+
export { default as calculateEndStability } from "./calculateEndStability";
|
|
101
|
+
export { default as getDigestFragmentsForCutsites } from "./getDigestFragmentsForCutsites";
|
|
102
|
+
export { default as getDigestFragmentsForRestrictionEnzymes } from "./getDigestFragmentsForRestrictionEnzymes";
|
|
103
|
+
export { default as convertDnaCaretPositionOrRangeToAA } from "./convertDnaCaretPositionOrRangeToAA";
|
|
104
|
+
export { default as convertAACaretPositionOrRangeToDna } from "./convertAACaretPositionOrRangeToDna";
|
|
105
|
+
export { default as aminoAcidToDegenerateDnaMap } from "./aminoAcidToDegenerateDnaMap";
|
|
106
|
+
export { default as aminoAcidToDegenerateRnaMap } from "./aminoAcidToDegenerateRnaMap";
|
|
107
|
+
export { default as degenerateDnaToAminoAcidMap } from "./degenerateDnaToAminoAcidMap";
|
|
108
|
+
export { default as degenerateRnaToAminoAcidMap } from "./degenerateRnaToAminoAcidMap";
|
|
109
|
+
export { default as getMassOfAaString } from "./getMassOfAaString";
|
|
110
|
+
export { default as shiftAnnotationsByLen } from "./shiftAnnotationsByLen";
|
|
111
|
+
export { default as adjustAnnotationsToInsert } from "./adjustAnnotationsToInsert";
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import getAllInsertionsInSeqReads, {
|
|
2
|
+
SeqRead
|
|
3
|
+
} from "./getAllInsertionsInSeqReads";
|
|
4
|
+
|
|
5
|
+
// seqReads should be an array of objects [{name, seq, pos, cigar}, {name, seq, pos, cigar}, ...]
|
|
6
|
+
// add gaps in reference sequence where there are insertions
|
|
7
|
+
export default function insertGapsIntoRefSeq(
|
|
8
|
+
refSeq: string,
|
|
9
|
+
seqReads: SeqRead[]
|
|
10
|
+
): string {
|
|
11
|
+
// turn ref seq into an array ["A", "T", "C", "G"...]
|
|
12
|
+
const refSeqWithGaps = refSeq.split("");
|
|
13
|
+
const allInsertionsInSeqReads = getAllInsertionsInSeqReads(seqReads);
|
|
14
|
+
for (let i = 0; i < allInsertionsInSeqReads.length; i++) {
|
|
15
|
+
const bpPosOfInsertion = allInsertionsInSeqReads[i].bpPos;
|
|
16
|
+
const numberOfInsertions = allInsertionsInSeqReads[i].number;
|
|
17
|
+
// adding gaps at the bp pos of insertion
|
|
18
|
+
let insertionGaps = "";
|
|
19
|
+
for (let gapI = 0; gapI < numberOfInsertions; gapI++) {
|
|
20
|
+
insertionGaps += "-";
|
|
21
|
+
}
|
|
22
|
+
refSeqWithGaps.splice(bpPosOfInsertion - 1, 0, insertionGaps);
|
|
23
|
+
for (let posI = i + 1; posI < allInsertionsInSeqReads.length; posI++) {
|
|
24
|
+
allInsertionsInSeqReads[posI].bpPos += 1;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
// refSeqWithGaps is a string "GGGA--GA-C--ACC"
|
|
28
|
+
return refSeqWithGaps.join("");
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// allInsertionsInSeqReads.forEach(insertion => {
|
|
32
|
+
// // adding gap at the bp pos of insertion
|
|
33
|
+
// refSeqWithGaps.splice(insertion - 1, 0, "-");
|
|
34
|
+
// });
|
|
35
|
+
// for (let i = 0; i < allInsertionsInSeqReads.length; i++) {
|
|
36
|
+
// refSeqWithGaps.splice(allInsertionsInSeqReads[i] - 1, 0, "-");
|
|
37
|
+
// for (let innerI = i + 1; innerI < allInsertionsInSeqReads.length; innerI++){
|
|
38
|
+
// if (refSeqWithGaps[i] - 1 !== "-") {
|
|
39
|
+
// // allInsertionsInSeqReads[innerI] += 1;
|
|
40
|
+
// allInsertionsInSeqReads[i + 1] += 1;
|
|
41
|
+
// }
|
|
42
|
+
// }
|
|
43
|
+
// }
|