@teselagen/sequence-utils 0.3.36 → 0.3.38-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DNAComplementMap.d.ts +1 -1
- package/addGapsToSeqReads.d.ts +16 -3
- package/adjustAnnotationsToInsert.d.ts +2 -1
- package/adjustBpsToReplaceOrInsert.d.ts +2 -1
- package/aliasedEnzymesByName.d.ts +37 -1
- package/aminoAcidToDegenerateDnaMap.d.ts +1 -31
- package/aminoAcidToDegenerateRnaMap.d.ts +1 -1
- package/annotateSingleSeq.d.ts +5 -4
- package/annotationTypes.d.ts +2 -2
- package/autoAnnotate.d.ts +17 -8
- package/bioData.d.ts +10 -58
- package/calculateEndStability.d.ts +1 -1
- package/calculateNebTa.d.ts +6 -1
- package/calculateNebTm.d.ts +6 -4
- package/calculatePercentGC.d.ts +1 -1
- package/calculateSantaLuciaTm.d.ts +28 -114
- package/calculateTm.d.ts +13 -1
- package/computeDigestFragments.d.ts +30 -24
- package/condensePairwiseAlignmentDifferences.d.ts +1 -1
- package/convertAACaretPositionOrRangeToDna.d.ts +2 -1
- package/convertDnaCaretPositionOrRangeToAA.d.ts +2 -1
- package/cutSequenceByRestrictionEnzyme.d.ts +2 -1
- package/defaultEnzymesByName.d.ts +2 -1
- package/degenerateDnaToAminoAcidMap.d.ts +1 -1
- package/degenerateRnaToAminoAcidMap.d.ts +1 -1
- package/deleteSequenceDataAtRange.d.ts +2 -1
- package/diffUtils.d.ts +9 -7
- package/doesEnzymeChopOutsideOfRecognitionSite.d.ts +2 -1
- package/featureTypesAndColors.d.ts +19 -6
- package/filterSequenceString.d.ts +14 -10
- package/findApproxMatches.d.ts +7 -1
- package/findNearestRangeOfSequenceOverlapToPosition.d.ts +2 -1
- package/findOrfsInPlasmid.d.ts +2 -11
- package/findSequenceMatches.d.ts +11 -1
- package/generateAnnotations.d.ts +2 -1
- package/generateSequenceData.d.ts +8 -13
- package/getAllInsertionsInSeqReads.d.ts +11 -1
- package/getAminoAcidDataForEachBaseOfDna.d.ts +6 -5
- package/getAminoAcidFromSequenceTriplet.d.ts +1 -1
- package/getAminoAcidStringFromSequenceString.d.ts +3 -1
- package/getCodonRangeForAASliver.d.ts +3 -4
- package/getComplementAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getComplementSequenceAndAnnotations.d.ts +5 -1
- package/getComplementSequenceString.d.ts +1 -1
- package/getCutsiteType.d.ts +2 -1
- package/getCutsitesFromSequence.d.ts +2 -1
- package/getDegenerateDnaStringFromAAString.d.ts +1 -1
- package/getDegenerateRnaStringFromAAString.d.ts +1 -1
- package/getDigestFragmentsForCutsites.d.ts +4 -1
- package/getDigestFragmentsForRestrictionEnzymes.d.ts +8 -1
- package/getInsertBetweenVals.d.ts +2 -1
- package/getLeftAndRightOfSequenceInRangeGivenPosition.d.ts +2 -1
- package/getOrfsFromSequence.d.ts +17 -11
- package/getOverlapBetweenTwoSequences.d.ts +2 -1
- package/getPossiblePartsFromSequenceAndEnzymes.d.ts +18 -1
- package/getReverseAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getReverseComplementAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getReverseComplementAnnotation.d.ts +11 -1
- package/getReverseComplementSequenceAndAnnotations.d.ts +5 -1
- package/getReverseComplementSequenceString.d.ts +1 -1
- package/getReverseSequenceString.d.ts +1 -1
- package/getSequenceDataBetweenRange.d.ts +9 -1
- package/getVirtualDigest.d.ts +11 -10
- package/guessIfSequenceIsDnaAndNotProtein.d.ts +5 -1
- package/index.cjs +733 -484
- package/index.d.ts +8 -5
- package/index.js +733 -484
- package/index.umd.cjs +733 -484
- package/insertGapsIntoRefSeq.d.ts +2 -1
- package/insertSequenceDataAtPositionOrRange.d.ts +10 -1
- package/isEnzymeType2S.d.ts +2 -1
- package/mapAnnotationsToRows.d.ts +9 -1
- package/package.json +9 -6
- package/prepareCircularViewData.d.ts +2 -1
- package/prepareRowData.d.ts +7 -3
- package/proteinAlphabet.d.ts +1 -1
- package/rotateBpsToPosition.d.ts +1 -1
- package/rotateSequenceDataToPosition.d.ts +3 -1
- package/shiftAnnotationsByLen.d.ts +4 -3
- package/src/DNAComplementMap.ts +32 -0
- package/src/addGapsToSeqReads.ts +436 -0
- package/src/adjustAnnotationsToInsert.ts +20 -0
- package/src/adjustBpsToReplaceOrInsert.ts +73 -0
- package/src/aliasedEnzymesByName.ts +7366 -0
- package/src/aminoAcidToDegenerateDnaMap.ts +32 -0
- package/src/aminoAcidToDegenerateRnaMap.ts +32 -0
- package/src/annotateSingleSeq.ts +37 -0
- package/src/annotationTypes.ts +23 -0
- package/src/autoAnnotate.test.js +0 -1
- package/src/autoAnnotate.ts +290 -0
- package/src/bioData.ts +65 -0
- package/src/calculateEndStability.ts +91 -0
- package/src/calculateNebTa.ts +46 -0
- package/src/calculateNebTm.ts +132 -0
- package/src/calculatePercentGC.ts +3 -0
- package/src/calculateSantaLuciaTm.ts +184 -0
- package/src/calculateTm.ts +242 -0
- package/src/computeDigestFragments.ts +238 -0
- package/src/condensePairwiseAlignmentDifferences.ts +85 -0
- package/src/convertAACaretPositionOrRangeToDna.ts +28 -0
- package/src/convertDnaCaretPositionOrRangeToAA.ts +28 -0
- package/src/cutSequenceByRestrictionEnzyme.ts +345 -0
- package/src/defaultEnzymesByName.ts +280 -0
- package/src/degenerateDnaToAminoAcidMap.ts +5 -0
- package/src/degenerateRnaToAminoAcidMap.ts +5 -0
- package/src/deleteSequenceDataAtRange.ts +13 -0
- package/src/diffUtils.ts +80 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.ts +16 -0
- package/src/featureTypesAndColors.js +1 -1
- package/src/featureTypesAndColors.ts +167 -0
- package/src/filterSequenceString.ts +153 -0
- package/src/findApproxMatches.ts +58 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.ts +43 -0
- package/src/findOrfsInPlasmid.js +6 -1
- package/src/findOrfsInPlasmid.ts +31 -0
- package/src/findSequenceMatches.ts +154 -0
- package/src/generateAnnotations.ts +39 -0
- package/src/generateSequenceData.ts +212 -0
- package/src/getAllInsertionsInSeqReads.ts +100 -0
- package/src/getAminoAcidDataForEachBaseOfDna.ts +305 -0
- package/src/getAminoAcidFromSequenceTriplet.ts +27 -0
- package/src/getAminoAcidStringFromSequenceString.ts +36 -0
- package/src/getCodonRangeForAASliver.ts +73 -0
- package/src/getComplementAminoAcidStringFromSequenceString.ts +10 -0
- package/src/getComplementSequenceAndAnnotations.ts +25 -0
- package/src/getComplementSequenceString.ts +23 -0
- package/src/getCutsiteType.ts +18 -0
- package/src/getCutsitesFromSequence.ts +22 -0
- package/src/getDegenerateDnaStringFromAAString.ts +15 -0
- package/src/getDegenerateRnaStringFromAAString.ts +15 -0
- package/src/getDigestFragmentsForCutsites.ts +126 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.ts +50 -0
- package/src/getInsertBetweenVals.ts +31 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.ts +40 -0
- package/src/getMassOfAaString.ts +29 -0
- package/src/getOrfsFromSequence.ts +132 -0
- package/src/getOverlapBetweenTwoSequences.ts +30 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.ts +149 -0
- package/src/getReverseAminoAcidStringFromSequenceString.ts +22 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.ts +10 -0
- package/src/getReverseComplementAnnotation.ts +33 -0
- package/src/getReverseComplementSequenceAndAnnotations.ts +46 -0
- package/src/getReverseComplementSequenceString.ts +18 -0
- package/src/getReverseSequenceString.ts +12 -0
- package/src/getSequenceDataBetweenRange.ts +154 -0
- package/src/getVirtualDigest.ts +139 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.ts +39 -0
- package/src/index.test.ts +43 -0
- package/src/index.ts +111 -0
- package/src/insertGapsIntoRefSeq.ts +43 -0
- package/src/insertSequenceDataAtPosition.ts +2 -0
- package/src/insertSequenceDataAtPositionOrRange.ts +328 -0
- package/src/isEnzymeType2S.ts +5 -0
- package/src/mapAnnotationsToRows.ts +256 -0
- package/src/prepareCircularViewData.ts +24 -0
- package/src/prepareRowData.ts +61 -0
- package/src/prepareRowData_output1.json +1 -0
- package/src/proteinAlphabet.ts +271 -0
- package/src/rotateBpsToPosition.ts +12 -0
- package/src/rotateSequenceDataToPosition.ts +54 -0
- package/src/shiftAnnotationsByLen.ts +24 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.ts +198 -0
- package/src/tidyUpAnnotation.ts +205 -0
- package/src/tidyUpSequenceData.ts +213 -0
- package/src/types.ts +109 -0
- package/threeLetterSequenceStringToAminoAcidMap.d.ts +11 -921
- package/tidyUpAnnotation.d.ts +13 -11
- package/tidyUpSequenceData.d.ts +15 -1
- package/types.d.ts +105 -0
package/src/diffUtils.ts
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { cloneDeep, forEach } from "lodash-es";
|
|
2
|
+
import { diff, patch, reverse, Delta } from "jsondiffpatch";
|
|
3
|
+
import { SequenceData } from "./types";
|
|
4
|
+
|
|
5
|
+
import tidyUpSequenceData from "./tidyUpSequenceData";
|
|
6
|
+
|
|
7
|
+
interface DiffOptions {
|
|
8
|
+
ignoreKeys?: string[];
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
const getDiffFromSeqs = (
|
|
12
|
+
oldData: SequenceData,
|
|
13
|
+
newData: SequenceData,
|
|
14
|
+
{ ignoreKeys = [] }: DiffOptions = {}
|
|
15
|
+
): Delta | undefined => {
|
|
16
|
+
/* eslint-disable-next-line @typescript-eslint/no-explicit-any */
|
|
17
|
+
const cleanedOldData: any = tidyUpSequenceData(oldData, {
|
|
18
|
+
annotationsAsObjects: true,
|
|
19
|
+
noTranslationData: true,
|
|
20
|
+
doNotRemoveInvalidChars: true
|
|
21
|
+
});
|
|
22
|
+
/* eslint-disable-next-line @typescript-eslint/no-explicit-any */
|
|
23
|
+
const cleanedNewData: any = tidyUpSequenceData(newData, {
|
|
24
|
+
annotationsAsObjects: true,
|
|
25
|
+
noTranslationData: true,
|
|
26
|
+
doNotRemoveInvalidChars: true
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
[cleanedOldData, cleanedNewData].forEach(d => {
|
|
30
|
+
[
|
|
31
|
+
"cutsites",
|
|
32
|
+
"orfs",
|
|
33
|
+
"filteredFeatures",
|
|
34
|
+
"size",
|
|
35
|
+
"fromFileUpload",
|
|
36
|
+
"description",
|
|
37
|
+
"materiallyAvailable",
|
|
38
|
+
...ignoreKeys
|
|
39
|
+
].forEach(prop => {
|
|
40
|
+
delete d[prop];
|
|
41
|
+
});
|
|
42
|
+
if (d.translations) {
|
|
43
|
+
forEach(d.translations, (translation, key) => {
|
|
44
|
+
if (
|
|
45
|
+
translation.translationType &&
|
|
46
|
+
translation.translationType !== "User Created"
|
|
47
|
+
) {
|
|
48
|
+
delete d.translations[key];
|
|
49
|
+
} else {
|
|
50
|
+
delete translation.aminoAcids;
|
|
51
|
+
}
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
return diff(cleanedOldData, cleanedNewData);
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
const patchSeqWithDiff = (
|
|
60
|
+
oldData: SequenceData,
|
|
61
|
+
diffData: Delta,
|
|
62
|
+
{ ignoreKeys = [] }: DiffOptions = {}
|
|
63
|
+
): SequenceData => {
|
|
64
|
+
ignoreKeys.forEach(k => {
|
|
65
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
66
|
+
delete (diffData as any)[k];
|
|
67
|
+
});
|
|
68
|
+
const tidyOld = tidyUpSequenceData(cloneDeep(oldData), {
|
|
69
|
+
annotationsAsObjects: true,
|
|
70
|
+
doNotRemoveInvalidChars: true
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
return patch(tidyOld, diffData) as SequenceData;
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
const reverseSeqDiff = (diffData: Delta): Delta | undefined => {
|
|
77
|
+
return reverse(diffData);
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
export { getDiffFromSeqs, patchSeqWithDiff, reverseSeqDiff };
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { RestrictionEnzyme } from "./types";
|
|
2
|
+
|
|
3
|
+
export default function doesEnzymeChopOutsideOfRecognitionSite(
|
|
4
|
+
enzyme: RestrictionEnzyme
|
|
5
|
+
): boolean {
|
|
6
|
+
if (
|
|
7
|
+
enzyme.topSnipOffset &&
|
|
8
|
+
enzyme.bottomSnipOffset &&
|
|
9
|
+
(enzyme.topSnipOffset > enzyme.site.length ||
|
|
10
|
+
enzyme.bottomSnipOffset > enzyme.site.length)
|
|
11
|
+
) {
|
|
12
|
+
return true;
|
|
13
|
+
} else {
|
|
14
|
+
return false;
|
|
15
|
+
}
|
|
16
|
+
}
|
|
@@ -17,7 +17,7 @@ const genbankFeatureTypes = [
|
|
|
17
17
|
{ name: "regulatory", color: "#3F6C51" },
|
|
18
18
|
{ name: "SecStr", color: "#7B4B94" },
|
|
19
19
|
{ name: "Site", color: "#7D82B8" },
|
|
20
|
-
{ name: "telomere", color: "DE9151" },
|
|
20
|
+
{ name: "telomere", color: "#DE9151" },
|
|
21
21
|
{ name: "tmRNA", color: "#B7E3CC" },
|
|
22
22
|
{ name: "unsure", color: "#C4FFB2" },
|
|
23
23
|
{ name: "V_segment", color: "#D6F7A3" },
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import { get, keyBy, filter } from "lodash-es";
|
|
2
|
+
|
|
3
|
+
interface FeatureType {
|
|
4
|
+
name: string;
|
|
5
|
+
color: string;
|
|
6
|
+
isHidden?: boolean;
|
|
7
|
+
isGenbankStandardType?: boolean;
|
|
8
|
+
isOverridden?: boolean;
|
|
9
|
+
isCustomType?: boolean;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
const genbankFeatureTypes: FeatureType[] = [
|
|
13
|
+
{ name: "-10_signal", color: "#4ECDC4" },
|
|
14
|
+
{ name: "-35_signal", color: "#F7FFF7" },
|
|
15
|
+
{ name: "3'clip", color: "#FF6B6B" },
|
|
16
|
+
{ name: "3'UTR", color: "#FFE66D" },
|
|
17
|
+
{ name: "5'clip", color: "#3E517A" },
|
|
18
|
+
{ name: "5'UTR", color: "#BBBBBB" },
|
|
19
|
+
{ name: "D-loop", color: "#F13C73" },
|
|
20
|
+
{ name: "assembly_gap", color: "#DE9151" },
|
|
21
|
+
{ name: "centromere", color: "#F34213" },
|
|
22
|
+
{ name: "Het", color: "#BC5D2E" },
|
|
23
|
+
{ name: "mobile_element", color: "#6DB1BF" },
|
|
24
|
+
{ name: "ncRNA", color: "#FFEAEC" },
|
|
25
|
+
{ name: "proprotein", color: "#F39A9D" },
|
|
26
|
+
{ name: "regulatory", color: "#3F6C51" },
|
|
27
|
+
{ name: "SecStr", color: "#7B4B94" },
|
|
28
|
+
{ name: "Site", color: "#7D82B8" },
|
|
29
|
+
{ name: "telomere", color: "#DE9151" },
|
|
30
|
+
{ name: "tmRNA", color: "#B7E3CC" },
|
|
31
|
+
{ name: "unsure", color: "#C4FFB2" },
|
|
32
|
+
{ name: "V_segment", color: "#D6F7A3" },
|
|
33
|
+
{ name: "allele", color: "#D86D6D" },
|
|
34
|
+
{ name: "attenuator", color: "#6B7F9C" },
|
|
35
|
+
{ name: "C_region", color: "#B5D89D" },
|
|
36
|
+
{ name: "CAAT_signal", color: "#E9CD98" },
|
|
37
|
+
{ name: "CDS", color: "#EF6500" },
|
|
38
|
+
{ name: "conserved", color: "#A3A5F0" },
|
|
39
|
+
{ name: "D_segment", color: "#C060F7" },
|
|
40
|
+
{ name: "default", color: "#CCCCCC" },
|
|
41
|
+
{ name: "enhancer", color: "#38F872" },
|
|
42
|
+
{ name: "exon", color: "#95F844" },
|
|
43
|
+
{ name: "gap", color: "#F7D43C" },
|
|
44
|
+
{ name: "GC_signal", color: "#861F1F" },
|
|
45
|
+
{ name: "gene", color: "#684E27" },
|
|
46
|
+
{ name: "iDNA", color: "#A59B41" },
|
|
47
|
+
{ name: "intron", color: "#52963E" },
|
|
48
|
+
{ name: "J_region", color: "#369283" },
|
|
49
|
+
{ name: "LTR", color: "#31748F" },
|
|
50
|
+
{ name: "m_rna", color: "#FFFF00" },
|
|
51
|
+
{ name: "mat_peptide", color: "#353E8F" },
|
|
52
|
+
{ name: "misc_binding", color: "#006FEF" },
|
|
53
|
+
{ name: "misc_difference", color: "#5A368A" },
|
|
54
|
+
{ name: "misc_feature", color: "#006FEF" },
|
|
55
|
+
{ name: "misc_marker", color: "#8DCEB1" },
|
|
56
|
+
{ name: "misc_part", color: "#006FEF" },
|
|
57
|
+
{ name: "misc_recomb", color: "#DD97B4" },
|
|
58
|
+
{ name: "misc_RNA", color: "#BD0101" },
|
|
59
|
+
{ name: "misc_signal", color: "#FF9A04" },
|
|
60
|
+
{ name: "misc_structure", color: "#B3FF00" },
|
|
61
|
+
{ name: "modified_base", color: "#00F7FF" },
|
|
62
|
+
{ name: "mRNA", color: "#FFD900" },
|
|
63
|
+
{ name: "N_region", color: "#AE00FF" },
|
|
64
|
+
{ name: "old_sequence", color: "#F0A7FF" },
|
|
65
|
+
{ name: "operator", color: "#63004D" },
|
|
66
|
+
{ name: "operon", color: "#000653" },
|
|
67
|
+
{ name: "oriT", color: "#580000" },
|
|
68
|
+
{ name: "plasmid", color: "#00635E" },
|
|
69
|
+
{ name: "polyA_signal", color: "#BBBBBB" },
|
|
70
|
+
{ name: "polyA_site", color: "#003328" },
|
|
71
|
+
{ name: "precursor_RNA", color: "#443200" },
|
|
72
|
+
{ name: "prim_transcript", color: "#665E4C" },
|
|
73
|
+
{ name: "primer_bind", color: "#53d969" },
|
|
74
|
+
{ name: "promoter", color: "#31B440" },
|
|
75
|
+
{ name: "protein_bind", color: "#2E2E2E" },
|
|
76
|
+
{ name: "protein_domain", color: "#4D4B4B" },
|
|
77
|
+
{ name: "protein", color: "#696969" },
|
|
78
|
+
{ name: "RBS", color: "#BDFFCB" },
|
|
79
|
+
{ name: "rep_origin", color: "#878787" },
|
|
80
|
+
{ name: "repeat_region", color: "#966363" },
|
|
81
|
+
{ name: "repeat_unit", color: "#A16D8D" },
|
|
82
|
+
{ name: "rRNA", color: "#9BF0FF" },
|
|
83
|
+
{ name: "s_mutation", color: "#70A2FF" },
|
|
84
|
+
{ name: "S_region", color: "#FF74A9" },
|
|
85
|
+
{ name: "satellite", color: "#164E64" },
|
|
86
|
+
{ name: "scRNA", color: "#A057FF" },
|
|
87
|
+
{ name: "sig_peptide", color: "#2FFF8D" },
|
|
88
|
+
{ name: "snoRNA", color: "#296B14" },
|
|
89
|
+
{ name: "snRNA", color: "#A16249" },
|
|
90
|
+
{ name: "source", color: "#0B17BD" },
|
|
91
|
+
{ name: "start", color: "#D6A336" },
|
|
92
|
+
{ name: "stem_loop", color: "#67069E" },
|
|
93
|
+
{ name: "stop", color: "#D44FC9" },
|
|
94
|
+
{ name: "STS", color: "#597FE7" },
|
|
95
|
+
{ name: "tag", color: "#E419DA" },
|
|
96
|
+
{ name: "TATA_signal", color: "#EB2B2B" },
|
|
97
|
+
{ name: "terminator", color: "#F51600" },
|
|
98
|
+
{ name: "transit_peptide", color: "#24D491" },
|
|
99
|
+
{ name: "transposon", color: "#B6E436" },
|
|
100
|
+
{ name: "tRNA", color: "#D1456F" },
|
|
101
|
+
{ name: "V_region", color: "#7B5EE7" },
|
|
102
|
+
{ name: "variation", color: "#2EE455" }
|
|
103
|
+
];
|
|
104
|
+
|
|
105
|
+
const getMergedFeatureMap = (): Record<string, FeatureType> => {
|
|
106
|
+
const keyedGBFeats = keyBy(
|
|
107
|
+
genbankFeatureTypes.map(f => ({
|
|
108
|
+
...f,
|
|
109
|
+
isGenbankStandardType: true
|
|
110
|
+
})),
|
|
111
|
+
"name"
|
|
112
|
+
);
|
|
113
|
+
const featureOverrides: FeatureType[] =
|
|
114
|
+
((typeof window !== "undefined" &&
|
|
115
|
+
get(window, "tg_featureTypeOverrides")) as unknown as FeatureType[]) ||
|
|
116
|
+
((typeof global !== "undefined" &&
|
|
117
|
+
get(global, "tg_featureTypeOverrides")) as unknown as FeatureType[]) ||
|
|
118
|
+
[];
|
|
119
|
+
|
|
120
|
+
const mappedOverrides = featureOverrides.map(fo => {
|
|
121
|
+
const originalGenbankFeat = keyedGBFeats[fo.name];
|
|
122
|
+
return {
|
|
123
|
+
...originalGenbankFeat,
|
|
124
|
+
...fo,
|
|
125
|
+
...(originalGenbankFeat ? { isOverridden: true } : { isCustomType: true })
|
|
126
|
+
};
|
|
127
|
+
});
|
|
128
|
+
const keyedOverrides = keyBy(mappedOverrides, "name");
|
|
129
|
+
|
|
130
|
+
return {
|
|
131
|
+
...keyedGBFeats,
|
|
132
|
+
...keyedOverrides
|
|
133
|
+
};
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
const getFeatureToColorMap = ({
|
|
137
|
+
includeHidden
|
|
138
|
+
}: { includeHidden?: boolean } = {}): Record<string, string> => {
|
|
139
|
+
const toRet: Record<string, string> = {};
|
|
140
|
+
filter(getMergedFeatureMap(), f =>
|
|
141
|
+
includeHidden ? true : !f.isHidden
|
|
142
|
+
).forEach(f => {
|
|
143
|
+
toRet[f.name] = f.color;
|
|
144
|
+
});
|
|
145
|
+
return toRet;
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
const getFeatureTypes = ({
|
|
149
|
+
includeHidden
|
|
150
|
+
}: { includeHidden?: boolean } = {}): string[] =>
|
|
151
|
+
filter(getMergedFeatureMap(), f => (includeHidden ? true : !f.isHidden)).map(
|
|
152
|
+
f => f.name
|
|
153
|
+
);
|
|
154
|
+
|
|
155
|
+
export { genbankFeatureTypes };
|
|
156
|
+
|
|
157
|
+
export function getGenbankFeatureToColorMap(): Record<string, string> {
|
|
158
|
+
const toRet: Record<string, string> = {};
|
|
159
|
+
genbankFeatureTypes.forEach(({ name, color }) => {
|
|
160
|
+
toRet[name] = color;
|
|
161
|
+
});
|
|
162
|
+
return toRet;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
export { getFeatureToColorMap };
|
|
166
|
+
export { getFeatureTypes };
|
|
167
|
+
export { getMergedFeatureMap };
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import { debounce, uniq } from "lodash-es";
|
|
2
|
+
import {
|
|
3
|
+
ambiguous_dna_letters,
|
|
4
|
+
ambiguous_rna_letters,
|
|
5
|
+
extended_protein_letters
|
|
6
|
+
} from "./bioData";
|
|
7
|
+
|
|
8
|
+
let allWarnings: string[] = [];
|
|
9
|
+
|
|
10
|
+
let makeToast = () => {
|
|
11
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
12
|
+
if (typeof window !== "undefined" && (window as any).toastr && allWarnings.length) {
|
|
13
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
14
|
+
(window as any).toastr.warning(uniq(allWarnings).join("\n"));
|
|
15
|
+
}
|
|
16
|
+
allWarnings = [];
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
20
|
+
(makeToast as any) = debounce(makeToast, 200);
|
|
21
|
+
|
|
22
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
23
|
+
function showWarnings(warnings: any) {
|
|
24
|
+
allWarnings = allWarnings.concat(warnings);
|
|
25
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
26
|
+
(makeToast as any).cancel();
|
|
27
|
+
makeToast();
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
interface FilterSequenceStringOptions {
|
|
31
|
+
additionalValidChars?: string;
|
|
32
|
+
isOligo?: boolean;
|
|
33
|
+
name?: string;
|
|
34
|
+
isProtein?: boolean;
|
|
35
|
+
isRna?: boolean;
|
|
36
|
+
isMixedRnaAndDna?: boolean;
|
|
37
|
+
[key: string]: unknown;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export default function filterSequenceString(
|
|
41
|
+
sequenceString = "",
|
|
42
|
+
{
|
|
43
|
+
additionalValidChars = "",
|
|
44
|
+
isOligo,
|
|
45
|
+
name,
|
|
46
|
+
isProtein,
|
|
47
|
+
isRna,
|
|
48
|
+
isMixedRnaAndDna
|
|
49
|
+
}: FilterSequenceStringOptions = {}
|
|
50
|
+
): [string, string[]] {
|
|
51
|
+
const acceptedChars = getAcceptedChars({
|
|
52
|
+
isOligo,
|
|
53
|
+
isProtein,
|
|
54
|
+
isRna,
|
|
55
|
+
isMixedRnaAndDna
|
|
56
|
+
});
|
|
57
|
+
const replaceChars = getReplaceChars({
|
|
58
|
+
isOligo,
|
|
59
|
+
isProtein,
|
|
60
|
+
isRna,
|
|
61
|
+
isMixedRnaAndDna
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
let sanitizedVal = "";
|
|
65
|
+
const invalidChars: string[] = [];
|
|
66
|
+
const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
|
|
67
|
+
const warnings: string[] = [];
|
|
68
|
+
const replaceCount: Record<string, number> = {};
|
|
69
|
+
sequenceString.split("").forEach(letter => {
|
|
70
|
+
const lowerLetter = letter.toLowerCase();
|
|
71
|
+
if (replaceChars && replaceChars[lowerLetter]) {
|
|
72
|
+
if (!replaceCount[lowerLetter]) {
|
|
73
|
+
replaceCount[lowerLetter] = 0;
|
|
74
|
+
}
|
|
75
|
+
replaceCount[lowerLetter]++;
|
|
76
|
+
const isUpper = lowerLetter !== letter;
|
|
77
|
+
sanitizedVal += isUpper
|
|
78
|
+
? replaceChars[lowerLetter].toUpperCase()
|
|
79
|
+
: replaceChars[lowerLetter];
|
|
80
|
+
} else if (chars.includes(lowerLetter)) {
|
|
81
|
+
sanitizedVal += letter;
|
|
82
|
+
} else {
|
|
83
|
+
invalidChars.push(letter);
|
|
84
|
+
}
|
|
85
|
+
});
|
|
86
|
+
//add replace count warnings
|
|
87
|
+
Object.keys(replaceCount).forEach(letter => {
|
|
88
|
+
warnings.push(
|
|
89
|
+
`Replaced "${letter}" with "${replaceChars[letter]}"${replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""
|
|
90
|
+
}`
|
|
91
|
+
);
|
|
92
|
+
});
|
|
93
|
+
if (sequenceString.length !== sanitizedVal.length) {
|
|
94
|
+
warnings.push(
|
|
95
|
+
`${name ? `Sequence ${name}: ` : ""
|
|
96
|
+
}Invalid character(s) detected and removed: ${uniq(invalidChars)
|
|
97
|
+
.map(c => {
|
|
98
|
+
if (c === " ") {
|
|
99
|
+
return "space";
|
|
100
|
+
}
|
|
101
|
+
return c;
|
|
102
|
+
})
|
|
103
|
+
.slice(0, 100)
|
|
104
|
+
.join(", ")} `
|
|
105
|
+
);
|
|
106
|
+
}
|
|
107
|
+
showWarnings(warnings);
|
|
108
|
+
|
|
109
|
+
return [sanitizedVal, warnings];
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
export function getAcceptedChars({
|
|
113
|
+
isOligo,
|
|
114
|
+
isProtein,
|
|
115
|
+
isRna,
|
|
116
|
+
isMixedRnaAndDna
|
|
117
|
+
}: FilterSequenceStringOptions = {}) {
|
|
118
|
+
return isProtein
|
|
119
|
+
? `${extended_protein_letters.toLowerCase()}`
|
|
120
|
+
: isOligo
|
|
121
|
+
? ambiguous_rna_letters.toLowerCase() + "t"
|
|
122
|
+
: isRna
|
|
123
|
+
? ambiguous_rna_letters.toLowerCase() + "t"
|
|
124
|
+
: isMixedRnaAndDna
|
|
125
|
+
? ambiguous_rna_letters.toLowerCase() +
|
|
126
|
+
ambiguous_dna_letters.toLowerCase()
|
|
127
|
+
: //just plain old dna
|
|
128
|
+
ambiguous_rna_letters.toLowerCase() +
|
|
129
|
+
ambiguous_dna_letters.toLowerCase();
|
|
130
|
+
}
|
|
131
|
+
export function getReplaceChars({
|
|
132
|
+
isOligo,
|
|
133
|
+
isProtein,
|
|
134
|
+
isRna,
|
|
135
|
+
isMixedRnaAndDna
|
|
136
|
+
}: FilterSequenceStringOptions = {}): Record<string, string> {
|
|
137
|
+
return isProtein
|
|
138
|
+
? {}
|
|
139
|
+
: // {".": "*"}
|
|
140
|
+
isOligo
|
|
141
|
+
? {}
|
|
142
|
+
: isRna
|
|
143
|
+
? { t: "u" }
|
|
144
|
+
: isMixedRnaAndDna
|
|
145
|
+
? {}
|
|
146
|
+
: //just plain old dna
|
|
147
|
+
{};
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
export const filterRnaString = (
|
|
151
|
+
s: string,
|
|
152
|
+
o: FilterSequenceStringOptions
|
|
153
|
+
): string => filterSequenceString(s, { ...o, isRna: true })[0];
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Find approximate matches of a search sequence within a target sequence
|
|
3
|
+
*
|
|
4
|
+
* @param {string} searchSeq - The sequence to search for
|
|
5
|
+
* @param {string} targetSeq - The sequence to search within
|
|
6
|
+
* @param {number} maxMismatches - Maximum number of mismatches allowed
|
|
7
|
+
* @param {boolean} circular - Whether to treat the target sequence as circular (default: false)
|
|
8
|
+
* @returns {Array} - Array of objects containing { index, match, mismatchPositions }
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
export interface ApproxMatch {
|
|
12
|
+
index: number;
|
|
13
|
+
match: string;
|
|
14
|
+
mismatchPositions: number[];
|
|
15
|
+
numMismatches: number;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export default function findApproxMatches(
|
|
19
|
+
searchSeq: string,
|
|
20
|
+
targetSeq: string,
|
|
21
|
+
maxMismatches: number,
|
|
22
|
+
circular = false
|
|
23
|
+
): ApproxMatch[] {
|
|
24
|
+
const matches: ApproxMatch[] = [];
|
|
25
|
+
const lenA = searchSeq.length;
|
|
26
|
+
const lenB = targetSeq.length;
|
|
27
|
+
|
|
28
|
+
// Extend targetSeq to simulate circularity, in case circular = true
|
|
29
|
+
const targetSeqExtended = circular
|
|
30
|
+
? targetSeq + targetSeq.slice(0, lenA - 1)
|
|
31
|
+
: targetSeq;
|
|
32
|
+
const limit = circular ? lenB : lenB - lenA + 1;
|
|
33
|
+
|
|
34
|
+
for (let i = 0; i < limit; i++) {
|
|
35
|
+
const window = targetSeqExtended.slice(i, i + lenA);
|
|
36
|
+
let mismatchCount = 0;
|
|
37
|
+
const mismatchPositions: number[] = [];
|
|
38
|
+
|
|
39
|
+
for (let j = 0; j < lenA; j++) {
|
|
40
|
+
if (searchSeq[j] !== window[j]) {
|
|
41
|
+
mismatchPositions.push(j);
|
|
42
|
+
mismatchCount++;
|
|
43
|
+
if (mismatchCount > maxMismatches) break;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (mismatchCount <= maxMismatches) {
|
|
48
|
+
matches.push({
|
|
49
|
+
index: i,
|
|
50
|
+
match: window,
|
|
51
|
+
mismatchPositions,
|
|
52
|
+
numMismatches: mismatchPositions.length // Keep for backwards compatibility
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
return matches;
|
|
58
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { normalizeRange, Range } from "@teselagen/range-utils";
|
|
2
|
+
function findNearestRangeOfSequenceOverlapToPosition(
|
|
3
|
+
sequenceToSearch: string,
|
|
4
|
+
overlapSequence: string,
|
|
5
|
+
positionStart = 0,
|
|
6
|
+
isLinear?: boolean
|
|
7
|
+
): Range | null {
|
|
8
|
+
if (sequenceToSearch.length < overlapSequence.length) {
|
|
9
|
+
return null;
|
|
10
|
+
}
|
|
11
|
+
const regex = new RegExp(overlapSequence, "ig");
|
|
12
|
+
let result: RegExpExecArray | null;
|
|
13
|
+
let index: number | undefined;
|
|
14
|
+
let distance = Infinity;
|
|
15
|
+
while (
|
|
16
|
+
(result = regex.exec(sequenceToSearch + (isLinear ? "" : sequenceToSearch)))
|
|
17
|
+
) {
|
|
18
|
+
if (result.index > sequenceToSearch.length) break;
|
|
19
|
+
let newDistance = Math.abs(result.index - positionStart);
|
|
20
|
+
newDistance = isLinear
|
|
21
|
+
? newDistance //if linear, don't check around the origin
|
|
22
|
+
: Math.min(newDistance, Math.abs(newDistance - sequenceToSearch.length));
|
|
23
|
+
if (newDistance > distance) {
|
|
24
|
+
break;
|
|
25
|
+
}
|
|
26
|
+
index = result.index;
|
|
27
|
+
distance = newDistance;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if (index === undefined) {
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
//index is the closest range start
|
|
35
|
+
return normalizeRange(
|
|
36
|
+
{
|
|
37
|
+
start: index,
|
|
38
|
+
end: index + overlapSequence.length - 1
|
|
39
|
+
},
|
|
40
|
+
sequenceToSearch.length
|
|
41
|
+
);
|
|
42
|
+
}
|
|
43
|
+
export default findNearestRangeOfSequenceOverlapToPosition;
|
package/src/findOrfsInPlasmid.js
CHANGED
|
@@ -4,8 +4,13 @@ export default function findOrfsInPlasmid(
|
|
|
4
4
|
sequence,
|
|
5
5
|
circular,
|
|
6
6
|
minimumOrfSize,
|
|
7
|
-
useAdditionalOrfStartCodons
|
|
7
|
+
useAdditionalOrfStartCodons,
|
|
8
|
+
isProteinOrOligo
|
|
8
9
|
) {
|
|
10
|
+
if (isProteinOrOligo) {
|
|
11
|
+
// we do not find ORFs in protein/oligo sequences
|
|
12
|
+
return [];
|
|
13
|
+
}
|
|
9
14
|
//tnr, we should do the parsing down of the orfs immediately after they're returned from this sequence
|
|
10
15
|
// const orfs1Forward = eliminateCircularOrfsThatOverlapWithNonCircularOrfs(getOrfsFromSequence(0, doubleForwardSequence, minimumOrfSize, true), maxLength);
|
|
11
16
|
const forwardOrfs = getOrfsFromSequence({
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import getOrfsFromSequence, { Orf } from "./getOrfsFromSequence";
|
|
2
|
+
|
|
3
|
+
export default function findOrfsInPlasmid(
|
|
4
|
+
sequence: string,
|
|
5
|
+
circular: boolean,
|
|
6
|
+
minimumOrfSize: number,
|
|
7
|
+
useAdditionalOrfStartCodons: boolean,
|
|
8
|
+
isProteinOrOligo: boolean
|
|
9
|
+
): Orf[] {
|
|
10
|
+
if (isProteinOrOligo) {
|
|
11
|
+
// we do not find ORFs in protein/oligo sequences
|
|
12
|
+
return [];
|
|
13
|
+
}
|
|
14
|
+
//tnr, we should do the parsing down of the orfs immediately after they're returned from this sequence
|
|
15
|
+
// const orfs1Forward = eliminateCircularOrfsThatOverlapWithNonCircularOrfs(getOrfsFromSequence(0, doubleForwardSequence, minimumOrfSize, true), maxLength);
|
|
16
|
+
const forwardOrfs = getOrfsFromSequence({
|
|
17
|
+
sequence: sequence,
|
|
18
|
+
minimumOrfSize: minimumOrfSize,
|
|
19
|
+
forward: true,
|
|
20
|
+
circular: circular,
|
|
21
|
+
useAdditionalOrfStartCodons
|
|
22
|
+
});
|
|
23
|
+
const reverseOrfs = getOrfsFromSequence({
|
|
24
|
+
sequence: sequence,
|
|
25
|
+
minimumOrfSize: minimumOrfSize,
|
|
26
|
+
forward: false,
|
|
27
|
+
circular: circular,
|
|
28
|
+
useAdditionalOrfStartCodons
|
|
29
|
+
});
|
|
30
|
+
return forwardOrfs.concat(reverseOrfs);
|
|
31
|
+
}
|