@teselagen/sequence-utils 0.3.36 → 0.3.38-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/DNAComplementMap.d.ts +1 -1
  2. package/addGapsToSeqReads.d.ts +16 -3
  3. package/adjustAnnotationsToInsert.d.ts +2 -1
  4. package/adjustBpsToReplaceOrInsert.d.ts +2 -1
  5. package/aliasedEnzymesByName.d.ts +37 -1
  6. package/aminoAcidToDegenerateDnaMap.d.ts +1 -31
  7. package/aminoAcidToDegenerateRnaMap.d.ts +1 -1
  8. package/annotateSingleSeq.d.ts +5 -4
  9. package/annotationTypes.d.ts +2 -2
  10. package/autoAnnotate.d.ts +17 -8
  11. package/bioData.d.ts +10 -58
  12. package/calculateEndStability.d.ts +1 -1
  13. package/calculateNebTa.d.ts +6 -1
  14. package/calculateNebTm.d.ts +6 -4
  15. package/calculatePercentGC.d.ts +1 -1
  16. package/calculateSantaLuciaTm.d.ts +28 -114
  17. package/calculateTm.d.ts +13 -1
  18. package/computeDigestFragments.d.ts +30 -24
  19. package/condensePairwiseAlignmentDifferences.d.ts +1 -1
  20. package/convertAACaretPositionOrRangeToDna.d.ts +2 -1
  21. package/convertDnaCaretPositionOrRangeToAA.d.ts +2 -1
  22. package/cutSequenceByRestrictionEnzyme.d.ts +2 -1
  23. package/defaultEnzymesByName.d.ts +2 -1
  24. package/degenerateDnaToAminoAcidMap.d.ts +1 -1
  25. package/degenerateRnaToAminoAcidMap.d.ts +1 -1
  26. package/deleteSequenceDataAtRange.d.ts +2 -1
  27. package/diffUtils.d.ts +9 -7
  28. package/doesEnzymeChopOutsideOfRecognitionSite.d.ts +2 -1
  29. package/featureTypesAndColors.d.ts +19 -6
  30. package/filterSequenceString.d.ts +14 -10
  31. package/findApproxMatches.d.ts +7 -1
  32. package/findNearestRangeOfSequenceOverlapToPosition.d.ts +2 -1
  33. package/findOrfsInPlasmid.d.ts +2 -11
  34. package/findSequenceMatches.d.ts +11 -1
  35. package/generateAnnotations.d.ts +2 -1
  36. package/generateSequenceData.d.ts +8 -13
  37. package/getAllInsertionsInSeqReads.d.ts +11 -1
  38. package/getAminoAcidDataForEachBaseOfDna.d.ts +6 -5
  39. package/getAminoAcidFromSequenceTriplet.d.ts +1 -1
  40. package/getAminoAcidStringFromSequenceString.d.ts +3 -1
  41. package/getCodonRangeForAASliver.d.ts +3 -4
  42. package/getComplementAminoAcidStringFromSequenceString.d.ts +1 -1
  43. package/getComplementSequenceAndAnnotations.d.ts +5 -1
  44. package/getComplementSequenceString.d.ts +1 -1
  45. package/getCutsiteType.d.ts +2 -1
  46. package/getCutsitesFromSequence.d.ts +2 -1
  47. package/getDegenerateDnaStringFromAAString.d.ts +1 -1
  48. package/getDegenerateRnaStringFromAAString.d.ts +1 -1
  49. package/getDigestFragmentsForCutsites.d.ts +4 -1
  50. package/getDigestFragmentsForRestrictionEnzymes.d.ts +8 -1
  51. package/getInsertBetweenVals.d.ts +2 -1
  52. package/getLeftAndRightOfSequenceInRangeGivenPosition.d.ts +2 -1
  53. package/getOrfsFromSequence.d.ts +17 -11
  54. package/getOverlapBetweenTwoSequences.d.ts +2 -1
  55. package/getPossiblePartsFromSequenceAndEnzymes.d.ts +18 -1
  56. package/getReverseAminoAcidStringFromSequenceString.d.ts +1 -1
  57. package/getReverseComplementAminoAcidStringFromSequenceString.d.ts +1 -1
  58. package/getReverseComplementAnnotation.d.ts +11 -1
  59. package/getReverseComplementSequenceAndAnnotations.d.ts +5 -1
  60. package/getReverseComplementSequenceString.d.ts +1 -1
  61. package/getReverseSequenceString.d.ts +1 -1
  62. package/getSequenceDataBetweenRange.d.ts +9 -1
  63. package/getVirtualDigest.d.ts +11 -10
  64. package/guessIfSequenceIsDnaAndNotProtein.d.ts +5 -1
  65. package/index.cjs +733 -484
  66. package/index.d.ts +8 -5
  67. package/index.js +733 -484
  68. package/index.umd.cjs +733 -484
  69. package/insertGapsIntoRefSeq.d.ts +2 -1
  70. package/insertSequenceDataAtPositionOrRange.d.ts +10 -1
  71. package/isEnzymeType2S.d.ts +2 -1
  72. package/mapAnnotationsToRows.d.ts +9 -1
  73. package/package.json +9 -6
  74. package/prepareCircularViewData.d.ts +2 -1
  75. package/prepareRowData.d.ts +7 -3
  76. package/proteinAlphabet.d.ts +1 -1
  77. package/rotateBpsToPosition.d.ts +1 -1
  78. package/rotateSequenceDataToPosition.d.ts +3 -1
  79. package/shiftAnnotationsByLen.d.ts +4 -3
  80. package/src/DNAComplementMap.ts +32 -0
  81. package/src/addGapsToSeqReads.ts +436 -0
  82. package/src/adjustAnnotationsToInsert.ts +20 -0
  83. package/src/adjustBpsToReplaceOrInsert.ts +73 -0
  84. package/src/aliasedEnzymesByName.ts +7366 -0
  85. package/src/aminoAcidToDegenerateDnaMap.ts +32 -0
  86. package/src/aminoAcidToDegenerateRnaMap.ts +32 -0
  87. package/src/annotateSingleSeq.ts +37 -0
  88. package/src/annotationTypes.ts +23 -0
  89. package/src/autoAnnotate.test.js +0 -1
  90. package/src/autoAnnotate.ts +290 -0
  91. package/src/bioData.ts +65 -0
  92. package/src/calculateEndStability.ts +91 -0
  93. package/src/calculateNebTa.ts +46 -0
  94. package/src/calculateNebTm.ts +132 -0
  95. package/src/calculatePercentGC.ts +3 -0
  96. package/src/calculateSantaLuciaTm.ts +184 -0
  97. package/src/calculateTm.ts +242 -0
  98. package/src/computeDigestFragments.ts +238 -0
  99. package/src/condensePairwiseAlignmentDifferences.ts +85 -0
  100. package/src/convertAACaretPositionOrRangeToDna.ts +28 -0
  101. package/src/convertDnaCaretPositionOrRangeToAA.ts +28 -0
  102. package/src/cutSequenceByRestrictionEnzyme.ts +345 -0
  103. package/src/defaultEnzymesByName.ts +280 -0
  104. package/src/degenerateDnaToAminoAcidMap.ts +5 -0
  105. package/src/degenerateRnaToAminoAcidMap.ts +5 -0
  106. package/src/deleteSequenceDataAtRange.ts +13 -0
  107. package/src/diffUtils.ts +80 -0
  108. package/src/doesEnzymeChopOutsideOfRecognitionSite.ts +16 -0
  109. package/src/featureTypesAndColors.js +1 -1
  110. package/src/featureTypesAndColors.ts +167 -0
  111. package/src/filterSequenceString.ts +153 -0
  112. package/src/findApproxMatches.ts +58 -0
  113. package/src/findNearestRangeOfSequenceOverlapToPosition.ts +43 -0
  114. package/src/findOrfsInPlasmid.js +6 -1
  115. package/src/findOrfsInPlasmid.ts +31 -0
  116. package/src/findSequenceMatches.ts +154 -0
  117. package/src/generateAnnotations.ts +39 -0
  118. package/src/generateSequenceData.ts +212 -0
  119. package/src/getAllInsertionsInSeqReads.ts +100 -0
  120. package/src/getAminoAcidDataForEachBaseOfDna.ts +305 -0
  121. package/src/getAminoAcidFromSequenceTriplet.ts +27 -0
  122. package/src/getAminoAcidStringFromSequenceString.ts +36 -0
  123. package/src/getCodonRangeForAASliver.ts +73 -0
  124. package/src/getComplementAminoAcidStringFromSequenceString.ts +10 -0
  125. package/src/getComplementSequenceAndAnnotations.ts +25 -0
  126. package/src/getComplementSequenceString.ts +23 -0
  127. package/src/getCutsiteType.ts +18 -0
  128. package/src/getCutsitesFromSequence.ts +22 -0
  129. package/src/getDegenerateDnaStringFromAAString.ts +15 -0
  130. package/src/getDegenerateRnaStringFromAAString.ts +15 -0
  131. package/src/getDigestFragmentsForCutsites.ts +126 -0
  132. package/src/getDigestFragmentsForRestrictionEnzymes.ts +50 -0
  133. package/src/getInsertBetweenVals.ts +31 -0
  134. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.ts +40 -0
  135. package/src/getMassOfAaString.ts +29 -0
  136. package/src/getOrfsFromSequence.ts +132 -0
  137. package/src/getOverlapBetweenTwoSequences.ts +30 -0
  138. package/src/getPossiblePartsFromSequenceAndEnzymes.ts +149 -0
  139. package/src/getReverseAminoAcidStringFromSequenceString.ts +22 -0
  140. package/src/getReverseComplementAminoAcidStringFromSequenceString.ts +10 -0
  141. package/src/getReverseComplementAnnotation.ts +33 -0
  142. package/src/getReverseComplementSequenceAndAnnotations.ts +46 -0
  143. package/src/getReverseComplementSequenceString.ts +18 -0
  144. package/src/getReverseSequenceString.ts +12 -0
  145. package/src/getSequenceDataBetweenRange.ts +154 -0
  146. package/src/getVirtualDigest.ts +139 -0
  147. package/src/guessIfSequenceIsDnaAndNotProtein.ts +39 -0
  148. package/src/index.test.ts +43 -0
  149. package/src/index.ts +111 -0
  150. package/src/insertGapsIntoRefSeq.ts +43 -0
  151. package/src/insertSequenceDataAtPosition.ts +2 -0
  152. package/src/insertSequenceDataAtPositionOrRange.ts +328 -0
  153. package/src/isEnzymeType2S.ts +5 -0
  154. package/src/mapAnnotationsToRows.ts +256 -0
  155. package/src/prepareCircularViewData.ts +24 -0
  156. package/src/prepareRowData.ts +61 -0
  157. package/src/prepareRowData_output1.json +1 -0
  158. package/src/proteinAlphabet.ts +271 -0
  159. package/src/rotateBpsToPosition.ts +12 -0
  160. package/src/rotateSequenceDataToPosition.ts +54 -0
  161. package/src/shiftAnnotationsByLen.ts +24 -0
  162. package/src/threeLetterSequenceStringToAminoAcidMap.ts +198 -0
  163. package/src/tidyUpAnnotation.ts +205 -0
  164. package/src/tidyUpSequenceData.ts +213 -0
  165. package/src/types.ts +109 -0
  166. package/threeLetterSequenceStringToAminoAcidMap.d.ts +11 -921
  167. package/tidyUpAnnotation.d.ts +13 -11
  168. package/tidyUpSequenceData.d.ts +15 -1
  169. package/types.d.ts +105 -0
@@ -0,0 +1,80 @@
1
+ import { cloneDeep, forEach } from "lodash-es";
2
+ import { diff, patch, reverse, Delta } from "jsondiffpatch";
3
+ import { SequenceData } from "./types";
4
+
5
+ import tidyUpSequenceData from "./tidyUpSequenceData";
6
+
7
+ interface DiffOptions {
8
+ ignoreKeys?: string[];
9
+ }
10
+
11
+ const getDiffFromSeqs = (
12
+ oldData: SequenceData,
13
+ newData: SequenceData,
14
+ { ignoreKeys = [] }: DiffOptions = {}
15
+ ): Delta | undefined => {
16
+ /* eslint-disable-next-line @typescript-eslint/no-explicit-any */
17
+ const cleanedOldData: any = tidyUpSequenceData(oldData, {
18
+ annotationsAsObjects: true,
19
+ noTranslationData: true,
20
+ doNotRemoveInvalidChars: true
21
+ });
22
+ /* eslint-disable-next-line @typescript-eslint/no-explicit-any */
23
+ const cleanedNewData: any = tidyUpSequenceData(newData, {
24
+ annotationsAsObjects: true,
25
+ noTranslationData: true,
26
+ doNotRemoveInvalidChars: true
27
+ });
28
+
29
+ [cleanedOldData, cleanedNewData].forEach(d => {
30
+ [
31
+ "cutsites",
32
+ "orfs",
33
+ "filteredFeatures",
34
+ "size",
35
+ "fromFileUpload",
36
+ "description",
37
+ "materiallyAvailable",
38
+ ...ignoreKeys
39
+ ].forEach(prop => {
40
+ delete d[prop];
41
+ });
42
+ if (d.translations) {
43
+ forEach(d.translations, (translation, key) => {
44
+ if (
45
+ translation.translationType &&
46
+ translation.translationType !== "User Created"
47
+ ) {
48
+ delete d.translations[key];
49
+ } else {
50
+ delete translation.aminoAcids;
51
+ }
52
+ });
53
+ }
54
+ });
55
+
56
+ return diff(cleanedOldData, cleanedNewData);
57
+ };
58
+
59
+ const patchSeqWithDiff = (
60
+ oldData: SequenceData,
61
+ diffData: Delta,
62
+ { ignoreKeys = [] }: DiffOptions = {}
63
+ ): SequenceData => {
64
+ ignoreKeys.forEach(k => {
65
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
66
+ delete (diffData as any)[k];
67
+ });
68
+ const tidyOld = tidyUpSequenceData(cloneDeep(oldData), {
69
+ annotationsAsObjects: true,
70
+ doNotRemoveInvalidChars: true
71
+ });
72
+
73
+ return patch(tidyOld, diffData) as SequenceData;
74
+ };
75
+
76
+ const reverseSeqDiff = (diffData: Delta): Delta | undefined => {
77
+ return reverse(diffData);
78
+ };
79
+
80
+ export { getDiffFromSeqs, patchSeqWithDiff, reverseSeqDiff };
@@ -0,0 +1,16 @@
1
+ import { RestrictionEnzyme } from "./types";
2
+
3
+ export default function doesEnzymeChopOutsideOfRecognitionSite(
4
+ enzyme: RestrictionEnzyme
5
+ ): boolean {
6
+ if (
7
+ enzyme.topSnipOffset &&
8
+ enzyme.bottomSnipOffset &&
9
+ (enzyme.topSnipOffset > enzyme.site.length ||
10
+ enzyme.bottomSnipOffset > enzyme.site.length)
11
+ ) {
12
+ return true;
13
+ } else {
14
+ return false;
15
+ }
16
+ }
@@ -17,7 +17,7 @@ const genbankFeatureTypes = [
17
17
  { name: "regulatory", color: "#3F6C51" },
18
18
  { name: "SecStr", color: "#7B4B94" },
19
19
  { name: "Site", color: "#7D82B8" },
20
- { name: "telomere", color: "DE9151" },
20
+ { name: "telomere", color: "#DE9151" },
21
21
  { name: "tmRNA", color: "#B7E3CC" },
22
22
  { name: "unsure", color: "#C4FFB2" },
23
23
  { name: "V_segment", color: "#D6F7A3" },
@@ -0,0 +1,167 @@
1
+ import { get, keyBy, filter } from "lodash-es";
2
+
3
+ interface FeatureType {
4
+ name: string;
5
+ color: string;
6
+ isHidden?: boolean;
7
+ isGenbankStandardType?: boolean;
8
+ isOverridden?: boolean;
9
+ isCustomType?: boolean;
10
+ }
11
+
12
+ const genbankFeatureTypes: FeatureType[] = [
13
+ { name: "-10_signal", color: "#4ECDC4" },
14
+ { name: "-35_signal", color: "#F7FFF7" },
15
+ { name: "3'clip", color: "#FF6B6B" },
16
+ { name: "3'UTR", color: "#FFE66D" },
17
+ { name: "5'clip", color: "#3E517A" },
18
+ { name: "5'UTR", color: "#BBBBBB" },
19
+ { name: "D-loop", color: "#F13C73" },
20
+ { name: "assembly_gap", color: "#DE9151" },
21
+ { name: "centromere", color: "#F34213" },
22
+ { name: "Het", color: "#BC5D2E" },
23
+ { name: "mobile_element", color: "#6DB1BF" },
24
+ { name: "ncRNA", color: "#FFEAEC" },
25
+ { name: "proprotein", color: "#F39A9D" },
26
+ { name: "regulatory", color: "#3F6C51" },
27
+ { name: "SecStr", color: "#7B4B94" },
28
+ { name: "Site", color: "#7D82B8" },
29
+ { name: "telomere", color: "#DE9151" },
30
+ { name: "tmRNA", color: "#B7E3CC" },
31
+ { name: "unsure", color: "#C4FFB2" },
32
+ { name: "V_segment", color: "#D6F7A3" },
33
+ { name: "allele", color: "#D86D6D" },
34
+ { name: "attenuator", color: "#6B7F9C" },
35
+ { name: "C_region", color: "#B5D89D" },
36
+ { name: "CAAT_signal", color: "#E9CD98" },
37
+ { name: "CDS", color: "#EF6500" },
38
+ { name: "conserved", color: "#A3A5F0" },
39
+ { name: "D_segment", color: "#C060F7" },
40
+ { name: "default", color: "#CCCCCC" },
41
+ { name: "enhancer", color: "#38F872" },
42
+ { name: "exon", color: "#95F844" },
43
+ { name: "gap", color: "#F7D43C" },
44
+ { name: "GC_signal", color: "#861F1F" },
45
+ { name: "gene", color: "#684E27" },
46
+ { name: "iDNA", color: "#A59B41" },
47
+ { name: "intron", color: "#52963E" },
48
+ { name: "J_region", color: "#369283" },
49
+ { name: "LTR", color: "#31748F" },
50
+ { name: "m_rna", color: "#FFFF00" },
51
+ { name: "mat_peptide", color: "#353E8F" },
52
+ { name: "misc_binding", color: "#006FEF" },
53
+ { name: "misc_difference", color: "#5A368A" },
54
+ { name: "misc_feature", color: "#006FEF" },
55
+ { name: "misc_marker", color: "#8DCEB1" },
56
+ { name: "misc_part", color: "#006FEF" },
57
+ { name: "misc_recomb", color: "#DD97B4" },
58
+ { name: "misc_RNA", color: "#BD0101" },
59
+ { name: "misc_signal", color: "#FF9A04" },
60
+ { name: "misc_structure", color: "#B3FF00" },
61
+ { name: "modified_base", color: "#00F7FF" },
62
+ { name: "mRNA", color: "#FFD900" },
63
+ { name: "N_region", color: "#AE00FF" },
64
+ { name: "old_sequence", color: "#F0A7FF" },
65
+ { name: "operator", color: "#63004D" },
66
+ { name: "operon", color: "#000653" },
67
+ { name: "oriT", color: "#580000" },
68
+ { name: "plasmid", color: "#00635E" },
69
+ { name: "polyA_signal", color: "#BBBBBB" },
70
+ { name: "polyA_site", color: "#003328" },
71
+ { name: "precursor_RNA", color: "#443200" },
72
+ { name: "prim_transcript", color: "#665E4C" },
73
+ { name: "primer_bind", color: "#53d969" },
74
+ { name: "promoter", color: "#31B440" },
75
+ { name: "protein_bind", color: "#2E2E2E" },
76
+ { name: "protein_domain", color: "#4D4B4B" },
77
+ { name: "protein", color: "#696969" },
78
+ { name: "RBS", color: "#BDFFCB" },
79
+ { name: "rep_origin", color: "#878787" },
80
+ { name: "repeat_region", color: "#966363" },
81
+ { name: "repeat_unit", color: "#A16D8D" },
82
+ { name: "rRNA", color: "#9BF0FF" },
83
+ { name: "s_mutation", color: "#70A2FF" },
84
+ { name: "S_region", color: "#FF74A9" },
85
+ { name: "satellite", color: "#164E64" },
86
+ { name: "scRNA", color: "#A057FF" },
87
+ { name: "sig_peptide", color: "#2FFF8D" },
88
+ { name: "snoRNA", color: "#296B14" },
89
+ { name: "snRNA", color: "#A16249" },
90
+ { name: "source", color: "#0B17BD" },
91
+ { name: "start", color: "#D6A336" },
92
+ { name: "stem_loop", color: "#67069E" },
93
+ { name: "stop", color: "#D44FC9" },
94
+ { name: "STS", color: "#597FE7" },
95
+ { name: "tag", color: "#E419DA" },
96
+ { name: "TATA_signal", color: "#EB2B2B" },
97
+ { name: "terminator", color: "#F51600" },
98
+ { name: "transit_peptide", color: "#24D491" },
99
+ { name: "transposon", color: "#B6E436" },
100
+ { name: "tRNA", color: "#D1456F" },
101
+ { name: "V_region", color: "#7B5EE7" },
102
+ { name: "variation", color: "#2EE455" }
103
+ ];
104
+
105
+ const getMergedFeatureMap = (): Record<string, FeatureType> => {
106
+ const keyedGBFeats = keyBy(
107
+ genbankFeatureTypes.map(f => ({
108
+ ...f,
109
+ isGenbankStandardType: true
110
+ })),
111
+ "name"
112
+ );
113
+ const featureOverrides: FeatureType[] =
114
+ ((typeof window !== "undefined" &&
115
+ get(window, "tg_featureTypeOverrides")) as unknown as FeatureType[]) ||
116
+ ((typeof global !== "undefined" &&
117
+ get(global, "tg_featureTypeOverrides")) as unknown as FeatureType[]) ||
118
+ [];
119
+
120
+ const mappedOverrides = featureOverrides.map(fo => {
121
+ const originalGenbankFeat = keyedGBFeats[fo.name];
122
+ return {
123
+ ...originalGenbankFeat,
124
+ ...fo,
125
+ ...(originalGenbankFeat ? { isOverridden: true } : { isCustomType: true })
126
+ };
127
+ });
128
+ const keyedOverrides = keyBy(mappedOverrides, "name");
129
+
130
+ return {
131
+ ...keyedGBFeats,
132
+ ...keyedOverrides
133
+ };
134
+ };
135
+
136
+ const getFeatureToColorMap = ({
137
+ includeHidden
138
+ }: { includeHidden?: boolean } = {}): Record<string, string> => {
139
+ const toRet: Record<string, string> = {};
140
+ filter(getMergedFeatureMap(), f =>
141
+ includeHidden ? true : !f.isHidden
142
+ ).forEach(f => {
143
+ toRet[f.name] = f.color;
144
+ });
145
+ return toRet;
146
+ };
147
+
148
+ const getFeatureTypes = ({
149
+ includeHidden
150
+ }: { includeHidden?: boolean } = {}): string[] =>
151
+ filter(getMergedFeatureMap(), f => (includeHidden ? true : !f.isHidden)).map(
152
+ f => f.name
153
+ );
154
+
155
+ export { genbankFeatureTypes };
156
+
157
+ export function getGenbankFeatureToColorMap(): Record<string, string> {
158
+ const toRet: Record<string, string> = {};
159
+ genbankFeatureTypes.forEach(({ name, color }) => {
160
+ toRet[name] = color;
161
+ });
162
+ return toRet;
163
+ }
164
+
165
+ export { getFeatureToColorMap };
166
+ export { getFeatureTypes };
167
+ export { getMergedFeatureMap };
@@ -0,0 +1,153 @@
1
+ import { debounce, uniq } from "lodash-es";
2
+ import {
3
+ ambiguous_dna_letters,
4
+ ambiguous_rna_letters,
5
+ extended_protein_letters
6
+ } from "./bioData";
7
+
8
+ let allWarnings: string[] = [];
9
+
10
+ let makeToast = () => {
11
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
12
+ if (typeof window !== "undefined" && (window as any).toastr && allWarnings.length) {
13
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
14
+ (window as any).toastr.warning(uniq(allWarnings).join("\n"));
15
+ }
16
+ allWarnings = [];
17
+ };
18
+
19
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
20
+ (makeToast as any) = debounce(makeToast, 200);
21
+
22
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
23
+ function showWarnings(warnings: any) {
24
+ allWarnings = allWarnings.concat(warnings);
25
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
26
+ (makeToast as any).cancel();
27
+ makeToast();
28
+ }
29
+
30
+ interface FilterSequenceStringOptions {
31
+ additionalValidChars?: string;
32
+ isOligo?: boolean;
33
+ name?: string;
34
+ isProtein?: boolean;
35
+ isRna?: boolean;
36
+ isMixedRnaAndDna?: boolean;
37
+ [key: string]: unknown;
38
+ }
39
+
40
+ export default function filterSequenceString(
41
+ sequenceString = "",
42
+ {
43
+ additionalValidChars = "",
44
+ isOligo,
45
+ name,
46
+ isProtein,
47
+ isRna,
48
+ isMixedRnaAndDna
49
+ }: FilterSequenceStringOptions = {}
50
+ ): [string, string[]] {
51
+ const acceptedChars = getAcceptedChars({
52
+ isOligo,
53
+ isProtein,
54
+ isRna,
55
+ isMixedRnaAndDna
56
+ });
57
+ const replaceChars = getReplaceChars({
58
+ isOligo,
59
+ isProtein,
60
+ isRna,
61
+ isMixedRnaAndDna
62
+ });
63
+
64
+ let sanitizedVal = "";
65
+ const invalidChars: string[] = [];
66
+ const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
67
+ const warnings: string[] = [];
68
+ const replaceCount: Record<string, number> = {};
69
+ sequenceString.split("").forEach(letter => {
70
+ const lowerLetter = letter.toLowerCase();
71
+ if (replaceChars && replaceChars[lowerLetter]) {
72
+ if (!replaceCount[lowerLetter]) {
73
+ replaceCount[lowerLetter] = 0;
74
+ }
75
+ replaceCount[lowerLetter]++;
76
+ const isUpper = lowerLetter !== letter;
77
+ sanitizedVal += isUpper
78
+ ? replaceChars[lowerLetter].toUpperCase()
79
+ : replaceChars[lowerLetter];
80
+ } else if (chars.includes(lowerLetter)) {
81
+ sanitizedVal += letter;
82
+ } else {
83
+ invalidChars.push(letter);
84
+ }
85
+ });
86
+ //add replace count warnings
87
+ Object.keys(replaceCount).forEach(letter => {
88
+ warnings.push(
89
+ `Replaced "${letter}" with "${replaceChars[letter]}"${replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""
90
+ }`
91
+ );
92
+ });
93
+ if (sequenceString.length !== sanitizedVal.length) {
94
+ warnings.push(
95
+ `${name ? `Sequence ${name}: ` : ""
96
+ }Invalid character(s) detected and removed: ${uniq(invalidChars)
97
+ .map(c => {
98
+ if (c === " ") {
99
+ return "space";
100
+ }
101
+ return c;
102
+ })
103
+ .slice(0, 100)
104
+ .join(", ")} `
105
+ );
106
+ }
107
+ showWarnings(warnings);
108
+
109
+ return [sanitizedVal, warnings];
110
+ }
111
+
112
+ export function getAcceptedChars({
113
+ isOligo,
114
+ isProtein,
115
+ isRna,
116
+ isMixedRnaAndDna
117
+ }: FilterSequenceStringOptions = {}) {
118
+ return isProtein
119
+ ? `${extended_protein_letters.toLowerCase()}`
120
+ : isOligo
121
+ ? ambiguous_rna_letters.toLowerCase() + "t"
122
+ : isRna
123
+ ? ambiguous_rna_letters.toLowerCase() + "t"
124
+ : isMixedRnaAndDna
125
+ ? ambiguous_rna_letters.toLowerCase() +
126
+ ambiguous_dna_letters.toLowerCase()
127
+ : //just plain old dna
128
+ ambiguous_rna_letters.toLowerCase() +
129
+ ambiguous_dna_letters.toLowerCase();
130
+ }
131
+ export function getReplaceChars({
132
+ isOligo,
133
+ isProtein,
134
+ isRna,
135
+ isMixedRnaAndDna
136
+ }: FilterSequenceStringOptions = {}): Record<string, string> {
137
+ return isProtein
138
+ ? {}
139
+ : // {".": "*"}
140
+ isOligo
141
+ ? {}
142
+ : isRna
143
+ ? { t: "u" }
144
+ : isMixedRnaAndDna
145
+ ? {}
146
+ : //just plain old dna
147
+ {};
148
+ }
149
+
150
+ export const filterRnaString = (
151
+ s: string,
152
+ o: FilterSequenceStringOptions
153
+ ): string => filterSequenceString(s, { ...o, isRna: true })[0];
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Find approximate matches of a search sequence within a target sequence
3
+ *
4
+ * @param {string} searchSeq - The sequence to search for
5
+ * @param {string} targetSeq - The sequence to search within
6
+ * @param {number} maxMismatches - Maximum number of mismatches allowed
7
+ * @param {boolean} circular - Whether to treat the target sequence as circular (default: false)
8
+ * @returns {Array} - Array of objects containing { index, match, mismatchPositions }
9
+ */
10
+
11
+ export interface ApproxMatch {
12
+ index: number;
13
+ match: string;
14
+ mismatchPositions: number[];
15
+ numMismatches: number;
16
+ }
17
+
18
+ export default function findApproxMatches(
19
+ searchSeq: string,
20
+ targetSeq: string,
21
+ maxMismatches: number,
22
+ circular = false
23
+ ): ApproxMatch[] {
24
+ const matches: ApproxMatch[] = [];
25
+ const lenA = searchSeq.length;
26
+ const lenB = targetSeq.length;
27
+
28
+ // Extend targetSeq to simulate circularity, in case circular = true
29
+ const targetSeqExtended = circular
30
+ ? targetSeq + targetSeq.slice(0, lenA - 1)
31
+ : targetSeq;
32
+ const limit = circular ? lenB : lenB - lenA + 1;
33
+
34
+ for (let i = 0; i < limit; i++) {
35
+ const window = targetSeqExtended.slice(i, i + lenA);
36
+ let mismatchCount = 0;
37
+ const mismatchPositions: number[] = [];
38
+
39
+ for (let j = 0; j < lenA; j++) {
40
+ if (searchSeq[j] !== window[j]) {
41
+ mismatchPositions.push(j);
42
+ mismatchCount++;
43
+ if (mismatchCount > maxMismatches) break;
44
+ }
45
+ }
46
+
47
+ if (mismatchCount <= maxMismatches) {
48
+ matches.push({
49
+ index: i,
50
+ match: window,
51
+ mismatchPositions,
52
+ numMismatches: mismatchPositions.length // Keep for backwards compatibility
53
+ });
54
+ }
55
+ }
56
+
57
+ return matches;
58
+ }
@@ -0,0 +1,43 @@
1
+ import { normalizeRange, Range } from "@teselagen/range-utils";
2
+ function findNearestRangeOfSequenceOverlapToPosition(
3
+ sequenceToSearch: string,
4
+ overlapSequence: string,
5
+ positionStart = 0,
6
+ isLinear?: boolean
7
+ ): Range | null {
8
+ if (sequenceToSearch.length < overlapSequence.length) {
9
+ return null;
10
+ }
11
+ const regex = new RegExp(overlapSequence, "ig");
12
+ let result: RegExpExecArray | null;
13
+ let index: number | undefined;
14
+ let distance = Infinity;
15
+ while (
16
+ (result = regex.exec(sequenceToSearch + (isLinear ? "" : sequenceToSearch)))
17
+ ) {
18
+ if (result.index > sequenceToSearch.length) break;
19
+ let newDistance = Math.abs(result.index - positionStart);
20
+ newDistance = isLinear
21
+ ? newDistance //if linear, don't check around the origin
22
+ : Math.min(newDistance, Math.abs(newDistance - sequenceToSearch.length));
23
+ if (newDistance > distance) {
24
+ break;
25
+ }
26
+ index = result.index;
27
+ distance = newDistance;
28
+ }
29
+
30
+ if (index === undefined) {
31
+ return null;
32
+ }
33
+
34
+ //index is the closest range start
35
+ return normalizeRange(
36
+ {
37
+ start: index,
38
+ end: index + overlapSequence.length - 1
39
+ },
40
+ sequenceToSearch.length
41
+ );
42
+ }
43
+ export default findNearestRangeOfSequenceOverlapToPosition;
@@ -4,8 +4,13 @@ export default function findOrfsInPlasmid(
4
4
  sequence,
5
5
  circular,
6
6
  minimumOrfSize,
7
- useAdditionalOrfStartCodons
7
+ useAdditionalOrfStartCodons,
8
+ isProteinOrOligo
8
9
  ) {
10
+ if (isProteinOrOligo) {
11
+ // we do not find ORFs in protein/oligo sequences
12
+ return [];
13
+ }
9
14
  //tnr, we should do the parsing down of the orfs immediately after they're returned from this sequence
10
15
  // const orfs1Forward = eliminateCircularOrfsThatOverlapWithNonCircularOrfs(getOrfsFromSequence(0, doubleForwardSequence, minimumOrfSize, true), maxLength);
11
16
  const forwardOrfs = getOrfsFromSequence({
@@ -0,0 +1,31 @@
1
+ import getOrfsFromSequence, { Orf } from "./getOrfsFromSequence";
2
+
3
+ export default function findOrfsInPlasmid(
4
+ sequence: string,
5
+ circular: boolean,
6
+ minimumOrfSize: number,
7
+ useAdditionalOrfStartCodons: boolean,
8
+ isProteinOrOligo: boolean
9
+ ): Orf[] {
10
+ if (isProteinOrOligo) {
11
+ // we do not find ORFs in protein/oligo sequences
12
+ return [];
13
+ }
14
+ //tnr, we should do the parsing down of the orfs immediately after they're returned from this sequence
15
+ // const orfs1Forward = eliminateCircularOrfsThatOverlapWithNonCircularOrfs(getOrfsFromSequence(0, doubleForwardSequence, minimumOrfSize, true), maxLength);
16
+ const forwardOrfs = getOrfsFromSequence({
17
+ sequence: sequence,
18
+ minimumOrfSize: minimumOrfSize,
19
+ forward: true,
20
+ circular: circular,
21
+ useAdditionalOrfStartCodons
22
+ });
23
+ const reverseOrfs = getOrfsFromSequence({
24
+ sequence: sequence,
25
+ minimumOrfSize: minimumOrfSize,
26
+ forward: false,
27
+ circular: circular,
28
+ useAdditionalOrfStartCodons
29
+ });
30
+ return forwardOrfs.concat(reverseOrfs);
31
+ }