@teselagen/sequence-utils 0.3.38-beta.3 → 0.3.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/DNAComplementMap.d.ts +1 -1
  2. package/addGapsToSeqReads.d.ts +3 -16
  3. package/adjustAnnotationsToInsert.d.ts +1 -2
  4. package/adjustBpsToReplaceOrInsert.d.ts +1 -2
  5. package/aliasedEnzymesByName.d.ts +1 -37
  6. package/aminoAcidToDegenerateDnaMap.d.ts +31 -1
  7. package/aminoAcidToDegenerateRnaMap.d.ts +1 -1
  8. package/annotateSingleSeq.d.ts +4 -5
  9. package/annotationTypes.d.ts +2 -2
  10. package/autoAnnotate.d.ts +8 -17
  11. package/bioData.d.ts +58 -10
  12. package/calculateEndStability.d.ts +1 -1
  13. package/calculateNebTa.d.ts +1 -6
  14. package/calculateNebTm.d.ts +4 -6
  15. package/calculatePercentGC.d.ts +1 -1
  16. package/calculateSantaLuciaTm.d.ts +114 -28
  17. package/calculateTm.d.ts +1 -13
  18. package/computeDigestFragments.d.ts +24 -30
  19. package/condensePairwiseAlignmentDifferences.d.ts +1 -1
  20. package/convertAACaretPositionOrRangeToDna.d.ts +1 -2
  21. package/convertDnaCaretPositionOrRangeToAA.d.ts +1 -2
  22. package/cutSequenceByRestrictionEnzyme.d.ts +1 -2
  23. package/defaultEnzymesByName.d.ts +1 -2
  24. package/degenerateDnaToAminoAcidMap.d.ts +1 -1
  25. package/degenerateRnaToAminoAcidMap.d.ts +1 -1
  26. package/deleteSequenceDataAtRange.d.ts +1 -2
  27. package/diffUtils.d.ts +7 -9
  28. package/doesEnzymeChopOutsideOfRecognitionSite.d.ts +1 -2
  29. package/featureTypesAndColors.d.ts +6 -19
  30. package/filterSequenceString.d.ts +10 -14
  31. package/findApproxMatches.d.ts +1 -7
  32. package/findNearestRangeOfSequenceOverlapToPosition.d.ts +1 -2
  33. package/findOrfsInPlasmid.d.ts +11 -2
  34. package/findSequenceMatches.d.ts +1 -11
  35. package/generateAnnotations.d.ts +1 -2
  36. package/generateSequenceData.d.ts +13 -8
  37. package/getAllInsertionsInSeqReads.d.ts +1 -11
  38. package/getAminoAcidDataForEachBaseOfDna.d.ts +5 -6
  39. package/getAminoAcidFromSequenceTriplet.d.ts +1 -1
  40. package/getAminoAcidStringFromSequenceString.d.ts +1 -3
  41. package/getCodonRangeForAASliver.d.ts +4 -3
  42. package/getComplementAminoAcidStringFromSequenceString.d.ts +1 -1
  43. package/getComplementSequenceAndAnnotations.d.ts +1 -5
  44. package/getComplementSequenceString.d.ts +1 -1
  45. package/getCutsiteType.d.ts +1 -2
  46. package/getCutsitesFromSequence.d.ts +1 -2
  47. package/getDegenerateDnaStringFromAAString.d.ts +1 -1
  48. package/getDegenerateRnaStringFromAAString.d.ts +1 -1
  49. package/getDigestFragmentsForCutsites.d.ts +1 -4
  50. package/getDigestFragmentsForRestrictionEnzymes.d.ts +1 -8
  51. package/getInsertBetweenVals.d.ts +1 -2
  52. package/getLeftAndRightOfSequenceInRangeGivenPosition.d.ts +1 -2
  53. package/getOrfsFromSequence.d.ts +11 -17
  54. package/getOverlapBetweenTwoSequences.d.ts +1 -2
  55. package/getPossiblePartsFromSequenceAndEnzymes.d.ts +1 -18
  56. package/getReverseAminoAcidStringFromSequenceString.d.ts +1 -1
  57. package/getReverseComplementAminoAcidStringFromSequenceString.d.ts +1 -1
  58. package/getReverseComplementAnnotation.d.ts +1 -11
  59. package/getReverseComplementSequenceAndAnnotations.d.ts +1 -5
  60. package/getReverseComplementSequenceString.d.ts +1 -1
  61. package/getReverseSequenceString.d.ts +1 -1
  62. package/getSequenceDataBetweenRange.d.ts +1 -9
  63. package/getVirtualDigest.d.ts +10 -11
  64. package/guessIfSequenceIsDnaAndNotProtein.d.ts +1 -5
  65. package/index.cjs +491 -728
  66. package/index.d.ts +5 -8
  67. package/index.js +491 -728
  68. package/index.umd.cjs +491 -728
  69. package/insertGapsIntoRefSeq.d.ts +1 -2
  70. package/insertSequenceDataAtPositionOrRange.d.ts +1 -10
  71. package/isEnzymeType2S.d.ts +1 -2
  72. package/mapAnnotationsToRows.d.ts +1 -9
  73. package/package.json +9 -12
  74. package/prepareCircularViewData.d.ts +1 -2
  75. package/prepareRowData.d.ts +3 -7
  76. package/proteinAlphabet.d.ts +1 -1
  77. package/rotateBpsToPosition.d.ts +1 -1
  78. package/rotateSequenceDataToPosition.d.ts +1 -3
  79. package/shiftAnnotationsByLen.d.ts +3 -4
  80. package/src/autoAnnotate.test.js +1 -0
  81. package/src/getSequenceDataBetweenRange.js +11 -2
  82. package/src/getSequenceDataBetweenRange.test.js +42 -0
  83. package/src/prepareRowData_output1.json +0 -1
  84. package/threeLetterSequenceStringToAminoAcidMap.d.ts +921 -11
  85. package/tidyUpAnnotation.d.ts +11 -13
  86. package/tidyUpSequenceData.d.ts +1 -15
  87. package/src/DNAComplementMap.ts +0 -32
  88. package/src/addGapsToSeqReads.ts +0 -436
  89. package/src/adjustAnnotationsToInsert.ts +0 -20
  90. package/src/adjustBpsToReplaceOrInsert.ts +0 -73
  91. package/src/aliasedEnzymesByName.ts +0 -7366
  92. package/src/aminoAcidToDegenerateDnaMap.ts +0 -32
  93. package/src/aminoAcidToDegenerateRnaMap.ts +0 -32
  94. package/src/annotateSingleSeq.ts +0 -37
  95. package/src/annotationTypes.ts +0 -23
  96. package/src/autoAnnotate.ts +0 -290
  97. package/src/bioData.ts +0 -65
  98. package/src/calculateEndStability.ts +0 -91
  99. package/src/calculateNebTa.ts +0 -46
  100. package/src/calculateNebTm.ts +0 -132
  101. package/src/calculatePercentGC.ts +0 -3
  102. package/src/calculateSantaLuciaTm.ts +0 -184
  103. package/src/calculateTm.ts +0 -242
  104. package/src/computeDigestFragments.ts +0 -238
  105. package/src/condensePairwiseAlignmentDifferences.ts +0 -85
  106. package/src/convertAACaretPositionOrRangeToDna.ts +0 -28
  107. package/src/convertDnaCaretPositionOrRangeToAA.ts +0 -28
  108. package/src/cutSequenceByRestrictionEnzyme.ts +0 -345
  109. package/src/defaultEnzymesByName.ts +0 -280
  110. package/src/degenerateDnaToAminoAcidMap.ts +0 -5
  111. package/src/degenerateRnaToAminoAcidMap.ts +0 -5
  112. package/src/deleteSequenceDataAtRange.ts +0 -13
  113. package/src/diffUtils.ts +0 -80
  114. package/src/doesEnzymeChopOutsideOfRecognitionSite.ts +0 -16
  115. package/src/featureTypesAndColors.ts +0 -167
  116. package/src/filterSequenceString.ts +0 -153
  117. package/src/findApproxMatches.ts +0 -58
  118. package/src/findNearestRangeOfSequenceOverlapToPosition.ts +0 -43
  119. package/src/findOrfsInPlasmid.ts +0 -31
  120. package/src/findSequenceMatches.ts +0 -154
  121. package/src/generateAnnotations.ts +0 -39
  122. package/src/generateSequenceData.ts +0 -212
  123. package/src/getAllInsertionsInSeqReads.ts +0 -100
  124. package/src/getAminoAcidDataForEachBaseOfDna.ts +0 -305
  125. package/src/getAminoAcidFromSequenceTriplet.ts +0 -27
  126. package/src/getAminoAcidStringFromSequenceString.ts +0 -36
  127. package/src/getCodonRangeForAASliver.ts +0 -73
  128. package/src/getComplementAminoAcidStringFromSequenceString.ts +0 -10
  129. package/src/getComplementSequenceAndAnnotations.ts +0 -25
  130. package/src/getComplementSequenceString.ts +0 -23
  131. package/src/getCutsiteType.ts +0 -18
  132. package/src/getCutsitesFromSequence.ts +0 -22
  133. package/src/getDegenerateDnaStringFromAAString.ts +0 -15
  134. package/src/getDegenerateRnaStringFromAAString.ts +0 -15
  135. package/src/getDigestFragmentsForCutsites.ts +0 -126
  136. package/src/getDigestFragmentsForRestrictionEnzymes.ts +0 -50
  137. package/src/getInsertBetweenVals.ts +0 -31
  138. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.ts +0 -40
  139. package/src/getMassOfAaString.ts +0 -29
  140. package/src/getOrfsFromSequence.ts +0 -132
  141. package/src/getOverlapBetweenTwoSequences.ts +0 -30
  142. package/src/getPossiblePartsFromSequenceAndEnzymes.ts +0 -149
  143. package/src/getReverseAminoAcidStringFromSequenceString.ts +0 -22
  144. package/src/getReverseComplementAminoAcidStringFromSequenceString.ts +0 -10
  145. package/src/getReverseComplementAnnotation.ts +0 -33
  146. package/src/getReverseComplementSequenceAndAnnotations.ts +0 -46
  147. package/src/getReverseComplementSequenceString.ts +0 -18
  148. package/src/getReverseSequenceString.ts +0 -12
  149. package/src/getSequenceDataBetweenRange.ts +0 -154
  150. package/src/getVirtualDigest.ts +0 -139
  151. package/src/guessIfSequenceIsDnaAndNotProtein.ts +0 -39
  152. package/src/index.test.ts +0 -43
  153. package/src/index.ts +0 -111
  154. package/src/insertGapsIntoRefSeq.ts +0 -43
  155. package/src/insertSequenceDataAtPosition.ts +0 -2
  156. package/src/insertSequenceDataAtPositionOrRange.ts +0 -328
  157. package/src/isEnzymeType2S.ts +0 -5
  158. package/src/mapAnnotationsToRows.ts +0 -256
  159. package/src/prepareCircularViewData.ts +0 -24
  160. package/src/prepareRowData.ts +0 -61
  161. package/src/proteinAlphabet.ts +0 -271
  162. package/src/rotateBpsToPosition.ts +0 -12
  163. package/src/rotateSequenceDataToPosition.ts +0 -54
  164. package/src/shiftAnnotationsByLen.ts +0 -24
  165. package/src/threeLetterSequenceStringToAminoAcidMap.ts +0 -198
  166. package/src/tidyUpAnnotation.ts +0 -205
  167. package/src/tidyUpSequenceData.ts +0 -213
  168. package/src/types.ts +0 -109
  169. package/types.d.ts +0 -105
package/src/diffUtils.ts DELETED
@@ -1,80 +0,0 @@
1
- import { cloneDeep, forEach } from "lodash-es";
2
- import { diff, patch, reverse, Delta } from "jsondiffpatch";
3
- import { SequenceData } from "./types";
4
-
5
- import tidyUpSequenceData from "./tidyUpSequenceData";
6
-
7
- interface DiffOptions {
8
- ignoreKeys?: string[];
9
- }
10
-
11
- const getDiffFromSeqs = (
12
- oldData: SequenceData,
13
- newData: SequenceData,
14
- { ignoreKeys = [] }: DiffOptions = {}
15
- ): Delta | undefined => {
16
- /* eslint-disable-next-line @typescript-eslint/no-explicit-any */
17
- const cleanedOldData: any = tidyUpSequenceData(oldData, {
18
- annotationsAsObjects: true,
19
- noTranslationData: true,
20
- doNotRemoveInvalidChars: true
21
- });
22
- /* eslint-disable-next-line @typescript-eslint/no-explicit-any */
23
- const cleanedNewData: any = tidyUpSequenceData(newData, {
24
- annotationsAsObjects: true,
25
- noTranslationData: true,
26
- doNotRemoveInvalidChars: true
27
- });
28
-
29
- [cleanedOldData, cleanedNewData].forEach(d => {
30
- [
31
- "cutsites",
32
- "orfs",
33
- "filteredFeatures",
34
- "size",
35
- "fromFileUpload",
36
- "description",
37
- "materiallyAvailable",
38
- ...ignoreKeys
39
- ].forEach(prop => {
40
- delete d[prop];
41
- });
42
- if (d.translations) {
43
- forEach(d.translations, (translation, key) => {
44
- if (
45
- translation.translationType &&
46
- translation.translationType !== "User Created"
47
- ) {
48
- delete d.translations[key];
49
- } else {
50
- delete translation.aminoAcids;
51
- }
52
- });
53
- }
54
- });
55
-
56
- return diff(cleanedOldData, cleanedNewData);
57
- };
58
-
59
- const patchSeqWithDiff = (
60
- oldData: SequenceData,
61
- diffData: Delta,
62
- { ignoreKeys = [] }: DiffOptions = {}
63
- ): SequenceData => {
64
- ignoreKeys.forEach(k => {
65
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
66
- delete (diffData as any)[k];
67
- });
68
- const tidyOld = tidyUpSequenceData(cloneDeep(oldData), {
69
- annotationsAsObjects: true,
70
- doNotRemoveInvalidChars: true
71
- });
72
-
73
- return patch(tidyOld, diffData) as SequenceData;
74
- };
75
-
76
- const reverseSeqDiff = (diffData: Delta): Delta | undefined => {
77
- return reverse(diffData);
78
- };
79
-
80
- export { getDiffFromSeqs, patchSeqWithDiff, reverseSeqDiff };
@@ -1,16 +0,0 @@
1
- import { RestrictionEnzyme } from "./types";
2
-
3
- export default function doesEnzymeChopOutsideOfRecognitionSite(
4
- enzyme: RestrictionEnzyme
5
- ): boolean {
6
- if (
7
- enzyme.topSnipOffset &&
8
- enzyme.bottomSnipOffset &&
9
- (enzyme.topSnipOffset > enzyme.site.length ||
10
- enzyme.bottomSnipOffset > enzyme.site.length)
11
- ) {
12
- return true;
13
- } else {
14
- return false;
15
- }
16
- }
@@ -1,167 +0,0 @@
1
- import { get, keyBy, filter } from "lodash-es";
2
-
3
- interface FeatureType {
4
- name: string;
5
- color: string;
6
- isHidden?: boolean;
7
- isGenbankStandardType?: boolean;
8
- isOverridden?: boolean;
9
- isCustomType?: boolean;
10
- }
11
-
12
- const genbankFeatureTypes: FeatureType[] = [
13
- { name: "-10_signal", color: "#4ECDC4" },
14
- { name: "-35_signal", color: "#F7FFF7" },
15
- { name: "3'clip", color: "#FF6B6B" },
16
- { name: "3'UTR", color: "#FFE66D" },
17
- { name: "5'clip", color: "#3E517A" },
18
- { name: "5'UTR", color: "#BBBBBB" },
19
- { name: "D-loop", color: "#F13C73" },
20
- { name: "assembly_gap", color: "#DE9151" },
21
- { name: "centromere", color: "#F34213" },
22
- { name: "Het", color: "#BC5D2E" },
23
- { name: "mobile_element", color: "#6DB1BF" },
24
- { name: "ncRNA", color: "#FFEAEC" },
25
- { name: "proprotein", color: "#F39A9D" },
26
- { name: "regulatory", color: "#3F6C51" },
27
- { name: "SecStr", color: "#7B4B94" },
28
- { name: "Site", color: "#7D82B8" },
29
- { name: "telomere", color: "#DE9151" },
30
- { name: "tmRNA", color: "#B7E3CC" },
31
- { name: "unsure", color: "#C4FFB2" },
32
- { name: "V_segment", color: "#D6F7A3" },
33
- { name: "allele", color: "#D86D6D" },
34
- { name: "attenuator", color: "#6B7F9C" },
35
- { name: "C_region", color: "#B5D89D" },
36
- { name: "CAAT_signal", color: "#E9CD98" },
37
- { name: "CDS", color: "#EF6500" },
38
- { name: "conserved", color: "#A3A5F0" },
39
- { name: "D_segment", color: "#C060F7" },
40
- { name: "default", color: "#CCCCCC" },
41
- { name: "enhancer", color: "#38F872" },
42
- { name: "exon", color: "#95F844" },
43
- { name: "gap", color: "#F7D43C" },
44
- { name: "GC_signal", color: "#861F1F" },
45
- { name: "gene", color: "#684E27" },
46
- { name: "iDNA", color: "#A59B41" },
47
- { name: "intron", color: "#52963E" },
48
- { name: "J_region", color: "#369283" },
49
- { name: "LTR", color: "#31748F" },
50
- { name: "m_rna", color: "#FFFF00" },
51
- { name: "mat_peptide", color: "#353E8F" },
52
- { name: "misc_binding", color: "#006FEF" },
53
- { name: "misc_difference", color: "#5A368A" },
54
- { name: "misc_feature", color: "#006FEF" },
55
- { name: "misc_marker", color: "#8DCEB1" },
56
- { name: "misc_part", color: "#006FEF" },
57
- { name: "misc_recomb", color: "#DD97B4" },
58
- { name: "misc_RNA", color: "#BD0101" },
59
- { name: "misc_signal", color: "#FF9A04" },
60
- { name: "misc_structure", color: "#B3FF00" },
61
- { name: "modified_base", color: "#00F7FF" },
62
- { name: "mRNA", color: "#FFD900" },
63
- { name: "N_region", color: "#AE00FF" },
64
- { name: "old_sequence", color: "#F0A7FF" },
65
- { name: "operator", color: "#63004D" },
66
- { name: "operon", color: "#000653" },
67
- { name: "oriT", color: "#580000" },
68
- { name: "plasmid", color: "#00635E" },
69
- { name: "polyA_signal", color: "#BBBBBB" },
70
- { name: "polyA_site", color: "#003328" },
71
- { name: "precursor_RNA", color: "#443200" },
72
- { name: "prim_transcript", color: "#665E4C" },
73
- { name: "primer_bind", color: "#53d969" },
74
- { name: "promoter", color: "#31B440" },
75
- { name: "protein_bind", color: "#2E2E2E" },
76
- { name: "protein_domain", color: "#4D4B4B" },
77
- { name: "protein", color: "#696969" },
78
- { name: "RBS", color: "#BDFFCB" },
79
- { name: "rep_origin", color: "#878787" },
80
- { name: "repeat_region", color: "#966363" },
81
- { name: "repeat_unit", color: "#A16D8D" },
82
- { name: "rRNA", color: "#9BF0FF" },
83
- { name: "s_mutation", color: "#70A2FF" },
84
- { name: "S_region", color: "#FF74A9" },
85
- { name: "satellite", color: "#164E64" },
86
- { name: "scRNA", color: "#A057FF" },
87
- { name: "sig_peptide", color: "#2FFF8D" },
88
- { name: "snoRNA", color: "#296B14" },
89
- { name: "snRNA", color: "#A16249" },
90
- { name: "source", color: "#0B17BD" },
91
- { name: "start", color: "#D6A336" },
92
- { name: "stem_loop", color: "#67069E" },
93
- { name: "stop", color: "#D44FC9" },
94
- { name: "STS", color: "#597FE7" },
95
- { name: "tag", color: "#E419DA" },
96
- { name: "TATA_signal", color: "#EB2B2B" },
97
- { name: "terminator", color: "#F51600" },
98
- { name: "transit_peptide", color: "#24D491" },
99
- { name: "transposon", color: "#B6E436" },
100
- { name: "tRNA", color: "#D1456F" },
101
- { name: "V_region", color: "#7B5EE7" },
102
- { name: "variation", color: "#2EE455" }
103
- ];
104
-
105
- const getMergedFeatureMap = (): Record<string, FeatureType> => {
106
- const keyedGBFeats = keyBy(
107
- genbankFeatureTypes.map(f => ({
108
- ...f,
109
- isGenbankStandardType: true
110
- })),
111
- "name"
112
- );
113
- const featureOverrides: FeatureType[] =
114
- ((typeof window !== "undefined" &&
115
- get(window, "tg_featureTypeOverrides")) as unknown as FeatureType[]) ||
116
- ((typeof global !== "undefined" &&
117
- get(global, "tg_featureTypeOverrides")) as unknown as FeatureType[]) ||
118
- [];
119
-
120
- const mappedOverrides = featureOverrides.map(fo => {
121
- const originalGenbankFeat = keyedGBFeats[fo.name];
122
- return {
123
- ...originalGenbankFeat,
124
- ...fo,
125
- ...(originalGenbankFeat ? { isOverridden: true } : { isCustomType: true })
126
- };
127
- });
128
- const keyedOverrides = keyBy(mappedOverrides, "name");
129
-
130
- return {
131
- ...keyedGBFeats,
132
- ...keyedOverrides
133
- };
134
- };
135
-
136
- const getFeatureToColorMap = ({
137
- includeHidden
138
- }: { includeHidden?: boolean } = {}): Record<string, string> => {
139
- const toRet: Record<string, string> = {};
140
- filter(getMergedFeatureMap(), f =>
141
- includeHidden ? true : !f.isHidden
142
- ).forEach(f => {
143
- toRet[f.name] = f.color;
144
- });
145
- return toRet;
146
- };
147
-
148
- const getFeatureTypes = ({
149
- includeHidden
150
- }: { includeHidden?: boolean } = {}): string[] =>
151
- filter(getMergedFeatureMap(), f => (includeHidden ? true : !f.isHidden)).map(
152
- f => f.name
153
- );
154
-
155
- export { genbankFeatureTypes };
156
-
157
- export function getGenbankFeatureToColorMap(): Record<string, string> {
158
- const toRet: Record<string, string> = {};
159
- genbankFeatureTypes.forEach(({ name, color }) => {
160
- toRet[name] = color;
161
- });
162
- return toRet;
163
- }
164
-
165
- export { getFeatureToColorMap };
166
- export { getFeatureTypes };
167
- export { getMergedFeatureMap };
@@ -1,153 +0,0 @@
1
- import { debounce, uniq } from "lodash-es";
2
- import {
3
- ambiguous_dna_letters,
4
- ambiguous_rna_letters,
5
- extended_protein_letters
6
- } from "./bioData";
7
-
8
- let allWarnings: string[] = [];
9
-
10
- let makeToast = () => {
11
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
12
- if (typeof window !== "undefined" && (window as any).toastr && allWarnings.length) {
13
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
14
- (window as any).toastr.warning(uniq(allWarnings).join("\n"));
15
- }
16
- allWarnings = [];
17
- };
18
-
19
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
20
- (makeToast as any) = debounce(makeToast, 200);
21
-
22
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
23
- function showWarnings(warnings: any) {
24
- allWarnings = allWarnings.concat(warnings);
25
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
26
- (makeToast as any).cancel();
27
- makeToast();
28
- }
29
-
30
- interface FilterSequenceStringOptions {
31
- additionalValidChars?: string;
32
- isOligo?: boolean;
33
- name?: string;
34
- isProtein?: boolean;
35
- isRna?: boolean;
36
- isMixedRnaAndDna?: boolean;
37
- [key: string]: unknown;
38
- }
39
-
40
- export default function filterSequenceString(
41
- sequenceString = "",
42
- {
43
- additionalValidChars = "",
44
- isOligo,
45
- name,
46
- isProtein,
47
- isRna,
48
- isMixedRnaAndDna
49
- }: FilterSequenceStringOptions = {}
50
- ): [string, string[]] {
51
- const acceptedChars = getAcceptedChars({
52
- isOligo,
53
- isProtein,
54
- isRna,
55
- isMixedRnaAndDna
56
- });
57
- const replaceChars = getReplaceChars({
58
- isOligo,
59
- isProtein,
60
- isRna,
61
- isMixedRnaAndDna
62
- });
63
-
64
- let sanitizedVal = "";
65
- const invalidChars: string[] = [];
66
- const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
67
- const warnings: string[] = [];
68
- const replaceCount: Record<string, number> = {};
69
- sequenceString.split("").forEach(letter => {
70
- const lowerLetter = letter.toLowerCase();
71
- if (replaceChars && replaceChars[lowerLetter]) {
72
- if (!replaceCount[lowerLetter]) {
73
- replaceCount[lowerLetter] = 0;
74
- }
75
- replaceCount[lowerLetter]++;
76
- const isUpper = lowerLetter !== letter;
77
- sanitizedVal += isUpper
78
- ? replaceChars[lowerLetter].toUpperCase()
79
- : replaceChars[lowerLetter];
80
- } else if (chars.includes(lowerLetter)) {
81
- sanitizedVal += letter;
82
- } else {
83
- invalidChars.push(letter);
84
- }
85
- });
86
- //add replace count warnings
87
- Object.keys(replaceCount).forEach(letter => {
88
- warnings.push(
89
- `Replaced "${letter}" with "${replaceChars[letter]}"${replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""
90
- }`
91
- );
92
- });
93
- if (sequenceString.length !== sanitizedVal.length) {
94
- warnings.push(
95
- `${name ? `Sequence ${name}: ` : ""
96
- }Invalid character(s) detected and removed: ${uniq(invalidChars)
97
- .map(c => {
98
- if (c === " ") {
99
- return "space";
100
- }
101
- return c;
102
- })
103
- .slice(0, 100)
104
- .join(", ")} `
105
- );
106
- }
107
- showWarnings(warnings);
108
-
109
- return [sanitizedVal, warnings];
110
- }
111
-
112
- export function getAcceptedChars({
113
- isOligo,
114
- isProtein,
115
- isRna,
116
- isMixedRnaAndDna
117
- }: FilterSequenceStringOptions = {}) {
118
- return isProtein
119
- ? `${extended_protein_letters.toLowerCase()}`
120
- : isOligo
121
- ? ambiguous_rna_letters.toLowerCase() + "t"
122
- : isRna
123
- ? ambiguous_rna_letters.toLowerCase() + "t"
124
- : isMixedRnaAndDna
125
- ? ambiguous_rna_letters.toLowerCase() +
126
- ambiguous_dna_letters.toLowerCase()
127
- : //just plain old dna
128
- ambiguous_rna_letters.toLowerCase() +
129
- ambiguous_dna_letters.toLowerCase();
130
- }
131
- export function getReplaceChars({
132
- isOligo,
133
- isProtein,
134
- isRna,
135
- isMixedRnaAndDna
136
- }: FilterSequenceStringOptions = {}): Record<string, string> {
137
- return isProtein
138
- ? {}
139
- : // {".": "*"}
140
- isOligo
141
- ? {}
142
- : isRna
143
- ? { t: "u" }
144
- : isMixedRnaAndDna
145
- ? {}
146
- : //just plain old dna
147
- {};
148
- }
149
-
150
- export const filterRnaString = (
151
- s: string,
152
- o: FilterSequenceStringOptions
153
- ): string => filterSequenceString(s, { ...o, isRna: true })[0];
@@ -1,58 +0,0 @@
1
- /**
2
- * Find approximate matches of a search sequence within a target sequence
3
- *
4
- * @param {string} searchSeq - The sequence to search for
5
- * @param {string} targetSeq - The sequence to search within
6
- * @param {number} maxMismatches - Maximum number of mismatches allowed
7
- * @param {boolean} circular - Whether to treat the target sequence as circular (default: false)
8
- * @returns {Array} - Array of objects containing { index, match, mismatchPositions }
9
- */
10
-
11
- export interface ApproxMatch {
12
- index: number;
13
- match: string;
14
- mismatchPositions: number[];
15
- numMismatches: number;
16
- }
17
-
18
- export default function findApproxMatches(
19
- searchSeq: string,
20
- targetSeq: string,
21
- maxMismatches: number,
22
- circular = false
23
- ): ApproxMatch[] {
24
- const matches: ApproxMatch[] = [];
25
- const lenA = searchSeq.length;
26
- const lenB = targetSeq.length;
27
-
28
- // Extend targetSeq to simulate circularity, in case circular = true
29
- const targetSeqExtended = circular
30
- ? targetSeq + targetSeq.slice(0, lenA - 1)
31
- : targetSeq;
32
- const limit = circular ? lenB : lenB - lenA + 1;
33
-
34
- for (let i = 0; i < limit; i++) {
35
- const window = targetSeqExtended.slice(i, i + lenA);
36
- let mismatchCount = 0;
37
- const mismatchPositions: number[] = [];
38
-
39
- for (let j = 0; j < lenA; j++) {
40
- if (searchSeq[j] !== window[j]) {
41
- mismatchPositions.push(j);
42
- mismatchCount++;
43
- if (mismatchCount > maxMismatches) break;
44
- }
45
- }
46
-
47
- if (mismatchCount <= maxMismatches) {
48
- matches.push({
49
- index: i,
50
- match: window,
51
- mismatchPositions,
52
- numMismatches: mismatchPositions.length // Keep for backwards compatibility
53
- });
54
- }
55
- }
56
-
57
- return matches;
58
- }
@@ -1,43 +0,0 @@
1
- import { normalizeRange, Range } from "@teselagen/range-utils";
2
- function findNearestRangeOfSequenceOverlapToPosition(
3
- sequenceToSearch: string,
4
- overlapSequence: string,
5
- positionStart = 0,
6
- isLinear?: boolean
7
- ): Range | null {
8
- if (sequenceToSearch.length < overlapSequence.length) {
9
- return null;
10
- }
11
- const regex = new RegExp(overlapSequence, "ig");
12
- let result: RegExpExecArray | null;
13
- let index: number | undefined;
14
- let distance = Infinity;
15
- while (
16
- (result = regex.exec(sequenceToSearch + (isLinear ? "" : sequenceToSearch)))
17
- ) {
18
- if (result.index > sequenceToSearch.length) break;
19
- let newDistance = Math.abs(result.index - positionStart);
20
- newDistance = isLinear
21
- ? newDistance //if linear, don't check around the origin
22
- : Math.min(newDistance, Math.abs(newDistance - sequenceToSearch.length));
23
- if (newDistance > distance) {
24
- break;
25
- }
26
- index = result.index;
27
- distance = newDistance;
28
- }
29
-
30
- if (index === undefined) {
31
- return null;
32
- }
33
-
34
- //index is the closest range start
35
- return normalizeRange(
36
- {
37
- start: index,
38
- end: index + overlapSequence.length - 1
39
- },
40
- sequenceToSearch.length
41
- );
42
- }
43
- export default findNearestRangeOfSequenceOverlapToPosition;
@@ -1,31 +0,0 @@
1
- import getOrfsFromSequence, { Orf } from "./getOrfsFromSequence";
2
-
3
- export default function findOrfsInPlasmid(
4
- sequence: string,
5
- circular: boolean,
6
- minimumOrfSize: number,
7
- useAdditionalOrfStartCodons: boolean,
8
- isProteinOrOligo: boolean
9
- ): Orf[] {
10
- if (isProteinOrOligo) {
11
- // we do not find ORFs in protein/oligo sequences
12
- return [];
13
- }
14
- //tnr, we should do the parsing down of the orfs immediately after they're returned from this sequence
15
- // const orfs1Forward = eliminateCircularOrfsThatOverlapWithNonCircularOrfs(getOrfsFromSequence(0, doubleForwardSequence, minimumOrfSize, true), maxLength);
16
- const forwardOrfs = getOrfsFromSequence({
17
- sequence: sequence,
18
- minimumOrfSize: minimumOrfSize,
19
- forward: true,
20
- circular: circular,
21
- useAdditionalOrfStartCodons
22
- });
23
- const reverseOrfs = getOrfsFromSequence({
24
- sequence: sequence,
25
- minimumOrfSize: minimumOrfSize,
26
- forward: false,
27
- circular: circular,
28
- useAdditionalOrfStartCodons
29
- });
30
- return forwardOrfs.concat(reverseOrfs);
31
- }