@teselagen/sequence-utils 0.3.36 → 0.3.38-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/DNAComplementMap.d.ts +1 -1
  2. package/addGapsToSeqReads.d.ts +16 -3
  3. package/adjustAnnotationsToInsert.d.ts +2 -1
  4. package/adjustBpsToReplaceOrInsert.d.ts +2 -1
  5. package/aliasedEnzymesByName.d.ts +37 -1
  6. package/aminoAcidToDegenerateDnaMap.d.ts +1 -31
  7. package/aminoAcidToDegenerateRnaMap.d.ts +1 -1
  8. package/annotateSingleSeq.d.ts +5 -4
  9. package/annotationTypes.d.ts +2 -2
  10. package/autoAnnotate.d.ts +17 -8
  11. package/bioData.d.ts +10 -58
  12. package/calculateEndStability.d.ts +1 -1
  13. package/calculateNebTa.d.ts +6 -1
  14. package/calculateNebTm.d.ts +6 -4
  15. package/calculatePercentGC.d.ts +1 -1
  16. package/calculateSantaLuciaTm.d.ts +28 -114
  17. package/calculateTm.d.ts +13 -1
  18. package/computeDigestFragments.d.ts +30 -24
  19. package/condensePairwiseAlignmentDifferences.d.ts +1 -1
  20. package/convertAACaretPositionOrRangeToDna.d.ts +2 -1
  21. package/convertDnaCaretPositionOrRangeToAA.d.ts +2 -1
  22. package/cutSequenceByRestrictionEnzyme.d.ts +2 -1
  23. package/defaultEnzymesByName.d.ts +2 -1
  24. package/degenerateDnaToAminoAcidMap.d.ts +1 -1
  25. package/degenerateRnaToAminoAcidMap.d.ts +1 -1
  26. package/deleteSequenceDataAtRange.d.ts +2 -1
  27. package/diffUtils.d.ts +9 -7
  28. package/doesEnzymeChopOutsideOfRecognitionSite.d.ts +2 -1
  29. package/featureTypesAndColors.d.ts +19 -6
  30. package/filterSequenceString.d.ts +14 -10
  31. package/findApproxMatches.d.ts +7 -1
  32. package/findNearestRangeOfSequenceOverlapToPosition.d.ts +2 -1
  33. package/findOrfsInPlasmid.d.ts +2 -11
  34. package/findSequenceMatches.d.ts +11 -1
  35. package/generateAnnotations.d.ts +2 -1
  36. package/generateSequenceData.d.ts +8 -13
  37. package/getAllInsertionsInSeqReads.d.ts +11 -1
  38. package/getAminoAcidDataForEachBaseOfDna.d.ts +6 -5
  39. package/getAminoAcidFromSequenceTriplet.d.ts +1 -1
  40. package/getAminoAcidStringFromSequenceString.d.ts +3 -1
  41. package/getCodonRangeForAASliver.d.ts +3 -4
  42. package/getComplementAminoAcidStringFromSequenceString.d.ts +1 -1
  43. package/getComplementSequenceAndAnnotations.d.ts +5 -1
  44. package/getComplementSequenceString.d.ts +1 -1
  45. package/getCutsiteType.d.ts +2 -1
  46. package/getCutsitesFromSequence.d.ts +2 -1
  47. package/getDegenerateDnaStringFromAAString.d.ts +1 -1
  48. package/getDegenerateRnaStringFromAAString.d.ts +1 -1
  49. package/getDigestFragmentsForCutsites.d.ts +4 -1
  50. package/getDigestFragmentsForRestrictionEnzymes.d.ts +8 -1
  51. package/getInsertBetweenVals.d.ts +2 -1
  52. package/getLeftAndRightOfSequenceInRangeGivenPosition.d.ts +2 -1
  53. package/getOrfsFromSequence.d.ts +17 -11
  54. package/getOverlapBetweenTwoSequences.d.ts +2 -1
  55. package/getPossiblePartsFromSequenceAndEnzymes.d.ts +18 -1
  56. package/getReverseAminoAcidStringFromSequenceString.d.ts +1 -1
  57. package/getReverseComplementAminoAcidStringFromSequenceString.d.ts +1 -1
  58. package/getReverseComplementAnnotation.d.ts +11 -1
  59. package/getReverseComplementSequenceAndAnnotations.d.ts +5 -1
  60. package/getReverseComplementSequenceString.d.ts +1 -1
  61. package/getReverseSequenceString.d.ts +1 -1
  62. package/getSequenceDataBetweenRange.d.ts +9 -1
  63. package/getVirtualDigest.d.ts +11 -10
  64. package/guessIfSequenceIsDnaAndNotProtein.d.ts +5 -1
  65. package/index.cjs +733 -484
  66. package/index.d.ts +8 -5
  67. package/index.js +733 -484
  68. package/index.umd.cjs +733 -484
  69. package/insertGapsIntoRefSeq.d.ts +2 -1
  70. package/insertSequenceDataAtPositionOrRange.d.ts +10 -1
  71. package/isEnzymeType2S.d.ts +2 -1
  72. package/mapAnnotationsToRows.d.ts +9 -1
  73. package/package.json +9 -6
  74. package/prepareCircularViewData.d.ts +2 -1
  75. package/prepareRowData.d.ts +7 -3
  76. package/proteinAlphabet.d.ts +1 -1
  77. package/rotateBpsToPosition.d.ts +1 -1
  78. package/rotateSequenceDataToPosition.d.ts +3 -1
  79. package/shiftAnnotationsByLen.d.ts +4 -3
  80. package/src/DNAComplementMap.ts +32 -0
  81. package/src/addGapsToSeqReads.ts +436 -0
  82. package/src/adjustAnnotationsToInsert.ts +20 -0
  83. package/src/adjustBpsToReplaceOrInsert.ts +73 -0
  84. package/src/aliasedEnzymesByName.ts +7366 -0
  85. package/src/aminoAcidToDegenerateDnaMap.ts +32 -0
  86. package/src/aminoAcidToDegenerateRnaMap.ts +32 -0
  87. package/src/annotateSingleSeq.ts +37 -0
  88. package/src/annotationTypes.ts +23 -0
  89. package/src/autoAnnotate.test.js +0 -1
  90. package/src/autoAnnotate.ts +290 -0
  91. package/src/bioData.ts +65 -0
  92. package/src/calculateEndStability.ts +91 -0
  93. package/src/calculateNebTa.ts +46 -0
  94. package/src/calculateNebTm.ts +132 -0
  95. package/src/calculatePercentGC.ts +3 -0
  96. package/src/calculateSantaLuciaTm.ts +184 -0
  97. package/src/calculateTm.ts +242 -0
  98. package/src/computeDigestFragments.ts +238 -0
  99. package/src/condensePairwiseAlignmentDifferences.ts +85 -0
  100. package/src/convertAACaretPositionOrRangeToDna.ts +28 -0
  101. package/src/convertDnaCaretPositionOrRangeToAA.ts +28 -0
  102. package/src/cutSequenceByRestrictionEnzyme.ts +345 -0
  103. package/src/defaultEnzymesByName.ts +280 -0
  104. package/src/degenerateDnaToAminoAcidMap.ts +5 -0
  105. package/src/degenerateRnaToAminoAcidMap.ts +5 -0
  106. package/src/deleteSequenceDataAtRange.ts +13 -0
  107. package/src/diffUtils.ts +80 -0
  108. package/src/doesEnzymeChopOutsideOfRecognitionSite.ts +16 -0
  109. package/src/featureTypesAndColors.js +1 -1
  110. package/src/featureTypesAndColors.ts +167 -0
  111. package/src/filterSequenceString.ts +153 -0
  112. package/src/findApproxMatches.ts +58 -0
  113. package/src/findNearestRangeOfSequenceOverlapToPosition.ts +43 -0
  114. package/src/findOrfsInPlasmid.js +6 -1
  115. package/src/findOrfsInPlasmid.ts +31 -0
  116. package/src/findSequenceMatches.ts +154 -0
  117. package/src/generateAnnotations.ts +39 -0
  118. package/src/generateSequenceData.ts +212 -0
  119. package/src/getAllInsertionsInSeqReads.ts +100 -0
  120. package/src/getAminoAcidDataForEachBaseOfDna.ts +305 -0
  121. package/src/getAminoAcidFromSequenceTriplet.ts +27 -0
  122. package/src/getAminoAcidStringFromSequenceString.ts +36 -0
  123. package/src/getCodonRangeForAASliver.ts +73 -0
  124. package/src/getComplementAminoAcidStringFromSequenceString.ts +10 -0
  125. package/src/getComplementSequenceAndAnnotations.ts +25 -0
  126. package/src/getComplementSequenceString.ts +23 -0
  127. package/src/getCutsiteType.ts +18 -0
  128. package/src/getCutsitesFromSequence.ts +22 -0
  129. package/src/getDegenerateDnaStringFromAAString.ts +15 -0
  130. package/src/getDegenerateRnaStringFromAAString.ts +15 -0
  131. package/src/getDigestFragmentsForCutsites.ts +126 -0
  132. package/src/getDigestFragmentsForRestrictionEnzymes.ts +50 -0
  133. package/src/getInsertBetweenVals.ts +31 -0
  134. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.ts +40 -0
  135. package/src/getMassOfAaString.ts +29 -0
  136. package/src/getOrfsFromSequence.ts +132 -0
  137. package/src/getOverlapBetweenTwoSequences.ts +30 -0
  138. package/src/getPossiblePartsFromSequenceAndEnzymes.ts +149 -0
  139. package/src/getReverseAminoAcidStringFromSequenceString.ts +22 -0
  140. package/src/getReverseComplementAminoAcidStringFromSequenceString.ts +10 -0
  141. package/src/getReverseComplementAnnotation.ts +33 -0
  142. package/src/getReverseComplementSequenceAndAnnotations.ts +46 -0
  143. package/src/getReverseComplementSequenceString.ts +18 -0
  144. package/src/getReverseSequenceString.ts +12 -0
  145. package/src/getSequenceDataBetweenRange.ts +154 -0
  146. package/src/getVirtualDigest.ts +139 -0
  147. package/src/guessIfSequenceIsDnaAndNotProtein.ts +39 -0
  148. package/src/index.test.ts +43 -0
  149. package/src/index.ts +111 -0
  150. package/src/insertGapsIntoRefSeq.ts +43 -0
  151. package/src/insertSequenceDataAtPosition.ts +2 -0
  152. package/src/insertSequenceDataAtPositionOrRange.ts +328 -0
  153. package/src/isEnzymeType2S.ts +5 -0
  154. package/src/mapAnnotationsToRows.ts +256 -0
  155. package/src/prepareCircularViewData.ts +24 -0
  156. package/src/prepareRowData.ts +61 -0
  157. package/src/prepareRowData_output1.json +1 -0
  158. package/src/proteinAlphabet.ts +271 -0
  159. package/src/rotateBpsToPosition.ts +12 -0
  160. package/src/rotateSequenceDataToPosition.ts +54 -0
  161. package/src/shiftAnnotationsByLen.ts +24 -0
  162. package/src/threeLetterSequenceStringToAminoAcidMap.ts +198 -0
  163. package/src/tidyUpAnnotation.ts +205 -0
  164. package/src/tidyUpSequenceData.ts +213 -0
  165. package/src/types.ts +109 -0
  166. package/threeLetterSequenceStringToAminoAcidMap.d.ts +11 -921
  167. package/tidyUpAnnotation.d.ts +13 -11
  168. package/tidyUpSequenceData.d.ts +15 -1
  169. package/types.d.ts +105 -0
@@ -0,0 +1,32 @@
1
+ const aminoAcidToDegenerateDnaMap: Record<string, string> = {
2
+ "-": "---",
3
+ ".": "...",
4
+ "*": "trr",
5
+ a: "gcn",
6
+ b: "ray", // D or N => aay + gay = ray
7
+ c: "tgy",
8
+ d: "gay",
9
+ e: "gar",
10
+ f: "tty",
11
+ g: "ggn",
12
+ h: "cay",
13
+ i: "ath",
14
+ j: "htn", // L or I ytn + ath => htn
15
+ k: "aar",
16
+ l: "ytn", // YTR、CTN => Y=CT, N=AGCT
17
+ m: "atg",
18
+ n: "aay",
19
+ o: "tag", // Pyrrolysine, https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2933860/
20
+ p: "ccn",
21
+ q: "car",
22
+ r: "mgn", // CGN、MGR => M=AC, N=AGCT
23
+ s: "wsn", // TCN、AGY => AT = w, CG = S, N is AGCT
24
+ t: "acn",
25
+ u: "tga", // Selenocysteine, https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2933860/
26
+ v: "gtn",
27
+ w: "tgg",
28
+ x: "nnn", // unknown aa.
29
+ y: "tay",
30
+ z: "sar" // E or Q, => gar + car = sar
31
+ };
32
+ export default aminoAcidToDegenerateDnaMap;
@@ -0,0 +1,32 @@
1
+ const aminoAcidToDegenerateRnaMap = {
2
+ "-": "---",
3
+ ".": "...",
4
+ "*": "trr",
5
+ a: "gcn",
6
+ b: "ray",
7
+ c: "ugy",
8
+ d: "gay",
9
+ e: "gar",
10
+ f: "uuy",
11
+ g: "ggn",
12
+ h: "cay",
13
+ i: "auh",
14
+ j: "hun",
15
+ k: "aar",
16
+ l: "yun",
17
+ m: "aug",
18
+ n: "aay",
19
+ o: "uag",
20
+ p: "ccn",
21
+ q: "car",
22
+ r: "mgn",
23
+ s: "wsn",
24
+ t: "acn",
25
+ u: "uga",
26
+ v: "gun",
27
+ w: "ugg",
28
+ x: "nnn",
29
+ y: "uay",
30
+ z: "sar"
31
+ };
32
+ export default aminoAcidToDegenerateRnaMap;
@@ -0,0 +1,37 @@
1
+ import { autoAnnotate } from "./autoAnnotate";
2
+ import { SequenceData } from "./types";
3
+
4
+ function annotateSingleSeq({
5
+ fullSeq,
6
+ searchSeq
7
+ }: {
8
+ fullSeq: SequenceData;
9
+ searchSeq: SequenceData;
10
+ }) {
11
+ const fullSeqId = fullSeq.id || "fullSeqId";
12
+ const searchSeqId = searchSeq.id || "searchSeqId";
13
+ const results = autoAnnotate({
14
+ seqsToAnnotateById: {
15
+ [fullSeqId]: {
16
+ sequence: fullSeq.sequence,
17
+ circular: fullSeq.circular,
18
+ annotations: fullSeq.features || []
19
+ }
20
+ },
21
+ annotationsToCheckById: {
22
+ [searchSeqId]: {
23
+ ...searchSeq,
24
+ id: searchSeqId
25
+ }
26
+ },
27
+ compareName: false
28
+ });
29
+ if (results && results[fullSeqId]) {
30
+ return {
31
+ matches: results[fullSeqId]
32
+ };
33
+ } else {
34
+ return { matches: [] };
35
+ }
36
+ }
37
+ export default annotateSingleSeq;
@@ -0,0 +1,23 @@
1
+ export const annotationTypes = [
2
+ "features",
3
+ "warnings",
4
+ "assemblyPieces",
5
+ "lineageAnnotations",
6
+ "parts",
7
+ "cutsites",
8
+ "orfs",
9
+ "translations",
10
+ "primers",
11
+ "guides"
12
+ ];
13
+
14
+ export const modifiableTypes = [
15
+ "features",
16
+ "assemblyPieces",
17
+ "lineageAnnotations",
18
+ "warnings",
19
+ "parts",
20
+ "translations",
21
+ "primers",
22
+ "guides"
23
+ ];
@@ -1,4 +1,3 @@
1
- /* eslint-disable no-unused-expressions */
2
1
  import {
3
2
  autoAnnotate,
4
3
  convertApELikeRegexToRegex,
@@ -0,0 +1,290 @@
1
+ /* Copyright (C) 2018 TeselaGen Biotechnology, Inc. */
2
+ import { forEach, omitBy } from "lodash-es";
3
+ import { ambiguous_dna_values } from "./bioData";
4
+ import aminoAcidToDegenerateDnaMap from "./aminoAcidToDegenerateDnaMap";
5
+ import {
6
+ normalizePositionByRangeLength,
7
+ reversePositionInRange
8
+ } from "@teselagen/range-utils";
9
+ import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
10
+ import { Annotation } from "./types";
11
+
12
+ //seqsToAnnotateById must not be length = 0
13
+ function autoAnnotate({
14
+ seqsToAnnotateById,
15
+ annotationsToCheckById,
16
+ compareName,
17
+ warnIfMoreThan
18
+ }: {
19
+ seqsToAnnotateById: Record<
20
+ string,
21
+ { sequence: string; annotations: Annotation[]; circular?: boolean }
22
+ >;
23
+ annotationsToCheckById: Record<string, { sequence: string; id: string }>;
24
+ compareName?: boolean;
25
+ warnIfMoreThan?: number;
26
+ }) {
27
+ const annotationsToAddBySeqId: Record<string, Annotation[]> = {};
28
+
29
+ forEach(annotationsToCheckById, ann => {
30
+ const reg = new RegExp(ann.sequence, "gi");
31
+ forEach(
32
+ omitBy(seqsToAnnotateById, s => !s.sequence.length),
33
+ ({ circular, sequence }, id) => {
34
+ function getMatches({
35
+ seqToMatchAgainst,
36
+ isReverse,
37
+ seqLen
38
+ }: {
39
+ seqToMatchAgainst: string;
40
+ isReverse?: boolean;
41
+ seqLen: number;
42
+ }) {
43
+ let match;
44
+ let lastMatch: { start: number; end: number } | undefined;
45
+ // const matches = []
46
+ try {
47
+ while ((match = reg.exec(seqToMatchAgainst))) {
48
+ const { index: matchStart, 0: matchSeq } = match;
49
+ if (matchStart >= seqLen) return;
50
+ const matchEnd = matchStart + matchSeq.length;
51
+ if (lastMatch) {
52
+ if (matchStart > lastMatch.start && matchEnd <= lastMatch.end) {
53
+ reg.lastIndex = match.index + 1;
54
+ continue;
55
+ }
56
+ }
57
+ lastMatch = {
58
+ start: matchStart,
59
+ end: matchEnd
60
+ };
61
+ const range = {
62
+ start: matchStart,
63
+ end: normalizePositionByRangeLength(matchEnd - 1, seqLen, false)
64
+ };
65
+ if (!annotationsToAddBySeqId[id])
66
+ annotationsToAddBySeqId[id] = [];
67
+ annotationsToAddBySeqId[id].push({
68
+ ...(isReverse
69
+ ? {
70
+ start: reversePositionInRange(range.end, seqLen, false),
71
+ end: reversePositionInRange(range.start, seqLen, false)
72
+ }
73
+ : range),
74
+ strand: isReverse ? -1 : 1,
75
+ id: ann.id
76
+ });
77
+
78
+ reg.lastIndex = match.index + 1;
79
+ }
80
+ } catch (error) {
81
+ console.error(`error:`, error);
82
+ }
83
+ }
84
+ const seqLen = sequence.length;
85
+
86
+ const revSeq = getReverseComplementSequenceString(sequence);
87
+ getMatches({
88
+ seqLen,
89
+ seqToMatchAgainst: circular ? sequence + sequence : sequence
90
+ });
91
+ getMatches({
92
+ seqLen,
93
+ isReverse: true,
94
+ seqToMatchAgainst: circular ? revSeq + revSeq : revSeq
95
+ });
96
+ }
97
+ );
98
+ });
99
+
100
+ //loop through all patterns and get all matches
101
+
102
+ const toReturn: Record<string, Annotation[] | Record<string, string[]>> = {};
103
+
104
+ forEach(annotationsToAddBySeqId, (anns, id) => {
105
+ const origSeq = seqsToAnnotateById[id];
106
+ const alreadyExistingAnnsByStartEnd: Record<string, Annotation> = {};
107
+ forEach(origSeq.annotations, ann => {
108
+ alreadyExistingAnnsByStartEnd[
109
+ getStartEndStr(
110
+ { ...ann, strand: typeof ann.strand === "string" ? 1 : ann.strand },
111
+ { compareName }
112
+ )
113
+ ] = ann;
114
+ });
115
+ const warningCounter: Record<string, number> = {};
116
+ const toAdd = anns
117
+ .filter(ann => {
118
+ const alreadyExistingAnn =
119
+ alreadyExistingAnnsByStartEnd[
120
+ getStartEndStr(
121
+ {
122
+ ...ann,
123
+ strand: typeof ann.strand === "string" ? 1 : ann.strand
124
+ },
125
+ { compareName }
126
+ )
127
+ ];
128
+ if (alreadyExistingAnn) return false;
129
+ if (warnIfMoreThan && ann.id !== undefined) {
130
+ warningCounter[String(ann.id)] =
131
+ (warningCounter[String(ann.id)] || 0) + 1;
132
+ }
133
+ return true;
134
+ })
135
+ .sort((a, b) => a.start - b.start);
136
+ if (toAdd.length) {
137
+ toReturn[id] = toAdd;
138
+ }
139
+ warnIfMoreThan &&
140
+ forEach(warningCounter, (num, annId) => {
141
+ if (num > warnIfMoreThan) {
142
+ const warnings =
143
+ (toReturn["__more_than_warnings"] as Record<string, string[]>) ||
144
+ {};
145
+ warnings[id] = warnings[id] || [];
146
+ warnings[id].push(annId);
147
+ toReturn["__more_than_warnings"] = warnings;
148
+ }
149
+ });
150
+ });
151
+ return toReturn;
152
+ }
153
+
154
+ function getStartEndStr(
155
+ {
156
+ start,
157
+ end,
158
+ name,
159
+ strand,
160
+ forward
161
+ }: {
162
+ start: number;
163
+ end: number;
164
+ name?: string;
165
+ strand?: number;
166
+ forward?: boolean;
167
+ },
168
+ { compareName }: { compareName: boolean | undefined }
169
+ ) {
170
+ const isReverse = strand === -1 || forward === false;
171
+ return `${start}-${end}-${isReverse ? "rev" : "for"}-${
172
+ compareName ? name : ""
173
+ }`;
174
+ }
175
+
176
+ function convertApELikeRegexToRegex(regString = "") {
177
+ let newstr = "";
178
+ let rightOfCaretHolder = "";
179
+ let afterRightCaretHolder = "";
180
+ let beforeRightCaret = "";
181
+ let prevBp: string | undefined;
182
+ let hitLeftCaret: boolean | undefined;
183
+ let hitRightCaret: boolean | undefined;
184
+
185
+ // eslint-disable-next-line no-unused-vars
186
+ for (const bp of regString.replace("(", "").replace(")", "")) {
187
+ /* eslint-disable no-loop-func*/
188
+ /* eslint-disable no-inner-declarations*/
189
+ function maybeHandleRightCaret(justAdded: string) {
190
+ if (hitRightCaret) {
191
+ rightOfCaretHolder += justAdded;
192
+ afterRightCaretHolder = `${rightOfCaretHolder}${
193
+ afterRightCaretHolder.length ? "|" : ""
194
+ }${afterRightCaretHolder}`;
195
+ }
196
+ }
197
+ /* eslint-enable no-loop-func*/
198
+ /* eslint-enable no-inner-declarations*/
199
+ const ambigVal = ambiguous_dna_values[bp.toUpperCase()];
200
+ if (ambigVal && ambigVal.length > 1) {
201
+ let valToUse;
202
+ if (ambigVal.length === 4) {
203
+ valToUse = ".";
204
+ } else {
205
+ valToUse = `[${ambigVal}]`;
206
+ }
207
+ newstr += valToUse;
208
+ maybeHandleRightCaret(valToUse);
209
+ continue;
210
+ }
211
+ if (bp === "#") {
212
+ if (hitRightCaret) throw new Error("Error converting regex");
213
+ const valToUse = prevBp ? `[^${prevBp}]*?` : `.*?`;
214
+ newstr += valToUse;
215
+ maybeHandleRightCaret(valToUse);
216
+ continue;
217
+ }
218
+ if (bp === "<") {
219
+ if (hitRightCaret) throw new Error("Error converting to regex");
220
+ if (hitLeftCaret) throw new Error("Error converting to regex");
221
+ let holder = "";
222
+ let stringToAdd = "";
223
+ let isGroupClosed = true;
224
+ let closingBraceHit;
225
+ const groups = [];
226
+ for (let index = 0; index < newstr.length; index++) {
227
+ const char = newstr[index];
228
+ const nextChar = newstr[index + 1];
229
+ if (char === "[") {
230
+ isGroupClosed = false;
231
+ } else if (char === "]" || closingBraceHit) {
232
+ closingBraceHit = true;
233
+ if (ambiguous_dna_values[nextChar] || nextChar === "[") {
234
+ isGroupClosed = true;
235
+ closingBraceHit = false;
236
+ }
237
+ }
238
+ holder += char;
239
+ if (isGroupClosed) {
240
+ groups.push(holder);
241
+ holder = "";
242
+ }
243
+ }
244
+ let concattedEls = "";
245
+ groups.reverse();
246
+ groups.forEach(g => {
247
+ concattedEls = g + concattedEls;
248
+ stringToAdd = `${concattedEls}${
249
+ stringToAdd.length ? "|" : ""
250
+ }${stringToAdd}`;
251
+ });
252
+ newstr = `(${stringToAdd})?`;
253
+ hitLeftCaret = true;
254
+ continue;
255
+ }
256
+ if (bp === ">") {
257
+ if (hitRightCaret) throw new Error("Error converting regex");
258
+ hitRightCaret = true;
259
+ beforeRightCaret = newstr;
260
+ continue;
261
+ }
262
+ newstr += bp;
263
+ maybeHandleRightCaret(bp);
264
+ prevBp = bp;
265
+ }
266
+ if (hitRightCaret) {
267
+ newstr = `${beforeRightCaret}(${afterRightCaretHolder})?`;
268
+ }
269
+ return newstr;
270
+ }
271
+ function convertProteinSeqToDNAIupac(sequence: string) {
272
+ let toRet = "";
273
+ let l;
274
+ for (l of sequence) {
275
+ const degenDna = aminoAcidToDegenerateDnaMap[l.toLowerCase()];
276
+ if (degenDna) {
277
+ toRet += degenDna;
278
+ } else {
279
+ toRet += l;
280
+ }
281
+ }
282
+
283
+ return toRet;
284
+ }
285
+
286
+ export {
287
+ convertProteinSeqToDNAIupac,
288
+ convertApELikeRegexToRegex,
289
+ autoAnnotate
290
+ };
package/src/bioData.ts ADDED
@@ -0,0 +1,65 @@
1
+ //Adapted from biopython. Check the BIOPYTHON_LICENSE for licensing info
2
+
3
+ export const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
4
+ export const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
5
+ export const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
6
+ export const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
7
+ export const unambiguous_dna_letters = "GATC";
8
+ export const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
9
+ export const unambiguous_rna_letters = "GAUC";
10
+ export const extended_dna_letters = "GATCBDSW";
11
+
12
+ export const ambiguous_dna_values: Record<string, string> = {
13
+ ".": ".",
14
+ A: "A",
15
+ C: "C",
16
+ G: "G",
17
+ T: "T",
18
+ M: "AC",
19
+ R: "AG",
20
+ W: "AT",
21
+ S: "CG",
22
+ Y: "CT",
23
+ K: "GT",
24
+ V: "ACG",
25
+ H: "ACT",
26
+ D: "AGT",
27
+ B: "CGT",
28
+ X: "GATC",
29
+ N: "GATC"
30
+ };
31
+
32
+ export const extended_protein_values: Record<string, string> = {
33
+ A: "A",
34
+ B: "ND",
35
+ C: "C",
36
+ D: "D",
37
+ E: "E",
38
+ F: "F",
39
+ G: "G",
40
+ H: "H",
41
+ I: "I",
42
+ J: "IL",
43
+ K: "K",
44
+ L: "L",
45
+ M: "M",
46
+ N: "N",
47
+ O: "O",
48
+ P: "P",
49
+ Q: "Q",
50
+ R: "R",
51
+ S: "S",
52
+ T: "T",
53
+ U: "U",
54
+ V: "V",
55
+ W: "W",
56
+ X: "ACDEFGHIKLMNPQRSTVWY",
57
+ // # TODO - Include U and O in the possible values of X?
58
+ // # This could alter the extended_protein_weight_ranges ...
59
+ // # by MP: Won't do this, because they are so rare.
60
+ Y: "Y",
61
+ Z: "QE",
62
+ "*": "\\*\\.",
63
+ ".": "\\.",
64
+ "-": "\\-"
65
+ };
@@ -0,0 +1,91 @@
1
+ import {
2
+ isValidSequence,
3
+ SANTA_LUCIA_NN,
4
+ SANTA_LUCIA_INIT,
5
+ SantaLuciaParams
6
+ } from "./calculateSantaLuciaTm";
7
+
8
+ /**
9
+ * Calculate End Stability (3' end stability) of a primer
10
+ *
11
+ * The maximum stability for the last five 3' bases of a left or right primer.
12
+ * Bigger numbers mean more stable 3' ends. The value is the maximum delta G
13
+ * (kcal/mol) for duplex disruption for the five 3' bases.
14
+ *
15
+ * According to Primer3 documentation:
16
+ * - Most stable 5mer duplex: GCGCG = 6.86 kcal/mol (SantaLucia 1998)
17
+ * - Most labile 5mer duplex: TATAT = 0.86 kcal/mol (SantaLucia 1998)
18
+ *
19
+ * @param {string} sequence - DNA sequence (5' to 3')
20
+ * @returns {number} - Delta G (kcal/mol) for the last 5 bases at 3' end
21
+ * @throws {Error} Invalid sequence or too short.
22
+ */
23
+ export default function calculateEndStability(
24
+ sequence: string
25
+ ): number | string {
26
+ try {
27
+ const seq = sequence?.toUpperCase().trim();
28
+
29
+ if (!isValidSequence(seq)) {
30
+ throw new Error("Invalid sequence: contains non-DNA characters");
31
+ }
32
+
33
+ if (seq.length < 5) {
34
+ throw new Error(
35
+ "Sequence too short: minimum length is 5 bases for end stability calculation"
36
+ );
37
+ }
38
+
39
+ const last5Bases = seq.substring(seq.length - 5);
40
+
41
+ let deltaH = 0; // kcal/mol
42
+ let deltaS = 0; // cal/K·mol
43
+
44
+ // Calculate nearest-neighbor contributions for the 4 dinucleotides
45
+ for (let i = 0; i < 4; i++) {
46
+ const dinucleotide = last5Bases.substring(i, i + 2);
47
+
48
+ if (dinucleotide.includes("N")) {
49
+ continue;
50
+ }
51
+
52
+ const params = (SANTA_LUCIA_NN as Record<string, SantaLuciaParams>)[
53
+ dinucleotide
54
+ ];
55
+ if (params) {
56
+ deltaH += params.dH;
57
+ deltaS += params.dS;
58
+ }
59
+ }
60
+
61
+ // Add initiation parameters for terminal base pairs
62
+ const firstBase = last5Bases[0];
63
+ const lastBase = last5Bases[last5Bases.length - 1];
64
+
65
+ // Terminal GC or AT initiation
66
+ if (firstBase === "G" || firstBase === "C") {
67
+ deltaH += SANTA_LUCIA_INIT["GC"].dH;
68
+ deltaS += SANTA_LUCIA_INIT["GC"].dS;
69
+ } else {
70
+ deltaH += SANTA_LUCIA_INIT["AT"].dH;
71
+ deltaS += SANTA_LUCIA_INIT["AT"].dS;
72
+ }
73
+
74
+ if (lastBase === "G" || lastBase === "C") {
75
+ deltaH += SANTA_LUCIA_INIT["GC"].dH;
76
+ deltaS += SANTA_LUCIA_INIT["GC"].dS;
77
+ } else {
78
+ deltaH += SANTA_LUCIA_INIT["AT"].dH;
79
+ deltaS += SANTA_LUCIA_INIT["AT"].dS;
80
+ }
81
+
82
+ // Calculate deltaG at 37°C (310.15 K)
83
+ // deltaG = deltaH - T * deltaS
84
+ const T = 310.15; // 37°C in Kelvin
85
+ const deltaG = deltaH - (T * deltaS) / 1000; // Result in kcal/mol
86
+
87
+ return Math.round(Math.abs(deltaG) * 100) / 100;
88
+ } catch (e) {
89
+ return `Error calculating end stability for sequence ${sequence}. ${e}`;
90
+ }
91
+ }
@@ -0,0 +1,46 @@
1
+ import calculateTm from "./calculateNebTm";
2
+
3
+ interface NebTaOptions {
4
+ monovalentCationConc?: number;
5
+ polymerase?: string;
6
+ }
7
+
8
+ export default function calculateNebTa(
9
+ sequences: string[],
10
+ primerConc: number,
11
+ { monovalentCationConc, polymerase }: NebTaOptions = {}
12
+ ): number | string {
13
+ try {
14
+ if (sequences.length !== 2) {
15
+ throw new Error(
16
+ `${sequences.length} sequences received when 2 primers were expected`
17
+ );
18
+ }
19
+ // Type assertion or check return type of calculateTm if it can be number | string
20
+ // Assuming calculateTm returns number | string based on previous pattern
21
+ const meltingTemperatures = sequences.map(seq => {
22
+ const tm = calculateTm(seq, { monovalentCationConc, primerConc });
23
+ if (typeof tm !== "number") {
24
+ throw new Error(`Invalid Tm calculated for ${seq}: ${tm}`);
25
+ }
26
+ return tm;
27
+ });
28
+
29
+ meltingTemperatures.sort((a, b) => a - b);
30
+ const lowerMeltingTemp = meltingTemperatures[0];
31
+ let annealingTemp: number;
32
+ if (polymerase === "Q5") {
33
+ // Ta = Tm_lower+1°C is standard for Q5
34
+ annealingTemp = lowerMeltingTemp + 1;
35
+ if (annealingTemp > 72) {
36
+ // "Annealing temperature for experiments with this enzyme should typically not exceed 72°C"
37
+ annealingTemp = 72;
38
+ }
39
+ } else {
40
+ annealingTemp = lowerMeltingTemp - 3;
41
+ }
42
+ return annealingTemp;
43
+ } catch (err) {
44
+ return `Error calculating annealing temperature: ${err}`;
45
+ }
46
+ }