@teselagen/sequence-utils 0.3.37 → 0.3.38-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. package/DNAComplementMap.d.ts +1 -1
  2. package/addGapsToSeqReads.d.ts +16 -3
  3. package/adjustAnnotationsToInsert.d.ts +2 -1
  4. package/adjustBpsToReplaceOrInsert.d.ts +2 -1
  5. package/aliasedEnzymesByName.d.ts +37 -1
  6. package/aminoAcidToDegenerateDnaMap.d.ts +1 -31
  7. package/aminoAcidToDegenerateRnaMap.d.ts +1 -1
  8. package/annotateSingleSeq.d.ts +5 -4
  9. package/annotationTypes.d.ts +2 -2
  10. package/autoAnnotate.d.ts +17 -8
  11. package/bioData.d.ts +10 -58
  12. package/calculateEndStability.d.ts +1 -1
  13. package/calculateNebTa.d.ts +6 -1
  14. package/calculateNebTm.d.ts +6 -4
  15. package/calculatePercentGC.d.ts +1 -1
  16. package/calculateSantaLuciaTm.d.ts +28 -114
  17. package/calculateTm.d.ts +13 -1
  18. package/computeDigestFragments.d.ts +30 -24
  19. package/condensePairwiseAlignmentDifferences.d.ts +1 -1
  20. package/convertAACaretPositionOrRangeToDna.d.ts +2 -1
  21. package/convertDnaCaretPositionOrRangeToAA.d.ts +2 -1
  22. package/cutSequenceByRestrictionEnzyme.d.ts +2 -1
  23. package/defaultEnzymesByName.d.ts +2 -1
  24. package/degenerateDnaToAminoAcidMap.d.ts +1 -1
  25. package/degenerateRnaToAminoAcidMap.d.ts +1 -1
  26. package/deleteSequenceDataAtRange.d.ts +2 -1
  27. package/diffUtils.d.ts +9 -7
  28. package/doesEnzymeChopOutsideOfRecognitionSite.d.ts +2 -1
  29. package/featureTypesAndColors.d.ts +19 -6
  30. package/filterSequenceString.d.ts +14 -10
  31. package/findApproxMatches.d.ts +7 -1
  32. package/findNearestRangeOfSequenceOverlapToPosition.d.ts +2 -1
  33. package/findOrfsInPlasmid.d.ts +2 -11
  34. package/findSequenceMatches.d.ts +11 -1
  35. package/generateAnnotations.d.ts +2 -1
  36. package/generateSequenceData.d.ts +8 -13
  37. package/getAllInsertionsInSeqReads.d.ts +11 -1
  38. package/getAminoAcidDataForEachBaseOfDna.d.ts +6 -5
  39. package/getAminoAcidFromSequenceTriplet.d.ts +1 -1
  40. package/getAminoAcidStringFromSequenceString.d.ts +3 -1
  41. package/getCodonRangeForAASliver.d.ts +3 -4
  42. package/getComplementAminoAcidStringFromSequenceString.d.ts +1 -1
  43. package/getComplementSequenceAndAnnotations.d.ts +5 -1
  44. package/getComplementSequenceString.d.ts +1 -1
  45. package/getCutsiteType.d.ts +2 -1
  46. package/getCutsitesFromSequence.d.ts +2 -1
  47. package/getDegenerateDnaStringFromAAString.d.ts +1 -1
  48. package/getDegenerateRnaStringFromAAString.d.ts +1 -1
  49. package/getDigestFragmentsForCutsites.d.ts +4 -1
  50. package/getDigestFragmentsForRestrictionEnzymes.d.ts +8 -1
  51. package/getInsertBetweenVals.d.ts +2 -1
  52. package/getLeftAndRightOfSequenceInRangeGivenPosition.d.ts +2 -1
  53. package/getOrfsFromSequence.d.ts +17 -11
  54. package/getOverlapBetweenTwoSequences.d.ts +2 -1
  55. package/getPossiblePartsFromSequenceAndEnzymes.d.ts +18 -1
  56. package/getReverseAminoAcidStringFromSequenceString.d.ts +1 -1
  57. package/getReverseComplementAminoAcidStringFromSequenceString.d.ts +1 -1
  58. package/getReverseComplementAnnotation.d.ts +11 -1
  59. package/getReverseComplementSequenceAndAnnotations.d.ts +5 -1
  60. package/getReverseComplementSequenceString.d.ts +1 -1
  61. package/getReverseSequenceString.d.ts +1 -1
  62. package/getSequenceDataBetweenRange.d.ts +9 -1
  63. package/getVirtualDigest.d.ts +11 -10
  64. package/guessIfSequenceIsDnaAndNotProtein.d.ts +5 -1
  65. package/index.cjs +732 -483
  66. package/index.d.ts +8 -5
  67. package/index.js +732 -483
  68. package/index.umd.cjs +732 -483
  69. package/insertGapsIntoRefSeq.d.ts +2 -1
  70. package/insertSequenceDataAtPositionOrRange.d.ts +10 -1
  71. package/isEnzymeType2S.d.ts +2 -1
  72. package/mapAnnotationsToRows.d.ts +9 -1
  73. package/package.json +9 -6
  74. package/prepareCircularViewData.d.ts +2 -1
  75. package/prepareRowData.d.ts +7 -3
  76. package/proteinAlphabet.d.ts +1 -1
  77. package/rotateBpsToPosition.d.ts +1 -1
  78. package/rotateSequenceDataToPosition.d.ts +3 -1
  79. package/shiftAnnotationsByLen.d.ts +4 -3
  80. package/src/DNAComplementMap.ts +32 -0
  81. package/src/addGapsToSeqReads.ts +436 -0
  82. package/src/adjustAnnotationsToInsert.ts +20 -0
  83. package/src/adjustBpsToReplaceOrInsert.ts +73 -0
  84. package/src/aliasedEnzymesByName.ts +7366 -0
  85. package/src/aminoAcidToDegenerateDnaMap.ts +32 -0
  86. package/src/aminoAcidToDegenerateRnaMap.ts +32 -0
  87. package/src/annotateSingleSeq.ts +37 -0
  88. package/src/annotationTypes.ts +23 -0
  89. package/src/autoAnnotate.test.js +0 -1
  90. package/src/autoAnnotate.ts +290 -0
  91. package/src/bioData.ts +65 -0
  92. package/src/calculateEndStability.ts +91 -0
  93. package/src/calculateNebTa.ts +46 -0
  94. package/src/calculateNebTm.ts +132 -0
  95. package/src/calculatePercentGC.ts +3 -0
  96. package/src/calculateSantaLuciaTm.ts +184 -0
  97. package/src/calculateTm.ts +242 -0
  98. package/src/computeDigestFragments.ts +238 -0
  99. package/src/condensePairwiseAlignmentDifferences.ts +85 -0
  100. package/src/convertAACaretPositionOrRangeToDna.ts +28 -0
  101. package/src/convertDnaCaretPositionOrRangeToAA.ts +28 -0
  102. package/src/cutSequenceByRestrictionEnzyme.ts +345 -0
  103. package/src/defaultEnzymesByName.ts +280 -0
  104. package/src/degenerateDnaToAminoAcidMap.ts +5 -0
  105. package/src/degenerateRnaToAminoAcidMap.ts +5 -0
  106. package/src/deleteSequenceDataAtRange.ts +13 -0
  107. package/src/diffUtils.ts +80 -0
  108. package/src/doesEnzymeChopOutsideOfRecognitionSite.ts +16 -0
  109. package/src/featureTypesAndColors.ts +167 -0
  110. package/src/filterSequenceString.ts +153 -0
  111. package/src/findApproxMatches.ts +58 -0
  112. package/src/findNearestRangeOfSequenceOverlapToPosition.ts +43 -0
  113. package/src/findOrfsInPlasmid.js +6 -1
  114. package/src/findOrfsInPlasmid.ts +31 -0
  115. package/src/findSequenceMatches.ts +154 -0
  116. package/src/generateAnnotations.ts +39 -0
  117. package/src/generateSequenceData.ts +212 -0
  118. package/src/getAllInsertionsInSeqReads.ts +100 -0
  119. package/src/getAminoAcidDataForEachBaseOfDna.ts +305 -0
  120. package/src/getAminoAcidFromSequenceTriplet.ts +27 -0
  121. package/src/getAminoAcidStringFromSequenceString.ts +36 -0
  122. package/src/getCodonRangeForAASliver.ts +73 -0
  123. package/src/getComplementAminoAcidStringFromSequenceString.ts +10 -0
  124. package/src/getComplementSequenceAndAnnotations.ts +25 -0
  125. package/src/getComplementSequenceString.ts +23 -0
  126. package/src/getCutsiteType.ts +18 -0
  127. package/src/getCutsitesFromSequence.ts +22 -0
  128. package/src/getDegenerateDnaStringFromAAString.ts +15 -0
  129. package/src/getDegenerateRnaStringFromAAString.ts +15 -0
  130. package/src/getDigestFragmentsForCutsites.ts +126 -0
  131. package/src/getDigestFragmentsForRestrictionEnzymes.ts +50 -0
  132. package/src/getInsertBetweenVals.ts +31 -0
  133. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.ts +40 -0
  134. package/src/getMassOfAaString.ts +29 -0
  135. package/src/getOrfsFromSequence.ts +132 -0
  136. package/src/getOverlapBetweenTwoSequences.ts +30 -0
  137. package/src/getPossiblePartsFromSequenceAndEnzymes.ts +149 -0
  138. package/src/getReverseAminoAcidStringFromSequenceString.ts +22 -0
  139. package/src/getReverseComplementAminoAcidStringFromSequenceString.ts +10 -0
  140. package/src/getReverseComplementAnnotation.ts +33 -0
  141. package/src/getReverseComplementSequenceAndAnnotations.ts +46 -0
  142. package/src/getReverseComplementSequenceString.ts +18 -0
  143. package/src/getReverseSequenceString.ts +12 -0
  144. package/src/getSequenceDataBetweenRange.ts +154 -0
  145. package/src/getVirtualDigest.ts +139 -0
  146. package/src/guessIfSequenceIsDnaAndNotProtein.ts +39 -0
  147. package/src/index.test.ts +43 -0
  148. package/src/index.ts +111 -0
  149. package/src/insertGapsIntoRefSeq.ts +43 -0
  150. package/src/insertSequenceDataAtPosition.ts +2 -0
  151. package/src/insertSequenceDataAtPositionOrRange.ts +328 -0
  152. package/src/isEnzymeType2S.ts +5 -0
  153. package/src/mapAnnotationsToRows.ts +256 -0
  154. package/src/prepareCircularViewData.ts +24 -0
  155. package/src/prepareRowData.ts +61 -0
  156. package/src/prepareRowData_output1.json +1 -0
  157. package/src/proteinAlphabet.ts +271 -0
  158. package/src/rotateBpsToPosition.ts +12 -0
  159. package/src/rotateSequenceDataToPosition.ts +54 -0
  160. package/src/shiftAnnotationsByLen.ts +24 -0
  161. package/src/threeLetterSequenceStringToAminoAcidMap.ts +198 -0
  162. package/src/tidyUpAnnotation.ts +205 -0
  163. package/src/tidyUpSequenceData.ts +213 -0
  164. package/src/types.ts +109 -0
  165. package/threeLetterSequenceStringToAminoAcidMap.d.ts +11 -921
  166. package/tidyUpAnnotation.d.ts +13 -11
  167. package/tidyUpSequenceData.d.ts +15 -1
  168. package/types.d.ts +105 -0
@@ -0,0 +1,126 @@
1
+ import {
2
+ normalizePositionByRangeLength,
3
+ getRangeLength
4
+ } from "@teselagen/range-utils";
5
+ import { CutSite, DigestFragment } from "./types";
6
+
7
+ export default function getDigestFragmentsForCutsites(
8
+ sequenceLength: number,
9
+ circular: boolean,
10
+ cutsites: CutSite[],
11
+ opts: { computePartialDigests?: boolean } = {}
12
+ ): DigestFragment[] {
13
+ const fragments: DigestFragment[] = [];
14
+ const overlappingEnzymes: DigestFragment[] = [];
15
+ const pairs: CutSite[][] = [];
16
+ if (!cutsites.length) return [];
17
+ let sortedCutsites = cutsites.sort((a, b) => {
18
+ return (a.topSnipPosition || 0) - (b.topSnipPosition || 0);
19
+ });
20
+
21
+ if (!circular) {
22
+ //if linear, add 2 fake cutsites for the start and end of the seq
23
+ sortedCutsites = [
24
+ {
25
+ start: 0,
26
+ end: 0,
27
+ topSnipPosition: 0,
28
+ bottomSnipPosition: 0,
29
+ overhangSize: 0,
30
+ type: "START_OR_END_OF_SEQ",
31
+ name: "START_OF_SEQ",
32
+ restrictionEnzyme: {
33
+ name: "START_OF_SEQ",
34
+ site: "",
35
+ forwardRegex: "",
36
+ reverseRegex: ""
37
+ }
38
+ },
39
+ ...sortedCutsites,
40
+ {
41
+ start: sequenceLength,
42
+ end: sequenceLength,
43
+ topSnipPosition: sequenceLength,
44
+ bottomSnipPosition: sequenceLength,
45
+ overhangSize: 0,
46
+ type: "START_OR_END_OF_SEQ",
47
+ name: "END_OF_SEQ",
48
+ restrictionEnzyme: {
49
+ name: "END_OF_SEQ",
50
+ site: "",
51
+ forwardRegex: "",
52
+ reverseRegex: ""
53
+ }
54
+ }
55
+ ];
56
+ }
57
+
58
+ sortedCutsites.forEach((cutsite1, index) => {
59
+ if (!circular && !sortedCutsites[index + 1]) {
60
+ return; //don't push a pair if the sequence is linear and we've reached the end of our cutsites array
61
+ }
62
+ if (opts.computePartialDigests) {
63
+ sortedCutsites.forEach((cs, index2) => {
64
+ if (index2 === index + 1 || index2 === 0) {
65
+ return;
66
+ }
67
+ pairs.push([cutsite1, sortedCutsites[index2]]);
68
+ });
69
+ }
70
+ pairs.push([
71
+ cutsite1,
72
+ sortedCutsites[index + 1] ? sortedCutsites[index + 1] : sortedCutsites[0]
73
+ ]);
74
+ });
75
+
76
+ pairs.forEach(([cut1, cut2]) => {
77
+ const start = normalizePositionByRangeLength(
78
+ cut1.topSnipPosition || 0,
79
+ sequenceLength
80
+ );
81
+ const end = normalizePositionByRangeLength(
82
+ (cut2.topSnipPosition || 0) - 1,
83
+ sequenceLength
84
+ );
85
+ const fragmentRange = { start, end };
86
+ const size = getRangeLength(fragmentRange, sequenceLength);
87
+
88
+ // const id = uniqid()
89
+ const id = start + "-" + end + "-" + size + "-";
90
+
91
+ // getRangeLength({ start, end }, sequenceLength);
92
+
93
+ fragments.push({
94
+ // I don't think we can determine containsFive/ThreePrimeRecognitionSite until the inclusion/exclusion of the overhangs is done
95
+ // containsFivePrimeRecognitionSite: cut1.type !== "START_OR_END_OF_SEQ" && isRangeWithinRange(cut1.recognitionSiteRange, fragmentRange, sequenceLength ) ,
96
+ // containsThreePrimeRecognitionSite: cut2.type !== "START_OR_END_OF_SEQ" && isRangeWithinRange(cut1.recognitionSiteRange, fragmentRange, sequenceLength) ,
97
+ cut1: {
98
+ ...cut1,
99
+ isOverhangIncludedInFragmentSize:
100
+ cut1.type !== "START_OR_END_OF_SEQ" &&
101
+ cut1.overhangSize > 0 &&
102
+ cut1.topSnipBeforeBottom
103
+ },
104
+ cut2: {
105
+ ...cut2,
106
+ isOverhangIncludedInFragmentSize:
107
+ cut2.type !== "START_OR_END_OF_SEQ" &&
108
+ cut2.overhangSize > 0 &&
109
+ !cut2.topSnipBeforeBottom
110
+ },
111
+ ...fragmentRange,
112
+ size,
113
+ id,
114
+ name: `${cut1.restrictionEnzyme.name} -- ${cut2.restrictionEnzyme.name} ${size} bps` // Add missing name property
115
+ });
116
+ });
117
+
118
+ fragments.filter(fragment => {
119
+ if (!fragment.size) {
120
+ overlappingEnzymes.push(fragment);
121
+ return false;
122
+ }
123
+ return true;
124
+ });
125
+ return fragments;
126
+ }
@@ -0,0 +1,50 @@
1
+ import { computeDigestFragments } from "./computeDigestFragments";
2
+ import getCutsitesFromSequence from "./getCutsitesFromSequence";
3
+ import { CutSite, RestrictionEnzyme } from "./types";
4
+ import { flatMap, uniqBy } from "lodash-es";
5
+
6
+ export default function getDigestFragmentsForRestrictionEnzymes(
7
+ sequence: string,
8
+ circular: boolean,
9
+ contextEnzymes: RestrictionEnzyme[] | RestrictionEnzyme,
10
+ options?: {
11
+ computePartialDigest?: boolean;
12
+ computePartialDigests?: boolean; // alias
13
+ computeDigestDisabled?: boolean; // corrected spelling if needed, but keeping as is
14
+ computePartialDigestDisabled?: boolean;
15
+ includeOverAndUnderHangs?: boolean;
16
+ }
17
+ ) {
18
+ const cutsitesByName = getCutsitesFromSequence(
19
+ sequence,
20
+ circular,
21
+ Array.isArray(contextEnzymes) ? contextEnzymes : [contextEnzymes]
22
+ );
23
+ const digest = computeDigestFragments({
24
+ cutsites: flatMap(cutsitesByName) as CutSite[],
25
+ sequenceLength: sequence.length,
26
+ circular,
27
+ ...options,
28
+ computePartialDigest:
29
+ options?.computePartialDigest || options?.computePartialDigests
30
+ });
31
+ const fragments = uniqBy(digest.fragments, fragment => {
32
+ return `${fragment.start}-${fragment.end}-${fragment.size}`;
33
+ });
34
+ if (
35
+ circular &&
36
+ (options?.computePartialDigest || options?.computePartialDigests)
37
+ ) {
38
+ // filter out the full length fragment if it's a duplicate
39
+ const fullLengthFragmentIndex = fragments.findIndex(
40
+ f => f.size === sequence.length
41
+ );
42
+ if (fullLengthFragmentIndex > -1) {
43
+ fragments.splice(fullLengthFragmentIndex, 1);
44
+ }
45
+ }
46
+
47
+ return fragments.sort((a, b) => {
48
+ return a.start - b.start || b.size - a.size;
49
+ });
50
+ }
@@ -0,0 +1,31 @@
1
+ import {
2
+ normalizePositionByRangeLength1Based,
3
+ Range
4
+ } from "@teselagen/range-utils";
5
+
6
+ export default function getInsertBetweenVals(
7
+ caretPosition: number,
8
+ selectionLayer: Range,
9
+ sequenceLength: number
10
+ ): [number, number] {
11
+ if (selectionLayer.start > -1) {
12
+ //selection layer
13
+ return [
14
+ normalizePositionByRangeLength1Based(
15
+ selectionLayer.start,
16
+ sequenceLength
17
+ ),
18
+ normalizePositionByRangeLength1Based(
19
+ selectionLayer.end + 2,
20
+ sequenceLength
21
+ )
22
+ ];
23
+ } else if (caretPosition > -1) {
24
+ return [
25
+ normalizePositionByRangeLength1Based(caretPosition, sequenceLength),
26
+ normalizePositionByRangeLength1Based(caretPosition + 1, sequenceLength)
27
+ ];
28
+ } else {
29
+ return [sequenceLength, 1];
30
+ }
31
+ }
@@ -0,0 +1,40 @@
1
+ import {
2
+ isPositionWithinRange,
3
+ getSequenceWithinRange,
4
+ normalizePositionByRangeLength,
5
+ isPositionCloserToRangeStartThanRangeEnd,
6
+ Range
7
+ } from "@teselagen/range-utils";
8
+
9
+ export default function getLeftAndRightOfSequenceInRangeGivenPosition(
10
+ range: Range,
11
+ position: number,
12
+ sequence: string
13
+ ): { leftHandSide: string; rightHandSide: string } {
14
+ const result = {
15
+ leftHandSide: "",
16
+ rightHandSide: ""
17
+ };
18
+ if (isPositionWithinRange(position, range, sequence.length)) {
19
+ result.leftHandSide = getSequenceWithinRange(
20
+ {
21
+ start: range.start,
22
+ end: normalizePositionByRangeLength(position - 1, sequence.length)
23
+ },
24
+ sequence
25
+ ) as string;
26
+ result.rightHandSide = getSequenceWithinRange(
27
+ { start: position, end: range.end },
28
+ sequence
29
+ ) as string;
30
+ } else {
31
+ if (
32
+ isPositionCloserToRangeStartThanRangeEnd(position, range, sequence.length)
33
+ ) {
34
+ result.rightHandSide = getSequenceWithinRange(range, sequence) as string;
35
+ } else {
36
+ result.leftHandSide = getSequenceWithinRange(range, sequence) as string;
37
+ }
38
+ }
39
+ return result;
40
+ }
@@ -0,0 +1,29 @@
1
+ import proteinAlphabet from "./proteinAlphabet";
2
+
3
+ /**
4
+ * @param {string} aaString A string of amino acid characters
5
+ * @param {number} numsAfterDecimal the number of digits to round to after the decimal point, must be greater than 0
6
+ * @param {boolean} divideByThree divide the final mass by three,
7
+ * this is useful in situtations where nucelotides are converted to
8
+ * amino acids in a way that the amino acid appears three times
9
+ * @returns The sum of the mass of all amino acids in the string
10
+ */
11
+ export default function getMassOfAaString(
12
+ aaString: string,
13
+ numsAfterDecimal = 2,
14
+ divideByThree = false
15
+ ) {
16
+ let sumMass = 0;
17
+ for (let i = 0; i < aaString.length; i++) {
18
+ sumMass += (proteinAlphabet as Record<string, { mass: number }>)[
19
+ aaString[i]
20
+ ].mass;
21
+ }
22
+ if (divideByThree) {
23
+ sumMass /= 3;
24
+ }
25
+ if (aaString.length > 0) {
26
+ sumMass = sumMass + 18.0153;
27
+ }
28
+ return Math.round(sumMass * 10 ** numsAfterDecimal) / 10 ** numsAfterDecimal;
29
+ }
@@ -0,0 +1,132 @@
1
+ import shortid from "shortid";
2
+ import { Annotation } from "./types";
3
+ import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
4
+
5
+ interface GetOrfsOptions {
6
+ sequence: string;
7
+ minimumOrfSize: number;
8
+ forward: boolean;
9
+ circular?: boolean;
10
+ useAdditionalOrfStartCodons?: boolean;
11
+ }
12
+
13
+ export interface Orf extends Annotation {
14
+ internalStartCodonIndices: number[];
15
+ frame: number;
16
+ // annotationTypePlural: string; // Annotation might already have this or it's extra
17
+ isOrf: boolean;
18
+ remove?: boolean;
19
+ length: number;
20
+ }
21
+
22
+ /**
23
+ * @private
24
+ * Finds ORFs in a given DNA forward in a given frame.
25
+ * frame - The frame to look in.
26
+ * sequence - The dna sequence.
27
+ * minimumOrfSize - The minimum length of ORF to return.
28
+ * forward - Should we find forward facing orfs or reverse facing orfs
29
+ * return - The list of ORFs found.
30
+ */
31
+ export default function getOrfsFromSequence(options: GetOrfsOptions): Orf[] {
32
+ let sequence = options.sequence;
33
+ const minimumOrfSize = options.minimumOrfSize;
34
+ const forward = options.forward;
35
+ const circular = options.circular;
36
+ const useAdditionalOrfStartCodons = options.useAdditionalOrfStartCodons;
37
+
38
+ const originalSequenceLength = sequence.length;
39
+ if (!forward) {
40
+ //we reverse the sequence
41
+ sequence = getReverseComplementSequenceString(sequence);
42
+ }
43
+
44
+ if (circular) {
45
+ //we'll pass in double the sequence and then trim excess orfs
46
+ sequence += sequence;
47
+ }
48
+ const re = useAdditionalOrfStartCodons
49
+ ? /(?=((?:A[TU]G|G[TU]G|C[TU]G)(?:.{3})*?(?:[TU]AG|[TU]AA|[TU]GA)))/gi
50
+ : /(?=((?:A[TU]G)(?:.{3})*?(?:[TU]AG|[TU]AA|[TU]GA)))/gi;
51
+ let m: RegExpExecArray | null;
52
+ const orfRanges: Orf[] = [];
53
+ //loop through orf hits!
54
+ /* eslint-disable no-cond-assign*/
55
+
56
+ while ((m = re.exec(sequence)) !== null) {
57
+ //stuff to get the regex to work
58
+ if (m.index === re.lastIndex) {
59
+ re.lastIndex++;
60
+ }
61
+ //orf logic:
62
+ const orfLength = m[1].length;
63
+ if (orfLength >= minimumOrfSize) {
64
+ //only keep orfs >= to the minimum size
65
+ const start = m.index;
66
+ let end = orfLength + start - 1;
67
+ //normalize the end if it is greater than the original sequence length
68
+ if (end >= originalSequenceLength) {
69
+ end -= originalSequenceLength;
70
+ }
71
+ if (start < originalSequenceLength) {
72
+ //only keep orfs that *begin* before the original sequence length (only the case when dealing with circular orfs)
73
+ orfRanges.push({
74
+ start: start,
75
+ end: end,
76
+ length: m[1].length,
77
+ internalStartCodonIndices: [],
78
+ frame: start % 3,
79
+ forward: forward,
80
+ // annotationTypePlural: "orfs",
81
+ isOrf: true,
82
+ id: shortid(),
83
+ type: "orf",
84
+ name: "ORF"
85
+ } as Orf);
86
+ }
87
+ }
88
+ }
89
+ // pair down the orfs to remove duplicates
90
+ // and deal with revComp orfs
91
+ const orfEnds: Record<number, number> = {};
92
+ orfRanges.forEach((orf, index) => {
93
+ const indexOfAlreadyExistingOrf = orfEnds[orf.end];
94
+
95
+ if (typeof indexOfAlreadyExistingOrf !== "undefined") {
96
+ let internalOrf = orf;
97
+ let containingOrf = orfRanges[indexOfAlreadyExistingOrf];
98
+ if (containingOrf.length < internalOrf.length) {
99
+ internalOrf = orfRanges[indexOfAlreadyExistingOrf];
100
+ containingOrf = orf;
101
+ orfEnds[orf.end] = index;
102
+ }
103
+ const internalStartCodonIndex = forward
104
+ ? internalOrf.start
105
+ : originalSequenceLength - internalOrf.start - 1; //use either the start or the end depending on the direction of the internalOrf
106
+ //we know because of how the regex works that larger orfs come first in the array
107
+ containingOrf.internalStartCodonIndices = [
108
+ ...containingOrf.internalStartCodonIndices,
109
+ ...internalOrf.internalStartCodonIndices,
110
+ internalStartCodonIndex
111
+ ];
112
+ //set a flag that we'll use to remove all these shorter, duplicated orfs
113
+ internalOrf.remove = true;
114
+ } else {
115
+ orfEnds[orf.end] = index;
116
+ if (!forward) {
117
+ //this check needs to come after the above assignment of orfEnds
118
+ //flip the start and ends
119
+ const endHolder = orf.end; //temp variable
120
+ orf.end = originalSequenceLength - orf.start - 1;
121
+ orf.start = originalSequenceLength - endHolder - 1;
122
+ }
123
+ }
124
+ });
125
+ const nonDuplicatedOrfRanges = orfRanges.filter(orf => {
126
+ if (!orf.remove) {
127
+ return true;
128
+ }
129
+ return false;
130
+ });
131
+ return nonDuplicatedOrfRanges;
132
+ }
@@ -0,0 +1,30 @@
1
+ import { modulatePositionByRange, Range } from "@teselagen/range-utils";
2
+
3
+ /**
4
+ * This function gets the overlapping of one sequence to another based on sequence equality.
5
+ *
6
+ * @param {string} sequenceToFind
7
+ * @param {string} sequenceToSearchIn
8
+ * @param {object} options optional
9
+ * @return {object || null} null if no overlap exists or a range object with .start and .end properties
10
+ */
11
+ export default function getOverlapBetweenTwoSequences(
12
+ sequenceToFind: string,
13
+ sequenceToSearchIn: string
14
+ ): Range | null {
15
+ sequenceToSearchIn = sequenceToSearchIn.toLowerCase();
16
+ sequenceToFind = sequenceToFind.toLowerCase();
17
+ const lengthenedSeqToSearch = sequenceToSearchIn + sequenceToSearchIn;
18
+ const index = lengthenedSeqToSearch.indexOf(sequenceToFind);
19
+ if (index > -1) {
20
+ return {
21
+ start: index,
22
+ end: modulatePositionByRange(index + sequenceToFind.length - 1, {
23
+ start: 0,
24
+ end: sequenceToSearchIn.length - 1
25
+ })
26
+ };
27
+ } else {
28
+ return null;
29
+ }
30
+ }
@@ -0,0 +1,149 @@
1
+ import getComplementSequenceString from "./getComplementSequenceString";
2
+ import { normalizePositionByRangeLength } from "@teselagen/range-utils";
3
+ import cutSequenceByRestrictionEnzyme from "./cutSequenceByRestrictionEnzyme";
4
+ import { CutSite, RestrictionEnzyme, SequenceData } from "./types";
5
+
6
+ export interface PartBetweenEnzymes {
7
+ start: number;
8
+ start1Based: number;
9
+ end: number;
10
+ end1Based: number;
11
+ firstCut: CutSite;
12
+ firstCutOffset: number;
13
+ firstCutOverhang: string;
14
+ firstCutOverhangTop: string;
15
+ firstCutOverhangBottom: string;
16
+ secondCut: CutSite;
17
+ secondCutOffset: number;
18
+ secondCutOverhang: string;
19
+ secondCutOverhangTop: string;
20
+ secondCutOverhangBottom: string;
21
+ }
22
+
23
+ export default function getPossiblePartsFromSequenceAndEnzymes(
24
+ seqData: SequenceData,
25
+ restrictionEnzymes: RestrictionEnzyme | RestrictionEnzyme[]
26
+ ): PartBetweenEnzymes[] {
27
+ // ac.throw([
28
+ // ac.string,
29
+ // ac.bool,
30
+ // ac.shape({
31
+ // "name": ac.string,
32
+ // "site": ac.string,
33
+ // "forwardRegex": ac.string,
34
+ // "reverseRegex": ac.string,
35
+ // "topSnipOffset": ac.number,
36
+ // "bottomSnipOffset": ac.number
37
+ // })
38
+ // ], arguments);
39
+ const enzymes = Array.isArray(restrictionEnzymes)
40
+ ? restrictionEnzymes
41
+ : [restrictionEnzymes];
42
+
43
+ const bps = seqData.sequence;
44
+ const seqLen = bps.length;
45
+ const circular = seqData.circular || false;
46
+ let cutsites: CutSite[] = [];
47
+ enzymes.forEach(enzyme => {
48
+ const newCutsites = cutSequenceByRestrictionEnzyme(bps, circular, enzyme);
49
+ cutsites = cutsites.concat(newCutsites);
50
+ });
51
+ const parts: PartBetweenEnzymes[] = [];
52
+ if (cutsites.length < 1) {
53
+ return parts;
54
+ } else if (cutsites.length === 1) {
55
+ parts.push(
56
+ getPartBetweenEnzymesWithInclusiveOverhangs(
57
+ cutsites[0],
58
+ cutsites[0],
59
+ seqLen
60
+ )
61
+ );
62
+ return parts;
63
+ } else {
64
+ const pairs = pairwise(cutsites);
65
+ pairs.forEach(pair => {
66
+ const cut1 = pair[0];
67
+ const cut2 = pair[1];
68
+ const part1 = getPartBetweenEnzymesWithInclusiveOverhangs(
69
+ cut1,
70
+ cut2,
71
+ seqLen
72
+ );
73
+ const part2 = getPartBetweenEnzymesWithInclusiveOverhangs(
74
+ cut2,
75
+ cut1,
76
+ seqLen
77
+ );
78
+ if (circular || !(part1.start > part1.end)) {
79
+ //only add origin spanning parts if the sequence is circular
80
+ parts.push(part1);
81
+ }
82
+ if (circular || !(part2.start > part2.end)) {
83
+ //only add origin spanning parts if the sequence is circular
84
+ parts.push(part2);
85
+ }
86
+ });
87
+ return parts;
88
+ }
89
+ }
90
+
91
+ function getPartBetweenEnzymesWithInclusiveOverhangs(
92
+ cut1: CutSite,
93
+ cut2: CutSite,
94
+ seqLen: number
95
+ ): PartBetweenEnzymes {
96
+ const firstCutOffset = getEnzymeRelativeOffset(cut1.restrictionEnzyme);
97
+ const secondCutOffset = getEnzymeRelativeOffset(cut2.restrictionEnzyme);
98
+ const start =
99
+ (cut1.topSnipBeforeBottom
100
+ ? cut1.topSnipPosition
101
+ : cut1.bottomSnipPosition) || 0;
102
+ const end = normalizePositionByRangeLength(
103
+ (cut2.topSnipBeforeBottom
104
+ ? cut2.bottomSnipPosition || 0
105
+ : cut2.topSnipPosition || 0) - 1,
106
+ seqLen
107
+ );
108
+ return {
109
+ start,
110
+ start1Based: start + 1,
111
+ end,
112
+ end1Based: end + 1,
113
+ firstCut: cut1,
114
+ //the offset is always counting with 0 being at the top snip position
115
+ firstCutOffset,
116
+ firstCutOverhang: cut1.overhangBps || "",
117
+ firstCutOverhangTop: firstCutOffset > 0 ? cut1.overhangBps || "" : "",
118
+ firstCutOverhangBottom:
119
+ firstCutOffset < 0
120
+ ? getComplementSequenceString(cut1.overhangBps || "")
121
+ : "",
122
+ secondCut: cut2,
123
+ //the offset is always counting with 0 being at the top snip position
124
+ secondCutOffset,
125
+ secondCutOverhang: cut2.overhangBps || "",
126
+ secondCutOverhangTop: secondCutOffset < 0 ? cut2.overhangBps || "" : "",
127
+ secondCutOverhangBottom:
128
+ secondCutOffset > 0
129
+ ? getComplementSequenceString(cut2.overhangBps || "")
130
+ : ""
131
+ };
132
+ }
133
+
134
+ function getEnzymeRelativeOffset(enzyme: RestrictionEnzyme): number {
135
+ //the offset is always counting with 0 being at the top snip position
136
+ return (enzyme.bottomSnipOffset || 0) - (enzyme.topSnipOffset || 0);
137
+ }
138
+
139
+ function pairwise<T>(list: T[]): T[][] {
140
+ if (list.length < 2) {
141
+ return [];
142
+ }
143
+ const first = list[0],
144
+ rest = list.slice(1),
145
+ pairs = rest.map(x => {
146
+ return [first, x];
147
+ });
148
+ return pairs.concat(pairwise(rest));
149
+ }
@@ -0,0 +1,22 @@
1
+ import getAminoAcidDataForEachBaseOfDna from "./getAminoAcidDataForEachBaseOfDna";
2
+
3
+ export default function getReverseAminoAcidStringFromSequenceString(
4
+ sequenceString: string
5
+ ) {
6
+ const aminoAcidsPerBase = getAminoAcidDataForEachBaseOfDna(
7
+ sequenceString,
8
+ false,
9
+ null,
10
+ false
11
+ );
12
+ const aaArray: string[] = [];
13
+ let aaString = "";
14
+ aminoAcidsPerBase.forEach(aa => {
15
+ if (!aa.fullCodon) {
16
+ return;
17
+ }
18
+ aaArray[aa.aminoAcidIndex] = aa.aminoAcid.value;
19
+ });
20
+ aaString = aaArray.join("");
21
+ return aaString;
22
+ }
@@ -0,0 +1,10 @@
1
+ import getAminoAcidStringFromSequenceString from "./getAminoAcidStringFromSequenceString";
2
+ import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
3
+
4
+ export default function getReverseComplementAminoAcidStringFromSequenceString(
5
+ sequenceString: string
6
+ ): string {
7
+ return getAminoAcidStringFromSequenceString(
8
+ getReverseComplementSequenceString(sequenceString)
9
+ );
10
+ }
@@ -0,0 +1,33 @@
1
+ import { Annotation } from "./types";
2
+
3
+ export default function getReverseComplementAnnotation(
4
+ annotation: Annotation,
5
+ sequenceLength: number
6
+ ) {
7
+ //note this function assumes that the entire sequence (or subsequence) is being reverse complemented
8
+ //TNR: this is what is happening:
9
+ //0123456789
10
+ //-feature-- //normal
11
+ //--erutaef- //reverse complemented
12
+
13
+ //sequence length = 10
14
+ //feature start = 1
15
+ //feature end = 7
16
+ //so, erutaef start = 2 = 10 - (7+1)
17
+ //and, erutaef end = 8 = 10 - (1+1)
18
+
19
+ return Object.assign({}, annotation, {
20
+ start: sequenceLength - (annotation.end + 1),
21
+ end: sequenceLength - (annotation.start + 1),
22
+ forward: !annotation.forward,
23
+ strand: annotation.strand === 1 ? -1 : 1,
24
+ ...(annotation.locations && {
25
+ locations: annotation.locations.map(location => {
26
+ return {
27
+ start: sequenceLength - (location.end + 1),
28
+ end: sequenceLength - (location.start + 1)
29
+ };
30
+ })
31
+ })
32
+ });
33
+ }