@teselagen/sequence-utils 0.3.37 → 0.3.38-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. package/DNAComplementMap.d.ts +1 -1
  2. package/addGapsToSeqReads.d.ts +16 -3
  3. package/adjustAnnotationsToInsert.d.ts +2 -1
  4. package/adjustBpsToReplaceOrInsert.d.ts +2 -1
  5. package/aliasedEnzymesByName.d.ts +37 -1
  6. package/aminoAcidToDegenerateDnaMap.d.ts +1 -31
  7. package/aminoAcidToDegenerateRnaMap.d.ts +1 -1
  8. package/annotateSingleSeq.d.ts +5 -4
  9. package/annotationTypes.d.ts +2 -2
  10. package/autoAnnotate.d.ts +17 -8
  11. package/bioData.d.ts +10 -58
  12. package/calculateEndStability.d.ts +1 -1
  13. package/calculateNebTa.d.ts +6 -1
  14. package/calculateNebTm.d.ts +6 -4
  15. package/calculatePercentGC.d.ts +1 -1
  16. package/calculateSantaLuciaTm.d.ts +28 -114
  17. package/calculateTm.d.ts +13 -1
  18. package/computeDigestFragments.d.ts +30 -24
  19. package/condensePairwiseAlignmentDifferences.d.ts +1 -1
  20. package/convertAACaretPositionOrRangeToDna.d.ts +2 -1
  21. package/convertDnaCaretPositionOrRangeToAA.d.ts +2 -1
  22. package/cutSequenceByRestrictionEnzyme.d.ts +2 -1
  23. package/defaultEnzymesByName.d.ts +2 -1
  24. package/degenerateDnaToAminoAcidMap.d.ts +1 -1
  25. package/degenerateRnaToAminoAcidMap.d.ts +1 -1
  26. package/deleteSequenceDataAtRange.d.ts +2 -1
  27. package/diffUtils.d.ts +9 -7
  28. package/doesEnzymeChopOutsideOfRecognitionSite.d.ts +2 -1
  29. package/featureTypesAndColors.d.ts +19 -6
  30. package/filterSequenceString.d.ts +14 -10
  31. package/findApproxMatches.d.ts +7 -1
  32. package/findNearestRangeOfSequenceOverlapToPosition.d.ts +2 -1
  33. package/findOrfsInPlasmid.d.ts +2 -11
  34. package/findSequenceMatches.d.ts +11 -1
  35. package/generateAnnotations.d.ts +2 -1
  36. package/generateSequenceData.d.ts +8 -13
  37. package/getAllInsertionsInSeqReads.d.ts +11 -1
  38. package/getAminoAcidDataForEachBaseOfDna.d.ts +6 -5
  39. package/getAminoAcidFromSequenceTriplet.d.ts +1 -1
  40. package/getAminoAcidStringFromSequenceString.d.ts +3 -1
  41. package/getCodonRangeForAASliver.d.ts +3 -4
  42. package/getComplementAminoAcidStringFromSequenceString.d.ts +1 -1
  43. package/getComplementSequenceAndAnnotations.d.ts +5 -1
  44. package/getComplementSequenceString.d.ts +1 -1
  45. package/getCutsiteType.d.ts +2 -1
  46. package/getCutsitesFromSequence.d.ts +2 -1
  47. package/getDegenerateDnaStringFromAAString.d.ts +1 -1
  48. package/getDegenerateRnaStringFromAAString.d.ts +1 -1
  49. package/getDigestFragmentsForCutsites.d.ts +4 -1
  50. package/getDigestFragmentsForRestrictionEnzymes.d.ts +8 -1
  51. package/getInsertBetweenVals.d.ts +2 -1
  52. package/getLeftAndRightOfSequenceInRangeGivenPosition.d.ts +2 -1
  53. package/getOrfsFromSequence.d.ts +17 -11
  54. package/getOverlapBetweenTwoSequences.d.ts +2 -1
  55. package/getPossiblePartsFromSequenceAndEnzymes.d.ts +18 -1
  56. package/getReverseAminoAcidStringFromSequenceString.d.ts +1 -1
  57. package/getReverseComplementAminoAcidStringFromSequenceString.d.ts +1 -1
  58. package/getReverseComplementAnnotation.d.ts +11 -1
  59. package/getReverseComplementSequenceAndAnnotations.d.ts +5 -1
  60. package/getReverseComplementSequenceString.d.ts +1 -1
  61. package/getReverseSequenceString.d.ts +1 -1
  62. package/getSequenceDataBetweenRange.d.ts +9 -1
  63. package/getVirtualDigest.d.ts +11 -10
  64. package/guessIfSequenceIsDnaAndNotProtein.d.ts +5 -1
  65. package/index.cjs +732 -483
  66. package/index.d.ts +8 -5
  67. package/index.js +732 -483
  68. package/index.umd.cjs +732 -483
  69. package/insertGapsIntoRefSeq.d.ts +2 -1
  70. package/insertSequenceDataAtPositionOrRange.d.ts +10 -1
  71. package/isEnzymeType2S.d.ts +2 -1
  72. package/mapAnnotationsToRows.d.ts +9 -1
  73. package/package.json +9 -6
  74. package/prepareCircularViewData.d.ts +2 -1
  75. package/prepareRowData.d.ts +7 -3
  76. package/proteinAlphabet.d.ts +1 -1
  77. package/rotateBpsToPosition.d.ts +1 -1
  78. package/rotateSequenceDataToPosition.d.ts +3 -1
  79. package/shiftAnnotationsByLen.d.ts +4 -3
  80. package/src/DNAComplementMap.ts +32 -0
  81. package/src/addGapsToSeqReads.ts +436 -0
  82. package/src/adjustAnnotationsToInsert.ts +20 -0
  83. package/src/adjustBpsToReplaceOrInsert.ts +73 -0
  84. package/src/aliasedEnzymesByName.ts +7366 -0
  85. package/src/aminoAcidToDegenerateDnaMap.ts +32 -0
  86. package/src/aminoAcidToDegenerateRnaMap.ts +32 -0
  87. package/src/annotateSingleSeq.ts +37 -0
  88. package/src/annotationTypes.ts +23 -0
  89. package/src/autoAnnotate.test.js +0 -1
  90. package/src/autoAnnotate.ts +290 -0
  91. package/src/bioData.ts +65 -0
  92. package/src/calculateEndStability.ts +91 -0
  93. package/src/calculateNebTa.ts +46 -0
  94. package/src/calculateNebTm.ts +132 -0
  95. package/src/calculatePercentGC.ts +3 -0
  96. package/src/calculateSantaLuciaTm.ts +184 -0
  97. package/src/calculateTm.ts +242 -0
  98. package/src/computeDigestFragments.ts +238 -0
  99. package/src/condensePairwiseAlignmentDifferences.ts +85 -0
  100. package/src/convertAACaretPositionOrRangeToDna.ts +28 -0
  101. package/src/convertDnaCaretPositionOrRangeToAA.ts +28 -0
  102. package/src/cutSequenceByRestrictionEnzyme.ts +345 -0
  103. package/src/defaultEnzymesByName.ts +280 -0
  104. package/src/degenerateDnaToAminoAcidMap.ts +5 -0
  105. package/src/degenerateRnaToAminoAcidMap.ts +5 -0
  106. package/src/deleteSequenceDataAtRange.ts +13 -0
  107. package/src/diffUtils.ts +80 -0
  108. package/src/doesEnzymeChopOutsideOfRecognitionSite.ts +16 -0
  109. package/src/featureTypesAndColors.ts +167 -0
  110. package/src/filterSequenceString.ts +153 -0
  111. package/src/findApproxMatches.ts +58 -0
  112. package/src/findNearestRangeOfSequenceOverlapToPosition.ts +43 -0
  113. package/src/findOrfsInPlasmid.js +6 -1
  114. package/src/findOrfsInPlasmid.ts +31 -0
  115. package/src/findSequenceMatches.ts +154 -0
  116. package/src/generateAnnotations.ts +39 -0
  117. package/src/generateSequenceData.ts +212 -0
  118. package/src/getAllInsertionsInSeqReads.ts +100 -0
  119. package/src/getAminoAcidDataForEachBaseOfDna.ts +305 -0
  120. package/src/getAminoAcidFromSequenceTriplet.ts +27 -0
  121. package/src/getAminoAcidStringFromSequenceString.ts +36 -0
  122. package/src/getCodonRangeForAASliver.ts +73 -0
  123. package/src/getComplementAminoAcidStringFromSequenceString.ts +10 -0
  124. package/src/getComplementSequenceAndAnnotations.ts +25 -0
  125. package/src/getComplementSequenceString.ts +23 -0
  126. package/src/getCutsiteType.ts +18 -0
  127. package/src/getCutsitesFromSequence.ts +22 -0
  128. package/src/getDegenerateDnaStringFromAAString.ts +15 -0
  129. package/src/getDegenerateRnaStringFromAAString.ts +15 -0
  130. package/src/getDigestFragmentsForCutsites.ts +126 -0
  131. package/src/getDigestFragmentsForRestrictionEnzymes.ts +50 -0
  132. package/src/getInsertBetweenVals.ts +31 -0
  133. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.ts +40 -0
  134. package/src/getMassOfAaString.ts +29 -0
  135. package/src/getOrfsFromSequence.ts +132 -0
  136. package/src/getOverlapBetweenTwoSequences.ts +30 -0
  137. package/src/getPossiblePartsFromSequenceAndEnzymes.ts +149 -0
  138. package/src/getReverseAminoAcidStringFromSequenceString.ts +22 -0
  139. package/src/getReverseComplementAminoAcidStringFromSequenceString.ts +10 -0
  140. package/src/getReverseComplementAnnotation.ts +33 -0
  141. package/src/getReverseComplementSequenceAndAnnotations.ts +46 -0
  142. package/src/getReverseComplementSequenceString.ts +18 -0
  143. package/src/getReverseSequenceString.ts +12 -0
  144. package/src/getSequenceDataBetweenRange.ts +154 -0
  145. package/src/getVirtualDigest.ts +139 -0
  146. package/src/guessIfSequenceIsDnaAndNotProtein.ts +39 -0
  147. package/src/index.test.ts +43 -0
  148. package/src/index.ts +111 -0
  149. package/src/insertGapsIntoRefSeq.ts +43 -0
  150. package/src/insertSequenceDataAtPosition.ts +2 -0
  151. package/src/insertSequenceDataAtPositionOrRange.ts +328 -0
  152. package/src/isEnzymeType2S.ts +5 -0
  153. package/src/mapAnnotationsToRows.ts +256 -0
  154. package/src/prepareCircularViewData.ts +24 -0
  155. package/src/prepareRowData.ts +61 -0
  156. package/src/prepareRowData_output1.json +1 -0
  157. package/src/proteinAlphabet.ts +271 -0
  158. package/src/rotateBpsToPosition.ts +12 -0
  159. package/src/rotateSequenceDataToPosition.ts +54 -0
  160. package/src/shiftAnnotationsByLen.ts +24 -0
  161. package/src/threeLetterSequenceStringToAminoAcidMap.ts +198 -0
  162. package/src/tidyUpAnnotation.ts +205 -0
  163. package/src/tidyUpSequenceData.ts +213 -0
  164. package/src/types.ts +109 -0
  165. package/threeLetterSequenceStringToAminoAcidMap.d.ts +11 -921
  166. package/tidyUpAnnotation.d.ts +13 -11
  167. package/tidyUpSequenceData.d.ts +15 -1
  168. package/types.d.ts +105 -0
@@ -1 +1,2 @@
1
- export default function insertGapsIntoRefSeq(refSeq: any, seqReads: any): any;
1
+ import { SeqRead } from './getAllInsertionsInSeqReads';
2
+ export default function insertGapsIntoRefSeq(refSeq: string, seqReads: SeqRead[]): string;
@@ -1 +1,10 @@
1
- export default function insertSequenceDataAtPositionOrRange(_sequenceDataToInsert: any, _existingSequenceData: any, caretPositionOrRange: any, options?: {}): any;
1
+ import { Range } from '../../range-utils/src/index.ts';
2
+ import { SequenceData } from './types';
3
+ interface InsertSequenceDataOptions {
4
+ maintainOriginSplit?: boolean;
5
+ doNotRemoveInvalidChars?: boolean;
6
+ topLevelSeqData?: SequenceData;
7
+ [key: string]: unknown;
8
+ }
9
+ export default function insertSequenceDataAtPositionOrRange(_sequenceDataToInsert: SequenceData, _existingSequenceData: SequenceData, caretPositionOrRange: number | Range, options?: InsertSequenceDataOptions): SequenceData;
10
+ export {};
@@ -1 +1,2 @@
1
- export default function isEnzymeType2S(e: any): boolean;
1
+ import { RestrictionEnzyme } from './types';
2
+ export default function isEnzymeType2S(e: RestrictionEnzyme): boolean;
@@ -1 +1,9 @@
1
- export default function mapAnnotationsToRows(annotations: any, sequenceLength: any, bpsPerRow: any, { splitForwardReverse }?: {}): {};
1
+ import { Annotation } from './types';
2
+ export interface MappedAnnotation extends Annotation {
3
+ yOffset?: number;
4
+ enclosingRangeType?: "beginning" | "end" | "beginningAndEnd";
5
+ annotation?: Annotation;
6
+ }
7
+ export default function mapAnnotationsToRows(annotations: Annotation[], sequenceLength: number, bpsPerRow: number, { splitForwardReverse }?: {
8
+ splitForwardReverse?: boolean;
9
+ }): Record<string | number, MappedAnnotation[]>;
package/package.json CHANGED
@@ -1,20 +1,23 @@
1
1
  {
2
2
  "name": "@teselagen/sequence-utils",
3
- "version": "0.3.37",
3
+ "version": "0.3.38-beta.1",
4
4
  "type": "module",
5
5
  "dependencies": {
6
+ "@teselagen/range-utils": "0.3.14-beta.1",
6
7
  "escape-string-regexp": "5.0.0",
7
8
  "jsondiffpatch": "0.7.3",
8
- "string-splice": "^1.3.0",
9
9
  "lodash-es": "^4.17.21",
10
- "shortid": "2.2.16",
11
- "@teselagen/range-utils": "0.3.13"
10
+ "shortid": "2.2.16"
12
11
  },
13
12
  "exports": {
14
13
  ".": {
15
- "import": "./index.js",
16
- "require": "./index.cjs"
14
+ "import": "./src/index.ts",
15
+ "require": "./src/index.ts"
17
16
  }
18
17
  },
18
+ "devDependencies": {
19
+ "@types/lodash-es": "^4.17.12",
20
+ "@types/shortid": "^2.2.0"
21
+ },
19
22
  "license": "MIT"
20
23
  }
@@ -1 +1,2 @@
1
- export default function prepareCircularViewData(sequenceData: any): any;
1
+ import { SequenceData } from './types';
2
+ export default function prepareCircularViewData(sequenceData: SequenceData): SequenceData;
@@ -1,6 +1,10 @@
1
- export default function prepareRowData(sequenceData: any, bpsPerRow: any): {
1
+ import { MappedAnnotation } from './mapAnnotationsToRows';
2
+ import { SequenceData } from './types';
3
+ export interface RowData {
2
4
  rowNumber: number;
3
5
  start: number;
4
6
  end: number;
5
- sequence: any;
6
- }[];
7
+ sequence: string;
8
+ [key: string]: MappedAnnotation[] | number | string;
9
+ }
10
+ export default function prepareRowData(sequenceData: SequenceData, bpsPerRow: number): RowData[];
@@ -1,4 +1,3 @@
1
- export default proteinAlphabet;
2
1
  declare const proteinAlphabet: {
3
2
  A: {
4
3
  value: string;
@@ -261,3 +260,4 @@ declare const proteinAlphabet: {
261
260
  mass: number;
262
261
  };
263
262
  };
263
+ export default proteinAlphabet;
@@ -1 +1 @@
1
- export default function rotateBpsToPosition(bps: any, caretPosition: any): any;
1
+ export default function rotateBpsToPosition(bps: string, caretPosition: number): string;
@@ -1 +1,3 @@
1
- export default function rotateSequenceDataToPosition(sequenceData: any, caretPosition: any, options: any): any;
1
+ import { TidyUpSequenceDataOptions } from './tidyUpSequenceData';
2
+ import { SequenceData } from './types';
3
+ export default function rotateSequenceDataToPosition(sequenceData: SequenceData, caretPosition: number, options?: TidyUpSequenceDataOptions): SequenceData;
@@ -1,5 +1,6 @@
1
+ import { SequenceData } from './types';
1
2
  export default function shiftAnnotationsByLen({ seqData, caretPosition, insertLength }: {
2
- seqData: any;
3
- caretPosition: any;
4
- insertLength: any;
3
+ seqData: SequenceData;
4
+ caretPosition: number;
5
+ insertLength: number;
5
6
  }): void;
@@ -0,0 +1,32 @@
1
+ const DNAComplementMap = {
2
+ ".": ".",
3
+ a: "t",
4
+ t: "a",
5
+ u: "a",
6
+ c: "g",
7
+ g: "c",
8
+ A: "T",
9
+ T: "A",
10
+ U: "A",
11
+ C: "G",
12
+ G: "C",
13
+ r: "y",
14
+ R: "Y",
15
+ y: "r",
16
+ Y: "R",
17
+ d: "h",
18
+ D: "H",
19
+ h: "d",
20
+ H: "D",
21
+ k: "m",
22
+ K: "M",
23
+ m: "k",
24
+ M: "K",
25
+ v: "b",
26
+ V: "B",
27
+ b: "v",
28
+ B: "V"
29
+ //tnrtodo add more letters here
30
+ };
31
+
32
+ export default DNAComplementMap;
@@ -0,0 +1,436 @@
1
+ import insertGapsIntoRefSeq from "./insertGapsIntoRefSeq";
2
+
3
+ import { cloneDeep } from "lodash-es";
4
+
5
+ // bam.seq: NTGTAAGTCGTGAAAAAANCNNNCATATTNCGGAGGTAAAAATGAAAA...
6
+ // bam.pos: 43
7
+ // bam.cigar: 36M2D917M3I17M7I2M1I6M5I4M1D6M12I8M
8
+ // (note: bam.cigar is null if the sequencing read is unaligned)
9
+ // bam.reversed: true (if reversed)
10
+
11
+ // refSeq should be an object { name, sequence }
12
+ // seqReads should be an array of objects [{name, seq, pos, cigar}, {name, seq, pos, cigar}, ...]
13
+ // add gaps into sequencing reads before starting bp pos and from own deletions & all seq reads' insertions, minus own insertions
14
+ interface SeqRead {
15
+ name: string;
16
+ seq: string;
17
+ pos: number;
18
+ cigar: string;
19
+ reversed?: boolean;
20
+ }
21
+
22
+ export default function addGapsToSeqReads(
23
+ refSeq: { name: string; sequence: string },
24
+ seqReads: SeqRead[]
25
+ ) {
26
+ // remove unaligned seq reads for now
27
+ for (let i = 0; i < seqReads.length; i++) {
28
+ if (seqReads[i].cigar === null) {
29
+ seqReads.splice(i, 1);
30
+ }
31
+ }
32
+
33
+ const refSeqWithGaps = insertGapsIntoRefSeq(refSeq.sequence, seqReads);
34
+ // first object is reference sequence with gaps, to be followed by seq reads with gaps
35
+ const seqReadsWithGaps: {
36
+ name: string;
37
+ sequence: string;
38
+ reversed?: boolean;
39
+ cigar?: string;
40
+ }[] = [{ name: refSeq.name, sequence: refSeqWithGaps.toUpperCase() }];
41
+ seqReads.forEach((seqRead: SeqRead) => {
42
+ // get all insertions in seq reads
43
+ const allInsertionsInSeqReads: { bpPos: number; number: number }[] = [];
44
+ seqReads.forEach((seqRead: SeqRead) => {
45
+ // split cigar string at S, M, D, or I (soft-clipped, match, deletion, or insertion), e.g. ["5S", "2M", "3I", "39M", "3D"..."9S"]
46
+ const splitSeqRead = seqRead.cigar.match(/([0-9]*[SMDI])/g);
47
+ if (!splitSeqRead) return;
48
+ // adjust seqRead.pos, aka bp pos where the seq read starts aligning to the ref seq, if bps have been soft-clipped from the beginning of the seq read
49
+ let adjustedSeqReadPos = cloneDeep(seqRead.pos);
50
+ if (splitSeqRead[0].slice(-1) === "S") {
51
+ // # in #S at beginning of array, i.e. number of soft-clipped base pairs at beginning of the seq read
52
+ const numOfBeginningSoftClipped = splitSeqRead[0].slice(0, -1);
53
+ adjustedSeqReadPos = seqRead.pos - Number(numOfBeginningSoftClipped);
54
+ }
55
+ for (let componentI = 0; componentI < splitSeqRead.length; componentI++) {
56
+ if (splitSeqRead[componentI].slice(-1) === "I") {
57
+ let bpPosOfInsertion = adjustedSeqReadPos;
58
+ const numberOfInsertions = Number(
59
+ splitSeqRead[componentI].slice(0, -1)
60
+ );
61
+ for (let i = 0; i < componentI; i++) {
62
+ if (splitSeqRead[i].slice(-1) !== "I") {
63
+ const previousComponentNumber = Number(
64
+ splitSeqRead[i].slice(0, -1)
65
+ );
66
+ bpPosOfInsertion += previousComponentNumber;
67
+ }
68
+ }
69
+ const insertionInfo = {
70
+ // keeping bpPos 1-based
71
+ bpPos: bpPosOfInsertion,
72
+ number: numberOfInsertions
73
+ };
74
+ allInsertionsInSeqReads.push(insertionInfo);
75
+ }
76
+ }
77
+ });
78
+
79
+ // 1) add gaps before starting bp pos
80
+ const splitSeqReadChunk = seqRead.cigar.match(/([0-9]*[SMDI])/g);
81
+ if (!splitSeqReadChunk) return;
82
+ let adjustedSeqReadPos = cloneDeep(seqRead.pos);
83
+ if (splitSeqReadChunk[0].slice(-1) === "S") {
84
+ // # in #S at beginning of array, i.e. number of soft-clipped base pairs at beginning of the seq read
85
+ const numOfBeginningSoftClipped = splitSeqReadChunk[0].slice(0, -1);
86
+ adjustedSeqReadPos = seqRead.pos - Number(numOfBeginningSoftClipped);
87
+ }
88
+ let eachSeqReadWithGaps = seqRead.seq.split("");
89
+ if (adjustedSeqReadPos > 0) {
90
+ eachSeqReadWithGaps.unshift("-".repeat(adjustedSeqReadPos - 1));
91
+ }
92
+ eachSeqReadWithGaps = eachSeqReadWithGaps.join("").split("");
93
+
94
+ // 2) add own deletions to own sequence
95
+ // get own deletions
96
+ const ownDeletions = [];
97
+ for (
98
+ let componentI = 0;
99
+ componentI < splitSeqReadChunk.length;
100
+ componentI++
101
+ ) {
102
+ if (splitSeqReadChunk[componentI].slice(-1) === "D") {
103
+ let bpPosOfDeletion = adjustedSeqReadPos;
104
+ const numberOfDeletions = Number(
105
+ splitSeqReadChunk[componentI].slice(0, -1)
106
+ );
107
+ for (let i = 0; i < componentI; i++) {
108
+ const previousComponentNumber = Number(
109
+ splitSeqReadChunk[i].slice(0, -1)
110
+ );
111
+ bpPosOfDeletion += previousComponentNumber;
112
+ }
113
+ const deletionInfo = {
114
+ // keeping bpPos 1-based
115
+ bpPos: bpPosOfDeletion,
116
+ number: numberOfDeletions
117
+ };
118
+ ownDeletions.push(deletionInfo);
119
+ }
120
+ }
121
+ // sort deletions by ascending bp pos
122
+ const sortedOwnDeletions = ownDeletions.sort((a, b) => {
123
+ return a.bpPos - b.bpPos;
124
+ });
125
+ // add own deletions to own sequence
126
+ for (let ownD = 0; ownD < sortedOwnDeletions.length; ownD++) {
127
+ const bpPosOfDeletion = sortedOwnDeletions[ownD].bpPos;
128
+ const numberOfDeletions = sortedOwnDeletions[ownD].number;
129
+ // adding gaps at the bp pos
130
+ let deletionGaps = "";
131
+ for (let gapD = 0; gapD < numberOfDeletions; gapD++) {
132
+ deletionGaps += "-";
133
+ }
134
+ eachSeqReadWithGaps.splice(bpPosOfDeletion - 1, 0, deletionGaps);
135
+ eachSeqReadWithGaps = eachSeqReadWithGaps.join("").split("");
136
+ }
137
+ eachSeqReadWithGaps = eachSeqReadWithGaps.join("").split("");
138
+
139
+ // 3) remove own insertions from own sequence
140
+ // get own insertions
141
+ const ownInsertions = [];
142
+ const ownInsertionsBp = [];
143
+ for (
144
+ let componentI = 0;
145
+ componentI < splitSeqReadChunk.length;
146
+ componentI++
147
+ ) {
148
+ if (splitSeqReadChunk[componentI].slice(-1) === "I") {
149
+ let bpPosOfInsertion = adjustedSeqReadPos;
150
+ const numberOfInsertions = Number(
151
+ splitSeqReadChunk[componentI].slice(0, -1)
152
+ );
153
+ const nucleotides = [];
154
+ for (let i = 0; i < componentI; i++) {
155
+ const previousComponentNumber = Number(
156
+ splitSeqReadChunk[i].slice(0, -1)
157
+ );
158
+ bpPosOfInsertion += previousComponentNumber;
159
+ }
160
+ for (let nucI = 0; nucI < numberOfInsertions; nucI++) {
161
+ nucleotides.push(eachSeqReadWithGaps[bpPosOfInsertion - 1 + nucI]);
162
+ }
163
+ const insertionInfo = {
164
+ // keeping bpPos 1-based
165
+ bpPos: bpPosOfInsertion,
166
+ number: numberOfInsertions
167
+ };
168
+ const insertionInfoBp = {
169
+ // keeping bpPos 1-based
170
+ bpPos: bpPosOfInsertion,
171
+ number: numberOfInsertions,
172
+ nucleotides: nucleotides
173
+ };
174
+ ownInsertions.push(insertionInfo);
175
+ ownInsertionsBp.push(insertionInfoBp);
176
+ }
177
+ }
178
+ const ownInsertionsCompare = JSON.parse(JSON.stringify(ownInsertions));
179
+ // sort own insertions by ascending bp pos
180
+ const sortedOwnInsertions = ownInsertions.sort((a, b) => {
181
+ return a.bpPos - b.bpPos;
182
+ });
183
+ const sortedOwnInsertionsBp = ownInsertionsBp.sort((a, b) => {
184
+ return a.bpPos - b.bpPos;
185
+ });
186
+ // remove own insertions from own sequence
187
+ for (let ownI = 0; ownI < sortedOwnInsertions.length; ownI++) {
188
+ const bpPosOfInsertion = sortedOwnInsertions[ownI].bpPos;
189
+ const numberOfInsertions = sortedOwnInsertions[ownI].number;
190
+ for (let numI = 0; numI < numberOfInsertions; numI++) {
191
+ eachSeqReadWithGaps.splice(bpPosOfInsertion - 1, 1);
192
+ }
193
+ for (let posI = ownI + 1; posI < sortedOwnInsertions.length; posI++) {
194
+ sortedOwnInsertions[posI].bpPos -= numberOfInsertions;
195
+ }
196
+ }
197
+
198
+ // 4) add other seq reads' insertions to seq read
199
+ // get other seq reads' insertions (i.e. all insertions minus duplicates minus own insertions)
200
+ let otherInsertions = allInsertionsInSeqReads.sort((a, b) => {
201
+ return a.bpPos - b.bpPos;
202
+ });
203
+ // combine duplicates within all insertions, remove own insertions from all insertions, combine overlap between other insertions & own insertions
204
+ // first, combine duplicates within all insertions
205
+ otherInsertions = otherInsertions.filter(
206
+ (object, index) =>
207
+ index ===
208
+ otherInsertions.findIndex(
209
+ obj => JSON.stringify(obj) === JSON.stringify(object)
210
+ )
211
+ );
212
+ // 'i < otherInsertions.length - 1' because when at the end of the array, there is no 'i + 1' to compare to
213
+ for (let i = 0; i < otherInsertions.length - 1; i++) {
214
+ while (otherInsertions[i].bpPos === otherInsertions[i + 1].bpPos) {
215
+ if (otherInsertions[i].number > otherInsertions[i + 1].number) {
216
+ // remove the one with fewer number of gaps from array
217
+ otherInsertions.splice(i + 1, 1);
218
+ } else if (otherInsertions[i].number < otherInsertions[i + 1].number) {
219
+ otherInsertions.splice(i, 1);
220
+ } else if (
221
+ otherInsertions[i].number === otherInsertions[i + 1].number
222
+ ) {
223
+ otherInsertions.splice(i, 1);
224
+ }
225
+ }
226
+ }
227
+ // then remove own insertions from all insertions
228
+ for (let otherI = 0; otherI < ownInsertionsCompare.length; otherI++) {
229
+ const insertionInfoIndex = otherInsertions.findIndex(
230
+ e => e.bpPos === ownInsertionsCompare[otherI].bpPos
231
+ );
232
+ if (insertionInfoIndex !== -1) {
233
+ if (
234
+ otherInsertions[insertionInfoIndex].number >
235
+ ownInsertionsCompare[otherI].number
236
+ ) {
237
+ otherInsertions[insertionInfoIndex].number =
238
+ otherInsertions[insertionInfoIndex].number -
239
+ ownInsertionsCompare[otherI].number;
240
+ } else if (
241
+ otherInsertions[insertionInfoIndex].number <=
242
+ ownInsertionsCompare[otherI].number
243
+ ) {
244
+ otherInsertions.splice(insertionInfoIndex, 1);
245
+ otherI--;
246
+ }
247
+ }
248
+ }
249
+ // then combine overlap between other insertions & own insertions
250
+ for (let overlapI = 0; overlapI < sortedOwnInsertions.length; overlapI++) {
251
+ const insertionInfoIndex = otherInsertions.findIndex(
252
+ e => e.bpPos === sortedOwnInsertions[overlapI].bpPos
253
+ );
254
+ if (insertionInfoIndex !== -1) {
255
+ if (
256
+ otherInsertions[insertionInfoIndex].number >
257
+ sortedOwnInsertions[overlapI].number
258
+ ) {
259
+ otherInsertions[insertionInfoIndex].number =
260
+ otherInsertions[insertionInfoIndex].number -
261
+ sortedOwnInsertions[overlapI].number;
262
+ } else if (
263
+ otherInsertions[insertionInfoIndex].number <=
264
+ sortedOwnInsertions[overlapI].number
265
+ ) {
266
+ otherInsertions.splice(insertionInfoIndex, 1);
267
+ overlapI--;
268
+ }
269
+ }
270
+ }
271
+ // adjust own insertions according to other seq reads' insertions to be added (i.e. for all other reads' insertions with smaller bp pos, +1 to that own insertion's bp pos)
272
+ const adjustedOwnInsertionsBp = JSON.parse(
273
+ JSON.stringify(sortedOwnInsertionsBp)
274
+ );
275
+ for (let ownI = 0; ownI < adjustedOwnInsertionsBp.length; ownI++) {
276
+ let previousInserts = 0;
277
+ for (let i = 0; i < ownI; i++) {
278
+ previousInserts += adjustedOwnInsertionsBp[i].number - 1;
279
+ }
280
+ adjustedOwnInsertionsBp[ownI].bpPos =
281
+ adjustedOwnInsertionsBp[ownI].bpPos - previousInserts;
282
+ sortedOwnInsertionsBp[ownI].bpPos =
283
+ sortedOwnInsertionsBp[ownI].bpPos - previousInserts;
284
+ }
285
+ for (let otherI = 0; otherI < otherInsertions.length; otherI++) {
286
+ for (let ownI = 0; ownI < adjustedOwnInsertionsBp.length; ownI++) {
287
+ if (
288
+ otherInsertions[otherI].bpPos <= sortedOwnInsertionsBp[ownI].bpPos
289
+ ) {
290
+ adjustedOwnInsertionsBp[ownI].bpPos += 1;
291
+ }
292
+ }
293
+ }
294
+ // add other seq reads' insertions to sequence
295
+ for (
296
+ let otherI = 0;
297
+ otherI < otherInsertions.length &&
298
+ otherInsertions[otherI].bpPos <= eachSeqReadWithGaps.length;
299
+ otherI++
300
+ ) {
301
+ const bpPosOfInsertion = otherInsertions[otherI].bpPos;
302
+ const numberOfInsertions = otherInsertions[otherI].number;
303
+ // adding gaps at the bp pos
304
+ let insertionGaps = "";
305
+ for (let gapI = 0; gapI < numberOfInsertions; gapI++) {
306
+ insertionGaps += "-";
307
+ }
308
+ eachSeqReadWithGaps.splice(bpPosOfInsertion - 1, 0, insertionGaps);
309
+ for (let posI = otherI + 1; posI < otherInsertions.length; posI++) {
310
+ otherInsertions[posI].bpPos += 1;
311
+ }
312
+ }
313
+
314
+ // 5) add own insertions to own sequence
315
+ for (let ownI = 0; ownI < adjustedOwnInsertionsBp.length; ownI++) {
316
+ const bpPosOfInsertion = adjustedOwnInsertionsBp[ownI].bpPos;
317
+ const nucleotides = adjustedOwnInsertionsBp[ownI].nucleotides.join("");
318
+ eachSeqReadWithGaps.splice(bpPosOfInsertion - 1, 0, nucleotides);
319
+ }
320
+
321
+ // 6) add gaps after seq read for ref seq's length = seq read's length
322
+ eachSeqReadWithGaps = eachSeqReadWithGaps.join("").split("");
323
+ if (eachSeqReadWithGaps.length < refSeqWithGaps.length) {
324
+ eachSeqReadWithGaps.push(
325
+ "-".repeat(refSeqWithGaps.length - eachSeqReadWithGaps.length)
326
+ );
327
+ }
328
+
329
+ // eachSeqReadWithGaps is a string "GGGA--GA-C--ACC"
330
+ seqReadsWithGaps.push({
331
+ name: seqRead.name,
332
+ sequence: eachSeqReadWithGaps.join(""),
333
+ reversed: seqRead.reversed,
334
+ cigar: seqRead.cigar
335
+ });
336
+ });
337
+
338
+ // 7) add gaps before starting bp pos
339
+ // add gaps based on any seq reads that extend beyond beginning of the ref seq due to soft-clipped reads
340
+ // a) get the lengths of bps that extend beyond the beginning of the ref seq among all seq reads
341
+ const seqReadLengthsBeforeRefSeqStart: number[] = [];
342
+ seqReads.forEach(seq => {
343
+ const splitSeqReadChunk = seq.cigar.match(/([0-9]*[SMDI])/g);
344
+ if (!splitSeqReadChunk) return;
345
+ let adjustedSeqReadPos = cloneDeep(seq.pos);
346
+ if (splitSeqReadChunk[0].slice(-1) === "S") {
347
+ // # in #S at beginning of array, i.e. number of soft-clipped base pairs at beginning of the seq read
348
+ const numOfBeginningSoftClipped = splitSeqReadChunk[0].slice(0, -1);
349
+ adjustedSeqReadPos = seq.pos - Number(numOfBeginningSoftClipped);
350
+ // number of gaps to add if soft-clipped reads extend beyond beginning of ref seq
351
+ if (adjustedSeqReadPos < 0) {
352
+ seqReadLengthsBeforeRefSeqStart.push(Math.abs(adjustedSeqReadPos));
353
+ }
354
+ }
355
+ // number of gaps to add if seqRead.pos is negative (not sure if this is possible with bowtie2 outputs)
356
+ // if (seq.pos < 0) {
357
+ // seqReadLengthsBeforeRefSeqStart.push(Math.abs(seq.pos))
358
+ // }
359
+ });
360
+ // b) add gaps (to both ref seq and seq reads) based on any seq reads that extend beyond beginning of ref seq due to soft-clipped reads
361
+ let longestSeqReadLength = 0;
362
+ for (let i = 1; i < seqReadsWithGaps.length; i++) {
363
+ // turn seq read into an array ["A", "T", "C", "G"...]
364
+ const eachSeqReadWithGaps = seqReadsWithGaps[i].sequence.split("");
365
+ const splitSeqReadChunk = seqReads[i - 1].cigar!.match(/([0-9]*[SMDI])/g);
366
+ if (!splitSeqReadChunk) continue;
367
+ let adjustedSeqReadPos = cloneDeep(seqReads[i - 1].pos);
368
+ // longest length of bps that extend beyond the beginning of the ref seq among all seq reads
369
+ if (seqReadLengthsBeforeRefSeqStart.length > 0) {
370
+ longestSeqReadLength = Math.max(...seqReadLengthsBeforeRefSeqStart);
371
+ }
372
+ if (splitSeqReadChunk[0].slice(-1) === "S") {
373
+ // # in #S at beginning of array, i.e. number of soft-clipped base pairs at beginning of the seq read
374
+ const numOfBeginningSoftClipped = splitSeqReadChunk[0].slice(0, -1);
375
+ adjustedSeqReadPos =
376
+ seqReads[i - 1].pos - Number(numOfBeginningSoftClipped);
377
+ if (adjustedSeqReadPos > 0) {
378
+ if (longestSeqReadLength > 0) {
379
+ eachSeqReadWithGaps.unshift("-".repeat(longestSeqReadLength + 1));
380
+ }
381
+ seqReadsWithGaps[i].sequence = eachSeqReadWithGaps.join("");
382
+ } else if (adjustedSeqReadPos < 0) {
383
+ if (longestSeqReadLength > 0) {
384
+ eachSeqReadWithGaps.unshift(
385
+ "-".repeat(longestSeqReadLength - Math.abs(adjustedSeqReadPos))
386
+ );
387
+ }
388
+ seqReadsWithGaps[i].sequence = eachSeqReadWithGaps.join("");
389
+ }
390
+ } else {
391
+ if (longestSeqReadLength > 0) {
392
+ eachSeqReadWithGaps.unshift("-".repeat(longestSeqReadLength + 1));
393
+ }
394
+ seqReadsWithGaps[i].sequence = eachSeqReadWithGaps.join("");
395
+ }
396
+ }
397
+
398
+ // add gaps before ref seq based on the longest length of soft-clipped reads that extend beyond beginning of ref seq
399
+ if (longestSeqReadLength > 0) {
400
+ const splitRefSeqWithGaps = seqReadsWithGaps[0].sequence.split("");
401
+ splitRefSeqWithGaps.unshift("-".repeat(longestSeqReadLength + 1));
402
+ seqReadsWithGaps[0].sequence = splitRefSeqWithGaps.join("");
403
+ }
404
+
405
+ // 8) check if any seq read is longer than the ref seq, make ref seq & seq reads all the same length
406
+ const lengthsOfLongerSeqReads = [];
407
+ for (let i = 1; i < seqReadsWithGaps.length; i++) {
408
+ const refSeq = seqReadsWithGaps[0];
409
+ if (seqReadsWithGaps[i].sequence.length > refSeq.sequence.length) {
410
+ lengthsOfLongerSeqReads.push(seqReadsWithGaps[i].sequence.length);
411
+ }
412
+ }
413
+ if (lengthsOfLongerSeqReads.length > 0) {
414
+ const longestSeqReadLength = Math.max(...lengthsOfLongerSeqReads);
415
+ for (let i = 0; i < seqReadsWithGaps.length; i++) {
416
+ if (seqReadsWithGaps[i].sequence.length < longestSeqReadLength) {
417
+ seqReadsWithGaps[i].sequence += "-".repeat(
418
+ longestSeqReadLength - seqReadsWithGaps[i].sequence.length
419
+ );
420
+ }
421
+ }
422
+ }
423
+ // if any seq read shorter than ref seq, make ref seq & seq reads all the same length
424
+ for (let i = 1; i < seqReadsWithGaps.length; i++) {
425
+ const refSeq = seqReadsWithGaps[0];
426
+ if (seqReadsWithGaps[i].sequence.length < refSeq.sequence.length) {
427
+ seqReadsWithGaps[i].sequence += "-".repeat(
428
+ refSeq.sequence.length - seqReadsWithGaps[i].sequence.length
429
+ );
430
+ }
431
+ }
432
+
433
+ // seqReadsWithGaps is an array of objects containing the ref seq with gaps first and then all seq reads with gaps
434
+ // e.g. [{ name: "ref seq", sequence: "GG---GA--GA-C--A---CC---"}, { name: "r1", sequence: "-----GATTGA-C-----------"}...]
435
+ return seqReadsWithGaps;
436
+ }
@@ -0,0 +1,20 @@
1
+ import { adjustRangeToInsert } from "@teselagen/range-utils";
2
+ import { map } from "lodash-es";
3
+ import { Annotation } from "./types";
4
+
5
+ export default function adjustAnnotationsToInsert(
6
+ annotationsToBeAdjusted: Record<string, Annotation> | Annotation[],
7
+ insertStart: number,
8
+ insertLength: number
9
+ ) {
10
+ return map(annotationsToBeAdjusted, (annotation: Annotation) => {
11
+ return {
12
+ ...adjustRangeToInsert(annotation, insertStart, insertLength),
13
+ ...(annotation.locations && {
14
+ locations: annotation.locations.map((loc: Annotation) =>
15
+ adjustRangeToInsert(loc, insertStart, insertLength)
16
+ )
17
+ })
18
+ };
19
+ });
20
+ }
@@ -0,0 +1,73 @@
1
+ import {
2
+ splitRangeIntoTwoPartsIfItIsCircular,
3
+ getSequenceWithinRange,
4
+ getRangeLength,
5
+ invertRange,
6
+ isPositionWithinRange,
7
+ Range
8
+ } from "@teselagen/range-utils";
9
+
10
+ export default function adjustBpsToReplaceOrInsert(
11
+ bpString: string,
12
+ insertString = "",
13
+ caretPositionOrRange: number | Range
14
+ ) {
15
+ let stringToReturn = bpString;
16
+
17
+ if (
18
+ typeof caretPositionOrRange !== "number" &&
19
+ caretPositionOrRange &&
20
+ caretPositionOrRange.start > -1
21
+ ) {
22
+ if (
23
+ getRangeLength(caretPositionOrRange, bpString.length) === bpString.length
24
+ ) {
25
+ return insertString;
26
+ }
27
+ const ranges = splitRangeIntoTwoPartsIfItIsCircular(
28
+ invertRange(
29
+ caretPositionOrRange as unknown as Range,
30
+ bpString.length
31
+ ) as Range,
32
+ bpString.length
33
+ );
34
+ stringToReturn = "";
35
+ ranges.forEach((range, index) => {
36
+ stringToReturn += getSequenceWithinRange(range, bpString);
37
+ if (ranges.length === 1) {
38
+ if (isPositionWithinRange(0, range, bpString.length, true, true)) {
39
+ stringToReturn = stringToReturn + insertString;
40
+ } else {
41
+ stringToReturn = insertString + stringToReturn;
42
+ }
43
+ } else {
44
+ if (index === 0) stringToReturn += insertString;
45
+ }
46
+ });
47
+ } else {
48
+ //caretPosition Passed
49
+ stringToReturn = spliceString(
50
+ bpString,
51
+ caretPositionOrRange as number,
52
+ 0,
53
+ insertString
54
+ );
55
+ }
56
+ return stringToReturn;
57
+ }
58
+
59
+ const spliceString = (
60
+ str: string,
61
+ index: number,
62
+ count: number,
63
+ add: string
64
+ ) => {
65
+ let i = index;
66
+ if (i < 0) {
67
+ i = str.length + i;
68
+ if (i < 0) {
69
+ i = 0;
70
+ }
71
+ }
72
+ return str.slice(0, i) + (add || "") + str.slice(i + count);
73
+ };