@teselagen/sequence-utils 0.1.21 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/index.js +12030 -26126
  2. package/index.mjs +12119 -26124
  3. package/index.umd.js +24056 -38154
  4. package/package.json +2 -2
  5. package/src/DNAComplementMap.js +32 -0
  6. package/src/addGapsToSeqReads.js +417 -0
  7. package/src/addGapsToSeqReads.test.js +358 -0
  8. package/src/adjustAnnotationsToInsert.js +19 -0
  9. package/src/adjustBpsToReplaceOrInsert.js +50 -0
  10. package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
  11. package/src/aliasedEnzymesByName.js +7363 -0
  12. package/src/aminoAcidToDegenerateDnaMap.js +32 -0
  13. package/src/aminoAcidToDegenerateRnaMap.js +32 -0
  14. package/src/aminoAcidToDnaRna.test.js +27 -0
  15. package/src/annotateSingleSeq.js +29 -0
  16. package/src/annotateSingleSeq.test.js +64 -0
  17. package/src/annotationTypes.js +23 -0
  18. package/src/autoAnnotate.js +242 -0
  19. package/src/autoAnnotate.test.js +1039 -0
  20. package/src/bioData.js +431 -0
  21. package/src/calculateNebTa.js +34 -0
  22. package/src/calculateNebTa.test.js +57 -0
  23. package/src/calculateNebTm.js +127 -0
  24. package/src/calculateNebTm.test.js +32 -0
  25. package/src/calculatePercentGC.js +3 -0
  26. package/src/calculatePercentGC.test.js +14 -0
  27. package/src/calculateTm.js +297 -0
  28. package/src/calculateTm.test.js +7 -0
  29. package/src/computeDigestFragments.js +179 -0
  30. package/src/computeDigestFragments.test.js +73 -0
  31. package/src/condensePairwiseAlignmentDifferences.js +85 -0
  32. package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
  33. package/src/convertAACaretPositionOrRangeToDna.js +24 -0
  34. package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
  35. package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
  36. package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
  37. package/src/cutSequenceByRestrictionEnzyme.js +301 -0
  38. package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
  39. package/src/defaultEnzymesByName.js +278 -0
  40. package/src/degenerateDnaToAminoAcidMap.js +5 -0
  41. package/src/degenerateRnaToAminoAcidMap.js +5 -0
  42. package/src/deleteSequenceDataAtRange.js +5 -0
  43. package/src/deleteSequenceDataAtRange.test.js +146 -0
  44. package/src/diffUtils.js +64 -0
  45. package/src/diffUtils.test.js +74 -0
  46. package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
  47. package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
  48. package/src/featureTypesAndColors.js +152 -0
  49. package/src/featureTypesAndColors.test.js +52 -0
  50. package/src/filterAminoAcidSequenceString.js +13 -0
  51. package/src/filterAminoAcidSequenceString.test.js +22 -0
  52. package/src/filterSequenceString.js +22 -0
  53. package/src/filterSequenceString.test.js +13 -0
  54. package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
  55. package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
  56. package/src/findOrfsInPlasmid.js +26 -0
  57. package/src/findSequenceMatches.js +133 -0
  58. package/src/findSequenceMatches.test.js +286 -0
  59. package/src/generateAnnotations.js +34 -0
  60. package/src/generateSequenceData.js +206 -0
  61. package/src/generateSequenceData.test.js +22 -0
  62. package/src/getAllInsertionsInSeqReads.js +83 -0
  63. package/src/getAllInsertionsInSeqReads.test.js +26 -0
  64. package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
  65. package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
  66. package/src/getAminoAcidFromSequenceTriplet.js +22 -0
  67. package/src/getAminoAcidStringFromSequenceString.js +18 -0
  68. package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
  69. package/src/getCodonRangeForAASliver.js +63 -0
  70. package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
  71. package/src/getComplementSequenceAndAnnotations.js +20 -0
  72. package/src/getComplementSequenceString.js +19 -0
  73. package/src/getComplementSequenceString.test.js +13 -0
  74. package/src/getCutsiteType.js +10 -0
  75. package/src/getCutsitesFromSequence.js +17 -0
  76. package/src/getDegenerateDnaStringFromAAString.js +8 -0
  77. package/src/getDegenerateRnaStringFromAAString.js +8 -0
  78. package/src/getDigestFragmentsForCutsites.js +105 -0
  79. package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
  80. package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
  81. package/src/getInsertBetweenVals.js +28 -0
  82. package/src/getInsertBetweenVals.test.js +33 -0
  83. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
  84. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
  85. package/src/getMassOfAaString.js +24 -0
  86. package/src/getMassofAaString.test.js +18 -0
  87. package/src/getOrfsFromSequence.js +124 -0
  88. package/src/getOrfsFromSequence.test.js +210 -0
  89. package/src/getOverlapBetweenTwoSequences.js +30 -0
  90. package/src/getOverlapBetweenTwoSequences.test.js +23 -0
  91. package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
  92. package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
  93. package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
  94. package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
  95. package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
  96. package/src/getReverseComplementAnnotation.js +23 -0
  97. package/src/getReverseComplementAnnotation.test.js +44 -0
  98. package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
  99. package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
  100. package/src/getReverseComplementSequenceString.js +17 -0
  101. package/src/getReverseComplementSequenceString.test.js +11 -0
  102. package/src/getReverseSequenceString.js +12 -0
  103. package/src/getReverseSequenceString.test.js +9 -0
  104. package/src/getSequenceDataBetweenRange.js +131 -0
  105. package/src/getSequenceDataBetweenRange.test.js +474 -0
  106. package/src/getVirtualDigest.js +125 -0
  107. package/src/getVirtualDigest.test.js +134 -0
  108. package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
  109. package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
  110. package/src/index.js +106 -0
  111. package/src/index.test.js +38 -0
  112. package/src/insertGapsIntoRefSeq.js +38 -0
  113. package/src/insertGapsIntoRefSeq.test.js +20 -0
  114. package/src/insertSequenceDataAtPosition.js +2 -0
  115. package/src/insertSequenceDataAtPosition.test.js +75 -0
  116. package/src/insertSequenceDataAtPositionOrRange.js +249 -0
  117. package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
  118. package/src/isEnzymeType2S.js +3 -0
  119. package/src/mapAnnotationsToRows.js +174 -0
  120. package/src/mapAnnotationsToRows.test.js +425 -0
  121. package/src/prepareCircularViewData.js +17 -0
  122. package/src/prepareCircularViewData.test.js +196 -0
  123. package/src/prepareRowData.js +41 -0
  124. package/src/prepareRowData.test.js +36 -0
  125. package/src/prepareRowData_output1.json +391 -0
  126. package/src/proteinAlphabet.js +257 -0
  127. package/src/rotateBpsToPosition.js +13 -0
  128. package/src/rotateBpsToPosition.test.js +6 -0
  129. package/src/rotateSequenceDataToPosition.js +48 -0
  130. package/src/rotateSequenceDataToPosition.test.js +71 -0
  131. package/src/shiftAnnotationsByLen.js +17 -0
  132. package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
  133. package/src/tidyUpAnnotation.js +182 -0
  134. package/src/tidyUpSequenceData.js +169 -0
  135. package/src/tidyUpSequenceData.test.js +332 -0
package/package.json CHANGED
@@ -1,9 +1,9 @@
1
1
  {
2
2
  "name": "@teselagen/sequence-utils",
3
- "version": "0.1.21",
3
+ "version": "0.1.23",
4
4
  "type": "commonjs",
5
5
  "dependencies": {
6
- "@teselagen/range-utils": "0.1.20",
6
+ "@teselagen/range-utils": "0.1.22",
7
7
  "bson-objectid": "^2.0.4",
8
8
  "escape-string-regexp": "^5.0.0",
9
9
  "jsondiffpatch-rc": "0.4.2",
@@ -0,0 +1,32 @@
1
+ const DNAComplementMap = {
2
+ ".": ".",
3
+ a: "t",
4
+ t: "a",
5
+ u: "a",
6
+ c: "g",
7
+ g: "c",
8
+ A: "T",
9
+ T: "A",
10
+ U: "A",
11
+ C: "G",
12
+ G: "C",
13
+ r: "y",
14
+ R: "Y",
15
+ y: "r",
16
+ Y: "R",
17
+ d: "h",
18
+ D: "H",
19
+ h: "d",
20
+ H: "D",
21
+ k: "m",
22
+ K: "M",
23
+ m: "k",
24
+ M: "K",
25
+ v: "b",
26
+ V: "B",
27
+ b: "v",
28
+ B: "V"
29
+ //tnrtodo add more letters here
30
+ };
31
+
32
+ export default DNAComplementMap;
@@ -0,0 +1,417 @@
1
+ import insertGapsIntoRefSeq from "./insertGapsIntoRefSeq.js";
2
+
3
+ import {cloneDeep} from "lodash";
4
+
5
+ // bam.seq: NTGTAAGTCGTGAAAAAANCNNNCATATTNCGGAGGTAAAAATGAAAA...
6
+ // bam.pos: 43
7
+ // bam.cigar: 36M2D917M3I17M7I2M1I6M5I4M1D6M12I8M
8
+ // (note: bam.cigar is null if the sequencing read is unaligned)
9
+ // bam.reversed: true (if reversed)
10
+
11
+ // refSeq should be an object { name, sequence }
12
+ // seqReads should be an array of objects [{name, seq, pos, cigar}, {name, seq, pos, cigar}, ...]
13
+ // add gaps into sequencing reads before starting bp pos and from own deletions & all seq reads' insertions, minus own insertions
14
+ export default function addGapsToSeqReads(refSeq, seqReads) {
15
+ // remove unaligned seq reads for now
16
+ for (let i = 0; i < seqReads.length; i++) {
17
+ if (seqReads[i].cigar === null) {
18
+ seqReads.splice(i, 1);
19
+ }
20
+ }
21
+
22
+ const refSeqWithGaps = insertGapsIntoRefSeq(refSeq.sequence, seqReads);
23
+ // first object is reference sequence with gaps, to be followed by seq reads with gaps
24
+ const seqReadsWithGaps = [
25
+ { name: refSeq.name, sequence: refSeqWithGaps.toUpperCase() }
26
+ ];
27
+ seqReads.forEach(seqRead => {
28
+ // get all insertions in seq reads
29
+ const allInsertionsInSeqReads = [];
30
+ seqReads.forEach(seqRead => {
31
+ // split cigar string at S, M, D, or I (soft-clipped, match, deletion, or insertion), e.g. ["5S", "2M", "3I", "39M", "3D"..."9S"]
32
+ const splitSeqRead = seqRead.cigar.match(/([0-9]*[SMDI])/g);
33
+ // adjust seqRead.pos, aka bp pos where the seq read starts aligning to the ref seq, if bps have been soft-clipped from the beginning of the seq read
34
+ let adjustedSeqReadPos = cloneDeep(seqRead.pos);
35
+ if (splitSeqRead[0].slice(-1) === "S") {
36
+ // # in #S at beginning of array, i.e. number of soft-clipped base pairs at beginning of the seq read
37
+ const numOfBeginningSoftClipped = splitSeqRead[0].slice(0, -1);
38
+ adjustedSeqReadPos = seqRead.pos - numOfBeginningSoftClipped;
39
+ }
40
+ for (let componentI = 0; componentI < splitSeqRead.length; componentI++) {
41
+ if (splitSeqRead[componentI].slice(-1) === "I") {
42
+ let bpPosOfInsertion = adjustedSeqReadPos;
43
+ const numberOfInsertions = Number(
44
+ splitSeqRead[componentI].slice(0, -1)
45
+ );
46
+ for (let i = 0; i < componentI; i++) {
47
+ if (splitSeqRead[i].slice(-1) !== "I") {
48
+ const previousComponentNumber = Number(
49
+ splitSeqRead[i].slice(0, -1)
50
+ );
51
+ bpPosOfInsertion += previousComponentNumber;
52
+ }
53
+ }
54
+ const insertionInfo = {
55
+ // keeping bpPos 1-based
56
+ bpPos: bpPosOfInsertion,
57
+ number: numberOfInsertions
58
+ };
59
+ allInsertionsInSeqReads.push(insertionInfo);
60
+ }
61
+ }
62
+ });
63
+
64
+ // 1) add gaps before starting bp pos
65
+ const splitSeqReadChunk = seqRead.cigar.match(/([0-9]*[SMDI])/g);
66
+ let adjustedSeqReadPos = cloneDeep(seqRead.pos);
67
+ if (splitSeqReadChunk[0].slice(-1) === "S") {
68
+ // # in #S at beginning of array, i.e. number of soft-clipped base pairs at beginning of the seq read
69
+ const numOfBeginningSoftClipped = splitSeqReadChunk[0].slice(0, -1);
70
+ adjustedSeqReadPos = seqRead.pos - numOfBeginningSoftClipped;
71
+ }
72
+ let eachSeqReadWithGaps = seqRead.seq.split("");
73
+ if (adjustedSeqReadPos > 0) {
74
+ eachSeqReadWithGaps.unshift("-".repeat(adjustedSeqReadPos - 1));
75
+ }
76
+ eachSeqReadWithGaps = eachSeqReadWithGaps.join("").split("");
77
+
78
+ // 2) add own deletions to own sequence
79
+ // get own deletions
80
+ const ownDeletions = [];
81
+ for (
82
+ let componentI = 0;
83
+ componentI < splitSeqReadChunk.length;
84
+ componentI++
85
+ ) {
86
+ if (splitSeqReadChunk[componentI].slice(-1) === "D") {
87
+ let bpPosOfDeletion = adjustedSeqReadPos;
88
+ const numberOfDeletions = Number(
89
+ splitSeqReadChunk[componentI].slice(0, -1)
90
+ );
91
+ for (let i = 0; i < componentI; i++) {
92
+ const previousComponentNumber = Number(
93
+ splitSeqReadChunk[i].slice(0, -1)
94
+ );
95
+ bpPosOfDeletion += previousComponentNumber;
96
+ }
97
+ const deletionInfo = {
98
+ // keeping bpPos 1-based
99
+ bpPos: bpPosOfDeletion,
100
+ number: numberOfDeletions
101
+ };
102
+ ownDeletions.push(deletionInfo);
103
+ }
104
+ }
105
+ // sort deletions by ascending bp pos
106
+ const sortedOwnDeletions = ownDeletions.sort((a, b) => {
107
+ return a.bpPos - b.bpPos;
108
+ });
109
+ // add own deletions to own sequence
110
+ for (let ownD = 0; ownD < sortedOwnDeletions.length; ownD++) {
111
+ const bpPosOfDeletion = sortedOwnDeletions[ownD].bpPos;
112
+ const numberOfDeletions = sortedOwnDeletions[ownD].number;
113
+ // adding gaps at the bp pos
114
+ let deletionGaps = "";
115
+ for (let gapD = 0; gapD < numberOfDeletions; gapD++) {
116
+ deletionGaps += "-";
117
+ }
118
+ eachSeqReadWithGaps.splice(bpPosOfDeletion - 1, 0, deletionGaps);
119
+ eachSeqReadWithGaps = eachSeqReadWithGaps.join("").split("");
120
+ }
121
+ eachSeqReadWithGaps = eachSeqReadWithGaps.join("").split("");
122
+
123
+ // 3) remove own insertions from own sequence
124
+ // get own insertions
125
+ const ownInsertions = [];
126
+ const ownInsertionsBp = [];
127
+ for (
128
+ let componentI = 0;
129
+ componentI < splitSeqReadChunk.length;
130
+ componentI++
131
+ ) {
132
+ if (splitSeqReadChunk[componentI].slice(-1) === "I") {
133
+ let bpPosOfInsertion = adjustedSeqReadPos;
134
+ const numberOfInsertions = Number(
135
+ splitSeqReadChunk[componentI].slice(0, -1)
136
+ );
137
+ const nucleotides = [];
138
+ for (let i = 0; i < componentI; i++) {
139
+ const previousComponentNumber = Number(
140
+ splitSeqReadChunk[i].slice(0, -1)
141
+ );
142
+ bpPosOfInsertion += previousComponentNumber;
143
+ }
144
+ for (let nucI = 0; nucI < numberOfInsertions; nucI++) {
145
+ nucleotides.push(eachSeqReadWithGaps[bpPosOfInsertion - 1 + nucI]);
146
+ }
147
+ const insertionInfo = {
148
+ // keeping bpPos 1-based
149
+ bpPos: bpPosOfInsertion,
150
+ number: numberOfInsertions
151
+ };
152
+ const insertionInfoBp = {
153
+ // keeping bpPos 1-based
154
+ bpPos: bpPosOfInsertion,
155
+ number: numberOfInsertions,
156
+ nucleotides: nucleotides
157
+ };
158
+ ownInsertions.push(insertionInfo);
159
+ ownInsertionsBp.push(insertionInfoBp);
160
+ }
161
+ }
162
+ const ownInsertionsCompare = JSON.parse(JSON.stringify(ownInsertions));
163
+ // sort own insertions by ascending bp pos
164
+ const sortedOwnInsertions = ownInsertions.sort((a, b) => {
165
+ return a.bpPos - b.bpPos;
166
+ });
167
+ const sortedOwnInsertionsBp = ownInsertionsBp.sort((a, b) => {
168
+ return a.bpPos - b.bpPos;
169
+ });
170
+ // remove own insertions from own sequence
171
+ for (let ownI = 0; ownI < sortedOwnInsertions.length; ownI++) {
172
+ const bpPosOfInsertion = sortedOwnInsertions[ownI].bpPos;
173
+ const numberOfInsertions = sortedOwnInsertions[ownI].number;
174
+ for (let numI = 0; numI < numberOfInsertions; numI++) {
175
+ eachSeqReadWithGaps.splice(bpPosOfInsertion - 1, 1);
176
+ }
177
+ for (let posI = ownI + 1; posI < sortedOwnInsertions.length; posI++) {
178
+ sortedOwnInsertions[posI].bpPos -= numberOfInsertions;
179
+ }
180
+ }
181
+
182
+ // 4) add other seq reads' insertions to seq read
183
+ // get other seq reads' insertions (i.e. all insertions minus duplicates minus own insertions)
184
+ let otherInsertions = allInsertionsInSeqReads.sort((a, b) => {
185
+ return a.bpPos - b.bpPos;
186
+ });
187
+ // combine duplicates within all insertions, remove own insertions from all insertions, combine overlap between other insertions & own insertions
188
+ // first, combine duplicates within all insertions
189
+ otherInsertions = otherInsertions.filter(
190
+ (object, index) =>
191
+ index ===
192
+ otherInsertions.findIndex(
193
+ obj => JSON.stringify(obj) === JSON.stringify(object)
194
+ )
195
+ );
196
+ // 'i < otherInsertions.length - 1' because when at the end of the array, there is no 'i + 1' to compare to
197
+ for (let i = 0; i < otherInsertions.length - 1; i++) {
198
+ while (otherInsertions[i].bpPos === otherInsertions[i + 1].bpPos) {
199
+ if (otherInsertions[i].number > otherInsertions[i + 1].number) {
200
+ // remove the one with fewer number of gaps from array
201
+ otherInsertions.splice(i + 1, 1);
202
+ } else if (otherInsertions[i].number < otherInsertions[i + 1].number) {
203
+ otherInsertions.splice(i, 1);
204
+ } else if (
205
+ otherInsertions[i].number === otherInsertions[i + 1].number
206
+ ) {
207
+ otherInsertions.splice(i, 1);
208
+ }
209
+ }
210
+ }
211
+ // then remove own insertions from all insertions
212
+ for (let otherI = 0; otherI < ownInsertionsCompare.length; otherI++) {
213
+ const insertionInfoIndex = otherInsertions.findIndex(
214
+ e => e.bpPos === ownInsertionsCompare[otherI].bpPos
215
+ );
216
+ if (insertionInfoIndex !== -1) {
217
+ if (
218
+ otherInsertions[insertionInfoIndex].number >
219
+ ownInsertionsCompare[otherI].number
220
+ ) {
221
+ otherInsertions[insertionInfoIndex].number =
222
+ otherInsertions[insertionInfoIndex].number -
223
+ ownInsertionsCompare[otherI].number;
224
+ } else if (
225
+ otherInsertions[insertionInfoIndex].number <=
226
+ ownInsertionsCompare[otherI].number
227
+ ) {
228
+ otherInsertions.splice(insertionInfoIndex, 1);
229
+ otherI--;
230
+ }
231
+ }
232
+ }
233
+ // then combine overlap between other insertions & own insertions
234
+ for (let overlapI = 0; overlapI < sortedOwnInsertions.length; overlapI++) {
235
+ const insertionInfoIndex = otherInsertions.findIndex(
236
+ e => e.bpPos === sortedOwnInsertions[overlapI].bpPos
237
+ );
238
+ if (insertionInfoIndex !== -1) {
239
+ if (
240
+ otherInsertions[insertionInfoIndex].number >
241
+ sortedOwnInsertions[overlapI].number
242
+ ) {
243
+ otherInsertions[insertionInfoIndex].number =
244
+ otherInsertions[insertionInfoIndex].number -
245
+ sortedOwnInsertions[overlapI].number;
246
+ } else if (
247
+ otherInsertions[insertionInfoIndex].number <=
248
+ sortedOwnInsertions[overlapI].number
249
+ ) {
250
+ otherInsertions.splice(insertionInfoIndex, 1);
251
+ overlapI--;
252
+ }
253
+ }
254
+ }
255
+ // adjust own insertions according to other seq reads' insertions to be added (i.e. for all other reads' insertions with smaller bp pos, +1 to that own insertion's bp pos)
256
+ const adjustedOwnInsertionsBp = JSON.parse(
257
+ JSON.stringify(sortedOwnInsertionsBp)
258
+ );
259
+ for (let ownI = 0; ownI < adjustedOwnInsertionsBp.length; ownI++) {
260
+ let previousInserts = 0;
261
+ for (let i = 0; i < ownI; i++) {
262
+ previousInserts += adjustedOwnInsertionsBp[i].number - 1;
263
+ }
264
+ adjustedOwnInsertionsBp[ownI].bpPos =
265
+ adjustedOwnInsertionsBp[ownI].bpPos - previousInserts;
266
+ sortedOwnInsertionsBp[ownI].bpPos =
267
+ sortedOwnInsertionsBp[ownI].bpPos - previousInserts;
268
+ }
269
+ for (let otherI = 0; otherI < otherInsertions.length; otherI++) {
270
+ for (let ownI = 0; ownI < adjustedOwnInsertionsBp.length; ownI++) {
271
+ if (
272
+ otherInsertions[otherI].bpPos <= sortedOwnInsertionsBp[ownI].bpPos
273
+ ) {
274
+ adjustedOwnInsertionsBp[ownI].bpPos += 1;
275
+ }
276
+ }
277
+ }
278
+ // add other seq reads' insertions to sequence
279
+ for (
280
+ let otherI = 0;
281
+ otherI < otherInsertions.length &&
282
+ otherInsertions[otherI].bpPos <= eachSeqReadWithGaps.length;
283
+ otherI++
284
+ ) {
285
+ const bpPosOfInsertion = otherInsertions[otherI].bpPos;
286
+ const numberOfInsertions = otherInsertions[otherI].number;
287
+ // adding gaps at the bp pos
288
+ let insertionGaps = "";
289
+ for (let gapI = 0; gapI < numberOfInsertions; gapI++) {
290
+ insertionGaps += "-";
291
+ }
292
+ eachSeqReadWithGaps.splice(bpPosOfInsertion - 1, 0, insertionGaps);
293
+ for (let posI = otherI + 1; posI < otherInsertions.length; posI++) {
294
+ otherInsertions[posI].bpPos += 1;
295
+ }
296
+ }
297
+
298
+ // 5) add own insertions to own sequence
299
+ for (let ownI = 0; ownI < adjustedOwnInsertionsBp.length; ownI++) {
300
+ const bpPosOfInsertion = adjustedOwnInsertionsBp[ownI].bpPos;
301
+ const nucleotides = adjustedOwnInsertionsBp[ownI].nucleotides.join("");
302
+ eachSeqReadWithGaps.splice(bpPosOfInsertion - 1, 0, nucleotides);
303
+ }
304
+
305
+ // 6) add gaps after seq read for ref seq's length = seq read's length
306
+ eachSeqReadWithGaps = eachSeqReadWithGaps.join("").split("");
307
+ if (eachSeqReadWithGaps.length < refSeqWithGaps.length) {
308
+ eachSeqReadWithGaps.push(
309
+ "-".repeat(refSeqWithGaps.length - eachSeqReadWithGaps.length)
310
+ );
311
+ }
312
+
313
+ // eachSeqReadWithGaps is a string "GGGA--GA-C--ACC"
314
+ seqReadsWithGaps.push({
315
+ name: seqRead.name,
316
+ sequence: eachSeqReadWithGaps.join(""),
317
+ reversed: seqRead.reversed,
318
+ cigar: seqRead.cigar
319
+ });
320
+ });
321
+
322
+ // 7) add gaps before starting bp pos
323
+ // add gaps based on any seq reads that extend beyond beginning of the ref seq due to soft-clipped reads
324
+ // a) get the lengths of bps that extend beyond the beginning of the ref seq among all seq reads
325
+ const seqReadLengthsBeforeRefSeqStart = [];
326
+ seqReads.forEach(seq => {
327
+ const splitSeqReadChunk = seq.cigar.match(/([0-9]*[SMDI])/g);
328
+ let adjustedSeqReadPos = cloneDeep(seq.pos);
329
+ if (splitSeqReadChunk[0].slice(-1) === "S") {
330
+ // # in #S at beginning of array, i.e. number of soft-clipped base pairs at beginning of the seq read
331
+ const numOfBeginningSoftClipped = splitSeqReadChunk[0].slice(0, -1);
332
+ adjustedSeqReadPos = seq.pos - numOfBeginningSoftClipped;
333
+ // number of gaps to add if soft-clipped reads extend beyond beginning of ref seq
334
+ if (adjustedSeqReadPos < 0) {
335
+ seqReadLengthsBeforeRefSeqStart.push(Math.abs(adjustedSeqReadPos));
336
+ }
337
+ }
338
+ // number of gaps to add if seqRead.pos is negative (not sure if this is possible with bowtie2 outputs)
339
+ // if (seq.pos < 0) {
340
+ // seqReadLengthsBeforeRefSeqStart.push(Math.abs(seq.pos))
341
+ // }
342
+ });
343
+ // b) add gaps (to both ref seq and seq reads) based on any seq reads that extend beyond beginning of ref seq due to soft-clipped reads
344
+ let longestSeqReadLength = 0;
345
+ for (let i = 1; i < seqReadsWithGaps.length; i++) {
346
+ // turn seq read into an array ["A", "T", "C", "G"...]
347
+ const eachSeqReadWithGaps = seqReadsWithGaps[i].sequence.split("");
348
+ const splitSeqReadChunk = seqReads[i - 1].cigar.match(/([0-9]*[SMDI])/g);
349
+ let adjustedSeqReadPos = cloneDeep(seqReads[i - 1].pos);
350
+ // longest length of bps that extend beyond the beginning of the ref seq among all seq reads
351
+ if (seqReadLengthsBeforeRefSeqStart.length > 0) {
352
+ longestSeqReadLength = Math.max(...seqReadLengthsBeforeRefSeqStart);
353
+ }
354
+ if (splitSeqReadChunk[0].slice(-1) === "S") {
355
+ // # in #S at beginning of array, i.e. number of soft-clipped base pairs at beginning of the seq read
356
+ const numOfBeginningSoftClipped = splitSeqReadChunk[0].slice(0, -1);
357
+ adjustedSeqReadPos = seqReads[i - 1].pos - numOfBeginningSoftClipped;
358
+ if (adjustedSeqReadPos > 0) {
359
+ if (longestSeqReadLength > 0) {
360
+ eachSeqReadWithGaps.unshift("-".repeat(longestSeqReadLength + 1));
361
+ }
362
+ seqReadsWithGaps[i].sequence = eachSeqReadWithGaps.join("");
363
+ } else if (adjustedSeqReadPos < 0) {
364
+ if (longestSeqReadLength > 0) {
365
+ eachSeqReadWithGaps.unshift(
366
+ "-".repeat(longestSeqReadLength - Math.abs(adjustedSeqReadPos))
367
+ );
368
+ }
369
+ seqReadsWithGaps[i].sequence = eachSeqReadWithGaps.join("");
370
+ }
371
+ } else {
372
+ if (longestSeqReadLength > 0) {
373
+ eachSeqReadWithGaps.unshift("-".repeat(longestSeqReadLength + 1));
374
+ }
375
+ seqReadsWithGaps[i].sequence = eachSeqReadWithGaps.join("");
376
+ }
377
+ }
378
+
379
+ // add gaps before ref seq based on the longest length of soft-clipped reads that extend beyond beginning of ref seq
380
+ if (longestSeqReadLength > 0) {
381
+ const splitRefSeqWithGaps = seqReadsWithGaps[0].sequence.split("");
382
+ splitRefSeqWithGaps.unshift("-".repeat(longestSeqReadLength + 1));
383
+ seqReadsWithGaps[0].sequence = splitRefSeqWithGaps.join("");
384
+ }
385
+
386
+ // 8) check if any seq read is longer than the ref seq, make ref seq & seq reads all the same length
387
+ const lengthsOfLongerSeqReads = [];
388
+ for (let i = 1; i < seqReadsWithGaps.length; i++) {
389
+ const refSeq = seqReadsWithGaps[0];
390
+ if (seqReadsWithGaps[i].sequence.length > refSeq.sequence.length) {
391
+ lengthsOfLongerSeqReads.push(seqReadsWithGaps[i].sequence.length);
392
+ }
393
+ }
394
+ if (lengthsOfLongerSeqReads.length > 0) {
395
+ const longestSeqReadLength = Math.max(...lengthsOfLongerSeqReads);
396
+ for (let i = 0; i < seqReadsWithGaps.length; i++) {
397
+ if (seqReadsWithGaps[i].sequence.length < longestSeqReadLength) {
398
+ seqReadsWithGaps[i].sequence += "-".repeat(
399
+ longestSeqReadLength - seqReadsWithGaps[i].sequence.length
400
+ );
401
+ }
402
+ }
403
+ }
404
+ // if any seq read shorter than ref seq, make ref seq & seq reads all the same length
405
+ for (let i = 1; i < seqReadsWithGaps.length; i++) {
406
+ const refSeq = seqReadsWithGaps[0];
407
+ if (seqReadsWithGaps[i].sequence.length < refSeq.sequence.length) {
408
+ seqReadsWithGaps[i].sequence += "-".repeat(
409
+ refSeq.sequence.length - seqReadsWithGaps[i].sequence.length
410
+ );
411
+ }
412
+ }
413
+
414
+ // seqReadsWithGaps is an array of objects containing the ref seq with gaps first and then all seq reads with gaps
415
+ // e.g. [{ name: "ref seq", sequence: "GG---GA--GA-C--A---CC---"}, { name: "r1", sequence: "-----GATTGA-C-----------"}...]
416
+ return seqReadsWithGaps;
417
+ };