@teselagen/sequence-utils 0.1.21 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +12030 -26126
- package/index.mjs +12119 -26124
- package/index.umd.js +24056 -38154
- package/package.json +2 -2
- package/src/DNAComplementMap.js +32 -0
- package/src/addGapsToSeqReads.js +417 -0
- package/src/addGapsToSeqReads.test.js +358 -0
- package/src/adjustAnnotationsToInsert.js +19 -0
- package/src/adjustBpsToReplaceOrInsert.js +50 -0
- package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
- package/src/aliasedEnzymesByName.js +7363 -0
- package/src/aminoAcidToDegenerateDnaMap.js +32 -0
- package/src/aminoAcidToDegenerateRnaMap.js +32 -0
- package/src/aminoAcidToDnaRna.test.js +27 -0
- package/src/annotateSingleSeq.js +29 -0
- package/src/annotateSingleSeq.test.js +64 -0
- package/src/annotationTypes.js +23 -0
- package/src/autoAnnotate.js +242 -0
- package/src/autoAnnotate.test.js +1039 -0
- package/src/bioData.js +431 -0
- package/src/calculateNebTa.js +34 -0
- package/src/calculateNebTa.test.js +57 -0
- package/src/calculateNebTm.js +127 -0
- package/src/calculateNebTm.test.js +32 -0
- package/src/calculatePercentGC.js +3 -0
- package/src/calculatePercentGC.test.js +14 -0
- package/src/calculateTm.js +297 -0
- package/src/calculateTm.test.js +7 -0
- package/src/computeDigestFragments.js +179 -0
- package/src/computeDigestFragments.test.js +73 -0
- package/src/condensePairwiseAlignmentDifferences.js +85 -0
- package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
- package/src/convertAACaretPositionOrRangeToDna.js +24 -0
- package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
- package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
- package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
- package/src/cutSequenceByRestrictionEnzyme.js +301 -0
- package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
- package/src/defaultEnzymesByName.js +278 -0
- package/src/degenerateDnaToAminoAcidMap.js +5 -0
- package/src/degenerateRnaToAminoAcidMap.js +5 -0
- package/src/deleteSequenceDataAtRange.js +5 -0
- package/src/deleteSequenceDataAtRange.test.js +146 -0
- package/src/diffUtils.js +64 -0
- package/src/diffUtils.test.js +74 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
- package/src/featureTypesAndColors.js +152 -0
- package/src/featureTypesAndColors.test.js +52 -0
- package/src/filterAminoAcidSequenceString.js +13 -0
- package/src/filterAminoAcidSequenceString.test.js +22 -0
- package/src/filterSequenceString.js +22 -0
- package/src/filterSequenceString.test.js +13 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
- package/src/findOrfsInPlasmid.js +26 -0
- package/src/findSequenceMatches.js +133 -0
- package/src/findSequenceMatches.test.js +286 -0
- package/src/generateAnnotations.js +34 -0
- package/src/generateSequenceData.js +206 -0
- package/src/generateSequenceData.test.js +22 -0
- package/src/getAllInsertionsInSeqReads.js +83 -0
- package/src/getAllInsertionsInSeqReads.test.js +26 -0
- package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
- package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
- package/src/getAminoAcidFromSequenceTriplet.js +22 -0
- package/src/getAminoAcidStringFromSequenceString.js +18 -0
- package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
- package/src/getCodonRangeForAASliver.js +63 -0
- package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
- package/src/getComplementSequenceAndAnnotations.js +20 -0
- package/src/getComplementSequenceString.js +19 -0
- package/src/getComplementSequenceString.test.js +13 -0
- package/src/getCutsiteType.js +10 -0
- package/src/getCutsitesFromSequence.js +17 -0
- package/src/getDegenerateDnaStringFromAAString.js +8 -0
- package/src/getDegenerateRnaStringFromAAString.js +8 -0
- package/src/getDigestFragmentsForCutsites.js +105 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
- package/src/getInsertBetweenVals.js +28 -0
- package/src/getInsertBetweenVals.test.js +33 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
- package/src/getMassOfAaString.js +24 -0
- package/src/getMassofAaString.test.js +18 -0
- package/src/getOrfsFromSequence.js +124 -0
- package/src/getOrfsFromSequence.test.js +210 -0
- package/src/getOverlapBetweenTwoSequences.js +30 -0
- package/src/getOverlapBetweenTwoSequences.test.js +23 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
- package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
- package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
- package/src/getReverseComplementAnnotation.js +23 -0
- package/src/getReverseComplementAnnotation.test.js +44 -0
- package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
- package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
- package/src/getReverseComplementSequenceString.js +17 -0
- package/src/getReverseComplementSequenceString.test.js +11 -0
- package/src/getReverseSequenceString.js +12 -0
- package/src/getReverseSequenceString.test.js +9 -0
- package/src/getSequenceDataBetweenRange.js +131 -0
- package/src/getSequenceDataBetweenRange.test.js +474 -0
- package/src/getVirtualDigest.js +125 -0
- package/src/getVirtualDigest.test.js +134 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
- package/src/index.js +106 -0
- package/src/index.test.js +38 -0
- package/src/insertGapsIntoRefSeq.js +38 -0
- package/src/insertGapsIntoRefSeq.test.js +20 -0
- package/src/insertSequenceDataAtPosition.js +2 -0
- package/src/insertSequenceDataAtPosition.test.js +75 -0
- package/src/insertSequenceDataAtPositionOrRange.js +249 -0
- package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
- package/src/isEnzymeType2S.js +3 -0
- package/src/mapAnnotationsToRows.js +174 -0
- package/src/mapAnnotationsToRows.test.js +425 -0
- package/src/prepareCircularViewData.js +17 -0
- package/src/prepareCircularViewData.test.js +196 -0
- package/src/prepareRowData.js +41 -0
- package/src/prepareRowData.test.js +36 -0
- package/src/prepareRowData_output1.json +391 -0
- package/src/proteinAlphabet.js +257 -0
- package/src/rotateBpsToPosition.js +13 -0
- package/src/rotateBpsToPosition.test.js +6 -0
- package/src/rotateSequenceDataToPosition.js +48 -0
- package/src/rotateSequenceDataToPosition.test.js +71 -0
- package/src/shiftAnnotationsByLen.js +17 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
- package/src/tidyUpAnnotation.js +182 -0
- package/src/tidyUpSequenceData.js +169 -0
- package/src/tidyUpSequenceData.test.js +332 -0
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
import addGapsToSeqReads from "./addGapsToSeqReads.js";
|
|
2
|
+
|
|
3
|
+
describe("cigar strings to gapped alignments", () => {
|
|
4
|
+
it("adds gaps into sequencing reads before starting bp pos and from own deletions & other seq reads' insertions", () => {
|
|
5
|
+
const refSeq = { name: "ref seq", sequence: "GGGAGACACC" };
|
|
6
|
+
const seqReads = [
|
|
7
|
+
{ name: "r1", seq: "GATTGAC", pos: 3, cigar: "2M2I3M", reversed: false },
|
|
8
|
+
{ name: "r2", seq: "GAGAGAC", pos: 3, cigar: "7M", reversed: false },
|
|
9
|
+
{
|
|
10
|
+
name: "r3",
|
|
11
|
+
seq: "GGGAGATCAC",
|
|
12
|
+
pos: 1,
|
|
13
|
+
cigar: "6M1I3M",
|
|
14
|
+
reversed: false
|
|
15
|
+
},
|
|
16
|
+
{ name: "r4", seq: "GATTGAC", pos: 3, cigar: "2M2I3M", reversed: false },
|
|
17
|
+
{ name: "r5", seq: "GAGC", pos: 3, cigar: "3M1D1M", reversed: false },
|
|
18
|
+
{
|
|
19
|
+
name: "r6",
|
|
20
|
+
seq: "GAGCTTACC",
|
|
21
|
+
pos: 3,
|
|
22
|
+
cigar: "3M1D1M2I3M",
|
|
23
|
+
reversed: true
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
name: "r7",
|
|
27
|
+
seq: "GGCATTTCC",
|
|
28
|
+
pos: 2,
|
|
29
|
+
cigar: "2M3D2M3I2M",
|
|
30
|
+
reversed: true
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
name: "r8",
|
|
34
|
+
seq: "GGATTGACATT",
|
|
35
|
+
pos: 1,
|
|
36
|
+
cigar: "1D3M2I4M2I2D",
|
|
37
|
+
reversed: true
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
name: "r9",
|
|
41
|
+
seq: "GGTTTGACCTTT",
|
|
42
|
+
pos: 1,
|
|
43
|
+
cigar: "2M3I2D1M2D3M3I",
|
|
44
|
+
reversed: true
|
|
45
|
+
}
|
|
46
|
+
];
|
|
47
|
+
const result = addGapsToSeqReads(refSeq, seqReads);
|
|
48
|
+
expect(result).toEqual([
|
|
49
|
+
// ref seq first
|
|
50
|
+
{ name: "ref seq", sequence: "GG---GA--GA-C--A---CC---" },
|
|
51
|
+
// then seq reads
|
|
52
|
+
{
|
|
53
|
+
name: "r1",
|
|
54
|
+
sequence: "-----GATTGA-C-----------",
|
|
55
|
+
cigar: "2M2I3M",
|
|
56
|
+
reversed: false
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
name: "r2",
|
|
60
|
+
sequence: "-----GA--GA-G--A---C----",
|
|
61
|
+
cigar: "7M",
|
|
62
|
+
reversed: false
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
name: "r3",
|
|
66
|
+
sequence: "GG---GA--GATC--A---C----",
|
|
67
|
+
cigar: "6M1I3M",
|
|
68
|
+
reversed: false
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
name: "r4",
|
|
72
|
+
sequence: "-----GATTGA-C-----------",
|
|
73
|
+
cigar: "2M2I3M",
|
|
74
|
+
reversed: false
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
name: "r5",
|
|
78
|
+
sequence: "-----GA--G--C-----------",
|
|
79
|
+
cigar: "3M1D1M",
|
|
80
|
+
reversed: false
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
name: "r6",
|
|
84
|
+
sequence: "-----GA--G--CTTA---CC---",
|
|
85
|
+
cigar: "3M1D1M2I3M",
|
|
86
|
+
reversed: true
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
name: "r7",
|
|
90
|
+
sequence: "-G---G------C--ATTTCC---",
|
|
91
|
+
cigar: "2M3D2M3I2M",
|
|
92
|
+
reversed: true
|
|
93
|
+
},
|
|
94
|
+
{
|
|
95
|
+
name: "r8",
|
|
96
|
+
sequence: "-G---GATTGA-C--A-TT-----",
|
|
97
|
+
cigar: "1D3M2I4M2I2D",
|
|
98
|
+
reversed: true
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
name: "r9",
|
|
102
|
+
sequence: "GGTTT----G-----A---CCTTT",
|
|
103
|
+
cigar: "2M3I2D1M2D3M3I",
|
|
104
|
+
reversed: true
|
|
105
|
+
}
|
|
106
|
+
]);
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
it("removes unaligned seq reads (seqRead.pos = 0, seqRead.cigar = null)", () => {
|
|
110
|
+
const refSeq = { name: "ref seq", sequence: "GGACCGGAACAGGAAGCAAGGGACAG" };
|
|
111
|
+
const seqReads = [
|
|
112
|
+
{
|
|
113
|
+
name: "r1",
|
|
114
|
+
seq: "GAAGCAAGGGACSSSSS",
|
|
115
|
+
pos: 13,
|
|
116
|
+
cigar: "12M5S",
|
|
117
|
+
reversed: false
|
|
118
|
+
},
|
|
119
|
+
{ name: "r2", seq: "ZZZZZ", pos: 0, cigar: null, reversed: false }
|
|
120
|
+
];
|
|
121
|
+
const result = addGapsToSeqReads(refSeq, seqReads);
|
|
122
|
+
expect(result).toEqual([
|
|
123
|
+
// ref seq first
|
|
124
|
+
{ name: "ref seq", sequence: "GGACCGGAACAGGAAGCAAGGGACAG---" },
|
|
125
|
+
// then seq reads
|
|
126
|
+
{
|
|
127
|
+
name: "r1",
|
|
128
|
+
sequence: "------------GAAGCAAGGGACSSSSS",
|
|
129
|
+
cigar: "12M5S",
|
|
130
|
+
reversed: false
|
|
131
|
+
}
|
|
132
|
+
]);
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
it("adjusts bp pos of alignment with the ref seq (seqRead.pos) if there are soft-clipped reads at the beginning of a seq read (#S at start of seqRead.cigar)...seq read aligned near the beginning of the ref seq", () => {
|
|
136
|
+
const refSeq = { name: "ref seq", sequence: "GGGAGACACC" };
|
|
137
|
+
const seqReads = [
|
|
138
|
+
{
|
|
139
|
+
name: "r1",
|
|
140
|
+
seq: "SSGATTGAC",
|
|
141
|
+
pos: 3,
|
|
142
|
+
cigar: "2S2M2I3M",
|
|
143
|
+
reversed: false
|
|
144
|
+
}
|
|
145
|
+
];
|
|
146
|
+
const result = addGapsToSeqReads(refSeq, seqReads);
|
|
147
|
+
expect(result).toEqual([
|
|
148
|
+
// ref seq first
|
|
149
|
+
{ name: "ref seq", sequence: "GGGA--GACACC" },
|
|
150
|
+
// then seq reads
|
|
151
|
+
{
|
|
152
|
+
name: "r1",
|
|
153
|
+
sequence: "SSGATTGAC---",
|
|
154
|
+
cigar: "2S2M2I3M",
|
|
155
|
+
reversed: false
|
|
156
|
+
}
|
|
157
|
+
]);
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
it("adjusts bp pos of alignment with the ref seq (seqRead.pos) if there are soft-clipped reads at the beginning of a seq read (#S at start of seqRead.cigar)...seq read aligned near the middle of the ref seq", () => {
|
|
161
|
+
const refSeq = { name: "ref seq", sequence: "GGACCGGAACAGGAAGCAAGGGACAG" };
|
|
162
|
+
const seqReads = [
|
|
163
|
+
{
|
|
164
|
+
name: "r1",
|
|
165
|
+
seq: "SSSGAAGCAAG",
|
|
166
|
+
pos: 13,
|
|
167
|
+
cigar: "3S8M",
|
|
168
|
+
reversed: false
|
|
169
|
+
}
|
|
170
|
+
];
|
|
171
|
+
const result = addGapsToSeqReads(refSeq, seqReads);
|
|
172
|
+
expect(result).toEqual([
|
|
173
|
+
// ref seq first
|
|
174
|
+
{ name: "ref seq", sequence: "GGACCGGAACAGGAAGCAAGGGACAG" },
|
|
175
|
+
// then seq reads
|
|
176
|
+
{
|
|
177
|
+
name: "r1",
|
|
178
|
+
sequence: "---------SSSGAAGCAAG------",
|
|
179
|
+
cigar: "3S8M",
|
|
180
|
+
reversed: false
|
|
181
|
+
}
|
|
182
|
+
]);
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
it("adjusts bp pos of alignment with the ref seq (seqRead.pos) if there are soft-clipped reads at the beginning of a seq read (#S at start of seqRead.cigar)...multiple seq reads with #S at the start", () => {
|
|
186
|
+
const refSeq = { name: "ref seq", sequence: "GGACCGGAACAGGAAGCAAGGGACAG" };
|
|
187
|
+
const seqReads = [
|
|
188
|
+
{
|
|
189
|
+
name: "r1",
|
|
190
|
+
seq: "SSACTTCGGAACAGGAAG",
|
|
191
|
+
pos: 3,
|
|
192
|
+
cigar: "2S2M2I12M",
|
|
193
|
+
reversed: false
|
|
194
|
+
},
|
|
195
|
+
{
|
|
196
|
+
name: "r2",
|
|
197
|
+
seq: "SSSGAAGCAAG",
|
|
198
|
+
pos: 13,
|
|
199
|
+
cigar: "3S8M",
|
|
200
|
+
reversed: false
|
|
201
|
+
}
|
|
202
|
+
];
|
|
203
|
+
const result = addGapsToSeqReads(refSeq, seqReads);
|
|
204
|
+
expect(result).toEqual([
|
|
205
|
+
// ref seq first
|
|
206
|
+
{ name: "ref seq", sequence: "GGAC--CGGAACAGGAAGCAAGGGACAG" },
|
|
207
|
+
// then seq reads
|
|
208
|
+
{
|
|
209
|
+
name: "r1",
|
|
210
|
+
sequence: "SSACTTCGGAACAGGAAG----------",
|
|
211
|
+
cigar: "2S2M2I12M",
|
|
212
|
+
reversed: false
|
|
213
|
+
},
|
|
214
|
+
{
|
|
215
|
+
name: "r2",
|
|
216
|
+
sequence: "-----------SSSGAAGCAAG------",
|
|
217
|
+
cigar: "3S8M",
|
|
218
|
+
reversed: false
|
|
219
|
+
}
|
|
220
|
+
]);
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
it("adjusts bp pos of alignment with the ref seq (seqRead.pos) if there are soft-clipped reads at the beginning of a seq read (#S at start of seqRead.cigar)...soft-clipped reads before the beginning of the ref seq", () => {
|
|
224
|
+
const refSeq = { name: "ref seq", sequence: "GGGAGACACC" };
|
|
225
|
+
const seqReads = [
|
|
226
|
+
{
|
|
227
|
+
name: "r1",
|
|
228
|
+
seq: "SSSGGGATTGAC",
|
|
229
|
+
pos: 1,
|
|
230
|
+
cigar: "3S4M2I3M",
|
|
231
|
+
reversed: false
|
|
232
|
+
}
|
|
233
|
+
];
|
|
234
|
+
const result = addGapsToSeqReads(refSeq, seqReads);
|
|
235
|
+
expect(result).toEqual([
|
|
236
|
+
// ref seq first
|
|
237
|
+
{ name: "ref seq", sequence: "---GGGA--GACACC" },
|
|
238
|
+
// then seq reads
|
|
239
|
+
{
|
|
240
|
+
name: "r1",
|
|
241
|
+
sequence: "SSSGGGATTGAC---",
|
|
242
|
+
cigar: "3S4M2I3M",
|
|
243
|
+
reversed: false
|
|
244
|
+
}
|
|
245
|
+
]);
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
it("works with soft-clipped reads at the end of a seq read (#S at end of seqRead.cigar)", () => {
|
|
249
|
+
const refSeq = { name: "ref seq", sequence: "GGACCGGAACAGGAAGCAAGGGACAG" };
|
|
250
|
+
const seqReads = [
|
|
251
|
+
{
|
|
252
|
+
name: "r1",
|
|
253
|
+
seq: "GAAGCAAGSSS",
|
|
254
|
+
pos: 13,
|
|
255
|
+
cigar: "12M5S",
|
|
256
|
+
reversed: false
|
|
257
|
+
}
|
|
258
|
+
];
|
|
259
|
+
const result = addGapsToSeqReads(refSeq, seqReads);
|
|
260
|
+
expect(result).toEqual([
|
|
261
|
+
// ref seq first
|
|
262
|
+
{ name: "ref seq", sequence: "GGACCGGAACAGGAAGCAAGGGACAG" },
|
|
263
|
+
// then seq reads
|
|
264
|
+
{
|
|
265
|
+
name: "r1",
|
|
266
|
+
sequence: "------------GAAGCAAGSSS---",
|
|
267
|
+
cigar: "12M5S",
|
|
268
|
+
reversed: false
|
|
269
|
+
}
|
|
270
|
+
]);
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
it("accounts for soft-clipped reads at the end of a seq read (#S at end of seqRead.cigar) that make seqRead.sequence longer than refSeq.sequence, by making ref seq & seq reads all the same/longest length", () => {
|
|
274
|
+
const refSeq = { name: "ref seq", sequence: "GGACCGGAACAGGAAGCAAGGGACAG" };
|
|
275
|
+
const seqReads = [
|
|
276
|
+
{
|
|
277
|
+
name: "r1",
|
|
278
|
+
seq: "GAAGCAAGGGACSSSSS",
|
|
279
|
+
pos: 13,
|
|
280
|
+
cigar: "12M5S",
|
|
281
|
+
reversed: false
|
|
282
|
+
},
|
|
283
|
+
{ name: "r2", seq: "GCAAG", pos: 16, cigar: "5M", reversed: false }
|
|
284
|
+
];
|
|
285
|
+
const result = addGapsToSeqReads(refSeq, seqReads);
|
|
286
|
+
expect(result).toEqual([
|
|
287
|
+
// ref seq first
|
|
288
|
+
{ name: "ref seq", sequence: "GGACCGGAACAGGAAGCAAGGGACAG---" },
|
|
289
|
+
// then seq reads
|
|
290
|
+
{
|
|
291
|
+
name: "r1",
|
|
292
|
+
sequence: "------------GAAGCAAGGGACSSSSS",
|
|
293
|
+
cigar: "12M5S",
|
|
294
|
+
reversed: false
|
|
295
|
+
},
|
|
296
|
+
{
|
|
297
|
+
name: "r2",
|
|
298
|
+
sequence: "---------------GCAAG---------",
|
|
299
|
+
cigar: "5M",
|
|
300
|
+
reversed: false
|
|
301
|
+
}
|
|
302
|
+
]);
|
|
303
|
+
});
|
|
304
|
+
|
|
305
|
+
it("adjusts bp pos of alignment with the ref seq (seqRead.pos) if there are soft-clipped reads at the beginning of a seq read (#S at start of seqRead.cigar)...soft-clipped reads before the beginning of the ref seq", () => {
|
|
306
|
+
const refSeq = { name: "ref seq", sequence: "GGGAGACACC" };
|
|
307
|
+
const seqReads = [
|
|
308
|
+
{
|
|
309
|
+
name: "r1",
|
|
310
|
+
seq: "SSSGGGATTGAC",
|
|
311
|
+
pos: 1,
|
|
312
|
+
cigar: "3S4M2I3M",
|
|
313
|
+
reversed: false
|
|
314
|
+
},
|
|
315
|
+
{ name: "r2", seq: "GGAGAC", pos: 2, cigar: "6M", reversed: false },
|
|
316
|
+
{
|
|
317
|
+
name: "r3",
|
|
318
|
+
seq: "SSGGGATTGAC",
|
|
319
|
+
pos: 1,
|
|
320
|
+
cigar: "2S4M2I3M",
|
|
321
|
+
reversed: false
|
|
322
|
+
},
|
|
323
|
+
{ name: "r4", seq: "SSCAC", pos: 7, cigar: "2S3M", reversed: false },
|
|
324
|
+
{ name: "r5", seq: "SSSSSCAC", pos: 7, cigar: "5S3M", reversed: false }
|
|
325
|
+
];
|
|
326
|
+
const result = addGapsToSeqReads(refSeq, seqReads);
|
|
327
|
+
expect(result).toEqual([
|
|
328
|
+
// ref seq first
|
|
329
|
+
{ name: "ref seq", sequence: "---GGGA--GACACC" },
|
|
330
|
+
// then seq reads
|
|
331
|
+
{
|
|
332
|
+
name: "r1",
|
|
333
|
+
sequence: "SSSGGGATTGAC---",
|
|
334
|
+
cigar: "3S4M2I3M",
|
|
335
|
+
reversed: false
|
|
336
|
+
},
|
|
337
|
+
{ name: "r2", sequence: "----GGA--GAC---", cigar: "6M", reversed: false },
|
|
338
|
+
{
|
|
339
|
+
name: "r3",
|
|
340
|
+
sequence: "-SSGGGATTGAC---",
|
|
341
|
+
cigar: "2S4M2I3M",
|
|
342
|
+
reversed: false
|
|
343
|
+
},
|
|
344
|
+
{
|
|
345
|
+
name: "r4",
|
|
346
|
+
sequence: "---------SSCAC-",
|
|
347
|
+
cigar: "2S3M",
|
|
348
|
+
reversed: false
|
|
349
|
+
},
|
|
350
|
+
{
|
|
351
|
+
name: "r5",
|
|
352
|
+
sequence: "----SSS--SSCAC-",
|
|
353
|
+
cigar: "5S3M",
|
|
354
|
+
reversed: false
|
|
355
|
+
}
|
|
356
|
+
]);
|
|
357
|
+
});
|
|
358
|
+
});
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import {adjustRangeToInsert} from "@teselagen/range-utils";
|
|
2
|
+
import {map} from "lodash";
|
|
3
|
+
|
|
4
|
+
export default function adjustAnnotationsToInsert(
|
|
5
|
+
annotationsToBeAdjusted,
|
|
6
|
+
insertStart,
|
|
7
|
+
insertLength
|
|
8
|
+
) {
|
|
9
|
+
return map(annotationsToBeAdjusted, annotation => {
|
|
10
|
+
return {
|
|
11
|
+
...adjustRangeToInsert(annotation, insertStart, insertLength),
|
|
12
|
+
...(annotation.locations && {
|
|
13
|
+
locations: annotation.locations.map(loc =>
|
|
14
|
+
adjustRangeToInsert(loc, insertStart, insertLength)
|
|
15
|
+
)
|
|
16
|
+
})
|
|
17
|
+
};
|
|
18
|
+
});
|
|
19
|
+
};
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import {
|
|
2
|
+
splitRangeIntoTwoPartsIfItIsCircular,
|
|
3
|
+
getSequenceWithinRange,
|
|
4
|
+
getRangeLength,
|
|
5
|
+
invertRange,
|
|
6
|
+
isPositionWithinRange,
|
|
7
|
+
} from "@teselagen/range-utils";
|
|
8
|
+
|
|
9
|
+
import spliceString from "string-splice";
|
|
10
|
+
|
|
11
|
+
export default function adjustBpsToReplaceOrInsert(
|
|
12
|
+
bpString,
|
|
13
|
+
insertString = "",
|
|
14
|
+
caretPositionOrRange,
|
|
15
|
+
) {
|
|
16
|
+
let stringToReturn = bpString;
|
|
17
|
+
|
|
18
|
+
if (caretPositionOrRange && caretPositionOrRange.start > -1) {
|
|
19
|
+
if (
|
|
20
|
+
getRangeLength(caretPositionOrRange, bpString.length) === bpString.length
|
|
21
|
+
) {
|
|
22
|
+
return insertString;
|
|
23
|
+
}
|
|
24
|
+
const ranges = splitRangeIntoTwoPartsIfItIsCircular(
|
|
25
|
+
invertRange(caretPositionOrRange, bpString.length)
|
|
26
|
+
);
|
|
27
|
+
stringToReturn = "";
|
|
28
|
+
ranges.forEach((range, index) => {
|
|
29
|
+
stringToReturn += getSequenceWithinRange(range, bpString);
|
|
30
|
+
if (ranges.length === 1) {
|
|
31
|
+
if (isPositionWithinRange(0, range, bpString.length, true, true)) {
|
|
32
|
+
stringToReturn = stringToReturn + insertString;
|
|
33
|
+
} else {
|
|
34
|
+
stringToReturn = insertString + stringToReturn;
|
|
35
|
+
}
|
|
36
|
+
} else {
|
|
37
|
+
if (index === 0) stringToReturn += insertString;
|
|
38
|
+
}
|
|
39
|
+
});
|
|
40
|
+
} else {
|
|
41
|
+
//caretPosition Passed
|
|
42
|
+
stringToReturn = spliceString(
|
|
43
|
+
bpString,
|
|
44
|
+
caretPositionOrRange,
|
|
45
|
+
0,
|
|
46
|
+
insertString
|
|
47
|
+
);
|
|
48
|
+
}
|
|
49
|
+
return stringToReturn;
|
|
50
|
+
};
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
//tnr: half finished test.
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
import chai from "chai";
|
|
5
|
+
import chaiSubset from "chai-subset";
|
|
6
|
+
|
|
7
|
+
import adjustBpsToReplaceOrInsert from "./adjustBpsToReplaceOrInsert";
|
|
8
|
+
|
|
9
|
+
chai.should();
|
|
10
|
+
chai.use(chaiSubset);
|
|
11
|
+
|
|
12
|
+
describe("adjustBpsToReplaceOrInsert", () => {
|
|
13
|
+
it("inserts characters at correct caret position", () => {
|
|
14
|
+
adjustBpsToReplaceOrInsert("tttgggaaaccc", "xxx", 0).should.equal(
|
|
15
|
+
"xxxtttgggaaaccc"
|
|
16
|
+
);
|
|
17
|
+
});
|
|
18
|
+
it("inserts characters at correct caret position", () => {
|
|
19
|
+
adjustBpsToReplaceOrInsert("tttgggaaaccc", "xxx", 3).should.equal(
|
|
20
|
+
"tttxxxgggaaaccc"
|
|
21
|
+
);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it("inserts characters at correct caret position", () => {
|
|
25
|
+
adjustBpsToReplaceOrInsert("tttgggaaaccc", "xxx", 12).should.equal(
|
|
26
|
+
"tttgggaaacccxxx"
|
|
27
|
+
);
|
|
28
|
+
});
|
|
29
|
+
it("can replace whole sequence with upper case", () => {
|
|
30
|
+
adjustBpsToReplaceOrInsert("tttgggaaaccc", "TTTGGGAAACCC", {
|
|
31
|
+
start: 0,
|
|
32
|
+
end: 11
|
|
33
|
+
}).should.equal("TTTGGGAAACCC");
|
|
34
|
+
});
|
|
35
|
+
it("can replace whole sequence with just a couple chars", () => {
|
|
36
|
+
adjustBpsToReplaceOrInsert("tttgggaaaccc", "xx", {
|
|
37
|
+
start: 0,
|
|
38
|
+
end: 11
|
|
39
|
+
}).should.equal("xx");
|
|
40
|
+
});
|
|
41
|
+
it("inserts characters at correct range 0 0", () => {
|
|
42
|
+
adjustBpsToReplaceOrInsert("tttgggaaaccc", "xxx", {
|
|
43
|
+
start: 0,
|
|
44
|
+
end: 0
|
|
45
|
+
}).should.equal("xxxttgggaaaccc");
|
|
46
|
+
});
|
|
47
|
+
it("inserts characters at correct range 11 11", () => {
|
|
48
|
+
adjustBpsToReplaceOrInsert("tttgggaaaccc", "xxx", {
|
|
49
|
+
start: 11,
|
|
50
|
+
end: 11
|
|
51
|
+
}).should.equal("tttgggaaaccxxx");
|
|
52
|
+
});
|
|
53
|
+
it("inserts characters at correct range 11 0", () => {
|
|
54
|
+
adjustBpsToReplaceOrInsert("tttgggaaaccc", "xxx", {
|
|
55
|
+
start: 11,
|
|
56
|
+
end: 0
|
|
57
|
+
}).should.equal("xxxttgggaaacc");
|
|
58
|
+
});
|
|
59
|
+
});
|