@teselagen/sequence-utils 0.1.21 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +12030 -26126
- package/index.mjs +12119 -26124
- package/index.umd.js +24056 -38154
- package/package.json +2 -2
- package/src/DNAComplementMap.js +32 -0
- package/src/addGapsToSeqReads.js +417 -0
- package/src/addGapsToSeqReads.test.js +358 -0
- package/src/adjustAnnotationsToInsert.js +19 -0
- package/src/adjustBpsToReplaceOrInsert.js +50 -0
- package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
- package/src/aliasedEnzymesByName.js +7363 -0
- package/src/aminoAcidToDegenerateDnaMap.js +32 -0
- package/src/aminoAcidToDegenerateRnaMap.js +32 -0
- package/src/aminoAcidToDnaRna.test.js +27 -0
- package/src/annotateSingleSeq.js +29 -0
- package/src/annotateSingleSeq.test.js +64 -0
- package/src/annotationTypes.js +23 -0
- package/src/autoAnnotate.js +242 -0
- package/src/autoAnnotate.test.js +1039 -0
- package/src/bioData.js +431 -0
- package/src/calculateNebTa.js +34 -0
- package/src/calculateNebTa.test.js +57 -0
- package/src/calculateNebTm.js +127 -0
- package/src/calculateNebTm.test.js +32 -0
- package/src/calculatePercentGC.js +3 -0
- package/src/calculatePercentGC.test.js +14 -0
- package/src/calculateTm.js +297 -0
- package/src/calculateTm.test.js +7 -0
- package/src/computeDigestFragments.js +179 -0
- package/src/computeDigestFragments.test.js +73 -0
- package/src/condensePairwiseAlignmentDifferences.js +85 -0
- package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
- package/src/convertAACaretPositionOrRangeToDna.js +24 -0
- package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
- package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
- package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
- package/src/cutSequenceByRestrictionEnzyme.js +301 -0
- package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
- package/src/defaultEnzymesByName.js +278 -0
- package/src/degenerateDnaToAminoAcidMap.js +5 -0
- package/src/degenerateRnaToAminoAcidMap.js +5 -0
- package/src/deleteSequenceDataAtRange.js +5 -0
- package/src/deleteSequenceDataAtRange.test.js +146 -0
- package/src/diffUtils.js +64 -0
- package/src/diffUtils.test.js +74 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
- package/src/featureTypesAndColors.js +152 -0
- package/src/featureTypesAndColors.test.js +52 -0
- package/src/filterAminoAcidSequenceString.js +13 -0
- package/src/filterAminoAcidSequenceString.test.js +22 -0
- package/src/filterSequenceString.js +22 -0
- package/src/filterSequenceString.test.js +13 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
- package/src/findOrfsInPlasmid.js +26 -0
- package/src/findSequenceMatches.js +133 -0
- package/src/findSequenceMatches.test.js +286 -0
- package/src/generateAnnotations.js +34 -0
- package/src/generateSequenceData.js +206 -0
- package/src/generateSequenceData.test.js +22 -0
- package/src/getAllInsertionsInSeqReads.js +83 -0
- package/src/getAllInsertionsInSeqReads.test.js +26 -0
- package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
- package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
- package/src/getAminoAcidFromSequenceTriplet.js +22 -0
- package/src/getAminoAcidStringFromSequenceString.js +18 -0
- package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
- package/src/getCodonRangeForAASliver.js +63 -0
- package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
- package/src/getComplementSequenceAndAnnotations.js +20 -0
- package/src/getComplementSequenceString.js +19 -0
- package/src/getComplementSequenceString.test.js +13 -0
- package/src/getCutsiteType.js +10 -0
- package/src/getCutsitesFromSequence.js +17 -0
- package/src/getDegenerateDnaStringFromAAString.js +8 -0
- package/src/getDegenerateRnaStringFromAAString.js +8 -0
- package/src/getDigestFragmentsForCutsites.js +105 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
- package/src/getInsertBetweenVals.js +28 -0
- package/src/getInsertBetweenVals.test.js +33 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
- package/src/getMassOfAaString.js +24 -0
- package/src/getMassofAaString.test.js +18 -0
- package/src/getOrfsFromSequence.js +124 -0
- package/src/getOrfsFromSequence.test.js +210 -0
- package/src/getOverlapBetweenTwoSequences.js +30 -0
- package/src/getOverlapBetweenTwoSequences.test.js +23 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
- package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
- package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
- package/src/getReverseComplementAnnotation.js +23 -0
- package/src/getReverseComplementAnnotation.test.js +44 -0
- package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
- package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
- package/src/getReverseComplementSequenceString.js +17 -0
- package/src/getReverseComplementSequenceString.test.js +11 -0
- package/src/getReverseSequenceString.js +12 -0
- package/src/getReverseSequenceString.test.js +9 -0
- package/src/getSequenceDataBetweenRange.js +131 -0
- package/src/getSequenceDataBetweenRange.test.js +474 -0
- package/src/getVirtualDigest.js +125 -0
- package/src/getVirtualDigest.test.js +134 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
- package/src/index.js +106 -0
- package/src/index.test.js +38 -0
- package/src/insertGapsIntoRefSeq.js +38 -0
- package/src/insertGapsIntoRefSeq.test.js +20 -0
- package/src/insertSequenceDataAtPosition.js +2 -0
- package/src/insertSequenceDataAtPosition.test.js +75 -0
- package/src/insertSequenceDataAtPositionOrRange.js +249 -0
- package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
- package/src/isEnzymeType2S.js +3 -0
- package/src/mapAnnotationsToRows.js +174 -0
- package/src/mapAnnotationsToRows.test.js +425 -0
- package/src/prepareCircularViewData.js +17 -0
- package/src/prepareCircularViewData.test.js +196 -0
- package/src/prepareRowData.js +41 -0
- package/src/prepareRowData.test.js +36 -0
- package/src/prepareRowData_output1.json +391 -0
- package/src/proteinAlphabet.js +257 -0
- package/src/rotateBpsToPosition.js +13 -0
- package/src/rotateBpsToPosition.test.js +6 -0
- package/src/rotateSequenceDataToPosition.js +48 -0
- package/src/rotateSequenceDataToPosition.test.js +71 -0
- package/src/shiftAnnotationsByLen.js +17 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
- package/src/tidyUpAnnotation.js +182 -0
- package/src/tidyUpSequenceData.js +169 -0
- package/src/tidyUpSequenceData.test.js +332 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
// this is throwing a weird eslint error
|
|
2
|
+
|
|
3
|
+
//
|
|
4
|
+
|
|
5
|
+
import generateAnnotations from "./generateAnnotations";
|
|
6
|
+
|
|
7
|
+
export default function generateSequenceData({
|
|
8
|
+
isProtein,
|
|
9
|
+
sequenceLength = 1000,
|
|
10
|
+
numFeatures,
|
|
11
|
+
numParts,
|
|
12
|
+
numPrimers,
|
|
13
|
+
numTranslations
|
|
14
|
+
} = {}) {
|
|
15
|
+
const proteinSequence = isProtein && generateSequence(sequenceLength, true);
|
|
16
|
+
const sequence = !isProtein && generateSequence(sequenceLength);
|
|
17
|
+
|
|
18
|
+
return {
|
|
19
|
+
circular: isProtein ? false : Math.random() > 0.5,
|
|
20
|
+
name: "p-" + Math.floor(Math.random * 100),
|
|
21
|
+
description: "",
|
|
22
|
+
isProtein,
|
|
23
|
+
sequence,
|
|
24
|
+
proteinSequence,
|
|
25
|
+
translations: isProtein
|
|
26
|
+
? undefined
|
|
27
|
+
: generateAnnotations(
|
|
28
|
+
numTranslations || 5,
|
|
29
|
+
0,
|
|
30
|
+
sequenceLength - 1,
|
|
31
|
+
sequenceLength / 3
|
|
32
|
+
),
|
|
33
|
+
features: generateAnnotations(
|
|
34
|
+
numFeatures || 10,
|
|
35
|
+
0,
|
|
36
|
+
sequenceLength - 1,
|
|
37
|
+
sequenceLength / 3
|
|
38
|
+
),
|
|
39
|
+
primers: isProtein
|
|
40
|
+
? undefined
|
|
41
|
+
: generateAnnotations(numPrimers || 10, 0, sequenceLength - 1, 50),
|
|
42
|
+
parts: generateAnnotations(
|
|
43
|
+
numParts || 10,
|
|
44
|
+
0,
|
|
45
|
+
sequenceLength - 1,
|
|
46
|
+
sequenceLength / 3
|
|
47
|
+
)
|
|
48
|
+
};
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
// export default tidyUpSequenceData(exampleData)
|
|
52
|
+
|
|
53
|
+
function generateSequence(m = 9, isProtein) {
|
|
54
|
+
let s = "";
|
|
55
|
+
const r = isProtein ? "" : "gatc";
|
|
56
|
+
for (let i = 0; i < m; i++) {
|
|
57
|
+
s += r.charAt(Math.floor(Math.random() * r.length));
|
|
58
|
+
}
|
|
59
|
+
return s;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// tnr: this is used to generate a very large, multi-featured sequence
|
|
63
|
+
// var string = "ggggcccccgggggccc";
|
|
64
|
+
// var reallyLongFakeSequence = "";
|
|
65
|
+
// for (var i = 1; i < 100000; i++) {
|
|
66
|
+
// reallyLongFakeSequence += string;
|
|
67
|
+
// if (i % 100 === 0) {
|
|
68
|
+
// reallyLongFakeSequence += 'taafatg';
|
|
69
|
+
// sequenceData.features.push({
|
|
70
|
+
// id: i,
|
|
71
|
+
// start: parseInt(i * 10),
|
|
72
|
+
// end: parseInt(i * 10 + 100),
|
|
73
|
+
// name: 'cooljim',
|
|
74
|
+
// color: 'green',
|
|
75
|
+
// forward: true,
|
|
76
|
+
// annotationType: "feature"
|
|
77
|
+
// });
|
|
78
|
+
// }
|
|
79
|
+
// }
|
|
80
|
+
// sequenceData.sequence += reallyLongFakeSequence;
|
|
81
|
+
//
|
|
82
|
+
// export default function() {
|
|
83
|
+
// var baseSeqData = {
|
|
84
|
+
//
|
|
85
|
+
// }
|
|
86
|
+
// function seqGen() {
|
|
87
|
+
//
|
|
88
|
+
// }
|
|
89
|
+
// }
|
|
90
|
+
// "features" : [
|
|
91
|
+
// {
|
|
92
|
+
// "name" : "1",
|
|
93
|
+
// "type" : "misc_feature",
|
|
94
|
+
// "start" : 1,
|
|
95
|
+
// "end" : 1,
|
|
96
|
+
// "strand" : 1,
|
|
97
|
+
// "notes" : [],
|
|
98
|
+
// "color": 'blue'
|
|
99
|
+
// },
|
|
100
|
+
// {
|
|
101
|
+
// "name" : "2",
|
|
102
|
+
// "type" : "misc_feature",
|
|
103
|
+
// "start" : 1,
|
|
104
|
+
// "end" : 1,
|
|
105
|
+
// "strand" : 1,
|
|
106
|
+
// "notes" : [],
|
|
107
|
+
// "color": 'blue'
|
|
108
|
+
// },
|
|
109
|
+
// {
|
|
110
|
+
// "name" : "3",
|
|
111
|
+
// "type" : "misc_feature",
|
|
112
|
+
// "start" : 1,
|
|
113
|
+
// "end" : 1,
|
|
114
|
+
// "strand" : 1,
|
|
115
|
+
// "notes" : [],
|
|
116
|
+
// "color": 'blue'
|
|
117
|
+
// },
|
|
118
|
+
// {
|
|
119
|
+
// "name" : "4",
|
|
120
|
+
// "type" : "misc_feature",
|
|
121
|
+
// "start" : 1,
|
|
122
|
+
// "end" : 14,
|
|
123
|
+
// "strand" : 1,
|
|
124
|
+
// "notes" : [],
|
|
125
|
+
// "color": 'blue'
|
|
126
|
+
// },
|
|
127
|
+
// {
|
|
128
|
+
// "name" : "5",
|
|
129
|
+
// "type" : "misc_feature",
|
|
130
|
+
// "start" : 1,
|
|
131
|
+
// "end" : 1,
|
|
132
|
+
// "strand" : 1,
|
|
133
|
+
// "notes" : [],
|
|
134
|
+
// "color": 'blue'
|
|
135
|
+
// },
|
|
136
|
+
// {
|
|
137
|
+
// "name" : "6",
|
|
138
|
+
// "type" : "misc_feature",
|
|
139
|
+
// "id" : "5590c1978fafgw979df000a4f02c7a",
|
|
140
|
+
// "start" : 4,
|
|
141
|
+
// "end" : 6,
|
|
142
|
+
// "strand" : 1,
|
|
143
|
+
// "notes" : [],
|
|
144
|
+
// "color": 'orange'
|
|
145
|
+
// },
|
|
146
|
+
// {
|
|
147
|
+
// "name" : "housemouserousepouse",
|
|
148
|
+
// "type" : "misc_feature",
|
|
149
|
+
// "id" : "5590c197897fs9df000a4f02c7a",
|
|
150
|
+
// "start" : 4,
|
|
151
|
+
// "end" : 6,
|
|
152
|
+
// "strand" : 1,
|
|
153
|
+
// "notes" : [],
|
|
154
|
+
// "color": 'orange'
|
|
155
|
+
// },
|
|
156
|
+
// {
|
|
157
|
+
// "name" : "housemouserousepouse",
|
|
158
|
+
// "type" : "misc_feature",
|
|
159
|
+
// "id" : "5590c1978979dasdfaf000a4f02c7a",
|
|
160
|
+
// "start" : 4,
|
|
161
|
+
// "end" : 6,
|
|
162
|
+
// "strand" : 1,
|
|
163
|
+
// "notes" : [],
|
|
164
|
+
// "color": 'orange'
|
|
165
|
+
// },
|
|
166
|
+
// {
|
|
167
|
+
// "name" : "housemouserousepouse",
|
|
168
|
+
// "type" : "misc_feature",
|
|
169
|
+
// "id" : "5590c197faas8979df000a4f02c7a",
|
|
170
|
+
// "start" : 4,
|
|
171
|
+
// "end" : 6,
|
|
172
|
+
// "strand" : 1,
|
|
173
|
+
// "notes" : [],
|
|
174
|
+
// "color": 'orange'
|
|
175
|
+
// },
|
|
176
|
+
// {
|
|
177
|
+
// "name" : "housemouserousepouse",
|
|
178
|
+
// "type" : "misc_feature",
|
|
179
|
+
// "id" : "5590c1978979df000a4f02c7aasd",
|
|
180
|
+
// "start" : 4,
|
|
181
|
+
// "end" : 6,
|
|
182
|
+
// "strand" : 1,
|
|
183
|
+
// "notes" : [],
|
|
184
|
+
// "color": 'orange'
|
|
185
|
+
// },
|
|
186
|
+
// {
|
|
187
|
+
// "name" : "house",
|
|
188
|
+
// "type" : "misc_feature",
|
|
189
|
+
// "id" : "5590c1978979df000a4f02c7b",
|
|
190
|
+
// "start" : 70,
|
|
191
|
+
// "end" : 90,
|
|
192
|
+
// "strand" : 1,
|
|
193
|
+
// "notes" : [],
|
|
194
|
+
// "color": 'green'
|
|
195
|
+
// },
|
|
196
|
+
// {
|
|
197
|
+
// "name" : "weer",
|
|
198
|
+
// "type" : "misc_feature",
|
|
199
|
+
// "id" : "5590c1d88979df000a4f02f5c",
|
|
200
|
+
// "start" : 3,
|
|
201
|
+
// "end" : 69,
|
|
202
|
+
// "strand" : 1,
|
|
203
|
+
// "notes" : [],
|
|
204
|
+
// "color": 'red'
|
|
205
|
+
// }
|
|
206
|
+
// ],
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import generateSequenceData from "./generateSequenceData";
|
|
2
|
+
import chai from "chai";
|
|
3
|
+
import chaiSubset from "chai-subset";
|
|
4
|
+
import {map} from "lodash";
|
|
5
|
+
|
|
6
|
+
chai.should();
|
|
7
|
+
chai.use(chaiSubset);
|
|
8
|
+
|
|
9
|
+
describe("generateSequenceData", () => {
|
|
10
|
+
it("should generate some nice random data", () => {
|
|
11
|
+
generateSequenceData({ sequenceLength: 100 }).sequence.length.should.equal(
|
|
12
|
+
100
|
|
13
|
+
);
|
|
14
|
+
});
|
|
15
|
+
it("numFeatures should work", () => {
|
|
16
|
+
const a = generateSequenceData({
|
|
17
|
+
sequenceLength: 100,
|
|
18
|
+
numFeatures: 100
|
|
19
|
+
});
|
|
20
|
+
map(a.features).length.should.equal(100);
|
|
21
|
+
});
|
|
22
|
+
});
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
// seqReads should be an array of objects [{name, seq, pos, cigar}, {name, seq, pos, cigar}, ...]
|
|
2
|
+
export default function getAllInsertionsInSeqReads(seqReads) {
|
|
3
|
+
const allInsertionsInSeqReads = [];
|
|
4
|
+
seqReads.forEach(seqRead => {
|
|
5
|
+
// split cigar string at M, D, or I (match, deletion, or insertion), e.g. ["2M", "3I", "39M", "3D"...]
|
|
6
|
+
const splitSeqRead = seqRead.cigar.match(/([0-9]*[MDI])/g);
|
|
7
|
+
|
|
8
|
+
for (let componentI = 0; componentI < splitSeqRead.length; componentI++) {
|
|
9
|
+
if (splitSeqRead[componentI].slice(-1) === "I") {
|
|
10
|
+
let bpPosOfInsertion = seqRead.pos;
|
|
11
|
+
const numberOfInsertions = Number(
|
|
12
|
+
splitSeqRead[componentI].slice(0, -1)
|
|
13
|
+
);
|
|
14
|
+
for (let i = 0; i < componentI; i++) {
|
|
15
|
+
if (splitSeqRead[i].slice(-1) !== "I") {
|
|
16
|
+
const previousComponentNumber = Number(
|
|
17
|
+
splitSeqRead[i].slice(0, -1)
|
|
18
|
+
);
|
|
19
|
+
bpPosOfInsertion += previousComponentNumber;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
const insertionInfo = {
|
|
23
|
+
// keeping bpPos 1-based
|
|
24
|
+
bpPos: bpPosOfInsertion,
|
|
25
|
+
number: numberOfInsertions
|
|
26
|
+
};
|
|
27
|
+
allInsertionsInSeqReads.push(insertionInfo);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
});
|
|
31
|
+
// sort insertions by ascending bp pos
|
|
32
|
+
const sortedInsertions = allInsertionsInSeqReads.sort((a, b) => {
|
|
33
|
+
return a.bpPos - b.bpPos;
|
|
34
|
+
});
|
|
35
|
+
// combine duplicate or overlapping insertions from seq reads
|
|
36
|
+
for (let i = 0; i < sortedInsertions.length - 1; i++) {
|
|
37
|
+
if (sortedInsertions[i].bpPos === sortedInsertions[i + 1].bpPos) {
|
|
38
|
+
if (sortedInsertions[i].number > sortedInsertions[i + 1].number) {
|
|
39
|
+
// remove the one with fewer number of gaps from array
|
|
40
|
+
sortedInsertions.splice(i + 1, 1);
|
|
41
|
+
i--;
|
|
42
|
+
} else if (sortedInsertions[i].number < sortedInsertions[i + 1].number) {
|
|
43
|
+
sortedInsertions.splice(i, 1);
|
|
44
|
+
i--;
|
|
45
|
+
} else if (
|
|
46
|
+
sortedInsertions[i].number === sortedInsertions[i + 1].number
|
|
47
|
+
) {
|
|
48
|
+
sortedInsertions.splice(i, 1);
|
|
49
|
+
i--;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
// sortedInsertions is an array of objects [{bpPos: bp pos of insertion, number: # of insertions}, {bpPos, number}, ...]
|
|
54
|
+
return sortedInsertions;
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
// function getAllInsertionsInSeqReads(seqReads) {
|
|
58
|
+
// let allInsertionBpPosInSeqReads = [];
|
|
59
|
+
// seqReads.forEach(seqRead => {
|
|
60
|
+
// // split cigar string at M, D, or I (match, deletion, or insertion)
|
|
61
|
+
// // ["2M", "3I", "39M", "3D"...]
|
|
62
|
+
// const splitSeqRead = seqRead.cigar.match(/([0-9]*[MDI])/g)
|
|
63
|
+
// splitSeqRead.forEach(component => {
|
|
64
|
+
// // keeping bpPos 1-based
|
|
65
|
+
// let bpPosOfInsertion = seqRead.pos;
|
|
66
|
+
// if (component.slice(-1) === "I") {
|
|
67
|
+
// const numberOfInsertions = Number(component.slice(0, -1));
|
|
68
|
+
// const componentIndex = splitSeqRead.indexOf(component);
|
|
69
|
+
// for (let i = 0; i < componentIndex; i++) {
|
|
70
|
+
// const previousComponentNumber = Number(splitSeqRead[i].slice(0, -1));
|
|
71
|
+
// bpPosOfInsertion += previousComponentNumber;
|
|
72
|
+
// }
|
|
73
|
+
// for (let i = 1; i <= numberOfInsertions; i++) {
|
|
74
|
+
// allInsertionBpPosInSeqReads.push(bpPosOfInsertion - i);
|
|
75
|
+
// }
|
|
76
|
+
// }
|
|
77
|
+
// });
|
|
78
|
+
// });
|
|
79
|
+
// // allInsertionBpPosInSeqReads should be an array of bp pos [6, 15, 9, 2, 23...]
|
|
80
|
+
// // remove duplicates, organize in ascending order
|
|
81
|
+
// const uniqueInsertionBpPos = [...new Set(allInsertionBpPosInSeqReads)].sort(function(a, b) { return a - b });
|
|
82
|
+
// return uniqueInsertionBpPos;
|
|
83
|
+
// }
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import getAllInsertionsInSeqReads from "./getAllInsertionsInSeqReads.js";
|
|
2
|
+
|
|
3
|
+
describe("get bp pos of all insertions in seq reads after bowtie2 alignment", () => {
|
|
4
|
+
it("allInsertionsInSeqReads should be an array of objects [{bpPos: bp pos of insertion, number: # of insertions}, {bpPos, number}, ...]", () => {
|
|
5
|
+
const seqReads = [
|
|
6
|
+
{ name: "r1", seq: "GATTGAC", pos: 3, cigar: "2M2I3M" },
|
|
7
|
+
{ name: "r2", seq: "GAGAGAC", pos: 3, cigar: "7M" },
|
|
8
|
+
{ name: "r3", seq: "GGGAGATCAC", pos: 1, cigar: "6M1I3M" },
|
|
9
|
+
{ name: "r4", seq: "GATTGAC", pos: 3, cigar: "2M2I3M" },
|
|
10
|
+
{ name: "r5", seq: "GAGC", pos: 3, cigar: "3M1D1M" },
|
|
11
|
+
{ name: "r6", seq: "GAGCTTACC", pos: 3, cigar: "3M1D1M2I3M" },
|
|
12
|
+
{ name: "r7", seq: "GGCATTTCC", pos: 2, cigar: "2M3D2M3I2M" },
|
|
13
|
+
{ name: "r8", seq: "GGATTGACATT", pos: 1, cigar: "1D3M2I4M2I2D" },
|
|
14
|
+
{ name: "r9", seq: "GGTTTGACCTTT", pos: 1, cigar: "2M3I2D1M2D3M3I" }
|
|
15
|
+
];
|
|
16
|
+
const result = getAllInsertionsInSeqReads(seqReads);
|
|
17
|
+
expect(result).toEqual([
|
|
18
|
+
{ bpPos: 3, number: 3 },
|
|
19
|
+
{ bpPos: 5, number: 2 },
|
|
20
|
+
{ bpPos: 7, number: 1 },
|
|
21
|
+
{ bpPos: 8, number: 2 },
|
|
22
|
+
{ bpPos: 9, number: 3 },
|
|
23
|
+
{ bpPos: 11, number: 3 }
|
|
24
|
+
]);
|
|
25
|
+
});
|
|
26
|
+
});
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import {translateRange, getSequenceWithinRange} from "@teselagen/range-utils";
|
|
2
|
+
import revComp from "./getReverseComplementSequenceString";
|
|
3
|
+
import getAA from "./getAminoAcidFromSequenceTriplet";
|
|
4
|
+
|
|
5
|
+
//
|
|
6
|
+
import proteinAlphabet from "./proteinAlphabet";
|
|
7
|
+
|
|
8
|
+
// ac.throw([ac.string,ac.bool],arguments);
|
|
9
|
+
/**
|
|
10
|
+
* @private
|
|
11
|
+
* Gets aminoAcid data, including position in string and position in codon
|
|
12
|
+
* from the sequenceString and the direction of the translation
|
|
13
|
+
* @param {String} sequenceString The dna sequenceString.
|
|
14
|
+
* @param {boolean} forward Should we find forward facing orfs or reverse facing orfs
|
|
15
|
+
* @param {boolean} isProteinSequence We're passing in a sequence of AA chars instead of DNA chars (slightly confusing but we'll still use the dna indexing for rendering in OVE)
|
|
16
|
+
* @return [{
|
|
17
|
+
aminoAcid:
|
|
18
|
+
positionInCodon:
|
|
19
|
+
}]
|
|
20
|
+
*/
|
|
21
|
+
export default function getAminoAcidDataForEachBaseOfDna(
|
|
22
|
+
originalSequenceString,
|
|
23
|
+
forward,
|
|
24
|
+
optionalSubrangeRange,
|
|
25
|
+
isProteinSequence
|
|
26
|
+
) {
|
|
27
|
+
const originalSequenceStringLength = isProteinSequence
|
|
28
|
+
? originalSequenceString.length * 3
|
|
29
|
+
: originalSequenceString.length;
|
|
30
|
+
let sequenceString = originalSequenceString;
|
|
31
|
+
let startOffset = 0;
|
|
32
|
+
if (optionalSubrangeRange) {
|
|
33
|
+
sequenceString = getSequenceWithinRange(
|
|
34
|
+
optionalSubrangeRange,
|
|
35
|
+
originalSequenceString
|
|
36
|
+
);
|
|
37
|
+
startOffset = optionalSubrangeRange.start;
|
|
38
|
+
}
|
|
39
|
+
const sequenceStringLength = isProteinSequence
|
|
40
|
+
? sequenceString.length * 3
|
|
41
|
+
: sequenceString.length;
|
|
42
|
+
|
|
43
|
+
// ac.throw([ac.string,ac.bool],arguments);
|
|
44
|
+
const aminoAcidDataForEachBaseOfDNA = [];
|
|
45
|
+
let codonRange;
|
|
46
|
+
let revCompGapLength = 0;
|
|
47
|
+
let aminoAcidIndex = 0;
|
|
48
|
+
if (!forward) {
|
|
49
|
+
//compute the start of the amino acid sequence, but only if translating in the reverse direction
|
|
50
|
+
aminoAcidIndex = Math.floor((sequenceStringLength - 1) / 3);
|
|
51
|
+
//because we're translating in the reverse direction, we need to
|
|
52
|
+
//check to see if there are untranslated amino acids at the start of the sequenceString
|
|
53
|
+
revCompGapLength = sequenceStringLength % 3;
|
|
54
|
+
codonRange = translateRange(
|
|
55
|
+
{
|
|
56
|
+
start: 0,
|
|
57
|
+
end: revCompGapLength - 1
|
|
58
|
+
},
|
|
59
|
+
startOffset,
|
|
60
|
+
originalSequenceStringLength
|
|
61
|
+
);
|
|
62
|
+
|
|
63
|
+
if (revCompGapLength > 0) {
|
|
64
|
+
for (let i = 0; i < revCompGapLength; i++) {
|
|
65
|
+
aminoAcidDataForEachBaseOfDNA.push({
|
|
66
|
+
aminoAcid: getAA("xxx"), //fake xxx triplet returns the ambiguous X amino acid
|
|
67
|
+
positionInCodon: revCompGapLength - i - 1,
|
|
68
|
+
aminoAcidIndex,
|
|
69
|
+
sequenceIndex: codonRange.start + i,
|
|
70
|
+
codonRange,
|
|
71
|
+
fullCodon: false
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
aminoAcidIndex--;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
//compute the bulk of the sequence
|
|
79
|
+
for (
|
|
80
|
+
let index = 2 + revCompGapLength;
|
|
81
|
+
index < sequenceStringLength;
|
|
82
|
+
index += 3
|
|
83
|
+
) {
|
|
84
|
+
let aminoAcid;
|
|
85
|
+
if (isProteinSequence) {
|
|
86
|
+
aminoAcid =
|
|
87
|
+
proteinAlphabet[sequenceString[(index - 2) / 3].toUpperCase()];
|
|
88
|
+
} else {
|
|
89
|
+
let triplet = sequenceString.slice(index - 2, index + 1);
|
|
90
|
+
if (!forward) {
|
|
91
|
+
//we reverse the triplet
|
|
92
|
+
triplet = revComp(triplet);
|
|
93
|
+
}
|
|
94
|
+
aminoAcid = getAA(triplet);
|
|
95
|
+
}
|
|
96
|
+
codonRange = translateRange(
|
|
97
|
+
{
|
|
98
|
+
start: index - 2,
|
|
99
|
+
end: index
|
|
100
|
+
},
|
|
101
|
+
startOffset,
|
|
102
|
+
originalSequenceStringLength
|
|
103
|
+
);
|
|
104
|
+
|
|
105
|
+
aminoAcidDataForEachBaseOfDNA.push({
|
|
106
|
+
aminoAcid, //gap amino acid
|
|
107
|
+
positionInCodon: forward ? 0 : 2,
|
|
108
|
+
aminoAcidIndex,
|
|
109
|
+
sequenceIndex: codonRange.start,
|
|
110
|
+
codonRange,
|
|
111
|
+
fullCodon: true
|
|
112
|
+
});
|
|
113
|
+
aminoAcidDataForEachBaseOfDNA.push({
|
|
114
|
+
aminoAcid, //gap amino acid
|
|
115
|
+
positionInCodon: 1,
|
|
116
|
+
aminoAcidIndex,
|
|
117
|
+
sequenceIndex: codonRange.start + 1,
|
|
118
|
+
codonRange,
|
|
119
|
+
fullCodon: true
|
|
120
|
+
});
|
|
121
|
+
aminoAcidDataForEachBaseOfDNA.push({
|
|
122
|
+
aminoAcid, //gap amino acid
|
|
123
|
+
positionInCodon: forward ? 2 : 0,
|
|
124
|
+
aminoAcidIndex,
|
|
125
|
+
sequenceIndex: codonRange.start + 2,
|
|
126
|
+
codonRange,
|
|
127
|
+
fullCodon: true
|
|
128
|
+
});
|
|
129
|
+
if (forward) {
|
|
130
|
+
aminoAcidIndex++;
|
|
131
|
+
} else {
|
|
132
|
+
aminoAcidIndex--;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
//compute the end of the sequence
|
|
137
|
+
//we'll never hit the following logic if translating in the reverse direction
|
|
138
|
+
const lengthOfEndBpsNotCoveredByAminoAcids =
|
|
139
|
+
sequenceStringLength - aminoAcidDataForEachBaseOfDNA.length;
|
|
140
|
+
codonRange = translateRange(
|
|
141
|
+
{
|
|
142
|
+
start: sequenceStringLength - lengthOfEndBpsNotCoveredByAminoAcids,
|
|
143
|
+
end: sequenceStringLength - 1
|
|
144
|
+
},
|
|
145
|
+
startOffset,
|
|
146
|
+
originalSequenceStringLength
|
|
147
|
+
);
|
|
148
|
+
for (let j = 0; j < lengthOfEndBpsNotCoveredByAminoAcids; j++) {
|
|
149
|
+
aminoAcidDataForEachBaseOfDNA.push({
|
|
150
|
+
aminoAcid: getAA("xxx"), //fake xxx triplet returns the gap amino acid
|
|
151
|
+
positionInCodon: j,
|
|
152
|
+
aminoAcidIndex,
|
|
153
|
+
sequenceIndex: codonRange.start + j,
|
|
154
|
+
fullCodon: false,
|
|
155
|
+
codonRange
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
if (sequenceStringLength !== aminoAcidDataForEachBaseOfDNA.length) {
|
|
160
|
+
throw new Error("something went wrong!");
|
|
161
|
+
}
|
|
162
|
+
return aminoAcidDataForEachBaseOfDNA;
|
|
163
|
+
};
|