@teselagen/sequence-utils 0.3.38-beta.2 → 0.3.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DNAComplementMap.d.ts +1 -1
- package/addGapsToSeqReads.d.ts +3 -16
- package/adjustAnnotationsToInsert.d.ts +1 -2
- package/adjustBpsToReplaceOrInsert.d.ts +1 -2
- package/aliasedEnzymesByName.d.ts +1 -37
- package/aminoAcidToDegenerateDnaMap.d.ts +31 -1
- package/aminoAcidToDegenerateRnaMap.d.ts +1 -1
- package/annotateSingleSeq.d.ts +4 -5
- package/annotationTypes.d.ts +2 -2
- package/autoAnnotate.d.ts +8 -17
- package/bioData.d.ts +58 -10
- package/calculateEndStability.d.ts +1 -1
- package/calculateNebTa.d.ts +1 -6
- package/calculateNebTm.d.ts +4 -6
- package/calculatePercentGC.d.ts +1 -1
- package/calculateSantaLuciaTm.d.ts +114 -28
- package/calculateTm.d.ts +1 -13
- package/computeDigestFragments.d.ts +24 -30
- package/condensePairwiseAlignmentDifferences.d.ts +1 -1
- package/convertAACaretPositionOrRangeToDna.d.ts +1 -2
- package/convertDnaCaretPositionOrRangeToAA.d.ts +1 -2
- package/cutSequenceByRestrictionEnzyme.d.ts +1 -2
- package/defaultEnzymesByName.d.ts +1 -2
- package/degenerateDnaToAminoAcidMap.d.ts +1 -1
- package/degenerateRnaToAminoAcidMap.d.ts +1 -1
- package/deleteSequenceDataAtRange.d.ts +1 -2
- package/diffUtils.d.ts +7 -9
- package/doesEnzymeChopOutsideOfRecognitionSite.d.ts +1 -2
- package/featureTypesAndColors.d.ts +6 -19
- package/filterSequenceString.d.ts +10 -14
- package/findApproxMatches.d.ts +1 -7
- package/findNearestRangeOfSequenceOverlapToPosition.d.ts +1 -2
- package/findOrfsInPlasmid.d.ts +11 -2
- package/findSequenceMatches.d.ts +1 -11
- package/generateAnnotations.d.ts +1 -2
- package/generateSequenceData.d.ts +13 -8
- package/getAllInsertionsInSeqReads.d.ts +1 -11
- package/getAminoAcidDataForEachBaseOfDna.d.ts +5 -6
- package/getAminoAcidFromSequenceTriplet.d.ts +1 -1
- package/getAminoAcidStringFromSequenceString.d.ts +1 -3
- package/getCodonRangeForAASliver.d.ts +4 -3
- package/getComplementAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getComplementSequenceAndAnnotations.d.ts +1 -5
- package/getComplementSequenceString.d.ts +1 -1
- package/getCutsiteType.d.ts +1 -2
- package/getCutsitesFromSequence.d.ts +1 -2
- package/getDegenerateDnaStringFromAAString.d.ts +1 -1
- package/getDegenerateRnaStringFromAAString.d.ts +1 -1
- package/getDigestFragmentsForCutsites.d.ts +1 -4
- package/getDigestFragmentsForRestrictionEnzymes.d.ts +1 -8
- package/getInsertBetweenVals.d.ts +1 -2
- package/getLeftAndRightOfSequenceInRangeGivenPosition.d.ts +1 -2
- package/getOrfsFromSequence.d.ts +11 -17
- package/getOverlapBetweenTwoSequences.d.ts +1 -2
- package/getPossiblePartsFromSequenceAndEnzymes.d.ts +1 -18
- package/getReverseAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getReverseComplementAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getReverseComplementAnnotation.d.ts +1 -11
- package/getReverseComplementSequenceAndAnnotations.d.ts +1 -5
- package/getReverseComplementSequenceString.d.ts +1 -1
- package/getReverseSequenceString.d.ts +1 -1
- package/getSequenceDataBetweenRange.d.ts +1 -9
- package/getVirtualDigest.d.ts +10 -11
- package/guessIfSequenceIsDnaAndNotProtein.d.ts +1 -5
- package/index.cjs +491 -728
- package/index.d.ts +5 -8
- package/index.js +491 -728
- package/index.umd.cjs +491 -728
- package/insertGapsIntoRefSeq.d.ts +1 -2
- package/insertSequenceDataAtPositionOrRange.d.ts +1 -10
- package/isEnzymeType2S.d.ts +1 -2
- package/mapAnnotationsToRows.d.ts +1 -9
- package/package.json +9 -12
- package/prepareCircularViewData.d.ts +1 -2
- package/prepareRowData.d.ts +3 -7
- package/proteinAlphabet.d.ts +1 -1
- package/rotateBpsToPosition.d.ts +1 -1
- package/rotateSequenceDataToPosition.d.ts +1 -3
- package/shiftAnnotationsByLen.d.ts +3 -4
- package/src/autoAnnotate.test.js +1 -0
- package/src/getSequenceDataBetweenRange.js +11 -2
- package/src/getSequenceDataBetweenRange.test.js +42 -0
- package/src/prepareRowData_output1.json +0 -1
- package/threeLetterSequenceStringToAminoAcidMap.d.ts +921 -11
- package/tidyUpAnnotation.d.ts +11 -13
- package/tidyUpSequenceData.d.ts +1 -15
- package/src/DNAComplementMap.ts +0 -32
- package/src/addGapsToSeqReads.ts +0 -436
- package/src/adjustAnnotationsToInsert.ts +0 -20
- package/src/adjustBpsToReplaceOrInsert.ts +0 -73
- package/src/aliasedEnzymesByName.ts +0 -7366
- package/src/aminoAcidToDegenerateDnaMap.ts +0 -32
- package/src/aminoAcidToDegenerateRnaMap.ts +0 -32
- package/src/annotateSingleSeq.ts +0 -37
- package/src/annotationTypes.ts +0 -23
- package/src/autoAnnotate.ts +0 -290
- package/src/bioData.ts +0 -65
- package/src/calculateEndStability.ts +0 -91
- package/src/calculateNebTa.ts +0 -46
- package/src/calculateNebTm.ts +0 -132
- package/src/calculatePercentGC.ts +0 -3
- package/src/calculateSantaLuciaTm.ts +0 -184
- package/src/calculateTm.ts +0 -242
- package/src/computeDigestFragments.ts +0 -238
- package/src/condensePairwiseAlignmentDifferences.ts +0 -85
- package/src/convertAACaretPositionOrRangeToDna.ts +0 -28
- package/src/convertDnaCaretPositionOrRangeToAA.ts +0 -28
- package/src/cutSequenceByRestrictionEnzyme.ts +0 -345
- package/src/defaultEnzymesByName.ts +0 -280
- package/src/degenerateDnaToAminoAcidMap.ts +0 -5
- package/src/degenerateRnaToAminoAcidMap.ts +0 -5
- package/src/deleteSequenceDataAtRange.ts +0 -13
- package/src/diffUtils.ts +0 -80
- package/src/doesEnzymeChopOutsideOfRecognitionSite.ts +0 -16
- package/src/featureTypesAndColors.ts +0 -167
- package/src/filterSequenceString.ts +0 -153
- package/src/findApproxMatches.ts +0 -58
- package/src/findNearestRangeOfSequenceOverlapToPosition.ts +0 -43
- package/src/findOrfsInPlasmid.ts +0 -31
- package/src/findSequenceMatches.ts +0 -154
- package/src/generateAnnotations.ts +0 -39
- package/src/generateSequenceData.ts +0 -212
- package/src/getAllInsertionsInSeqReads.ts +0 -100
- package/src/getAminoAcidDataForEachBaseOfDna.ts +0 -305
- package/src/getAminoAcidFromSequenceTriplet.ts +0 -27
- package/src/getAminoAcidStringFromSequenceString.ts +0 -36
- package/src/getCodonRangeForAASliver.ts +0 -73
- package/src/getComplementAminoAcidStringFromSequenceString.ts +0 -10
- package/src/getComplementSequenceAndAnnotations.ts +0 -25
- package/src/getComplementSequenceString.ts +0 -23
- package/src/getCutsiteType.ts +0 -18
- package/src/getCutsitesFromSequence.ts +0 -22
- package/src/getDegenerateDnaStringFromAAString.ts +0 -15
- package/src/getDegenerateRnaStringFromAAString.ts +0 -15
- package/src/getDigestFragmentsForCutsites.ts +0 -126
- package/src/getDigestFragmentsForRestrictionEnzymes.ts +0 -50
- package/src/getInsertBetweenVals.ts +0 -31
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.ts +0 -40
- package/src/getMassOfAaString.ts +0 -29
- package/src/getOrfsFromSequence.ts +0 -132
- package/src/getOverlapBetweenTwoSequences.ts +0 -30
- package/src/getPossiblePartsFromSequenceAndEnzymes.ts +0 -149
- package/src/getReverseAminoAcidStringFromSequenceString.ts +0 -22
- package/src/getReverseComplementAminoAcidStringFromSequenceString.ts +0 -10
- package/src/getReverseComplementAnnotation.ts +0 -33
- package/src/getReverseComplementSequenceAndAnnotations.ts +0 -46
- package/src/getReverseComplementSequenceString.ts +0 -18
- package/src/getReverseSequenceString.ts +0 -12
- package/src/getSequenceDataBetweenRange.ts +0 -154
- package/src/getVirtualDigest.ts +0 -139
- package/src/guessIfSequenceIsDnaAndNotProtein.ts +0 -39
- package/src/index.test.ts +0 -43
- package/src/index.ts +0 -111
- package/src/insertGapsIntoRefSeq.ts +0 -43
- package/src/insertSequenceDataAtPosition.ts +0 -2
- package/src/insertSequenceDataAtPositionOrRange.ts +0 -328
- package/src/isEnzymeType2S.ts +0 -5
- package/src/mapAnnotationsToRows.ts +0 -256
- package/src/prepareCircularViewData.ts +0 -24
- package/src/prepareRowData.ts +0 -61
- package/src/proteinAlphabet.ts +0 -271
- package/src/rotateBpsToPosition.ts +0 -12
- package/src/rotateSequenceDataToPosition.ts +0 -54
- package/src/shiftAnnotationsByLen.ts +0 -24
- package/src/threeLetterSequenceStringToAminoAcidMap.ts +0 -198
- package/src/tidyUpAnnotation.ts +0 -205
- package/src/tidyUpSequenceData.ts +0 -213
- package/src/types.ts +0 -109
- package/types.d.ts +0 -105
|
@@ -1,305 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
translateRange,
|
|
3
|
-
getSequenceWithinRange,
|
|
4
|
-
flipContainedRange,
|
|
5
|
-
isPositionWithinRange,
|
|
6
|
-
Range
|
|
7
|
-
} from "@teselagen/range-utils";
|
|
8
|
-
import revComp from "./getReverseComplementSequenceString";
|
|
9
|
-
import getAA from "./getAminoAcidFromSequenceTriplet";
|
|
10
|
-
|
|
11
|
-
//
|
|
12
|
-
import proteinAlphabet from "./proteinAlphabet";
|
|
13
|
-
|
|
14
|
-
/**
|
|
15
|
-
* @private
|
|
16
|
-
* Gets the next triplet of bases in the sequenceString
|
|
17
|
-
* @param {Number} index The index of the sequenceString to start at
|
|
18
|
-
* @param {String} sequenceString The dna sequenceString.
|
|
19
|
-
* @param {Object[]} exonRange Array of ranges of the sequenceString that contains the positions of bases corresponding to exons.
|
|
20
|
-
* @return {Object} The triplet of bases, the number of bases read, and the positions of the codon bases in the sequenceString
|
|
21
|
-
* @property {String} triplet The triplet of bases
|
|
22
|
-
* @property {Number} basesRead The number of bases read
|
|
23
|
-
* @property {Number[]} codonPositions The positions of the codon bases in the sequenceString
|
|
24
|
-
*/
|
|
25
|
-
function getNextTriplet(
|
|
26
|
-
index: number,
|
|
27
|
-
sequenceString: string,
|
|
28
|
-
exonRange: Range[]
|
|
29
|
-
) {
|
|
30
|
-
let triplet = "";
|
|
31
|
-
let internalIndex;
|
|
32
|
-
// Positions of codons relative to the coding sequence start
|
|
33
|
-
// including introns.
|
|
34
|
-
const codonPositions = [];
|
|
35
|
-
|
|
36
|
-
// A function to check if a base is within an exon
|
|
37
|
-
const isBaseInExon = (baseIndex: number) =>
|
|
38
|
-
exonRange.some(r =>
|
|
39
|
-
isPositionWithinRange(baseIndex, r, sequenceString.length, true, false)
|
|
40
|
-
);
|
|
41
|
-
|
|
42
|
-
for (
|
|
43
|
-
internalIndex = index;
|
|
44
|
-
internalIndex < sequenceString.length;
|
|
45
|
-
internalIndex++
|
|
46
|
-
) {
|
|
47
|
-
// We have read three bases into the triplet (this has to be at the top of the loop)
|
|
48
|
-
if (triplet.length === 3) {
|
|
49
|
-
break;
|
|
50
|
-
}
|
|
51
|
-
// TODO: ask about ranges
|
|
52
|
-
// The base corresponds to an intron
|
|
53
|
-
if (isBaseInExon(internalIndex)) {
|
|
54
|
-
// We read a base from the sequenceString
|
|
55
|
-
triplet += sequenceString[internalIndex];
|
|
56
|
-
codonPositions.push(internalIndex);
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
return { triplet, basesRead: internalIndex - index, codonPositions };
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
/**
|
|
64
|
-
* @private
|
|
65
|
-
* Returns a series of derived properties from the arguments to getAminoAcidDataForEachBaseOfDna
|
|
66
|
-
* @param {String} originalSequenceString The dna sequenceString.
|
|
67
|
-
* @param {boolean} forward Whether the translation is in the forward direction.
|
|
68
|
-
* @param {Object} optionalSubrangeRange The range of the sequenceString to translate.
|
|
69
|
-
* @param {boolean} isProteinSequence Whether the sequenceString is a protein sequence.
|
|
70
|
-
* @return {Object} The derived properties
|
|
71
|
-
* @property {String} sequenceString
|
|
72
|
-
* - If !isProtein: The subsequence within originalSequenceString that will be translated, defined by transaltionRange. If
|
|
73
|
-
* !forward, this will be the reverse complement of the subsequence.
|
|
74
|
-
* - If isProtein: The originalSequenceString.
|
|
75
|
-
* @property {Object} translationRange The range of the originalSequenceString that we're translating (if !isProtein), or getting DNA-level
|
|
76
|
-
info for (if isProtein).
|
|
77
|
-
* @property {Number} originalSequenceStringLength The length of the full DNA sequence. If !isProtein it's the length of originalSequenceString
|
|
78
|
-
* @property {Number} sequenceStringLength The length of the DNA sequence that would give the translation.
|
|
79
|
-
* @property {Object[]} exonRange Array of ranges of the sequenceString that contains the positions of bases corresponding to exons.
|
|
80
|
-
*/
|
|
81
|
-
function getTranslatedSequenceProperties(
|
|
82
|
-
originalSequenceString: string,
|
|
83
|
-
forward: boolean,
|
|
84
|
-
optionalSubrangeRange: Range | null,
|
|
85
|
-
isProteinSequence: boolean
|
|
86
|
-
) {
|
|
87
|
-
const originalSequenceStringLength = isProteinSequence
|
|
88
|
-
? originalSequenceString.length * 3
|
|
89
|
-
: originalSequenceString.length;
|
|
90
|
-
|
|
91
|
-
let sequenceString = originalSequenceString;
|
|
92
|
-
const translationRange = { start: 0, end: originalSequenceStringLength - 1 };
|
|
93
|
-
|
|
94
|
-
if (optionalSubrangeRange) {
|
|
95
|
-
sequenceString = getSequenceWithinRange(
|
|
96
|
-
optionalSubrangeRange,
|
|
97
|
-
originalSequenceString
|
|
98
|
-
) as string;
|
|
99
|
-
translationRange.start = optionalSubrangeRange.start;
|
|
100
|
-
translationRange.end = optionalSubrangeRange.end;
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
const sequenceStringLength = isProteinSequence
|
|
104
|
-
? sequenceString.length * 3
|
|
105
|
-
: sequenceString.length;
|
|
106
|
-
|
|
107
|
-
if (!isProteinSequence && !forward) {
|
|
108
|
-
sequenceString = revComp(sequenceString);
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
// TODO: what to do with protein if this is true?
|
|
112
|
-
const absoluteExonRange =
|
|
113
|
-
!isProteinSequence &&
|
|
114
|
-
optionalSubrangeRange &&
|
|
115
|
-
optionalSubrangeRange.locations
|
|
116
|
-
? optionalSubrangeRange.locations
|
|
117
|
-
: [translationRange];
|
|
118
|
-
const exonRange = absoluteExonRange.map(range => {
|
|
119
|
-
let outputRange = translateRange(
|
|
120
|
-
range,
|
|
121
|
-
-translationRange.start,
|
|
122
|
-
originalSequenceStringLength
|
|
123
|
-
);
|
|
124
|
-
if (!forward) {
|
|
125
|
-
outputRange = flipContainedRange(
|
|
126
|
-
outputRange,
|
|
127
|
-
{ start: 0, end: sequenceStringLength - 1 },
|
|
128
|
-
sequenceStringLength
|
|
129
|
-
);
|
|
130
|
-
}
|
|
131
|
-
return outputRange;
|
|
132
|
-
});
|
|
133
|
-
|
|
134
|
-
return {
|
|
135
|
-
sequenceString,
|
|
136
|
-
translationRange,
|
|
137
|
-
sequenceStringLength,
|
|
138
|
-
originalSequenceStringLength,
|
|
139
|
-
exonRange
|
|
140
|
-
};
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
/**
|
|
144
|
-
* Function to convert the position within the CDS (where A in ATG is 0, and T in ATG is 1)
|
|
145
|
-
* to the position in the main sequence
|
|
146
|
-
*
|
|
147
|
-
* @param {Number} index The index of the sequenceString to start at
|
|
148
|
-
* @param {boolean} forward Whether the translation is in the forward direction.
|
|
149
|
-
* @param {Object} translationRange The range of the originalSequenceString that we're translating (if !isProtein), or getting DNA-level
|
|
150
|
-
* info for (if isProtein).
|
|
151
|
-
* @param {Number} mainSequenceLength The length of the full DNA sequence. If !isProtein it's the length of originalSequenceString
|
|
152
|
-
* @return {Number} The position in the main sequence
|
|
153
|
-
*
|
|
154
|
-
*/
|
|
155
|
-
function positionInCdsToPositionInMainSequence(
|
|
156
|
-
index: number,
|
|
157
|
-
forward: boolean,
|
|
158
|
-
translationRange: Range,
|
|
159
|
-
mainSequenceLength: number
|
|
160
|
-
) {
|
|
161
|
-
let outputRange = translateRange(
|
|
162
|
-
{ start: index, end: index },
|
|
163
|
-
translationRange.start,
|
|
164
|
-
mainSequenceLength
|
|
165
|
-
);
|
|
166
|
-
if (!forward) {
|
|
167
|
-
outputRange = flipContainedRange(
|
|
168
|
-
outputRange,
|
|
169
|
-
translationRange,
|
|
170
|
-
mainSequenceLength
|
|
171
|
-
);
|
|
172
|
-
}
|
|
173
|
-
return outputRange.start;
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
/**
|
|
177
|
-
* @private
|
|
178
|
-
* Gets aminoAcid data, including position in string and position in codon
|
|
179
|
-
* from the sequenceString and the direction of the translation
|
|
180
|
-
* @param {String} sequenceString The dna sequenceString.
|
|
181
|
-
* @param {boolean} forward Should we find forward facing orfs or reverse facing orfs
|
|
182
|
-
* @param {boolean} isProteinSequence We're passing in a sequence of AA chars instead of DNA chars (slightly confusing but we'll still use the dna indexing for rendering in OVE)
|
|
183
|
-
* @return [{
|
|
184
|
-
aminoAcid:
|
|
185
|
-
positionInCodon:
|
|
186
|
-
}]
|
|
187
|
-
*/
|
|
188
|
-
export default function getAminoAcidDataForEachBaseOfDna(
|
|
189
|
-
originalSequenceString: string,
|
|
190
|
-
forward: boolean,
|
|
191
|
-
optionalSubrangeRange: Range | null,
|
|
192
|
-
isProteinSequence: boolean
|
|
193
|
-
) {
|
|
194
|
-
if (!originalSequenceString) {
|
|
195
|
-
return [];
|
|
196
|
-
}
|
|
197
|
-
// Obtain derived properties, see getTranslatedSequenceProperties
|
|
198
|
-
const {
|
|
199
|
-
sequenceString,
|
|
200
|
-
translationRange,
|
|
201
|
-
sequenceStringLength,
|
|
202
|
-
originalSequenceStringLength,
|
|
203
|
-
exonRange
|
|
204
|
-
} = getTranslatedSequenceProperties(
|
|
205
|
-
originalSequenceString,
|
|
206
|
-
forward,
|
|
207
|
-
optionalSubrangeRange,
|
|
208
|
-
isProteinSequence
|
|
209
|
-
);
|
|
210
|
-
|
|
211
|
-
const aminoAcidDataForEachBaseOfDNA = [];
|
|
212
|
-
|
|
213
|
-
// Iterate over the DNA sequence length in increments of 3
|
|
214
|
-
for (let index = 0; index < sequenceStringLength; index += 3) {
|
|
215
|
-
let aminoAcid;
|
|
216
|
-
const aminoAcidIndex = Math.floor(index / 3);
|
|
217
|
-
let codonPositionsInCDS;
|
|
218
|
-
let basesRead;
|
|
219
|
-
|
|
220
|
-
if (isProteinSequence) {
|
|
221
|
-
codonPositionsInCDS = [0, 1, 2].map(i => index + i);
|
|
222
|
-
basesRead = 3;
|
|
223
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
224
|
-
aminoAcid = (proteinAlphabet as any)[sequenceString[index / 3].toUpperCase()];
|
|
225
|
-
} else {
|
|
226
|
-
// Get the triplet of DNA bases
|
|
227
|
-
const {
|
|
228
|
-
triplet,
|
|
229
|
-
basesRead: _basesRead,
|
|
230
|
-
codonPositions
|
|
231
|
-
} = getNextTriplet(index, sequenceString, exonRange);
|
|
232
|
-
basesRead = _basesRead;
|
|
233
|
-
codonPositionsInCDS = codonPositions;
|
|
234
|
-
// If the triplet is not full, we need to add the gap xxx amino acid, start
|
|
235
|
-
aminoAcid = triplet.length === 3 ? getAA(triplet) : getAA("xxx");
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
const absoluteCodonPositions = codonPositionsInCDS.map(i =>
|
|
239
|
-
positionInCdsToPositionInMainSequence(
|
|
240
|
-
i,
|
|
241
|
-
forward,
|
|
242
|
-
translationRange,
|
|
243
|
-
originalSequenceStringLength
|
|
244
|
-
)
|
|
245
|
-
);
|
|
246
|
-
|
|
247
|
-
// What should the codon range be if it comprises intron bases?
|
|
248
|
-
const codonRange = forward
|
|
249
|
-
? {
|
|
250
|
-
start: absoluteCodonPositions[0],
|
|
251
|
-
end: absoluteCodonPositions[codonPositionsInCDS.length - 1]
|
|
252
|
-
}
|
|
253
|
-
: {
|
|
254
|
-
start: absoluteCodonPositions[codonPositionsInCDS.length - 1],
|
|
255
|
-
end: absoluteCodonPositions[0]
|
|
256
|
-
};
|
|
257
|
-
|
|
258
|
-
// Iterate over the positions read
|
|
259
|
-
let positionInCodon = 0;
|
|
260
|
-
for (let i = 0; i < basesRead; i++) {
|
|
261
|
-
const posInCds = i + index;
|
|
262
|
-
const sequenceIndex = codonPositionsInCDS.includes(posInCds)
|
|
263
|
-
? absoluteCodonPositions[codonPositionsInCDS.indexOf(posInCds)]
|
|
264
|
-
: positionInCdsToPositionInMainSequence(
|
|
265
|
-
posInCds,
|
|
266
|
-
forward,
|
|
267
|
-
translationRange,
|
|
268
|
-
originalSequenceStringLength
|
|
269
|
-
);
|
|
270
|
-
if (codonPositionsInCDS.includes(posInCds)) {
|
|
271
|
-
aminoAcidDataForEachBaseOfDNA.push({
|
|
272
|
-
aminoAcid,
|
|
273
|
-
positionInCodon,
|
|
274
|
-
aminoAcidIndex,
|
|
275
|
-
sequenceIndex,
|
|
276
|
-
codonRange,
|
|
277
|
-
fullCodon: codonPositionsInCDS.length === 3
|
|
278
|
-
});
|
|
279
|
-
positionInCodon++;
|
|
280
|
-
} else {
|
|
281
|
-
// push a null object for intron bases
|
|
282
|
-
aminoAcidDataForEachBaseOfDNA.push({
|
|
283
|
-
aminoAcid: null,
|
|
284
|
-
positionInCodon: null,
|
|
285
|
-
aminoAcidIndex: null,
|
|
286
|
-
sequenceIndex,
|
|
287
|
-
codonRange: null,
|
|
288
|
-
fullCodon: null
|
|
289
|
-
});
|
|
290
|
-
}
|
|
291
|
-
}
|
|
292
|
-
// Move the index in case intron bases were read
|
|
293
|
-
index += basesRead - codonPositionsInCDS.length;
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
if (sequenceStringLength !== aminoAcidDataForEachBaseOfDNA.length) {
|
|
297
|
-
throw new Error("something went wrong!");
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
// Reverse the array if we're translating in the reverse direction
|
|
301
|
-
if (!forward) {
|
|
302
|
-
aminoAcidDataForEachBaseOfDNA.reverse();
|
|
303
|
-
}
|
|
304
|
-
return aminoAcidDataForEachBaseOfDNA;
|
|
305
|
-
}
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import threeLetterSequenceStringToAminoAcidMap from "./threeLetterSequenceStringToAminoAcidMap";
|
|
2
|
-
import proteinAlphabet from "./proteinAlphabet";
|
|
3
|
-
import degenerateDnaToAminoAcidMap from "./degenerateDnaToAminoAcidMap";
|
|
4
|
-
|
|
5
|
-
//tnrtodo: expand the threeLetterSequenceStringToAminoAcidMap mappings to include RNA characters.
|
|
6
|
-
//currently stop bps aren't all mapped!
|
|
7
|
-
export default function getAminoAcidFromSequenceTriplet(
|
|
8
|
-
sequenceString: string
|
|
9
|
-
) {
|
|
10
|
-
sequenceString = sequenceString.toLowerCase();
|
|
11
|
-
if (sequenceString.length !== 3) {
|
|
12
|
-
throw new Error("must pass a string of length 3");
|
|
13
|
-
}
|
|
14
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
15
|
-
const aa = (threeLetterSequenceStringToAminoAcidMap as any)[sequenceString];
|
|
16
|
-
if (aa) {
|
|
17
|
-
return aa;
|
|
18
|
-
}
|
|
19
|
-
const letter =
|
|
20
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
21
|
-
(degenerateDnaToAminoAcidMap as any)[
|
|
22
|
-
sequenceString.replace("x", "n") //replace x's with n's as those are equivalent dna chars
|
|
23
|
-
] || "x";
|
|
24
|
-
|
|
25
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
26
|
-
return (proteinAlphabet as any)[letter.toUpperCase()];
|
|
27
|
-
}
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
import getAminoAcidDataForEachBaseOfDna from "./getAminoAcidDataForEachBaseOfDna";
|
|
2
|
-
import { AminoAcidData } from "./types";
|
|
3
|
-
|
|
4
|
-
export default function getAminoAcidStringFromSequenceString(
|
|
5
|
-
sequenceString: string,
|
|
6
|
-
options: { doNotExcludeAsterisk?: boolean } = {}
|
|
7
|
-
): string {
|
|
8
|
-
const { doNotExcludeAsterisk } = options;
|
|
9
|
-
const aminoAcidsPerBase = getAminoAcidDataForEachBaseOfDna(
|
|
10
|
-
sequenceString,
|
|
11
|
-
true,
|
|
12
|
-
null,
|
|
13
|
-
false
|
|
14
|
-
);
|
|
15
|
-
const aaArray: string[] = [];
|
|
16
|
-
let aaString = "";
|
|
17
|
-
aminoAcidsPerBase.forEach((aa: AminoAcidData, index: number) => {
|
|
18
|
-
if (!aa.fullCodon) {
|
|
19
|
-
return;
|
|
20
|
-
}
|
|
21
|
-
// Check if the current amino acid is the last in the sequence and is a stop codon
|
|
22
|
-
if (
|
|
23
|
-
!doNotExcludeAsterisk &&
|
|
24
|
-
index >= aminoAcidsPerBase.length - 3 &&
|
|
25
|
-
aa.aminoAcid?.value === "*"
|
|
26
|
-
) {
|
|
27
|
-
return;
|
|
28
|
-
}
|
|
29
|
-
if (aa.aminoAcidIndex === null || !aa.aminoAcid) {
|
|
30
|
-
return;
|
|
31
|
-
}
|
|
32
|
-
aaArray[aa.aminoAcidIndex] = aa.aminoAcid.value;
|
|
33
|
-
});
|
|
34
|
-
aaString = aaArray.join("");
|
|
35
|
-
return aaString;
|
|
36
|
-
}
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
import { AminoAcidData } from "./types";
|
|
2
|
-
import { Range } from "@teselagen/range-utils";
|
|
3
|
-
|
|
4
|
-
export default function getCodonRangeForAASliver(
|
|
5
|
-
aminoAcidPositionInSequence: number,
|
|
6
|
-
aminoAcidSliver: AminoAcidData,
|
|
7
|
-
AARepresentationOfTranslation: AminoAcidData[],
|
|
8
|
-
relativeAAPositionInTranslation: number
|
|
9
|
-
): Range {
|
|
10
|
-
const AASliverOneBefore =
|
|
11
|
-
AARepresentationOfTranslation[relativeAAPositionInTranslation - 1];
|
|
12
|
-
if (
|
|
13
|
-
AASliverOneBefore &&
|
|
14
|
-
AASliverOneBefore.aminoAcidIndex === aminoAcidSliver.aminoAcidIndex
|
|
15
|
-
) {
|
|
16
|
-
const AASliverTwoBefore =
|
|
17
|
-
AARepresentationOfTranslation[relativeAAPositionInTranslation - 2];
|
|
18
|
-
if (
|
|
19
|
-
AASliverTwoBefore &&
|
|
20
|
-
AASliverTwoBefore.aminoAcidIndex === aminoAcidSliver.aminoAcidIndex
|
|
21
|
-
) {
|
|
22
|
-
return {
|
|
23
|
-
start: aminoAcidPositionInSequence - 2,
|
|
24
|
-
end: aminoAcidPositionInSequence
|
|
25
|
-
};
|
|
26
|
-
} else {
|
|
27
|
-
if (aminoAcidSliver.fullCodon === true) {
|
|
28
|
-
return {
|
|
29
|
-
start: aminoAcidPositionInSequence - 1,
|
|
30
|
-
end: aminoAcidPositionInSequence + 1
|
|
31
|
-
};
|
|
32
|
-
} else {
|
|
33
|
-
return {
|
|
34
|
-
start: aminoAcidPositionInSequence - 1,
|
|
35
|
-
end: aminoAcidPositionInSequence
|
|
36
|
-
};
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
} else {
|
|
40
|
-
//no AASliver before with same index
|
|
41
|
-
if (aminoAcidSliver.fullCodon === true) {
|
|
42
|
-
//sliver is part of a full codon, so we know the codon will expand 2 more slivers ahead
|
|
43
|
-
return {
|
|
44
|
-
start: aminoAcidPositionInSequence,
|
|
45
|
-
end: aminoAcidPositionInSequence + 2
|
|
46
|
-
};
|
|
47
|
-
} else {
|
|
48
|
-
const AASliverOneAhead =
|
|
49
|
-
AARepresentationOfTranslation[relativeAAPositionInTranslation - 2]; // Original logic logic seems to check "ahead" but uses -2 index??
|
|
50
|
-
// Wait, AASliverOneAhead should likely access +1 or +2?
|
|
51
|
-
// Line 46 in original: AARepresentationOfTranslation[relativeAAPositionInTranslation - 2]
|
|
52
|
-
// This is weird for "OneAhead".
|
|
53
|
-
// But I shouldn't change logic unless I'm sure it's a bug fix.
|
|
54
|
-
// Assuming original code logic is intentional or I should preserve it.
|
|
55
|
-
// I will preserve the index access but add types.
|
|
56
|
-
|
|
57
|
-
if (
|
|
58
|
-
AASliverOneAhead &&
|
|
59
|
-
AASliverOneAhead.aminoAcidIndex === aminoAcidSliver.aminoAcidIndex
|
|
60
|
-
) {
|
|
61
|
-
return {
|
|
62
|
-
start: aminoAcidPositionInSequence,
|
|
63
|
-
end: aminoAcidPositionInSequence + 1
|
|
64
|
-
};
|
|
65
|
-
} else {
|
|
66
|
-
return {
|
|
67
|
-
start: aminoAcidPositionInSequence,
|
|
68
|
-
end: aminoAcidPositionInSequence + 1
|
|
69
|
-
};
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
}
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
import getAminoAcidStringFromSequenceString from "./getAminoAcidStringFromSequenceString";
|
|
2
|
-
|
|
3
|
-
export default function getComplementAminoAcidStringFromSequenceString(
|
|
4
|
-
sequenceString: string
|
|
5
|
-
): string {
|
|
6
|
-
const aaString = getAminoAcidStringFromSequenceString(sequenceString, {
|
|
7
|
-
doNotExcludeAsterisk: true
|
|
8
|
-
});
|
|
9
|
-
return aaString.split("").reverse().join("");
|
|
10
|
-
}
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import getComplementSequenceString from "./getComplementSequenceString";
|
|
2
|
-
import tidyUpSequenceData from "./tidyUpSequenceData";
|
|
3
|
-
|
|
4
|
-
import getSequenceDataBetweenRange from "./getSequenceDataBetweenRange";
|
|
5
|
-
import { SequenceData, Range } from "./types";
|
|
6
|
-
|
|
7
|
-
// ac.throw([ac.string,ac.bool],arguments);
|
|
8
|
-
export default function getComplementSequenceAndAnnotations(
|
|
9
|
-
pSeqObj: SequenceData,
|
|
10
|
-
options: { range?: Range; [key: string]: unknown } = {}
|
|
11
|
-
): SequenceData {
|
|
12
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
13
|
-
const seqObj = tidyUpSequenceData(
|
|
14
|
-
getSequenceDataBetweenRange(pSeqObj, options.range || null),
|
|
15
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
16
|
-
options as any
|
|
17
|
-
);
|
|
18
|
-
const newSeqObj = Object.assign({}, seqObj, {
|
|
19
|
-
sequence: getComplementSequenceString(seqObj.sequence, seqObj.isRna)
|
|
20
|
-
});
|
|
21
|
-
return tidyUpSequenceData(newSeqObj, {
|
|
22
|
-
doNotRemoveInvalidChars: true,
|
|
23
|
-
...options
|
|
24
|
-
});
|
|
25
|
-
}
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
import DNAComplementMap from "./DNAComplementMap";
|
|
2
|
-
import { merge } from "lodash-es";
|
|
3
|
-
|
|
4
|
-
export default function getComplementSequenceString(
|
|
5
|
-
sequence: string,
|
|
6
|
-
isRna?: boolean
|
|
7
|
-
): string {
|
|
8
|
-
if (typeof sequence !== "string") return "";
|
|
9
|
-
let complementSeqString = "";
|
|
10
|
-
const complementMap: Record<string, string> = merge(
|
|
11
|
-
DNAComplementMap,
|
|
12
|
-
isRna ? { a: "u", A: "U" } : { a: "t", A: "T" }
|
|
13
|
-
);
|
|
14
|
-
for (let i = 0; i < sequence.length; i++) {
|
|
15
|
-
let complementChar = complementMap[sequence[i]];
|
|
16
|
-
if (!complementChar) {
|
|
17
|
-
complementChar = sequence[i];
|
|
18
|
-
// throw new Error('trying to get the reverse compelement of an invalid base');
|
|
19
|
-
}
|
|
20
|
-
complementSeqString += complementChar;
|
|
21
|
-
}
|
|
22
|
-
return complementSeqString;
|
|
23
|
-
}
|
package/src/getCutsiteType.ts
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
import { RestrictionEnzyme } from "./types";
|
|
2
|
-
|
|
3
|
-
export default function getCutsiteType(
|
|
4
|
-
restrictionEnzyme: RestrictionEnzyme
|
|
5
|
-
): string {
|
|
6
|
-
const { topSnipOffset, bottomSnipOffset } = restrictionEnzyme;
|
|
7
|
-
if (topSnipOffset === bottomSnipOffset) {
|
|
8
|
-
return "blunt";
|
|
9
|
-
} else if (
|
|
10
|
-
topSnipOffset !== undefined &&
|
|
11
|
-
bottomSnipOffset !== undefined &&
|
|
12
|
-
topSnipOffset < bottomSnipOffset
|
|
13
|
-
) {
|
|
14
|
-
return "5' overhang";
|
|
15
|
-
} else {
|
|
16
|
-
return "3' overhang";
|
|
17
|
-
}
|
|
18
|
-
}
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import { flatMap } from "lodash-es";
|
|
2
|
-
import cutSequenceByRestrictionEnzyme from "./cutSequenceByRestrictionEnzyme";
|
|
3
|
-
import { CutSite, RestrictionEnzyme } from "./types";
|
|
4
|
-
|
|
5
|
-
export default function getCutsitesFromSequence(
|
|
6
|
-
sequence: string,
|
|
7
|
-
circular: boolean,
|
|
8
|
-
contextEnzymes: RestrictionEnzyme[]
|
|
9
|
-
): Record<string, CutSite[]> {
|
|
10
|
-
const cutsites = flatMap(contextEnzymes, enzyme => {
|
|
11
|
-
return cutSequenceByRestrictionEnzyme(sequence, circular, enzyme);
|
|
12
|
-
});
|
|
13
|
-
const cutsitesByNameMap: Record<string, CutSite[]> = {};
|
|
14
|
-
cutsites.forEach(cutsite => {
|
|
15
|
-
const name = cutsite.name || "";
|
|
16
|
-
if (!cutsitesByNameMap[name]) {
|
|
17
|
-
cutsitesByNameMap[name] = [];
|
|
18
|
-
}
|
|
19
|
-
cutsitesByNameMap[name].push(cutsite);
|
|
20
|
-
});
|
|
21
|
-
return cutsitesByNameMap;
|
|
22
|
-
}
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
import aminoAcidToDegenerateDnaMap from "./aminoAcidToDegenerateDnaMap";
|
|
2
|
-
|
|
3
|
-
export default function getDegenerateDnaStringFromAAString(
|
|
4
|
-
aaString: string
|
|
5
|
-
): string {
|
|
6
|
-
return aaString
|
|
7
|
-
.split("")
|
|
8
|
-
.map(
|
|
9
|
-
char =>
|
|
10
|
-
(aminoAcidToDegenerateDnaMap as Record<string, string>)[
|
|
11
|
-
char.toLowerCase()
|
|
12
|
-
] || "nnn"
|
|
13
|
-
)
|
|
14
|
-
.join("");
|
|
15
|
-
}
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
import aminoAcidToDegenerateRnaMap from "./aminoAcidToDegenerateRnaMap";
|
|
2
|
-
|
|
3
|
-
export default function getDegenerateRnaStringFromAAString(
|
|
4
|
-
aaString: string
|
|
5
|
-
): string {
|
|
6
|
-
return aaString
|
|
7
|
-
.split("")
|
|
8
|
-
.map(
|
|
9
|
-
char =>
|
|
10
|
-
(aminoAcidToDegenerateRnaMap as Record<string, string>)[
|
|
11
|
-
char.toLowerCase()
|
|
12
|
-
] || "nnn"
|
|
13
|
-
)
|
|
14
|
-
.join("");
|
|
15
|
-
}
|