@teselagen/sequence-utils 0.3.38-beta.2 → 0.3.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DNAComplementMap.d.ts +1 -1
- package/addGapsToSeqReads.d.ts +3 -16
- package/adjustAnnotationsToInsert.d.ts +1 -2
- package/adjustBpsToReplaceOrInsert.d.ts +1 -2
- package/aliasedEnzymesByName.d.ts +1 -37
- package/aminoAcidToDegenerateDnaMap.d.ts +31 -1
- package/aminoAcidToDegenerateRnaMap.d.ts +1 -1
- package/annotateSingleSeq.d.ts +4 -5
- package/annotationTypes.d.ts +2 -2
- package/autoAnnotate.d.ts +8 -17
- package/bioData.d.ts +58 -10
- package/calculateEndStability.d.ts +1 -1
- package/calculateNebTa.d.ts +1 -6
- package/calculateNebTm.d.ts +4 -6
- package/calculatePercentGC.d.ts +1 -1
- package/calculateSantaLuciaTm.d.ts +114 -28
- package/calculateTm.d.ts +1 -13
- package/computeDigestFragments.d.ts +24 -30
- package/condensePairwiseAlignmentDifferences.d.ts +1 -1
- package/convertAACaretPositionOrRangeToDna.d.ts +1 -2
- package/convertDnaCaretPositionOrRangeToAA.d.ts +1 -2
- package/cutSequenceByRestrictionEnzyme.d.ts +1 -2
- package/defaultEnzymesByName.d.ts +1 -2
- package/degenerateDnaToAminoAcidMap.d.ts +1 -1
- package/degenerateRnaToAminoAcidMap.d.ts +1 -1
- package/deleteSequenceDataAtRange.d.ts +1 -2
- package/diffUtils.d.ts +7 -9
- package/doesEnzymeChopOutsideOfRecognitionSite.d.ts +1 -2
- package/featureTypesAndColors.d.ts +6 -19
- package/filterSequenceString.d.ts +10 -14
- package/findApproxMatches.d.ts +1 -7
- package/findNearestRangeOfSequenceOverlapToPosition.d.ts +1 -2
- package/findOrfsInPlasmid.d.ts +11 -2
- package/findSequenceMatches.d.ts +1 -11
- package/generateAnnotations.d.ts +1 -2
- package/generateSequenceData.d.ts +13 -8
- package/getAllInsertionsInSeqReads.d.ts +1 -11
- package/getAminoAcidDataForEachBaseOfDna.d.ts +5 -6
- package/getAminoAcidFromSequenceTriplet.d.ts +1 -1
- package/getAminoAcidStringFromSequenceString.d.ts +1 -3
- package/getCodonRangeForAASliver.d.ts +4 -3
- package/getComplementAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getComplementSequenceAndAnnotations.d.ts +1 -5
- package/getComplementSequenceString.d.ts +1 -1
- package/getCutsiteType.d.ts +1 -2
- package/getCutsitesFromSequence.d.ts +1 -2
- package/getDegenerateDnaStringFromAAString.d.ts +1 -1
- package/getDegenerateRnaStringFromAAString.d.ts +1 -1
- package/getDigestFragmentsForCutsites.d.ts +1 -4
- package/getDigestFragmentsForRestrictionEnzymes.d.ts +1 -8
- package/getInsertBetweenVals.d.ts +1 -2
- package/getLeftAndRightOfSequenceInRangeGivenPosition.d.ts +1 -2
- package/getOrfsFromSequence.d.ts +11 -17
- package/getOverlapBetweenTwoSequences.d.ts +1 -2
- package/getPossiblePartsFromSequenceAndEnzymes.d.ts +1 -18
- package/getReverseAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getReverseComplementAminoAcidStringFromSequenceString.d.ts +1 -1
- package/getReverseComplementAnnotation.d.ts +1 -11
- package/getReverseComplementSequenceAndAnnotations.d.ts +1 -5
- package/getReverseComplementSequenceString.d.ts +1 -1
- package/getReverseSequenceString.d.ts +1 -1
- package/getSequenceDataBetweenRange.d.ts +1 -9
- package/getVirtualDigest.d.ts +10 -11
- package/guessIfSequenceIsDnaAndNotProtein.d.ts +1 -5
- package/index.cjs +491 -728
- package/index.d.ts +5 -8
- package/index.js +491 -728
- package/index.umd.cjs +491 -728
- package/insertGapsIntoRefSeq.d.ts +1 -2
- package/insertSequenceDataAtPositionOrRange.d.ts +1 -10
- package/isEnzymeType2S.d.ts +1 -2
- package/mapAnnotationsToRows.d.ts +1 -9
- package/package.json +9 -12
- package/prepareCircularViewData.d.ts +1 -2
- package/prepareRowData.d.ts +3 -7
- package/proteinAlphabet.d.ts +1 -1
- package/rotateBpsToPosition.d.ts +1 -1
- package/rotateSequenceDataToPosition.d.ts +1 -3
- package/shiftAnnotationsByLen.d.ts +3 -4
- package/src/autoAnnotate.test.js +1 -0
- package/src/getSequenceDataBetweenRange.js +11 -2
- package/src/getSequenceDataBetweenRange.test.js +42 -0
- package/src/prepareRowData_output1.json +0 -1
- package/threeLetterSequenceStringToAminoAcidMap.d.ts +921 -11
- package/tidyUpAnnotation.d.ts +11 -13
- package/tidyUpSequenceData.d.ts +1 -15
- package/src/DNAComplementMap.ts +0 -32
- package/src/addGapsToSeqReads.ts +0 -436
- package/src/adjustAnnotationsToInsert.ts +0 -20
- package/src/adjustBpsToReplaceOrInsert.ts +0 -73
- package/src/aliasedEnzymesByName.ts +0 -7366
- package/src/aminoAcidToDegenerateDnaMap.ts +0 -32
- package/src/aminoAcidToDegenerateRnaMap.ts +0 -32
- package/src/annotateSingleSeq.ts +0 -37
- package/src/annotationTypes.ts +0 -23
- package/src/autoAnnotate.ts +0 -290
- package/src/bioData.ts +0 -65
- package/src/calculateEndStability.ts +0 -91
- package/src/calculateNebTa.ts +0 -46
- package/src/calculateNebTm.ts +0 -132
- package/src/calculatePercentGC.ts +0 -3
- package/src/calculateSantaLuciaTm.ts +0 -184
- package/src/calculateTm.ts +0 -242
- package/src/computeDigestFragments.ts +0 -238
- package/src/condensePairwiseAlignmentDifferences.ts +0 -85
- package/src/convertAACaretPositionOrRangeToDna.ts +0 -28
- package/src/convertDnaCaretPositionOrRangeToAA.ts +0 -28
- package/src/cutSequenceByRestrictionEnzyme.ts +0 -345
- package/src/defaultEnzymesByName.ts +0 -280
- package/src/degenerateDnaToAminoAcidMap.ts +0 -5
- package/src/degenerateRnaToAminoAcidMap.ts +0 -5
- package/src/deleteSequenceDataAtRange.ts +0 -13
- package/src/diffUtils.ts +0 -80
- package/src/doesEnzymeChopOutsideOfRecognitionSite.ts +0 -16
- package/src/featureTypesAndColors.ts +0 -167
- package/src/filterSequenceString.ts +0 -153
- package/src/findApproxMatches.ts +0 -58
- package/src/findNearestRangeOfSequenceOverlapToPosition.ts +0 -43
- package/src/findOrfsInPlasmid.ts +0 -31
- package/src/findSequenceMatches.ts +0 -154
- package/src/generateAnnotations.ts +0 -39
- package/src/generateSequenceData.ts +0 -212
- package/src/getAllInsertionsInSeqReads.ts +0 -100
- package/src/getAminoAcidDataForEachBaseOfDna.ts +0 -305
- package/src/getAminoAcidFromSequenceTriplet.ts +0 -27
- package/src/getAminoAcidStringFromSequenceString.ts +0 -36
- package/src/getCodonRangeForAASliver.ts +0 -73
- package/src/getComplementAminoAcidStringFromSequenceString.ts +0 -10
- package/src/getComplementSequenceAndAnnotations.ts +0 -25
- package/src/getComplementSequenceString.ts +0 -23
- package/src/getCutsiteType.ts +0 -18
- package/src/getCutsitesFromSequence.ts +0 -22
- package/src/getDegenerateDnaStringFromAAString.ts +0 -15
- package/src/getDegenerateRnaStringFromAAString.ts +0 -15
- package/src/getDigestFragmentsForCutsites.ts +0 -126
- package/src/getDigestFragmentsForRestrictionEnzymes.ts +0 -50
- package/src/getInsertBetweenVals.ts +0 -31
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.ts +0 -40
- package/src/getMassOfAaString.ts +0 -29
- package/src/getOrfsFromSequence.ts +0 -132
- package/src/getOverlapBetweenTwoSequences.ts +0 -30
- package/src/getPossiblePartsFromSequenceAndEnzymes.ts +0 -149
- package/src/getReverseAminoAcidStringFromSequenceString.ts +0 -22
- package/src/getReverseComplementAminoAcidStringFromSequenceString.ts +0 -10
- package/src/getReverseComplementAnnotation.ts +0 -33
- package/src/getReverseComplementSequenceAndAnnotations.ts +0 -46
- package/src/getReverseComplementSequenceString.ts +0 -18
- package/src/getReverseSequenceString.ts +0 -12
- package/src/getSequenceDataBetweenRange.ts +0 -154
- package/src/getVirtualDigest.ts +0 -139
- package/src/guessIfSequenceIsDnaAndNotProtein.ts +0 -39
- package/src/index.test.ts +0 -43
- package/src/index.ts +0 -111
- package/src/insertGapsIntoRefSeq.ts +0 -43
- package/src/insertSequenceDataAtPosition.ts +0 -2
- package/src/insertSequenceDataAtPositionOrRange.ts +0 -328
- package/src/isEnzymeType2S.ts +0 -5
- package/src/mapAnnotationsToRows.ts +0 -256
- package/src/prepareCircularViewData.ts +0 -24
- package/src/prepareRowData.ts +0 -61
- package/src/proteinAlphabet.ts +0 -271
- package/src/rotateBpsToPosition.ts +0 -12
- package/src/rotateSequenceDataToPosition.ts +0 -54
- package/src/shiftAnnotationsByLen.ts +0 -24
- package/src/threeLetterSequenceStringToAminoAcidMap.ts +0 -198
- package/src/tidyUpAnnotation.ts +0 -205
- package/src/tidyUpSequenceData.ts +0 -213
- package/src/types.ts +0 -109
- package/types.d.ts +0 -105
|
@@ -1,154 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
modulateRangeBySequenceLength,
|
|
3
|
-
flipContainedRange,
|
|
4
|
-
Range
|
|
5
|
-
} from "@teselagen/range-utils";
|
|
6
|
-
import { reduce, uniqBy } from "lodash-es";
|
|
7
|
-
import escapeStringRegexp from "escape-string-regexp";
|
|
8
|
-
import getAminoAcidStringFromSequenceString from "./getAminoAcidStringFromSequenceString";
|
|
9
|
-
import { ambiguous_dna_values, extended_protein_values } from "./bioData";
|
|
10
|
-
import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
|
|
11
|
-
|
|
12
|
-
export interface FindSequenceMatchesOptions {
|
|
13
|
-
searchReverseStrand?: boolean;
|
|
14
|
-
isCircular?: boolean;
|
|
15
|
-
isAmbiguous?: boolean;
|
|
16
|
-
isProteinSequence?: boolean;
|
|
17
|
-
isProteinSearch?: boolean;
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
export default function findSequenceMatches(
|
|
21
|
-
sequence: string,
|
|
22
|
-
searchString: string,
|
|
23
|
-
options: FindSequenceMatchesOptions = {}
|
|
24
|
-
): (Range & { bottomStrand?: boolean })[] {
|
|
25
|
-
let matches = findSequenceMatchesTopStrand(sequence, searchString, options);
|
|
26
|
-
const { searchReverseStrand } = options;
|
|
27
|
-
|
|
28
|
-
if (searchReverseStrand) {
|
|
29
|
-
const sequenceLength = sequence.length;
|
|
30
|
-
const reverseSeq = getReverseComplementSequenceString(sequence);
|
|
31
|
-
const reverseMatches = findSequenceMatchesTopStrand(
|
|
32
|
-
reverseSeq,
|
|
33
|
-
searchString,
|
|
34
|
-
options
|
|
35
|
-
);
|
|
36
|
-
const flippedReverseMatches = reverseMatches.map(range => {
|
|
37
|
-
return {
|
|
38
|
-
...flipContainedRange(
|
|
39
|
-
range,
|
|
40
|
-
{ start: 0, end: sequenceLength - 1 },
|
|
41
|
-
sequenceLength
|
|
42
|
-
),
|
|
43
|
-
bottomStrand: true
|
|
44
|
-
};
|
|
45
|
-
});
|
|
46
|
-
matches = [...matches, ...flippedReverseMatches];
|
|
47
|
-
}
|
|
48
|
-
return matches;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
function findSequenceMatchesTopStrand(
|
|
52
|
-
sequence: string,
|
|
53
|
-
searchString: string,
|
|
54
|
-
options: FindSequenceMatchesOptions = {}
|
|
55
|
-
): Range[] {
|
|
56
|
-
const { isCircular, isAmbiguous, isProteinSequence, isProteinSearch } =
|
|
57
|
-
options;
|
|
58
|
-
let searchStringToUse = escapeStringRegexp(searchString);
|
|
59
|
-
if (isAmbiguous) {
|
|
60
|
-
if (isProteinSearch || isProteinSequence) {
|
|
61
|
-
searchStringToUse = convertAmbiguousStringToRegex(
|
|
62
|
-
searchStringToUse,
|
|
63
|
-
true
|
|
64
|
-
);
|
|
65
|
-
} else {
|
|
66
|
-
//we're searching DNA
|
|
67
|
-
searchStringToUse = convertAmbiguousStringToRegex(
|
|
68
|
-
searchStringToUse,
|
|
69
|
-
false
|
|
70
|
-
);
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
if (!searchStringToUse) return []; //short circuit if nothing is actually being searched for (eg searching for "%%"")
|
|
74
|
-
let sequenceToUse = sequence;
|
|
75
|
-
if (isCircular) {
|
|
76
|
-
sequenceToUse = sequenceToUse + sequenceToUse;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
let sequencesToCheck = [{ seqToCheck: sequenceToUse, offset: 0 }];
|
|
80
|
-
if (isProteinSearch) {
|
|
81
|
-
sequencesToCheck = [
|
|
82
|
-
{
|
|
83
|
-
seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse, {
|
|
84
|
-
doNotExcludeAsterisk: true
|
|
85
|
-
}),
|
|
86
|
-
offset: 0
|
|
87
|
-
},
|
|
88
|
-
{
|
|
89
|
-
seqToCheck: getAminoAcidStringFromSequenceString(
|
|
90
|
-
sequenceToUse.substr(1),
|
|
91
|
-
{ doNotExcludeAsterisk: true }
|
|
92
|
-
),
|
|
93
|
-
offset: 1
|
|
94
|
-
},
|
|
95
|
-
{
|
|
96
|
-
seqToCheck: getAminoAcidStringFromSequenceString(
|
|
97
|
-
sequenceToUse.substr(2),
|
|
98
|
-
{ doNotExcludeAsterisk: true }
|
|
99
|
-
),
|
|
100
|
-
offset: 2
|
|
101
|
-
}
|
|
102
|
-
];
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
const ranges: Range[] = [];
|
|
106
|
-
sequencesToCheck.forEach(({ seqToCheck, offset }) => {
|
|
107
|
-
const reg = new RegExp(searchStringToUse, "ig");
|
|
108
|
-
let match: RegExpExecArray | null;
|
|
109
|
-
let range: Range;
|
|
110
|
-
/* eslint-disable no-cond-assign*/
|
|
111
|
-
|
|
112
|
-
while ((match = reg.exec(seqToCheck)) !== null) {
|
|
113
|
-
range = {
|
|
114
|
-
start: match.index,
|
|
115
|
-
end: match.index + searchString.length - 1 //this should be the original searchString here j
|
|
116
|
-
};
|
|
117
|
-
if (isProteinSearch) {
|
|
118
|
-
range.start = range.start * 3 + offset;
|
|
119
|
-
range.end = range.end * 3 + 2 + offset;
|
|
120
|
-
}
|
|
121
|
-
ranges.push(modulateRangeBySequenceLength(range, sequence.length));
|
|
122
|
-
reg.lastIndex = match.index + 1;
|
|
123
|
-
}
|
|
124
|
-
/* eslint-enable no-cond-assign*/
|
|
125
|
-
});
|
|
126
|
-
|
|
127
|
-
return uniqBy(ranges, e => {
|
|
128
|
-
return e.start + "-" + e.end;
|
|
129
|
-
});
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
function convertAmbiguousStringToRegex(string: string, isProtein: boolean) {
|
|
133
|
-
// Search for a DNA subseq in sequence.
|
|
134
|
-
// use ambiguous values (like N = A or T or C or G, R = A or G etc.)
|
|
135
|
-
// searches only on forward strand
|
|
136
|
-
return reduce(
|
|
137
|
-
string,
|
|
138
|
-
(acc, char) => {
|
|
139
|
-
const value = isProtein
|
|
140
|
-
? (extended_protein_values as Record<string, string>)[
|
|
141
|
-
char.toUpperCase()
|
|
142
|
-
]
|
|
143
|
-
: (ambiguous_dna_values as Record<string, string>)[char.toUpperCase()];
|
|
144
|
-
if (!value) return acc;
|
|
145
|
-
if (value.length === 1) {
|
|
146
|
-
acc += value;
|
|
147
|
-
} else {
|
|
148
|
-
acc += `[${value}]`;
|
|
149
|
-
}
|
|
150
|
-
return acc;
|
|
151
|
-
},
|
|
152
|
-
""
|
|
153
|
-
);
|
|
154
|
-
}
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
import { generateRandomRange } from "@teselagen/range-utils";
|
|
2
|
-
import shortid from "shortid";
|
|
3
|
-
import { Annotation } from "./types";
|
|
4
|
-
|
|
5
|
-
function generateAnnotations(
|
|
6
|
-
numberOfAnnotationsToGenerate: number,
|
|
7
|
-
start: number,
|
|
8
|
-
end: number,
|
|
9
|
-
maxLength: number
|
|
10
|
-
): Annotation[] {
|
|
11
|
-
const result: Annotation[] = [];
|
|
12
|
-
for (let i = 0; i < numberOfAnnotationsToGenerate; i++) {
|
|
13
|
-
const annotation = generateAnnotation(start, end, maxLength);
|
|
14
|
-
result.push(annotation);
|
|
15
|
-
}
|
|
16
|
-
return result;
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
function generateAnnotation(
|
|
20
|
-
start: number,
|
|
21
|
-
end: number,
|
|
22
|
-
maxLength: number
|
|
23
|
-
): Annotation {
|
|
24
|
-
const range = generateRandomRange(start, end, maxLength);
|
|
25
|
-
return {
|
|
26
|
-
...range,
|
|
27
|
-
name: getRandomInt(0, 100000).toString(),
|
|
28
|
-
type: "misc_feature",
|
|
29
|
-
id: shortid(),
|
|
30
|
-
forward: Math.random() > 0.5,
|
|
31
|
-
notes: {}
|
|
32
|
-
};
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
function getRandomInt(min: number, max: number): number {
|
|
36
|
-
return Math.floor(Math.random() * (max - min)) + min;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
export default generateAnnotations;
|
|
@@ -1,212 +0,0 @@
|
|
|
1
|
-
import generateAnnotations from "./generateAnnotations";
|
|
2
|
-
import { SequenceData } from "./types";
|
|
3
|
-
|
|
4
|
-
export default function generateSequenceData({
|
|
5
|
-
isProtein,
|
|
6
|
-
sequenceLength = 1000,
|
|
7
|
-
numFeatures,
|
|
8
|
-
numParts,
|
|
9
|
-
numPrimers,
|
|
10
|
-
numTranslations
|
|
11
|
-
}: {
|
|
12
|
-
isProtein?: boolean;
|
|
13
|
-
sequenceLength?: number;
|
|
14
|
-
numFeatures?: number;
|
|
15
|
-
numParts?: number;
|
|
16
|
-
numPrimers?: number;
|
|
17
|
-
numTranslations?: number;
|
|
18
|
-
} = {}): SequenceData {
|
|
19
|
-
const proteinSequence = isProtein
|
|
20
|
-
? generateSequence(sequenceLength, true)
|
|
21
|
-
: "";
|
|
22
|
-
const sequence = !isProtein ? generateSequence(sequenceLength) : "";
|
|
23
|
-
|
|
24
|
-
return {
|
|
25
|
-
circular: isProtein ? false : Math.random() > 0.5,
|
|
26
|
-
name: "p-" + Math.floor(Math.random() * 100),
|
|
27
|
-
description: "",
|
|
28
|
-
isProtein: !!isProtein,
|
|
29
|
-
sequence,
|
|
30
|
-
proteinSequence,
|
|
31
|
-
translations: isProtein
|
|
32
|
-
? []
|
|
33
|
-
: generateAnnotations(
|
|
34
|
-
numTranslations || 5,
|
|
35
|
-
0,
|
|
36
|
-
sequenceLength - 1,
|
|
37
|
-
sequenceLength / 3
|
|
38
|
-
),
|
|
39
|
-
features: generateAnnotations(
|
|
40
|
-
numFeatures || 10,
|
|
41
|
-
0,
|
|
42
|
-
sequenceLength - 1,
|
|
43
|
-
sequenceLength / 3
|
|
44
|
-
),
|
|
45
|
-
primers: isProtein
|
|
46
|
-
? []
|
|
47
|
-
: generateAnnotations(numPrimers || 10, 0, sequenceLength - 1, 50),
|
|
48
|
-
parts: generateAnnotations(
|
|
49
|
-
numParts || 10,
|
|
50
|
-
0,
|
|
51
|
-
sequenceLength - 1,
|
|
52
|
-
sequenceLength / 3
|
|
53
|
-
)
|
|
54
|
-
};
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
// export default tidyUpSequenceData(exampleData)
|
|
58
|
-
|
|
59
|
-
function generateSequence(m = 9, isProtein?: boolean): string {
|
|
60
|
-
let s = "";
|
|
61
|
-
const r = isProtein ? "ACDEFGHIKLMNPQRSTVWY" : "gatc"; // Added explicit protein string instead of empty string default behavior
|
|
62
|
-
for (let i = 0; i < m; i++) {
|
|
63
|
-
s += r.charAt(Math.floor(Math.random() * r.length));
|
|
64
|
-
}
|
|
65
|
-
return s;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
// tnr: this is used to generate a very large, multi-featured sequence
|
|
69
|
-
// var string = "ggggcccccgggggccc";
|
|
70
|
-
// var reallyLongFakeSequence = "";
|
|
71
|
-
// for (var i = 1; i < 100000; i++) {
|
|
72
|
-
// reallyLongFakeSequence += string;
|
|
73
|
-
// if (i % 100 === 0) {
|
|
74
|
-
// reallyLongFakeSequence += 'taafatg';
|
|
75
|
-
// sequenceData.features.push({
|
|
76
|
-
// id: i,
|
|
77
|
-
// start: parseInt(i * 10),
|
|
78
|
-
// end: parseInt(i * 10 + 100),
|
|
79
|
-
// name: 'cooljim',
|
|
80
|
-
// color: 'green',
|
|
81
|
-
// forward: true,
|
|
82
|
-
// annotationType: "feature"
|
|
83
|
-
// });
|
|
84
|
-
// }
|
|
85
|
-
// }
|
|
86
|
-
// sequenceData.sequence += reallyLongFakeSequence;
|
|
87
|
-
//
|
|
88
|
-
// export default function() {
|
|
89
|
-
// var baseSeqData = {
|
|
90
|
-
//
|
|
91
|
-
// }
|
|
92
|
-
// function seqGen() {
|
|
93
|
-
//
|
|
94
|
-
// }
|
|
95
|
-
// }
|
|
96
|
-
// "features" : [
|
|
97
|
-
// {
|
|
98
|
-
// "name" : "1",
|
|
99
|
-
// "type" : "misc_feature",
|
|
100
|
-
// "start" : 1,
|
|
101
|
-
// "end" : 1,
|
|
102
|
-
// "strand" : 1,
|
|
103
|
-
// "notes" : [],
|
|
104
|
-
// "color": 'blue'
|
|
105
|
-
// },
|
|
106
|
-
// {
|
|
107
|
-
// "name" : "2",
|
|
108
|
-
// "type" : "misc_feature",
|
|
109
|
-
// "start" : 1,
|
|
110
|
-
// "end" : 1,
|
|
111
|
-
// "strand" : 1,
|
|
112
|
-
// "notes" : [],
|
|
113
|
-
// "color": 'blue'
|
|
114
|
-
// },
|
|
115
|
-
// {
|
|
116
|
-
// "name" : "3",
|
|
117
|
-
// "type" : "misc_feature",
|
|
118
|
-
// "start" : 1,
|
|
119
|
-
// "end" : 1,
|
|
120
|
-
// "strand" : 1,
|
|
121
|
-
// "notes" : [],
|
|
122
|
-
// "color": 'blue'
|
|
123
|
-
// },
|
|
124
|
-
// {
|
|
125
|
-
// "name" : "4",
|
|
126
|
-
// "type" : "misc_feature",
|
|
127
|
-
// "start" : 1,
|
|
128
|
-
// "end" : 14,
|
|
129
|
-
// "strand" : 1,
|
|
130
|
-
// "notes" : [],
|
|
131
|
-
// "color": 'blue'
|
|
132
|
-
// },
|
|
133
|
-
// {
|
|
134
|
-
// "name" : "5",
|
|
135
|
-
// "type" : "misc_feature",
|
|
136
|
-
// "start" : 1,
|
|
137
|
-
// "end" : 1,
|
|
138
|
-
// "strand" : 1,
|
|
139
|
-
// "notes" : [],
|
|
140
|
-
// "color": 'blue'
|
|
141
|
-
// },
|
|
142
|
-
// {
|
|
143
|
-
// "name" : "6",
|
|
144
|
-
// "type" : "misc_feature",
|
|
145
|
-
// "id" : "5590c1978fafgw979df000a4f02c7a",
|
|
146
|
-
// "start" : 4,
|
|
147
|
-
// "end" : 6,
|
|
148
|
-
// "strand" : 1,
|
|
149
|
-
// "notes" : [],
|
|
150
|
-
// "color": 'orange'
|
|
151
|
-
// },
|
|
152
|
-
// {
|
|
153
|
-
// "name" : "housemouserousepouse",
|
|
154
|
-
// "type" : "misc_feature",
|
|
155
|
-
// "id" : "5590c197897fs9df000a4f02c7a",
|
|
156
|
-
// "start" : 4,
|
|
157
|
-
// "end" : 6,
|
|
158
|
-
// "strand" : 1,
|
|
159
|
-
// "notes" : [],
|
|
160
|
-
// "color": 'orange'
|
|
161
|
-
// },
|
|
162
|
-
// {
|
|
163
|
-
// "name" : "housemouserousepouse",
|
|
164
|
-
// "type" : "misc_feature",
|
|
165
|
-
// "id" : "5590c1978979dasdfaf000a4f02c7a",
|
|
166
|
-
// "start" : 4,
|
|
167
|
-
// "end" : 6,
|
|
168
|
-
// "strand" : 1,
|
|
169
|
-
// "notes" : [],
|
|
170
|
-
// "color": 'orange'
|
|
171
|
-
// },
|
|
172
|
-
// {
|
|
173
|
-
// "name" : "housemouserousepouse",
|
|
174
|
-
// "type" : "misc_feature",
|
|
175
|
-
// "id" : "5590c197faas8979df000a4f02c7a",
|
|
176
|
-
// "start" : 4,
|
|
177
|
-
// "end" : 6,
|
|
178
|
-
// "strand" : 1,
|
|
179
|
-
// "notes" : [],
|
|
180
|
-
// "color": 'orange'
|
|
181
|
-
// },
|
|
182
|
-
// {
|
|
183
|
-
// "name" : "housemouserousepouse",
|
|
184
|
-
// "type" : "misc_feature",
|
|
185
|
-
// "id" : "5590c1978979df000a4f02c7aasd",
|
|
186
|
-
// "start" : 4,
|
|
187
|
-
// "end" : 6,
|
|
188
|
-
// "strand" : 1,
|
|
189
|
-
// "notes" : [],
|
|
190
|
-
// "color": 'orange'
|
|
191
|
-
// },
|
|
192
|
-
// {
|
|
193
|
-
// "name" : "house",
|
|
194
|
-
// "type" : "misc_feature",
|
|
195
|
-
// "id" : "5590c1978979df000a4f02c7b",
|
|
196
|
-
// "start" : 70,
|
|
197
|
-
// "end" : 90,
|
|
198
|
-
// "strand" : 1,
|
|
199
|
-
// "notes" : [],
|
|
200
|
-
// "color": 'green'
|
|
201
|
-
// },
|
|
202
|
-
// {
|
|
203
|
-
// "name" : "weer",
|
|
204
|
-
// "type" : "misc_feature",
|
|
205
|
-
// "id" : "5590c1d88979df000a4f02f5c",
|
|
206
|
-
// "start" : 3,
|
|
207
|
-
// "end" : 69,
|
|
208
|
-
// "strand" : 1,
|
|
209
|
-
// "notes" : [],
|
|
210
|
-
// "color": 'red'
|
|
211
|
-
// }
|
|
212
|
-
// ],
|
|
@@ -1,100 +0,0 @@
|
|
|
1
|
-
// seqReads should be an array of objects [{name, seq, pos, cigar}, {name, seq, pos, cigar}, ...]
|
|
2
|
-
|
|
3
|
-
export interface SeqRead {
|
|
4
|
-
name: string;
|
|
5
|
-
seq: string;
|
|
6
|
-
pos: number;
|
|
7
|
-
cigar: string;
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
export interface InsertionInfo {
|
|
11
|
-
bpPos: number;
|
|
12
|
-
number: number;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
export default function getAllInsertionsInSeqReads(
|
|
16
|
-
seqReads: SeqRead[]
|
|
17
|
-
): InsertionInfo[] {
|
|
18
|
-
const allInsertionsInSeqReads: InsertionInfo[] = [];
|
|
19
|
-
seqReads.forEach(seqRead => {
|
|
20
|
-
// split cigar string at M, D, or I (match, deletion, or insertion), e.g. ["2M", "3I", "39M", "3D"...]
|
|
21
|
-
const splitSeqRead = seqRead.cigar.match(/([0-9]*[MDI])/g);
|
|
22
|
-
|
|
23
|
-
if (!splitSeqRead) return;
|
|
24
|
-
|
|
25
|
-
for (let componentI = 0; componentI < splitSeqRead.length; componentI++) {
|
|
26
|
-
if (splitSeqRead[componentI].slice(-1) === "I") {
|
|
27
|
-
let bpPosOfInsertion = seqRead.pos;
|
|
28
|
-
const numberOfInsertions = Number(
|
|
29
|
-
splitSeqRead[componentI].slice(0, -1)
|
|
30
|
-
);
|
|
31
|
-
for (let i = 0; i < componentI; i++) {
|
|
32
|
-
if (splitSeqRead[i].slice(-1) !== "I") {
|
|
33
|
-
const previousComponentNumber = Number(
|
|
34
|
-
splitSeqRead[i].slice(0, -1)
|
|
35
|
-
);
|
|
36
|
-
bpPosOfInsertion += previousComponentNumber;
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
const insertionInfo = {
|
|
40
|
-
// keeping bpPos 1-based
|
|
41
|
-
bpPos: bpPosOfInsertion,
|
|
42
|
-
number: numberOfInsertions
|
|
43
|
-
};
|
|
44
|
-
allInsertionsInSeqReads.push(insertionInfo);
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
});
|
|
48
|
-
// sort insertions by ascending bp pos
|
|
49
|
-
const sortedInsertions = allInsertionsInSeqReads.sort((a, b) => {
|
|
50
|
-
return a.bpPos - b.bpPos;
|
|
51
|
-
});
|
|
52
|
-
// combine duplicate or overlapping insertions from seq reads
|
|
53
|
-
for (let i = 0; i < sortedInsertions.length - 1; i++) {
|
|
54
|
-
if (sortedInsertions[i].bpPos === sortedInsertions[i + 1].bpPos) {
|
|
55
|
-
if (sortedInsertions[i].number > sortedInsertions[i + 1].number) {
|
|
56
|
-
// remove the one with fewer number of gaps from array
|
|
57
|
-
sortedInsertions.splice(i + 1, 1);
|
|
58
|
-
i--;
|
|
59
|
-
} else if (sortedInsertions[i].number < sortedInsertions[i + 1].number) {
|
|
60
|
-
sortedInsertions.splice(i, 1);
|
|
61
|
-
i--;
|
|
62
|
-
} else if (
|
|
63
|
-
sortedInsertions[i].number === sortedInsertions[i + 1].number
|
|
64
|
-
) {
|
|
65
|
-
sortedInsertions.splice(i, 1);
|
|
66
|
-
i--;
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
// sortedInsertions is an array of objects [{bpPos: bp pos of insertion, number: # of insertions}, {bpPos, number}, ...]
|
|
71
|
-
return sortedInsertions;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
// function getAllInsertionsInSeqReads(seqReads) {
|
|
75
|
-
// let allInsertionBpPosInSeqReads = [];
|
|
76
|
-
// seqReads.forEach(seqRead => {
|
|
77
|
-
// // split cigar string at M, D, or I (match, deletion, or insertion)
|
|
78
|
-
// // ["2M", "3I", "39M", "3D"...]
|
|
79
|
-
// const splitSeqRead = seqRead.cigar.match(/([0-9]*[MDI])/g)
|
|
80
|
-
// splitSeqRead.forEach(component => {
|
|
81
|
-
// // keeping bpPos 1-based
|
|
82
|
-
// let bpPosOfInsertion = seqRead.pos;
|
|
83
|
-
// if (component.slice(-1) === "I") {
|
|
84
|
-
// const numberOfInsertions = Number(component.slice(0, -1));
|
|
85
|
-
// const componentIndex = splitSeqRead.indexOf(component);
|
|
86
|
-
// for (let i = 0; i < componentIndex; i++) {
|
|
87
|
-
// const previousComponentNumber = Number(splitSeqRead[i].slice(0, -1));
|
|
88
|
-
// bpPosOfInsertion += previousComponentNumber;
|
|
89
|
-
// }
|
|
90
|
-
// for (let i = 1; i <= numberOfInsertions; i++) {
|
|
91
|
-
// allInsertionBpPosInSeqReads.push(bpPosOfInsertion - i);
|
|
92
|
-
// }
|
|
93
|
-
// }
|
|
94
|
-
// });
|
|
95
|
-
// });
|
|
96
|
-
// // allInsertionBpPosInSeqReads should be an array of bp pos [6, 15, 9, 2, 23...]
|
|
97
|
-
// // remove duplicates, organize in ascending order
|
|
98
|
-
// const uniqueInsertionBpPos = [...new Set(allInsertionBpPosInSeqReads)].sort(function(a, b) { return a - b });
|
|
99
|
-
// return uniqueInsertionBpPos;
|
|
100
|
-
// }
|