@teselagen/sequence-utils 0.1.22 → 0.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +12030 -26126
- package/index.mjs +12119 -26124
- package/index.umd.js +24056 -38154
- package/package.json +4 -3
- package/src/DNAComplementMap.js +32 -0
- package/src/addGapsToSeqReads.js +417 -0
- package/src/addGapsToSeqReads.test.js +358 -0
- package/src/adjustAnnotationsToInsert.js +19 -0
- package/src/adjustBpsToReplaceOrInsert.js +50 -0
- package/src/adjustBpsToReplaceOrInsert.test.js +59 -0
- package/src/aliasedEnzymesByName.js +7363 -0
- package/src/aminoAcidToDegenerateDnaMap.js +32 -0
- package/src/aminoAcidToDegenerateRnaMap.js +32 -0
- package/src/aminoAcidToDnaRna.test.js +27 -0
- package/src/annotateSingleSeq.js +29 -0
- package/src/annotateSingleSeq.test.js +64 -0
- package/src/annotationTypes.js +23 -0
- package/src/autoAnnotate.js +242 -0
- package/src/autoAnnotate.test.js +1039 -0
- package/src/bioData.js +431 -0
- package/src/calculateNebTa.js +34 -0
- package/src/calculateNebTa.test.js +57 -0
- package/src/calculateNebTm.js +127 -0
- package/src/calculateNebTm.test.js +32 -0
- package/src/calculatePercentGC.js +3 -0
- package/src/calculatePercentGC.test.js +14 -0
- package/src/calculateTm.js +297 -0
- package/src/calculateTm.test.js +7 -0
- package/src/computeDigestFragments.js +179 -0
- package/src/computeDigestFragments.test.js +73 -0
- package/src/condensePairwiseAlignmentDifferences.js +85 -0
- package/src/condensePairwiseAlignmentDifferences.test.js +66 -0
- package/src/convertAACaretPositionOrRangeToDna.js +24 -0
- package/src/convertAACaretPositionOrRangeToDna.test.js +34 -0
- package/src/convertDnaCaretPositionOrRangeToAA.js +24 -0
- package/src/convertDnaCaretPositionOrRangeToAA.test.js +37 -0
- package/src/cutSequenceByRestrictionEnzyme.js +301 -0
- package/src/cutSequenceByRestrictionEnzyme.test.js +296 -0
- package/src/defaultEnzymesByName.js +278 -0
- package/src/degenerateDnaToAminoAcidMap.js +5 -0
- package/src/degenerateRnaToAminoAcidMap.js +5 -0
- package/src/deleteSequenceDataAtRange.js +5 -0
- package/src/deleteSequenceDataAtRange.test.js +146 -0
- package/src/diffUtils.js +64 -0
- package/src/diffUtils.test.js +74 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.js +10 -0
- package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +41 -0
- package/src/featureTypesAndColors.js +152 -0
- package/src/featureTypesAndColors.test.js +52 -0
- package/src/filterAminoAcidSequenceString.js +13 -0
- package/src/filterAminoAcidSequenceString.test.js +22 -0
- package/src/filterSequenceString.js +22 -0
- package/src/filterSequenceString.test.js +13 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.js +39 -0
- package/src/findNearestRangeOfSequenceOverlapToPosition.test.js +31 -0
- package/src/findOrfsInPlasmid.js +26 -0
- package/src/findSequenceMatches.js +133 -0
- package/src/findSequenceMatches.test.js +286 -0
- package/src/generateAnnotations.js +34 -0
- package/src/generateSequenceData.js +206 -0
- package/src/generateSequenceData.test.js +22 -0
- package/src/getAllInsertionsInSeqReads.js +83 -0
- package/src/getAllInsertionsInSeqReads.test.js +26 -0
- package/src/getAminoAcidDataForEachBaseOfDna.js +163 -0
- package/src/getAminoAcidDataForEachBaseOfDna.test.js +424 -0
- package/src/getAminoAcidFromSequenceTriplet.js +22 -0
- package/src/getAminoAcidStringFromSequenceString.js +18 -0
- package/src/getAminoAcidStringFromSequenceString.test.js +18 -0
- package/src/getCodonRangeForAASliver.js +63 -0
- package/src/getComplementAminoAcidStringFromSequenceString.js +11 -0
- package/src/getComplementSequenceAndAnnotations.js +20 -0
- package/src/getComplementSequenceString.js +19 -0
- package/src/getComplementSequenceString.test.js +13 -0
- package/src/getCutsiteType.js +10 -0
- package/src/getCutsitesFromSequence.js +17 -0
- package/src/getDegenerateDnaStringFromAAString.js +8 -0
- package/src/getDegenerateRnaStringFromAAString.js +8 -0
- package/src/getDigestFragmentsForCutsites.js +105 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.js +27 -0
- package/src/getDigestFragmentsForRestrictionEnzymes.test.js +228 -0
- package/src/getInsertBetweenVals.js +28 -0
- package/src/getInsertBetweenVals.test.js +33 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.js +39 -0
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +80 -0
- package/src/getMassOfAaString.js +24 -0
- package/src/getMassofAaString.test.js +18 -0
- package/src/getOrfsFromSequence.js +124 -0
- package/src/getOrfsFromSequence.test.js +210 -0
- package/src/getOverlapBetweenTwoSequences.js +30 -0
- package/src/getOverlapBetweenTwoSequences.test.js +23 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.js +121 -0
- package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +208 -0
- package/src/getReverseAminoAcidStringFromSequenceString.js +20 -0
- package/src/getReverseAminoAcidStringFromSequenceString.test.js +11 -0
- package/src/getReverseComplementAminoAcidStringFromSequenceString.js +7 -0
- package/src/getReverseComplementAnnotation.js +23 -0
- package/src/getReverseComplementAnnotation.test.js +44 -0
- package/src/getReverseComplementSequenceAndAnnotations.js +38 -0
- package/src/getReverseComplementSequenceAndAnnotations.test.js +105 -0
- package/src/getReverseComplementSequenceString.js +17 -0
- package/src/getReverseComplementSequenceString.test.js +11 -0
- package/src/getReverseSequenceString.js +12 -0
- package/src/getReverseSequenceString.test.js +9 -0
- package/src/getSequenceDataBetweenRange.js +131 -0
- package/src/getSequenceDataBetweenRange.test.js +474 -0
- package/src/getVirtualDigest.js +125 -0
- package/src/getVirtualDigest.test.js +134 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.js +33 -0
- package/src/guessIfSequenceIsDnaAndNotProtein.test.js +34 -0
- package/src/index.js +106 -0
- package/src/index.test.js +38 -0
- package/src/insertGapsIntoRefSeq.js +38 -0
- package/src/insertGapsIntoRefSeq.test.js +20 -0
- package/src/insertSequenceDataAtPosition.js +2 -0
- package/src/insertSequenceDataAtPosition.test.js +75 -0
- package/src/insertSequenceDataAtPositionOrRange.js +249 -0
- package/src/insertSequenceDataAtPositionOrRange.test.js +547 -0
- package/src/isEnzymeType2S.js +3 -0
- package/src/mapAnnotationsToRows.js +174 -0
- package/src/mapAnnotationsToRows.test.js +425 -0
- package/src/prepareCircularViewData.js +17 -0
- package/src/prepareCircularViewData.test.js +196 -0
- package/src/prepareRowData.js +41 -0
- package/src/prepareRowData.test.js +36 -0
- package/src/prepareRowData_output1.json +391 -0
- package/src/proteinAlphabet.js +257 -0
- package/src/rotateBpsToPosition.js +13 -0
- package/src/rotateBpsToPosition.test.js +6 -0
- package/src/rotateSequenceDataToPosition.js +48 -0
- package/src/rotateSequenceDataToPosition.test.js +71 -0
- package/src/shiftAnnotationsByLen.js +17 -0
- package/src/threeLetterSequenceStringToAminoAcidMap.js +106 -0
- package/src/tidyUpAnnotation.js +182 -0
- package/src/tidyUpSequenceData.js +169 -0
- package/src/tidyUpSequenceData.test.js +332 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
const aminoAcidToDegenerateDnaMap = {
|
|
2
|
+
"-": "---",
|
|
3
|
+
".": "...",
|
|
4
|
+
"*": "trr",
|
|
5
|
+
a: "gcn",
|
|
6
|
+
b: "ray", // D or N => aay + gay = ray
|
|
7
|
+
c: "tgy",
|
|
8
|
+
d: "gay",
|
|
9
|
+
e: "gar",
|
|
10
|
+
f: "tty",
|
|
11
|
+
g: "ggn",
|
|
12
|
+
h: "cay",
|
|
13
|
+
i: "ath",
|
|
14
|
+
j: "htn", // L or I ytn + ath => htn
|
|
15
|
+
k: "aar",
|
|
16
|
+
l: "ytn", // YTR、CTN => Y=CT, N=AGCT
|
|
17
|
+
m: "atg",
|
|
18
|
+
n: "aay",
|
|
19
|
+
o: "tag", // Pyrrolysine, https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2933860/
|
|
20
|
+
p: "ccn",
|
|
21
|
+
q: "car",
|
|
22
|
+
r: "mgn", // CGN、MGR => M=AC, N=AGCT
|
|
23
|
+
s: "wsn", // TCN、AGY => AT = w, CG = S, N is AGCT
|
|
24
|
+
t: "acn",
|
|
25
|
+
u: "tga", // Selenocysteine, https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2933860/
|
|
26
|
+
v: "gtn",
|
|
27
|
+
w: "tgg",
|
|
28
|
+
x: "nnn", // unknown aa.
|
|
29
|
+
y: "tay",
|
|
30
|
+
z: "sar" // E or Q, => gar + car = sar
|
|
31
|
+
};
|
|
32
|
+
export default aminoAcidToDegenerateDnaMap;
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
const aminoAcidToDegenerateRnaMap = {
|
|
2
|
+
"-": "---",
|
|
3
|
+
".": "...",
|
|
4
|
+
"*": "trr",
|
|
5
|
+
a: "gcn",
|
|
6
|
+
b: "ray",
|
|
7
|
+
c: "ugy",
|
|
8
|
+
d: "gay",
|
|
9
|
+
e: "gar",
|
|
10
|
+
f: "uuy",
|
|
11
|
+
g: "ggn",
|
|
12
|
+
h: "cay",
|
|
13
|
+
i: "auh",
|
|
14
|
+
j: "hun",
|
|
15
|
+
k: "aar",
|
|
16
|
+
l: "yun",
|
|
17
|
+
m: "aug",
|
|
18
|
+
n: "aay",
|
|
19
|
+
o: "uag",
|
|
20
|
+
p: "ccn",
|
|
21
|
+
q: "car",
|
|
22
|
+
r: "mgn",
|
|
23
|
+
s: "wsn",
|
|
24
|
+
t: "acn",
|
|
25
|
+
u: "uga",
|
|
26
|
+
v: "gun",
|
|
27
|
+
w: "ugg",
|
|
28
|
+
x: "nnn",
|
|
29
|
+
y: "uay",
|
|
30
|
+
z: "sar"
|
|
31
|
+
};
|
|
32
|
+
export default aminoAcidToDegenerateRnaMap;
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import {expect} from "chai";
|
|
2
|
+
import getDegenerateDnaStringFromAAString from "./getDegenerateDnaStringFromAAString";
|
|
3
|
+
import getDegenerateRnaStringFromAAString from "./getDegenerateRnaStringFromAAString";
|
|
4
|
+
|
|
5
|
+
describe("amino acid to RNA or DNA should be correct", () => {
|
|
6
|
+
it('should return a string with no "u/U" in it when parse AA sequence to DNA sequence.', () => {
|
|
7
|
+
const aaStr = "AQRSTFFVCL";
|
|
8
|
+
const DNASeq = getDegenerateDnaStringFromAAString(aaStr);
|
|
9
|
+
const RNASeq = getDegenerateRnaStringFromAAString(aaStr);
|
|
10
|
+
expect(DNASeq.length)
|
|
11
|
+
.equal(RNASeq.length)
|
|
12
|
+
.equal(aaStr.length * 3);
|
|
13
|
+
expect(DNASeq.toLowerCase().includes("u")).equal(false);
|
|
14
|
+
expect(DNASeq.toLowerCase().includes("t")).equal(true);
|
|
15
|
+
|
|
16
|
+
expect(RNASeq.toLowerCase().includes("t")).equal(false);
|
|
17
|
+
expect(RNASeq.toLowerCase().includes("u")).equal(true);
|
|
18
|
+
|
|
19
|
+
expect(RNASeq.toLowerCase().replace(/u/gi, "t")).equal(
|
|
20
|
+
DNASeq.toLowerCase()
|
|
21
|
+
);
|
|
22
|
+
|
|
23
|
+
expect(DNASeq.toLowerCase().replace(/t/gi, "u")).equal(
|
|
24
|
+
RNASeq.toLowerCase()
|
|
25
|
+
);
|
|
26
|
+
});
|
|
27
|
+
});
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import {autoAnnotate} from "./autoAnnotate";
|
|
2
|
+
|
|
3
|
+
function annotateSingleSeq({ fullSeq, searchSeq }) {
|
|
4
|
+
const fullSeqId = fullSeq.id || "fullSeqId";
|
|
5
|
+
const searchSeqId = searchSeq.id || "searchSeqId";
|
|
6
|
+
const results = autoAnnotate({
|
|
7
|
+
seqsToAnnotateById: {
|
|
8
|
+
[fullSeqId]: {
|
|
9
|
+
...fullSeq,
|
|
10
|
+
id: fullSeqId
|
|
11
|
+
}
|
|
12
|
+
},
|
|
13
|
+
annotationsToCheckById: {
|
|
14
|
+
[searchSeqId]: {
|
|
15
|
+
...searchSeq,
|
|
16
|
+
id: searchSeqId
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
compareName: false
|
|
20
|
+
});
|
|
21
|
+
if (results && results[fullSeqId]) {
|
|
22
|
+
return {
|
|
23
|
+
matches: results[fullSeqId]
|
|
24
|
+
};
|
|
25
|
+
} else {
|
|
26
|
+
return { matches: [] };
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
export default annotateSingleSeq;
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import annotateSingleSeq from "./annotateSingleSeq";
|
|
2
|
+
import {expect} from "chai";
|
|
3
|
+
|
|
4
|
+
describe("annotateSingleSeq", () => {
|
|
5
|
+
it(`regexes work - correctly annotates a single seq with a regex annotation`, () => {
|
|
6
|
+
const results = annotateSingleSeq({
|
|
7
|
+
fullSeq: { sequence: "AAAATTTTGGGGGCCCCCAAGT" },
|
|
8
|
+
searchSeq: { sequence: "TTTT.*CCC" }
|
|
9
|
+
});
|
|
10
|
+
// eslint-disable-next-line no-unused-expressions
|
|
11
|
+
expect(results).to.not.be.undefined;
|
|
12
|
+
//this should return an object keyed by the sequence id with the list of annotations to create
|
|
13
|
+
expect(results).to.deep.eq({
|
|
14
|
+
matches: [
|
|
15
|
+
{
|
|
16
|
+
start: 4,
|
|
17
|
+
end: 17,
|
|
18
|
+
strand: 1,
|
|
19
|
+
id: "searchSeqId"
|
|
20
|
+
}
|
|
21
|
+
]
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
});
|
|
25
|
+
it(`correctly annotates a single seq with multiple matches`, () => {
|
|
26
|
+
const results = annotateSingleSeq({
|
|
27
|
+
fullSeq: { sequence: "AAAATTTTGGGGGCCCCCAAGTAAAATTTTGGGGGCCCCCAAGT" },
|
|
28
|
+
searchSeq: { sequence: "AAAATTTTGGGGGCCCCCAAGT", id: 2 }
|
|
29
|
+
});
|
|
30
|
+
// eslint-disable-next-line no-unused-expressions
|
|
31
|
+
expect(results).to.not.be.undefined;
|
|
32
|
+
//this should return an object keyed by the sequence id with the list of annotations to create
|
|
33
|
+
expect(results).to.deep.eq({
|
|
34
|
+
matches: [
|
|
35
|
+
{
|
|
36
|
+
start: 0,
|
|
37
|
+
end: 21,
|
|
38
|
+
strand: 1,
|
|
39
|
+
id: 2
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
start: 22,
|
|
43
|
+
end: 43,
|
|
44
|
+
strand: 1,
|
|
45
|
+
id: 2
|
|
46
|
+
}
|
|
47
|
+
]
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
});
|
|
51
|
+
it(`correctly finds no matches when there are none`, () => {
|
|
52
|
+
const results = annotateSingleSeq({
|
|
53
|
+
fullSeq: { sequence: "AAAATTTTGGGGGCCCCCAAGTAAAATTTTGGGGGCCCCCAAGT" },
|
|
54
|
+
searchSeq: { sequence: "AAAATTTTGGGGGGGGGGCCCCCAAGT" }
|
|
55
|
+
});
|
|
56
|
+
// eslint-disable-next-line no-unused-expressions
|
|
57
|
+
expect(results).to.not.be.undefined;
|
|
58
|
+
//this should return an object keyed by the sequence id with the list of annotations to create
|
|
59
|
+
expect(results).to.deep.eq({
|
|
60
|
+
matches: []
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
});
|
|
64
|
+
});
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export const annotationTypes = [
|
|
2
|
+
"features",
|
|
3
|
+
"warnings",
|
|
4
|
+
"assemblyPieces",
|
|
5
|
+
"lineageAnnotations",
|
|
6
|
+
"parts",
|
|
7
|
+
"cutsites",
|
|
8
|
+
"orfs",
|
|
9
|
+
"translations",
|
|
10
|
+
"primers",
|
|
11
|
+
"guides"
|
|
12
|
+
];
|
|
13
|
+
|
|
14
|
+
export const modifiableTypes = [
|
|
15
|
+
"features",
|
|
16
|
+
"assemblyPieces",
|
|
17
|
+
"lineageAnnotations",
|
|
18
|
+
"warnings",
|
|
19
|
+
"parts",
|
|
20
|
+
"translations",
|
|
21
|
+
"primers",
|
|
22
|
+
"guides"
|
|
23
|
+
];
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
/* Copyright (C) 2018 TeselaGen Biotechnology, Inc. */
|
|
2
|
+
import {forEach, omitBy} from "lodash";
|
|
3
|
+
import { ambiguous_dna_values } from "./bioData";
|
|
4
|
+
import aminoAcidToDegenerateDnaMap from "./aminoAcidToDegenerateDnaMap";
|
|
5
|
+
import {normalizePositionByRangeLength, reversePositionInRange} from "@teselagen/range-utils";
|
|
6
|
+
import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
|
|
7
|
+
|
|
8
|
+
//seqsToAnnotateById must not be length = 0
|
|
9
|
+
function autoAnnotate({
|
|
10
|
+
seqsToAnnotateById,
|
|
11
|
+
annotationsToCheckById,
|
|
12
|
+
compareName,
|
|
13
|
+
warnIfMoreThan
|
|
14
|
+
}) {
|
|
15
|
+
const annotationsToAddBySeqId = {};
|
|
16
|
+
|
|
17
|
+
forEach(annotationsToCheckById, ann => {
|
|
18
|
+
const reg = new RegExp(ann.sequence, "gi");
|
|
19
|
+
forEach(
|
|
20
|
+
omitBy(seqsToAnnotateById, s => !s.sequence.length),
|
|
21
|
+
({ circular, sequence }, id) => {
|
|
22
|
+
function getMatches({ seqToMatchAgainst, isReverse, seqLen }) {
|
|
23
|
+
let match;
|
|
24
|
+
let lastMatch;
|
|
25
|
+
// const matches = []
|
|
26
|
+
try {
|
|
27
|
+
while ((match = reg.exec(seqToMatchAgainst))) {
|
|
28
|
+
const { index: matchStart, 0: matchSeq } = match;
|
|
29
|
+
if (matchStart >= seqLen) return;
|
|
30
|
+
const matchEnd = matchStart + matchSeq.length;
|
|
31
|
+
if (lastMatch) {
|
|
32
|
+
if (matchStart > lastMatch.start && matchEnd <= lastMatch.end) {
|
|
33
|
+
reg.lastIndex = match.index + 1;
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
lastMatch = {
|
|
38
|
+
start: matchStart,
|
|
39
|
+
end: matchEnd
|
|
40
|
+
};
|
|
41
|
+
const range = {
|
|
42
|
+
start: matchStart,
|
|
43
|
+
end: normalizePositionByRangeLength(matchEnd - 1, seqLen)
|
|
44
|
+
};
|
|
45
|
+
if (!annotationsToAddBySeqId[id])
|
|
46
|
+
annotationsToAddBySeqId[id] = [];
|
|
47
|
+
annotationsToAddBySeqId[id].push({
|
|
48
|
+
...(isReverse
|
|
49
|
+
? {
|
|
50
|
+
start: reversePositionInRange(range.end, seqLen),
|
|
51
|
+
end: reversePositionInRange(range.start, seqLen)
|
|
52
|
+
}
|
|
53
|
+
: range),
|
|
54
|
+
strand: isReverse ? -1 : 1,
|
|
55
|
+
id: ann.id
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
reg.lastIndex = match.index + 1;
|
|
59
|
+
}
|
|
60
|
+
} catch (error) {
|
|
61
|
+
console.error(`error:`, error);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
const seqLen = sequence.length;
|
|
65
|
+
|
|
66
|
+
const revSeq = getReverseComplementSequenceString(sequence);
|
|
67
|
+
getMatches({
|
|
68
|
+
seqLen,
|
|
69
|
+
seqToMatchAgainst: circular ? sequence + sequence : sequence
|
|
70
|
+
});
|
|
71
|
+
getMatches({
|
|
72
|
+
seqLen,
|
|
73
|
+
isReverse: true,
|
|
74
|
+
seqToMatchAgainst: circular ? revSeq + revSeq : revSeq
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
);
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
//loop through all patterns and get all matches
|
|
81
|
+
|
|
82
|
+
const toReturn = {};
|
|
83
|
+
|
|
84
|
+
forEach(annotationsToAddBySeqId, (anns, id) => {
|
|
85
|
+
const origSeq = seqsToAnnotateById[id];
|
|
86
|
+
const alreadyExistingAnnsByStartEnd = {};
|
|
87
|
+
forEach(origSeq.annotations, ann => {
|
|
88
|
+
alreadyExistingAnnsByStartEnd[getStartEndStr(ann, { compareName })] = ann;
|
|
89
|
+
});
|
|
90
|
+
const warningCounter = {};
|
|
91
|
+
const toAdd = anns
|
|
92
|
+
.filter(ann => {
|
|
93
|
+
const alreadyExistingAnn =
|
|
94
|
+
alreadyExistingAnnsByStartEnd[getStartEndStr(ann, { compareName })];
|
|
95
|
+
if (alreadyExistingAnn) return false;
|
|
96
|
+
if (warnIfMoreThan) {
|
|
97
|
+
warningCounter[ann.id] = (warningCounter[ann.id] || 0) + 1;
|
|
98
|
+
}
|
|
99
|
+
return true;
|
|
100
|
+
})
|
|
101
|
+
.sort((a, b) => a.start - b.start);
|
|
102
|
+
if (toAdd.length) {
|
|
103
|
+
toReturn[id] = toAdd;
|
|
104
|
+
}
|
|
105
|
+
warnIfMoreThan &&
|
|
106
|
+
forEach(warningCounter, (num, annId) => {
|
|
107
|
+
if (num > warnIfMoreThan) {
|
|
108
|
+
toReturn.__more_than_warnings = toReturn.__more_than_warnings || {};
|
|
109
|
+
toReturn.__more_than_warnings[id] =
|
|
110
|
+
toReturn.__more_than_warnings[id] || [];
|
|
111
|
+
toReturn.__more_than_warnings[id].push(annId);
|
|
112
|
+
}
|
|
113
|
+
});
|
|
114
|
+
});
|
|
115
|
+
return toReturn;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function getStartEndStr(
|
|
119
|
+
{ start, end, name, strand, forward },
|
|
120
|
+
{ compareName }
|
|
121
|
+
) {
|
|
122
|
+
const isReverse = strand === -1 || forward === false;
|
|
123
|
+
return `${start}-${end}-${isReverse ? "rev" : "for"}-${
|
|
124
|
+
compareName ? name : ""
|
|
125
|
+
}`;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function convertApELikeRegexToRegex(regString = "") {
|
|
129
|
+
let newstr = "";
|
|
130
|
+
let rightOfCaretHolder = "";
|
|
131
|
+
let afterRightCaretHolder = "";
|
|
132
|
+
let beforeRightCaret = "";
|
|
133
|
+
let prevBp;
|
|
134
|
+
let hitLeftCaret;
|
|
135
|
+
let hitRightCaret;
|
|
136
|
+
|
|
137
|
+
// eslint-disable-next-line no-unused-vars
|
|
138
|
+
for (const bp of regString.replace("(", "").replace(")", "")) {
|
|
139
|
+
/* eslint-disable no-loop-func*/
|
|
140
|
+
/* eslint-disable no-inner-declarations*/
|
|
141
|
+
function maybeHandleRightCaret(justAdded) {
|
|
142
|
+
if (hitRightCaret) {
|
|
143
|
+
rightOfCaretHolder += justAdded;
|
|
144
|
+
afterRightCaretHolder = `${rightOfCaretHolder}${
|
|
145
|
+
afterRightCaretHolder.length ? "|" : ""
|
|
146
|
+
}${afterRightCaretHolder}`;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
/* eslint-enable no-loop-func*/
|
|
150
|
+
/* eslint-enable no-inner-declarations*/
|
|
151
|
+
const ambigVal = ambiguous_dna_values[bp.toUpperCase()];
|
|
152
|
+
if (ambigVal && ambigVal.length > 1) {
|
|
153
|
+
let valToUse;
|
|
154
|
+
if (ambigVal.length === 4) {
|
|
155
|
+
valToUse = ".";
|
|
156
|
+
} else {
|
|
157
|
+
valToUse = `[${ambigVal}]`;
|
|
158
|
+
}
|
|
159
|
+
newstr += valToUse;
|
|
160
|
+
maybeHandleRightCaret(valToUse);
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
163
|
+
if (bp === "#") {
|
|
164
|
+
if (hitRightCaret) throw new Error("Error converting regex");
|
|
165
|
+
const valToUse = prevBp ? `[^${prevBp}]*?` : `.*?`;
|
|
166
|
+
newstr += valToUse;
|
|
167
|
+
maybeHandleRightCaret(valToUse);
|
|
168
|
+
continue;
|
|
169
|
+
}
|
|
170
|
+
if (bp === "<") {
|
|
171
|
+
if (hitRightCaret) throw new Error("Error converting to regex");
|
|
172
|
+
if (hitLeftCaret) throw new Error("Error converting to regex");
|
|
173
|
+
let holder = "";
|
|
174
|
+
let stringToAdd = "";
|
|
175
|
+
let isGroupClosed = true;
|
|
176
|
+
let closingBraceHit;
|
|
177
|
+
const groups = [];
|
|
178
|
+
for (let index = 0; index < newstr.length; index++) {
|
|
179
|
+
const char = newstr[index];
|
|
180
|
+
const nextChar = newstr[index + 1];
|
|
181
|
+
if (char === "[") {
|
|
182
|
+
isGroupClosed = false;
|
|
183
|
+
} else if (char === "]" || closingBraceHit) {
|
|
184
|
+
closingBraceHit = true;
|
|
185
|
+
if (ambiguous_dna_values[nextChar] || nextChar === "[") {
|
|
186
|
+
isGroupClosed = true;
|
|
187
|
+
closingBraceHit = false;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
holder += char;
|
|
191
|
+
if (isGroupClosed) {
|
|
192
|
+
groups.push(holder);
|
|
193
|
+
holder = "";
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
let concattedEls = "";
|
|
197
|
+
groups.reverse();
|
|
198
|
+
groups.forEach(g => {
|
|
199
|
+
concattedEls = g + concattedEls;
|
|
200
|
+
stringToAdd = `${concattedEls}${
|
|
201
|
+
stringToAdd.length ? "|" : ""
|
|
202
|
+
}${stringToAdd}`;
|
|
203
|
+
});
|
|
204
|
+
newstr = `(${stringToAdd})?`;
|
|
205
|
+
hitLeftCaret = true;
|
|
206
|
+
continue;
|
|
207
|
+
}
|
|
208
|
+
if (bp === ">") {
|
|
209
|
+
if (hitRightCaret) throw new Error("Error converting regex");
|
|
210
|
+
hitRightCaret = true;
|
|
211
|
+
beforeRightCaret = newstr;
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
newstr += bp;
|
|
215
|
+
maybeHandleRightCaret(bp);
|
|
216
|
+
prevBp = bp;
|
|
217
|
+
}
|
|
218
|
+
if (hitRightCaret) {
|
|
219
|
+
newstr = `${beforeRightCaret}(${afterRightCaretHolder})?`;
|
|
220
|
+
}
|
|
221
|
+
return newstr;
|
|
222
|
+
}
|
|
223
|
+
function convertProteinSeqToDNAIupac(sequence) {
|
|
224
|
+
let toRet = "";
|
|
225
|
+
let l;
|
|
226
|
+
for (l of sequence) {
|
|
227
|
+
const degenDna = aminoAcidToDegenerateDnaMap[l.toLowerCase()];
|
|
228
|
+
if (degenDna) {
|
|
229
|
+
toRet += degenDna;
|
|
230
|
+
} else {
|
|
231
|
+
toRet += l;
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
return toRet;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
export {
|
|
239
|
+
convertProteinSeqToDNAIupac,
|
|
240
|
+
convertApELikeRegexToRegex,
|
|
241
|
+
autoAnnotate
|
|
242
|
+
};
|