@teselagen/sequence-utils 0.3.31 → 0.3.32-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/findApproxMatches.d.ts +10 -0
- package/findApproxMatches.test.d.ts +1 -0
- package/findOrfsInPlasmid.d.ts +1 -1
- package/getOrfsFromSequence.d.ts +1 -1
- package/index.cjs +75 -331
- package/index.d.ts +1 -0
- package/index.js +75 -331
- package/index.umd.cjs +75 -331
- package/package.json +8 -4
- package/src/computeDigestFragments.js +2 -2
- package/src/cutSequenceByRestrictionEnzyme.js +2 -2
- package/src/findApproxMatches.js +50 -0
- package/src/findApproxMatches.test.js +126 -0
- package/src/generateAnnotations.js +2 -2
- package/src/getOrfsFromSequence.js +2 -2
- package/src/index.js +1 -0
- package/src/tidyUpAnnotation.js +3 -3
- package/src/tidyUpSequenceData.js +2 -2
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import findApproxMatches from "./findApproxMatches";
|
|
2
|
+
|
|
3
|
+
describe("findApproxMatches", () => {
|
|
4
|
+
it("returns exact matches with maxMismatches=0", () => {
|
|
5
|
+
// Test with DNA sequence
|
|
6
|
+
expect(findApproxMatches("ATG", "GATGC", 0)).toEqual([
|
|
7
|
+
{ index: 1, match: "ATG", mismatchPositions: [], numMismatches: 0 }
|
|
8
|
+
]);
|
|
9
|
+
|
|
10
|
+
// Test with no matches
|
|
11
|
+
expect(findApproxMatches("ATG", "GCCTA", 0)).toEqual([]);
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
it("finds matches with allowed mismatches", () => {
|
|
15
|
+
// One mismatch allowed, one actual mismatch
|
|
16
|
+
expect(findApproxMatches("ATG", "ACTG", 1)).toEqual([
|
|
17
|
+
{ index: 1, match: "CTG", mismatchPositions: [0], numMismatches: 1 }
|
|
18
|
+
]);
|
|
19
|
+
|
|
20
|
+
// Two mismatches allowed, two actual mismatches
|
|
21
|
+
expect(findApproxMatches("ATGC", "ACGA", 2)).toEqual([
|
|
22
|
+
{ index: 0, match: "ACGA", mismatchPositions: [1, 3], numMismatches: 2 }
|
|
23
|
+
]);
|
|
24
|
+
|
|
25
|
+
// Multiple matches with mismatches
|
|
26
|
+
expect(findApproxMatches("AGT", "AGTCAATAGTAAGTG", 1)).toEqual([
|
|
27
|
+
{ index: 0, match: "AGT", mismatchPositions: [], numMismatches: 0 },
|
|
28
|
+
{ index: 4, match: "AAT", mismatchPositions: [1], numMismatches: 1 },
|
|
29
|
+
{ index: 7, match: "AGT", mismatchPositions: [], numMismatches: 0 },
|
|
30
|
+
{ index: 11, match: "AGT", mismatchPositions: [], numMismatches: 0 }
|
|
31
|
+
]);
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it("respects the maximum mismatch threshold", () => {
|
|
35
|
+
// Three mismatches are too many when max is 2
|
|
36
|
+
expect(findApproxMatches("ATGC", "ACAA", 2)).toEqual([]);
|
|
37
|
+
|
|
38
|
+
// Three mismatches are allowed when max is 3
|
|
39
|
+
expect(findApproxMatches("ATGC", "ACAA", 3)).toEqual([
|
|
40
|
+
{
|
|
41
|
+
index: 0,
|
|
42
|
+
match: "ACAA",
|
|
43
|
+
mismatchPositions: [1, 2, 3],
|
|
44
|
+
numMismatches: 3
|
|
45
|
+
}
|
|
46
|
+
]);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it("handles circular sequences correctly", () => {
|
|
50
|
+
// Non-circular sequence
|
|
51
|
+
expect(findApproxMatches("ATG", "TGA", 0, false)).toEqual([]);
|
|
52
|
+
|
|
53
|
+
// Circular sequence - match wraps around the end
|
|
54
|
+
expect(findApproxMatches("ATG", "TGA", 0, true)).toEqual([
|
|
55
|
+
{ index: 2, match: "ATG", mismatchPositions: [], numMismatches: 0 }
|
|
56
|
+
]);
|
|
57
|
+
|
|
58
|
+
// Circular sequence with mismatches
|
|
59
|
+
expect(findApproxMatches("ATG", "TGC", 1, true)).toEqual([
|
|
60
|
+
{ index: 2, match: "CTG", mismatchPositions: [0], numMismatches: 1 }
|
|
61
|
+
]);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it("handles edge cases", () => {
|
|
65
|
+
// Empty search sequence - returns matches at every position
|
|
66
|
+
expect(findApproxMatches("", "ATGC", 0)).toEqual([
|
|
67
|
+
{ index: 0, match: "", mismatchPositions: [], numMismatches: 0 },
|
|
68
|
+
{ index: 1, match: "", mismatchPositions: [], numMismatches: 0 },
|
|
69
|
+
{ index: 2, match: "", mismatchPositions: [], numMismatches: 0 },
|
|
70
|
+
{ index: 3, match: "", mismatchPositions: [], numMismatches: 0 },
|
|
71
|
+
{ index: 4, match: "", mismatchPositions: [], numMismatches: 0 }
|
|
72
|
+
]);
|
|
73
|
+
|
|
74
|
+
// Empty target sequence
|
|
75
|
+
expect(findApproxMatches("ATG", "", 0)).toEqual([]);
|
|
76
|
+
|
|
77
|
+
// Search sequence longer than target
|
|
78
|
+
expect(findApproxMatches("ATGCG", "ATGC", 0)).toEqual([]);
|
|
79
|
+
|
|
80
|
+
// Exactly matching length sequences
|
|
81
|
+
expect(findApproxMatches("ATGC", "ATGC", 0)).toEqual([
|
|
82
|
+
{ index: 0, match: "ATGC", mismatchPositions: [], numMismatches: 0 }
|
|
83
|
+
]);
|
|
84
|
+
|
|
85
|
+
// Sequences with special characters
|
|
86
|
+
expect(findApproxMatches("AT-G", "AT-GC", 0)).toEqual([
|
|
87
|
+
{ index: 0, match: "AT-G", mismatchPositions: [], numMismatches: 0 }
|
|
88
|
+
]);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it("handles larger sequences efficiently", () => {
|
|
92
|
+
const longTarget = "ATGCGATCGATCGATCGATCGATCGATCGATCG";
|
|
93
|
+
const longSearch = "ATCGATCG";
|
|
94
|
+
|
|
95
|
+
// The actual positions where the pattern appears in the sequence
|
|
96
|
+
const expected = [
|
|
97
|
+
{ index: 5, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
|
|
98
|
+
{ index: 9, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
|
|
99
|
+
{ index: 13, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
|
|
100
|
+
{ index: 17, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
|
|
101
|
+
{ index: 21, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
|
|
102
|
+
{ index: 25, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 }
|
|
103
|
+
];
|
|
104
|
+
|
|
105
|
+
// Only include indices where we have full matches (length of search string)
|
|
106
|
+
const actual = findApproxMatches(longSearch, longTarget, 0).filter(
|
|
107
|
+
m => m.match.length === longSearch.length
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
expect(actual).toEqual(expected);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it("tracks exact positions of mismatches", () => {
|
|
114
|
+
// Test specific positions of mismatches
|
|
115
|
+
const result = findApproxMatches("ATGCTA", "ATCCAA", 2);
|
|
116
|
+
|
|
117
|
+
expect(result).toEqual([
|
|
118
|
+
{
|
|
119
|
+
index: 0,
|
|
120
|
+
match: "ATCCAA",
|
|
121
|
+
mismatchPositions: [2, 4],
|
|
122
|
+
numMismatches: 2
|
|
123
|
+
}
|
|
124
|
+
]);
|
|
125
|
+
});
|
|
126
|
+
});
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { generateRandomRange } from "@teselagen/range-utils";
|
|
2
|
-
import
|
|
2
|
+
import { nanoid } from "nanoid";
|
|
3
3
|
|
|
4
4
|
function generateAnnotations(
|
|
5
5
|
numberOfAnnotationsToGenerate,
|
|
@@ -21,7 +21,7 @@ function generateAnnotation(start, end, maxLength) {
|
|
|
21
21
|
...range,
|
|
22
22
|
name: getRandomInt(0, 100000).toString(),
|
|
23
23
|
type: "misc_feature",
|
|
24
|
-
id:
|
|
24
|
+
id: nanoid(),
|
|
25
25
|
forward: Math.random() > 0.5,
|
|
26
26
|
notes: {}
|
|
27
27
|
};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
import { nanoid } from "nanoid";
|
|
2
2
|
import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
|
|
3
3
|
|
|
4
4
|
/**
|
|
@@ -61,7 +61,7 @@ export default function getOrfsFromSequence(options) {
|
|
|
61
61
|
forward: forward,
|
|
62
62
|
annotationTypePlural: "orfs",
|
|
63
63
|
isOrf: true,
|
|
64
|
-
id:
|
|
64
|
+
id: nanoid()
|
|
65
65
|
});
|
|
66
66
|
}
|
|
67
67
|
}
|
package/src/index.js
CHANGED
|
@@ -40,6 +40,7 @@ export { default as getDegenerateRnaStringFromAAString } from "./getDegenerateRn
|
|
|
40
40
|
export { default as getVirtualDigest } from "./getVirtualDigest";
|
|
41
41
|
export { default as isEnzymeType2S } from "./isEnzymeType2S";
|
|
42
42
|
export { default as insertGapsIntoRefSeq } from "./insertGapsIntoRefSeq";
|
|
43
|
+
export { default as findApproxMatches } from "./findApproxMatches";
|
|
43
44
|
export { default as adjustBpsToReplaceOrInsert } from "./adjustBpsToReplaceOrInsert";
|
|
44
45
|
export { default as calculatePercentGC } from "./calculatePercentGC";
|
|
45
46
|
export { default as calculateTm } from "./calculateTm";
|
package/src/tidyUpAnnotation.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { cloneDeep, get, some } from "lodash-es";
|
|
2
2
|
import { getFeatureToColorMap, getFeatureTypes } from "./featureTypesAndColors";
|
|
3
|
-
import
|
|
3
|
+
import { nanoid } from "nanoid";
|
|
4
4
|
|
|
5
5
|
export default function tidyUpAnnotation(
|
|
6
6
|
_annotation,
|
|
@@ -34,10 +34,10 @@ export default function tidyUpAnnotation(
|
|
|
34
34
|
annotation.name = "Untitled annotation";
|
|
35
35
|
}
|
|
36
36
|
if (provideNewIdsForAnnotations) {
|
|
37
|
-
annotation.id =
|
|
37
|
+
annotation.id = nanoid();
|
|
38
38
|
}
|
|
39
39
|
if (!annotation.id && annotation.id !== 0 && !doNotProvideIdsForAnnotations) {
|
|
40
|
-
annotation.id =
|
|
40
|
+
annotation.id = nanoid();
|
|
41
41
|
messages.push(
|
|
42
42
|
"Unable to detect valid ID for annotation, setting ID to " + annotation.id
|
|
43
43
|
);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// tnrtodo: figure out where to insert this validation exactly..
|
|
2
|
-
import
|
|
2
|
+
import { nanoid } from "nanoid";
|
|
3
3
|
|
|
4
4
|
import getAminoAcidDataForEachBaseOfDna from "./getAminoAcidDataForEachBaseOfDna";
|
|
5
5
|
import { cloneDeep, flatMap } from "lodash-es";
|
|
@@ -155,7 +155,7 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
155
155
|
if (item.id || item.id === 0) {
|
|
156
156
|
itemId = item.id;
|
|
157
157
|
} else {
|
|
158
|
-
itemId =
|
|
158
|
+
itemId = nanoid();
|
|
159
159
|
if (!doNotProvideIdsForAnnotations) {
|
|
160
160
|
item.id = itemId; //assign the newly created id to the item
|
|
161
161
|
}
|