@teselagen/sequence-utils 0.3.30 → 0.3.32-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/calculateNebTa.d.ts +1 -4
- package/calculateTm.d.ts +1 -7
- package/diffUtils.d.ts +2 -2
- package/featureTypesAndColors.d.ts +2 -6
- package/filterSequenceString.d.ts +2 -17
- package/findApproxMatches.d.ts +10 -0
- package/findApproxMatches.test.d.ts +1 -0
- package/generateSequenceData.d.ts +0 -5
- package/getAminoAcidStringFromSequenceString.d.ts +1 -3
- package/index.cjs +4013 -3929
- package/index.d.ts +1 -0
- package/index.js +4013 -3929
- package/index.umd.cjs +4013 -3929
- package/mapAnnotationsToRows.d.ts +1 -3
- package/package.json +1 -1
- package/src/adjustBpsToReplaceOrInsert.test.js +1 -1
- package/src/cutSequenceByRestrictionEnzyme.test.js +1 -1
- package/src/deleteSequenceDataAtRange.test.js +1 -1
- package/src/diffUtils.test.js +1 -1
- package/src/doesEnzymeChopOutsideOfRecognitionSite.test.js +1 -1
- package/src/filterSequenceString.test.js +0 -1
- package/src/findApproxMatches.js +50 -0
- package/src/findApproxMatches.test.js +126 -0
- package/src/generateSequenceData.test.js +1 -1
- package/src/getDigestFragmentsForRestrictionEnzymes.test.js +1 -1
- package/src/getLeftAndRightOfSequenceInRangeGivenPosition.test.js +1 -1
- package/src/getPossiblePartsFromSequenceAndEnzymes.test.js +1 -1
- package/src/getReverseComplementAnnotation.test.js +1 -1
- package/src/getReverseComplementSequenceAndAnnotations.test.js +1 -1
- package/src/getReverseComplementSequenceString.test.js +1 -1
- package/src/getReverseSequenceString.test.js +1 -1
- package/src/getSequenceDataBetweenRange.test.js +1 -1
- package/src/getVirtualDigest.test.js +1 -1
- package/src/guessIfSequenceIsDnaAndNotProtein.test.js +1 -1
- package/src/index.js +1 -0
- package/src/insertSequenceDataAtPosition.test.js +1 -1
- package/src/insertSequenceDataAtPositionOrRange.test.js +1 -1
- package/src/mapAnnotationsToRows.test.js +1 -1
- package/src/tidyUpSequenceData.test.js +1 -1
- package/tidyUpAnnotation.d.ts +1 -1
|
@@ -1,3 +1 @@
|
|
|
1
|
-
export default function mapAnnotationsToRows(annotations: any, sequenceLength: any, bpsPerRow: any, { splitForwardReverse }?: {
|
|
2
|
-
splitForwardReverse: any;
|
|
3
|
-
}): {};
|
|
1
|
+
export default function mapAnnotationsToRows(annotations: any, sequenceLength: any, bpsPerRow: any, { splitForwardReverse }?: {}): {};
|
package/package.json
CHANGED
package/src/diffUtils.test.js
CHANGED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Find approximate matches of a search sequence within a target sequence
|
|
3
|
+
*
|
|
4
|
+
* @param {string} searchSeq - The sequence to search for
|
|
5
|
+
* @param {string} targetSeq - The sequence to search within
|
|
6
|
+
* @param {number} maxMismatches - Maximum number of mismatches allowed
|
|
7
|
+
* @param {boolean} circular - Whether to treat the target sequence as circular (default: false)
|
|
8
|
+
* @returns {Array} - Array of objects containing { index, match, mismatchPositions }
|
|
9
|
+
*/
|
|
10
|
+
export default function findApproxMatches(
|
|
11
|
+
searchSeq,
|
|
12
|
+
targetSeq,
|
|
13
|
+
maxMismatches,
|
|
14
|
+
circular = false
|
|
15
|
+
) {
|
|
16
|
+
const matches = [];
|
|
17
|
+
const lenA = searchSeq.length;
|
|
18
|
+
const lenB = targetSeq.length;
|
|
19
|
+
|
|
20
|
+
// Extend targetSeq to simulate circularity, in case circular = true
|
|
21
|
+
const targetSeqExtended = circular
|
|
22
|
+
? targetSeq + targetSeq.slice(0, lenA - 1)
|
|
23
|
+
: targetSeq;
|
|
24
|
+
const limit = circular ? lenB : lenB - lenA + 1;
|
|
25
|
+
|
|
26
|
+
for (let i = 0; i < limit; i++) {
|
|
27
|
+
const window = targetSeqExtended.slice(i, i + lenA);
|
|
28
|
+
let mismatchCount = 0;
|
|
29
|
+
const mismatchPositions = [];
|
|
30
|
+
|
|
31
|
+
for (let j = 0; j < lenA; j++) {
|
|
32
|
+
if (searchSeq[j] !== window[j]) {
|
|
33
|
+
mismatchPositions.push(j);
|
|
34
|
+
mismatchCount++;
|
|
35
|
+
if (mismatchCount > maxMismatches) break;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
if (mismatchCount <= maxMismatches) {
|
|
40
|
+
matches.push({
|
|
41
|
+
index: i,
|
|
42
|
+
match: window,
|
|
43
|
+
mismatchPositions,
|
|
44
|
+
numMismatches: mismatchPositions.length // Keep for backwards compatibility
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return matches;
|
|
50
|
+
}
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import findApproxMatches from "./findApproxMatches";
|
|
2
|
+
|
|
3
|
+
describe("findApproxMatches", () => {
|
|
4
|
+
it("returns exact matches with maxMismatches=0", () => {
|
|
5
|
+
// Test with DNA sequence
|
|
6
|
+
expect(findApproxMatches("ATG", "GATGC", 0)).toEqual([
|
|
7
|
+
{ index: 1, match: "ATG", mismatchPositions: [], numMismatches: 0 }
|
|
8
|
+
]);
|
|
9
|
+
|
|
10
|
+
// Test with no matches
|
|
11
|
+
expect(findApproxMatches("ATG", "GCCTA", 0)).toEqual([]);
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
it("finds matches with allowed mismatches", () => {
|
|
15
|
+
// One mismatch allowed, one actual mismatch
|
|
16
|
+
expect(findApproxMatches("ATG", "ACTG", 1)).toEqual([
|
|
17
|
+
{ index: 1, match: "CTG", mismatchPositions: [0], numMismatches: 1 }
|
|
18
|
+
]);
|
|
19
|
+
|
|
20
|
+
// Two mismatches allowed, two actual mismatches
|
|
21
|
+
expect(findApproxMatches("ATGC", "ACGA", 2)).toEqual([
|
|
22
|
+
{ index: 0, match: "ACGA", mismatchPositions: [1, 3], numMismatches: 2 }
|
|
23
|
+
]);
|
|
24
|
+
|
|
25
|
+
// Multiple matches with mismatches
|
|
26
|
+
expect(findApproxMatches("AGT", "AGTCAATAGTAAGTG", 1)).toEqual([
|
|
27
|
+
{ index: 0, match: "AGT", mismatchPositions: [], numMismatches: 0 },
|
|
28
|
+
{ index: 4, match: "AAT", mismatchPositions: [1], numMismatches: 1 },
|
|
29
|
+
{ index: 7, match: "AGT", mismatchPositions: [], numMismatches: 0 },
|
|
30
|
+
{ index: 11, match: "AGT", mismatchPositions: [], numMismatches: 0 }
|
|
31
|
+
]);
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it("respects the maximum mismatch threshold", () => {
|
|
35
|
+
// Three mismatches are too many when max is 2
|
|
36
|
+
expect(findApproxMatches("ATGC", "ACAA", 2)).toEqual([]);
|
|
37
|
+
|
|
38
|
+
// Three mismatches are allowed when max is 3
|
|
39
|
+
expect(findApproxMatches("ATGC", "ACAA", 3)).toEqual([
|
|
40
|
+
{
|
|
41
|
+
index: 0,
|
|
42
|
+
match: "ACAA",
|
|
43
|
+
mismatchPositions: [1, 2, 3],
|
|
44
|
+
numMismatches: 3
|
|
45
|
+
}
|
|
46
|
+
]);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it("handles circular sequences correctly", () => {
|
|
50
|
+
// Non-circular sequence
|
|
51
|
+
expect(findApproxMatches("ATG", "TGA", 0, false)).toEqual([]);
|
|
52
|
+
|
|
53
|
+
// Circular sequence - match wraps around the end
|
|
54
|
+
expect(findApproxMatches("ATG", "TGA", 0, true)).toEqual([
|
|
55
|
+
{ index: 2, match: "ATG", mismatchPositions: [], numMismatches: 0 }
|
|
56
|
+
]);
|
|
57
|
+
|
|
58
|
+
// Circular sequence with mismatches
|
|
59
|
+
expect(findApproxMatches("ATG", "TGC", 1, true)).toEqual([
|
|
60
|
+
{ index: 2, match: "CTG", mismatchPositions: [0], numMismatches: 1 }
|
|
61
|
+
]);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it("handles edge cases", () => {
|
|
65
|
+
// Empty search sequence - returns matches at every position
|
|
66
|
+
expect(findApproxMatches("", "ATGC", 0)).toEqual([
|
|
67
|
+
{ index: 0, match: "", mismatchPositions: [], numMismatches: 0 },
|
|
68
|
+
{ index: 1, match: "", mismatchPositions: [], numMismatches: 0 },
|
|
69
|
+
{ index: 2, match: "", mismatchPositions: [], numMismatches: 0 },
|
|
70
|
+
{ index: 3, match: "", mismatchPositions: [], numMismatches: 0 },
|
|
71
|
+
{ index: 4, match: "", mismatchPositions: [], numMismatches: 0 }
|
|
72
|
+
]);
|
|
73
|
+
|
|
74
|
+
// Empty target sequence
|
|
75
|
+
expect(findApproxMatches("ATG", "", 0)).toEqual([]);
|
|
76
|
+
|
|
77
|
+
// Search sequence longer than target
|
|
78
|
+
expect(findApproxMatches("ATGCG", "ATGC", 0)).toEqual([]);
|
|
79
|
+
|
|
80
|
+
// Exactly matching length sequences
|
|
81
|
+
expect(findApproxMatches("ATGC", "ATGC", 0)).toEqual([
|
|
82
|
+
{ index: 0, match: "ATGC", mismatchPositions: [], numMismatches: 0 }
|
|
83
|
+
]);
|
|
84
|
+
|
|
85
|
+
// Sequences with special characters
|
|
86
|
+
expect(findApproxMatches("AT-G", "AT-GC", 0)).toEqual([
|
|
87
|
+
{ index: 0, match: "AT-G", mismatchPositions: [], numMismatches: 0 }
|
|
88
|
+
]);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it("handles larger sequences efficiently", () => {
|
|
92
|
+
const longTarget = "ATGCGATCGATCGATCGATCGATCGATCGATCG";
|
|
93
|
+
const longSearch = "ATCGATCG";
|
|
94
|
+
|
|
95
|
+
// The actual positions where the pattern appears in the sequence
|
|
96
|
+
const expected = [
|
|
97
|
+
{ index: 5, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
|
|
98
|
+
{ index: 9, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
|
|
99
|
+
{ index: 13, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
|
|
100
|
+
{ index: 17, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
|
|
101
|
+
{ index: 21, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
|
|
102
|
+
{ index: 25, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 }
|
|
103
|
+
];
|
|
104
|
+
|
|
105
|
+
// Only include indices where we have full matches (length of search string)
|
|
106
|
+
const actual = findApproxMatches(longSearch, longTarget, 0).filter(
|
|
107
|
+
m => m.match.length === longSearch.length
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
expect(actual).toEqual(expected);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it("tracks exact positions of mismatches", () => {
|
|
114
|
+
// Test specific positions of mismatches
|
|
115
|
+
const result = findApproxMatches("ATGCTA", "ATCCAA", 2);
|
|
116
|
+
|
|
117
|
+
expect(result).toEqual([
|
|
118
|
+
{
|
|
119
|
+
index: 0,
|
|
120
|
+
match: "ATCCAA",
|
|
121
|
+
mismatchPositions: [2, 4],
|
|
122
|
+
numMismatches: 2
|
|
123
|
+
}
|
|
124
|
+
]);
|
|
125
|
+
});
|
|
126
|
+
});
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import chai from "chai";
|
|
1
|
+
import * as chai from "chai";
|
|
2
2
|
import cutSequenceByRestrictionEnzyme from "./cutSequenceByRestrictionEnzyme.js";
|
|
3
3
|
import enzymeList from "./aliasedEnzymesByName";
|
|
4
4
|
import getLeftAndRightOfSequenceInRangeGivenPosition from "./getLeftAndRightOfSequenceInRangeGivenPosition";
|
package/src/index.js
CHANGED
|
@@ -40,6 +40,7 @@ export { default as getDegenerateRnaStringFromAAString } from "./getDegenerateRn
|
|
|
40
40
|
export { default as getVirtualDigest } from "./getVirtualDigest";
|
|
41
41
|
export { default as isEnzymeType2S } from "./isEnzymeType2S";
|
|
42
42
|
export { default as insertGapsIntoRefSeq } from "./insertGapsIntoRefSeq";
|
|
43
|
+
export { default as findApproxMatches } from "./findApproxMatches";
|
|
43
44
|
export { default as adjustBpsToReplaceOrInsert } from "./adjustBpsToReplaceOrInsert";
|
|
44
45
|
export { default as calculatePercentGC } from "./calculatePercentGC";
|
|
45
46
|
export { default as calculateTm } from "./calculateTm";
|
package/tidyUpAnnotation.d.ts
CHANGED
|
@@ -4,7 +4,7 @@ export default function tidyUpAnnotation(_annotation: any, { sequenceData, conve
|
|
|
4
4
|
annotationType: any;
|
|
5
5
|
provideNewIdsForAnnotations: any;
|
|
6
6
|
doNotProvideIdsForAnnotations: any;
|
|
7
|
-
messages?:
|
|
7
|
+
messages?: never[] | undefined;
|
|
8
8
|
mutative: any;
|
|
9
9
|
allowNonStandardGenbankTypes: any;
|
|
10
10
|
featureTypes: any;
|