@teselagen/sequence-utils 0.3.31 → 0.3.32-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/findApproxMatches.d.ts +10 -0
- package/findApproxMatches.test.d.ts +1 -0
- package/index.cjs +55 -23
- package/index.d.ts +1 -0
- package/index.js +55 -23
- package/index.umd.cjs +55 -23
- package/package.json +1 -1
- package/src/findApproxMatches.js +50 -0
- package/src/findApproxMatches.test.js +126 -0
- package/src/index.js +1 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Find approximate matches of a search sequence within a target sequence
|
|
3
|
+
*
|
|
4
|
+
* @param {string} searchSeq - The sequence to search for
|
|
5
|
+
* @param {string} targetSeq - The sequence to search within
|
|
6
|
+
* @param {number} maxMismatches - Maximum number of mismatches allowed
|
|
7
|
+
* @param {boolean} circular - Whether to treat the target sequence as circular (default: false)
|
|
8
|
+
* @returns {Array} - Array of objects containing { index, match, mismatchPositions }
|
|
9
|
+
*/
|
|
10
|
+
export default function findApproxMatches(searchSeq: string, targetSeq: string, maxMismatches: number, circular?: boolean): any[];
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/index.cjs
CHANGED
|
@@ -2736,30 +2736,31 @@ function adjustRangeToDeletionOfAnotherRange(rangeToBeAdjusted, anotherRange, ma
|
|
|
2736
2736
|
anotherRange,
|
|
2737
2737
|
maxLength
|
|
2738
2738
|
);
|
|
2739
|
-
if (trimmedRange) {
|
|
2740
|
-
|
|
2741
|
-
anotherRange,
|
|
2742
|
-
maxLength
|
|
2743
|
-
);
|
|
2744
|
-
nonCircularDeletionRanges.forEach(function(nonCircularDeletionRange) {
|
|
2745
|
-
const deletionLength = nonCircularDeletionRange.end - nonCircularDeletionRange.start + 1;
|
|
2746
|
-
if (trimmedRange.start > trimmedRange.end) {
|
|
2747
|
-
if (nonCircularDeletionRange.start < trimmedRange.end) {
|
|
2748
|
-
trimmedRange.start -= deletionLength;
|
|
2749
|
-
trimmedRange.end -= deletionLength;
|
|
2750
|
-
} else if (nonCircularDeletionRange.start < trimmedRange.start) {
|
|
2751
|
-
trimmedRange.start -= deletionLength;
|
|
2752
|
-
} else ;
|
|
2753
|
-
} else {
|
|
2754
|
-
if (nonCircularDeletionRange.start < trimmedRange.start) {
|
|
2755
|
-
trimmedRange.start -= deletionLength;
|
|
2756
|
-
trimmedRange.end -= deletionLength;
|
|
2757
|
-
} else if (nonCircularDeletionRange.start < trimmedRange.end) {
|
|
2758
|
-
trimmedRange.end -= deletionLength;
|
|
2759
|
-
} else ;
|
|
2760
|
-
}
|
|
2761
|
-
});
|
|
2739
|
+
if (!trimmedRange) {
|
|
2740
|
+
return null;
|
|
2762
2741
|
}
|
|
2742
|
+
const nonCircularDeletionRanges = splitRangeIntoTwoPartsIfItIsCircular(
|
|
2743
|
+
anotherRange,
|
|
2744
|
+
maxLength
|
|
2745
|
+
);
|
|
2746
|
+
nonCircularDeletionRanges.forEach(function(nonCircularDeletionRange) {
|
|
2747
|
+
const deletionLength = nonCircularDeletionRange.end - nonCircularDeletionRange.start + 1;
|
|
2748
|
+
if (trimmedRange.start > trimmedRange.end) {
|
|
2749
|
+
if (nonCircularDeletionRange.start < trimmedRange.end) {
|
|
2750
|
+
trimmedRange.start -= deletionLength;
|
|
2751
|
+
trimmedRange.end -= deletionLength;
|
|
2752
|
+
} else if (nonCircularDeletionRange.start < trimmedRange.start) {
|
|
2753
|
+
trimmedRange.start -= deletionLength;
|
|
2754
|
+
} else ;
|
|
2755
|
+
} else {
|
|
2756
|
+
if (nonCircularDeletionRange.start < trimmedRange.start) {
|
|
2757
|
+
trimmedRange.start -= deletionLength;
|
|
2758
|
+
trimmedRange.end -= deletionLength;
|
|
2759
|
+
} else if (nonCircularDeletionRange.start < trimmedRange.end) {
|
|
2760
|
+
trimmedRange.end -= deletionLength;
|
|
2761
|
+
} else ;
|
|
2762
|
+
}
|
|
2763
|
+
});
|
|
2763
2764
|
return trimmedRange;
|
|
2764
2765
|
}
|
|
2765
2766
|
__name(adjustRangeToDeletionOfAnotherRange, "adjustRangeToDeletionOfAnotherRange");
|
|
@@ -9514,6 +9515,36 @@ function insertGapsIntoRefSeq(refSeq, seqReads) {
|
|
|
9514
9515
|
return refSeqWithGaps.join("");
|
|
9515
9516
|
}
|
|
9516
9517
|
__name(insertGapsIntoRefSeq, "insertGapsIntoRefSeq");
|
|
9518
|
+
function findApproxMatches(searchSeq, targetSeq, maxMismatches, circular = false) {
|
|
9519
|
+
const matches = [];
|
|
9520
|
+
const lenA = searchSeq.length;
|
|
9521
|
+
const lenB = targetSeq.length;
|
|
9522
|
+
const targetSeqExtended = circular ? targetSeq + targetSeq.slice(0, lenA - 1) : targetSeq;
|
|
9523
|
+
const limit = circular ? lenB : lenB - lenA + 1;
|
|
9524
|
+
for (let i = 0; i < limit; i++) {
|
|
9525
|
+
const window2 = targetSeqExtended.slice(i, i + lenA);
|
|
9526
|
+
let mismatchCount = 0;
|
|
9527
|
+
const mismatchPositions = [];
|
|
9528
|
+
for (let j = 0; j < lenA; j++) {
|
|
9529
|
+
if (searchSeq[j] !== window2[j]) {
|
|
9530
|
+
mismatchPositions.push(j);
|
|
9531
|
+
mismatchCount++;
|
|
9532
|
+
if (mismatchCount > maxMismatches) break;
|
|
9533
|
+
}
|
|
9534
|
+
}
|
|
9535
|
+
if (mismatchCount <= maxMismatches) {
|
|
9536
|
+
matches.push({
|
|
9537
|
+
index: i,
|
|
9538
|
+
match: window2,
|
|
9539
|
+
mismatchPositions,
|
|
9540
|
+
numMismatches: mismatchPositions.length
|
|
9541
|
+
// Keep for backwards compatibility
|
|
9542
|
+
});
|
|
9543
|
+
}
|
|
9544
|
+
}
|
|
9545
|
+
return matches;
|
|
9546
|
+
}
|
|
9547
|
+
__name(findApproxMatches, "findApproxMatches");
|
|
9517
9548
|
var spliceString$1;
|
|
9518
9549
|
var hasRequiredSpliceString;
|
|
9519
9550
|
function requireSpliceString() {
|
|
@@ -19322,6 +19353,7 @@ exports.doesEnzymeChopOutsideOfRecognitionSite = doesEnzymeChopOutsideOfRecognit
|
|
|
19322
19353
|
exports.featureColors = featureColors;
|
|
19323
19354
|
exports.filterRnaString = filterRnaString;
|
|
19324
19355
|
exports.filterSequenceString = filterSequenceString;
|
|
19356
|
+
exports.findApproxMatches = findApproxMatches;
|
|
19325
19357
|
exports.findNearestRangeOfSequenceOverlapToPosition = findNearestRangeOfSequenceOverlapToPosition;
|
|
19326
19358
|
exports.findOrfsInPlasmid = findOrfsInPlasmid;
|
|
19327
19359
|
exports.findSequenceMatches = findSequenceMatches;
|
package/index.d.ts
CHANGED
|
@@ -11,6 +11,7 @@ export { default as getDegenerateRnaStringFromAAString } from './getDegenerateRn
|
|
|
11
11
|
export { default as getVirtualDigest } from './getVirtualDigest';
|
|
12
12
|
export { default as isEnzymeType2S } from './isEnzymeType2S';
|
|
13
13
|
export { default as insertGapsIntoRefSeq } from './insertGapsIntoRefSeq';
|
|
14
|
+
export { default as findApproxMatches } from './findApproxMatches';
|
|
14
15
|
export { default as adjustBpsToReplaceOrInsert } from './adjustBpsToReplaceOrInsert';
|
|
15
16
|
export { default as calculatePercentGC } from './calculatePercentGC';
|
|
16
17
|
export { default as calculateTm } from './calculateTm';
|
package/index.js
CHANGED
|
@@ -2734,30 +2734,31 @@ function adjustRangeToDeletionOfAnotherRange(rangeToBeAdjusted, anotherRange, ma
|
|
|
2734
2734
|
anotherRange,
|
|
2735
2735
|
maxLength
|
|
2736
2736
|
);
|
|
2737
|
-
if (trimmedRange) {
|
|
2738
|
-
|
|
2739
|
-
anotherRange,
|
|
2740
|
-
maxLength
|
|
2741
|
-
);
|
|
2742
|
-
nonCircularDeletionRanges.forEach(function(nonCircularDeletionRange) {
|
|
2743
|
-
const deletionLength = nonCircularDeletionRange.end - nonCircularDeletionRange.start + 1;
|
|
2744
|
-
if (trimmedRange.start > trimmedRange.end) {
|
|
2745
|
-
if (nonCircularDeletionRange.start < trimmedRange.end) {
|
|
2746
|
-
trimmedRange.start -= deletionLength;
|
|
2747
|
-
trimmedRange.end -= deletionLength;
|
|
2748
|
-
} else if (nonCircularDeletionRange.start < trimmedRange.start) {
|
|
2749
|
-
trimmedRange.start -= deletionLength;
|
|
2750
|
-
} else ;
|
|
2751
|
-
} else {
|
|
2752
|
-
if (nonCircularDeletionRange.start < trimmedRange.start) {
|
|
2753
|
-
trimmedRange.start -= deletionLength;
|
|
2754
|
-
trimmedRange.end -= deletionLength;
|
|
2755
|
-
} else if (nonCircularDeletionRange.start < trimmedRange.end) {
|
|
2756
|
-
trimmedRange.end -= deletionLength;
|
|
2757
|
-
} else ;
|
|
2758
|
-
}
|
|
2759
|
-
});
|
|
2737
|
+
if (!trimmedRange) {
|
|
2738
|
+
return null;
|
|
2760
2739
|
}
|
|
2740
|
+
const nonCircularDeletionRanges = splitRangeIntoTwoPartsIfItIsCircular(
|
|
2741
|
+
anotherRange,
|
|
2742
|
+
maxLength
|
|
2743
|
+
);
|
|
2744
|
+
nonCircularDeletionRanges.forEach(function(nonCircularDeletionRange) {
|
|
2745
|
+
const deletionLength = nonCircularDeletionRange.end - nonCircularDeletionRange.start + 1;
|
|
2746
|
+
if (trimmedRange.start > trimmedRange.end) {
|
|
2747
|
+
if (nonCircularDeletionRange.start < trimmedRange.end) {
|
|
2748
|
+
trimmedRange.start -= deletionLength;
|
|
2749
|
+
trimmedRange.end -= deletionLength;
|
|
2750
|
+
} else if (nonCircularDeletionRange.start < trimmedRange.start) {
|
|
2751
|
+
trimmedRange.start -= deletionLength;
|
|
2752
|
+
} else ;
|
|
2753
|
+
} else {
|
|
2754
|
+
if (nonCircularDeletionRange.start < trimmedRange.start) {
|
|
2755
|
+
trimmedRange.start -= deletionLength;
|
|
2756
|
+
trimmedRange.end -= deletionLength;
|
|
2757
|
+
} else if (nonCircularDeletionRange.start < trimmedRange.end) {
|
|
2758
|
+
trimmedRange.end -= deletionLength;
|
|
2759
|
+
} else ;
|
|
2760
|
+
}
|
|
2761
|
+
});
|
|
2761
2762
|
return trimmedRange;
|
|
2762
2763
|
}
|
|
2763
2764
|
__name(adjustRangeToDeletionOfAnotherRange, "adjustRangeToDeletionOfAnotherRange");
|
|
@@ -9512,6 +9513,36 @@ function insertGapsIntoRefSeq(refSeq, seqReads) {
|
|
|
9512
9513
|
return refSeqWithGaps.join("");
|
|
9513
9514
|
}
|
|
9514
9515
|
__name(insertGapsIntoRefSeq, "insertGapsIntoRefSeq");
|
|
9516
|
+
function findApproxMatches(searchSeq, targetSeq, maxMismatches, circular = false) {
|
|
9517
|
+
const matches = [];
|
|
9518
|
+
const lenA = searchSeq.length;
|
|
9519
|
+
const lenB = targetSeq.length;
|
|
9520
|
+
const targetSeqExtended = circular ? targetSeq + targetSeq.slice(0, lenA - 1) : targetSeq;
|
|
9521
|
+
const limit = circular ? lenB : lenB - lenA + 1;
|
|
9522
|
+
for (let i = 0; i < limit; i++) {
|
|
9523
|
+
const window2 = targetSeqExtended.slice(i, i + lenA);
|
|
9524
|
+
let mismatchCount = 0;
|
|
9525
|
+
const mismatchPositions = [];
|
|
9526
|
+
for (let j = 0; j < lenA; j++) {
|
|
9527
|
+
if (searchSeq[j] !== window2[j]) {
|
|
9528
|
+
mismatchPositions.push(j);
|
|
9529
|
+
mismatchCount++;
|
|
9530
|
+
if (mismatchCount > maxMismatches) break;
|
|
9531
|
+
}
|
|
9532
|
+
}
|
|
9533
|
+
if (mismatchCount <= maxMismatches) {
|
|
9534
|
+
matches.push({
|
|
9535
|
+
index: i,
|
|
9536
|
+
match: window2,
|
|
9537
|
+
mismatchPositions,
|
|
9538
|
+
numMismatches: mismatchPositions.length
|
|
9539
|
+
// Keep for backwards compatibility
|
|
9540
|
+
});
|
|
9541
|
+
}
|
|
9542
|
+
}
|
|
9543
|
+
return matches;
|
|
9544
|
+
}
|
|
9545
|
+
__name(findApproxMatches, "findApproxMatches");
|
|
9515
9546
|
var spliceString$1;
|
|
9516
9547
|
var hasRequiredSpliceString;
|
|
9517
9548
|
function requireSpliceString() {
|
|
@@ -19321,6 +19352,7 @@ export {
|
|
|
19321
19352
|
featureColors,
|
|
19322
19353
|
filterRnaString,
|
|
19323
19354
|
filterSequenceString,
|
|
19355
|
+
findApproxMatches,
|
|
19324
19356
|
findNearestRangeOfSequenceOverlapToPosition,
|
|
19325
19357
|
findOrfsInPlasmid,
|
|
19326
19358
|
findSequenceMatches,
|
package/index.umd.cjs
CHANGED
|
@@ -2738,30 +2738,31 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
2738
2738
|
anotherRange,
|
|
2739
2739
|
maxLength
|
|
2740
2740
|
);
|
|
2741
|
-
if (trimmedRange) {
|
|
2742
|
-
|
|
2743
|
-
anotherRange,
|
|
2744
|
-
maxLength
|
|
2745
|
-
);
|
|
2746
|
-
nonCircularDeletionRanges.forEach(function(nonCircularDeletionRange) {
|
|
2747
|
-
const deletionLength = nonCircularDeletionRange.end - nonCircularDeletionRange.start + 1;
|
|
2748
|
-
if (trimmedRange.start > trimmedRange.end) {
|
|
2749
|
-
if (nonCircularDeletionRange.start < trimmedRange.end) {
|
|
2750
|
-
trimmedRange.start -= deletionLength;
|
|
2751
|
-
trimmedRange.end -= deletionLength;
|
|
2752
|
-
} else if (nonCircularDeletionRange.start < trimmedRange.start) {
|
|
2753
|
-
trimmedRange.start -= deletionLength;
|
|
2754
|
-
} else ;
|
|
2755
|
-
} else {
|
|
2756
|
-
if (nonCircularDeletionRange.start < trimmedRange.start) {
|
|
2757
|
-
trimmedRange.start -= deletionLength;
|
|
2758
|
-
trimmedRange.end -= deletionLength;
|
|
2759
|
-
} else if (nonCircularDeletionRange.start < trimmedRange.end) {
|
|
2760
|
-
trimmedRange.end -= deletionLength;
|
|
2761
|
-
} else ;
|
|
2762
|
-
}
|
|
2763
|
-
});
|
|
2741
|
+
if (!trimmedRange) {
|
|
2742
|
+
return null;
|
|
2764
2743
|
}
|
|
2744
|
+
const nonCircularDeletionRanges = splitRangeIntoTwoPartsIfItIsCircular(
|
|
2745
|
+
anotherRange,
|
|
2746
|
+
maxLength
|
|
2747
|
+
);
|
|
2748
|
+
nonCircularDeletionRanges.forEach(function(nonCircularDeletionRange) {
|
|
2749
|
+
const deletionLength = nonCircularDeletionRange.end - nonCircularDeletionRange.start + 1;
|
|
2750
|
+
if (trimmedRange.start > trimmedRange.end) {
|
|
2751
|
+
if (nonCircularDeletionRange.start < trimmedRange.end) {
|
|
2752
|
+
trimmedRange.start -= deletionLength;
|
|
2753
|
+
trimmedRange.end -= deletionLength;
|
|
2754
|
+
} else if (nonCircularDeletionRange.start < trimmedRange.start) {
|
|
2755
|
+
trimmedRange.start -= deletionLength;
|
|
2756
|
+
} else ;
|
|
2757
|
+
} else {
|
|
2758
|
+
if (nonCircularDeletionRange.start < trimmedRange.start) {
|
|
2759
|
+
trimmedRange.start -= deletionLength;
|
|
2760
|
+
trimmedRange.end -= deletionLength;
|
|
2761
|
+
} else if (nonCircularDeletionRange.start < trimmedRange.end) {
|
|
2762
|
+
trimmedRange.end -= deletionLength;
|
|
2763
|
+
} else ;
|
|
2764
|
+
}
|
|
2765
|
+
});
|
|
2765
2766
|
return trimmedRange;
|
|
2766
2767
|
}
|
|
2767
2768
|
__name(adjustRangeToDeletionOfAnotherRange, "adjustRangeToDeletionOfAnotherRange");
|
|
@@ -9516,6 +9517,36 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
9516
9517
|
return refSeqWithGaps.join("");
|
|
9517
9518
|
}
|
|
9518
9519
|
__name(insertGapsIntoRefSeq, "insertGapsIntoRefSeq");
|
|
9520
|
+
function findApproxMatches(searchSeq, targetSeq, maxMismatches, circular = false) {
|
|
9521
|
+
const matches = [];
|
|
9522
|
+
const lenA = searchSeq.length;
|
|
9523
|
+
const lenB = targetSeq.length;
|
|
9524
|
+
const targetSeqExtended = circular ? targetSeq + targetSeq.slice(0, lenA - 1) : targetSeq;
|
|
9525
|
+
const limit = circular ? lenB : lenB - lenA + 1;
|
|
9526
|
+
for (let i = 0; i < limit; i++) {
|
|
9527
|
+
const window2 = targetSeqExtended.slice(i, i + lenA);
|
|
9528
|
+
let mismatchCount = 0;
|
|
9529
|
+
const mismatchPositions = [];
|
|
9530
|
+
for (let j = 0; j < lenA; j++) {
|
|
9531
|
+
if (searchSeq[j] !== window2[j]) {
|
|
9532
|
+
mismatchPositions.push(j);
|
|
9533
|
+
mismatchCount++;
|
|
9534
|
+
if (mismatchCount > maxMismatches) break;
|
|
9535
|
+
}
|
|
9536
|
+
}
|
|
9537
|
+
if (mismatchCount <= maxMismatches) {
|
|
9538
|
+
matches.push({
|
|
9539
|
+
index: i,
|
|
9540
|
+
match: window2,
|
|
9541
|
+
mismatchPositions,
|
|
9542
|
+
numMismatches: mismatchPositions.length
|
|
9543
|
+
// Keep for backwards compatibility
|
|
9544
|
+
});
|
|
9545
|
+
}
|
|
9546
|
+
}
|
|
9547
|
+
return matches;
|
|
9548
|
+
}
|
|
9549
|
+
__name(findApproxMatches, "findApproxMatches");
|
|
9519
9550
|
var spliceString$1;
|
|
9520
9551
|
var hasRequiredSpliceString;
|
|
9521
9552
|
function requireSpliceString() {
|
|
@@ -19324,6 +19355,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
19324
19355
|
exports2.featureColors = featureColors;
|
|
19325
19356
|
exports2.filterRnaString = filterRnaString;
|
|
19326
19357
|
exports2.filterSequenceString = filterSequenceString;
|
|
19358
|
+
exports2.findApproxMatches = findApproxMatches;
|
|
19327
19359
|
exports2.findNearestRangeOfSequenceOverlapToPosition = findNearestRangeOfSequenceOverlapToPosition;
|
|
19328
19360
|
exports2.findOrfsInPlasmid = findOrfsInPlasmid;
|
|
19329
19361
|
exports2.findSequenceMatches = findSequenceMatches;
|
package/package.json
CHANGED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Find approximate matches of a search sequence within a target sequence
|
|
3
|
+
*
|
|
4
|
+
* @param {string} searchSeq - The sequence to search for
|
|
5
|
+
* @param {string} targetSeq - The sequence to search within
|
|
6
|
+
* @param {number} maxMismatches - Maximum number of mismatches allowed
|
|
7
|
+
* @param {boolean} circular - Whether to treat the target sequence as circular (default: false)
|
|
8
|
+
* @returns {Array} - Array of objects containing { index, match, mismatchPositions }
|
|
9
|
+
*/
|
|
10
|
+
export default function findApproxMatches(
|
|
11
|
+
searchSeq,
|
|
12
|
+
targetSeq,
|
|
13
|
+
maxMismatches,
|
|
14
|
+
circular = false
|
|
15
|
+
) {
|
|
16
|
+
const matches = [];
|
|
17
|
+
const lenA = searchSeq.length;
|
|
18
|
+
const lenB = targetSeq.length;
|
|
19
|
+
|
|
20
|
+
// Extend targetSeq to simulate circularity, in case circular = true
|
|
21
|
+
const targetSeqExtended = circular
|
|
22
|
+
? targetSeq + targetSeq.slice(0, lenA - 1)
|
|
23
|
+
: targetSeq;
|
|
24
|
+
const limit = circular ? lenB : lenB - lenA + 1;
|
|
25
|
+
|
|
26
|
+
for (let i = 0; i < limit; i++) {
|
|
27
|
+
const window = targetSeqExtended.slice(i, i + lenA);
|
|
28
|
+
let mismatchCount = 0;
|
|
29
|
+
const mismatchPositions = [];
|
|
30
|
+
|
|
31
|
+
for (let j = 0; j < lenA; j++) {
|
|
32
|
+
if (searchSeq[j] !== window[j]) {
|
|
33
|
+
mismatchPositions.push(j);
|
|
34
|
+
mismatchCount++;
|
|
35
|
+
if (mismatchCount > maxMismatches) break;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
if (mismatchCount <= maxMismatches) {
|
|
40
|
+
matches.push({
|
|
41
|
+
index: i,
|
|
42
|
+
match: window,
|
|
43
|
+
mismatchPositions,
|
|
44
|
+
numMismatches: mismatchPositions.length // Keep for backwards compatibility
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return matches;
|
|
50
|
+
}
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import findApproxMatches from "./findApproxMatches";
|
|
2
|
+
|
|
3
|
+
describe("findApproxMatches", () => {
|
|
4
|
+
it("returns exact matches with maxMismatches=0", () => {
|
|
5
|
+
// Test with DNA sequence
|
|
6
|
+
expect(findApproxMatches("ATG", "GATGC", 0)).toEqual([
|
|
7
|
+
{ index: 1, match: "ATG", mismatchPositions: [], numMismatches: 0 }
|
|
8
|
+
]);
|
|
9
|
+
|
|
10
|
+
// Test with no matches
|
|
11
|
+
expect(findApproxMatches("ATG", "GCCTA", 0)).toEqual([]);
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
it("finds matches with allowed mismatches", () => {
|
|
15
|
+
// One mismatch allowed, one actual mismatch
|
|
16
|
+
expect(findApproxMatches("ATG", "ACTG", 1)).toEqual([
|
|
17
|
+
{ index: 1, match: "CTG", mismatchPositions: [0], numMismatches: 1 }
|
|
18
|
+
]);
|
|
19
|
+
|
|
20
|
+
// Two mismatches allowed, two actual mismatches
|
|
21
|
+
expect(findApproxMatches("ATGC", "ACGA", 2)).toEqual([
|
|
22
|
+
{ index: 0, match: "ACGA", mismatchPositions: [1, 3], numMismatches: 2 }
|
|
23
|
+
]);
|
|
24
|
+
|
|
25
|
+
// Multiple matches with mismatches
|
|
26
|
+
expect(findApproxMatches("AGT", "AGTCAATAGTAAGTG", 1)).toEqual([
|
|
27
|
+
{ index: 0, match: "AGT", mismatchPositions: [], numMismatches: 0 },
|
|
28
|
+
{ index: 4, match: "AAT", mismatchPositions: [1], numMismatches: 1 },
|
|
29
|
+
{ index: 7, match: "AGT", mismatchPositions: [], numMismatches: 0 },
|
|
30
|
+
{ index: 11, match: "AGT", mismatchPositions: [], numMismatches: 0 }
|
|
31
|
+
]);
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it("respects the maximum mismatch threshold", () => {
|
|
35
|
+
// Three mismatches are too many when max is 2
|
|
36
|
+
expect(findApproxMatches("ATGC", "ACAA", 2)).toEqual([]);
|
|
37
|
+
|
|
38
|
+
// Three mismatches are allowed when max is 3
|
|
39
|
+
expect(findApproxMatches("ATGC", "ACAA", 3)).toEqual([
|
|
40
|
+
{
|
|
41
|
+
index: 0,
|
|
42
|
+
match: "ACAA",
|
|
43
|
+
mismatchPositions: [1, 2, 3],
|
|
44
|
+
numMismatches: 3
|
|
45
|
+
}
|
|
46
|
+
]);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it("handles circular sequences correctly", () => {
|
|
50
|
+
// Non-circular sequence
|
|
51
|
+
expect(findApproxMatches("ATG", "TGA", 0, false)).toEqual([]);
|
|
52
|
+
|
|
53
|
+
// Circular sequence - match wraps around the end
|
|
54
|
+
expect(findApproxMatches("ATG", "TGA", 0, true)).toEqual([
|
|
55
|
+
{ index: 2, match: "ATG", mismatchPositions: [], numMismatches: 0 }
|
|
56
|
+
]);
|
|
57
|
+
|
|
58
|
+
// Circular sequence with mismatches
|
|
59
|
+
expect(findApproxMatches("ATG", "TGC", 1, true)).toEqual([
|
|
60
|
+
{ index: 2, match: "CTG", mismatchPositions: [0], numMismatches: 1 }
|
|
61
|
+
]);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it("handles edge cases", () => {
|
|
65
|
+
// Empty search sequence - returns matches at every position
|
|
66
|
+
expect(findApproxMatches("", "ATGC", 0)).toEqual([
|
|
67
|
+
{ index: 0, match: "", mismatchPositions: [], numMismatches: 0 },
|
|
68
|
+
{ index: 1, match: "", mismatchPositions: [], numMismatches: 0 },
|
|
69
|
+
{ index: 2, match: "", mismatchPositions: [], numMismatches: 0 },
|
|
70
|
+
{ index: 3, match: "", mismatchPositions: [], numMismatches: 0 },
|
|
71
|
+
{ index: 4, match: "", mismatchPositions: [], numMismatches: 0 }
|
|
72
|
+
]);
|
|
73
|
+
|
|
74
|
+
// Empty target sequence
|
|
75
|
+
expect(findApproxMatches("ATG", "", 0)).toEqual([]);
|
|
76
|
+
|
|
77
|
+
// Search sequence longer than target
|
|
78
|
+
expect(findApproxMatches("ATGCG", "ATGC", 0)).toEqual([]);
|
|
79
|
+
|
|
80
|
+
// Exactly matching length sequences
|
|
81
|
+
expect(findApproxMatches("ATGC", "ATGC", 0)).toEqual([
|
|
82
|
+
{ index: 0, match: "ATGC", mismatchPositions: [], numMismatches: 0 }
|
|
83
|
+
]);
|
|
84
|
+
|
|
85
|
+
// Sequences with special characters
|
|
86
|
+
expect(findApproxMatches("AT-G", "AT-GC", 0)).toEqual([
|
|
87
|
+
{ index: 0, match: "AT-G", mismatchPositions: [], numMismatches: 0 }
|
|
88
|
+
]);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it("handles larger sequences efficiently", () => {
|
|
92
|
+
const longTarget = "ATGCGATCGATCGATCGATCGATCGATCGATCG";
|
|
93
|
+
const longSearch = "ATCGATCG";
|
|
94
|
+
|
|
95
|
+
// The actual positions where the pattern appears in the sequence
|
|
96
|
+
const expected = [
|
|
97
|
+
{ index: 5, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
|
|
98
|
+
{ index: 9, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
|
|
99
|
+
{ index: 13, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
|
|
100
|
+
{ index: 17, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
|
|
101
|
+
{ index: 21, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
|
|
102
|
+
{ index: 25, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 }
|
|
103
|
+
];
|
|
104
|
+
|
|
105
|
+
// Only include indices where we have full matches (length of search string)
|
|
106
|
+
const actual = findApproxMatches(longSearch, longTarget, 0).filter(
|
|
107
|
+
m => m.match.length === longSearch.length
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
expect(actual).toEqual(expected);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it("tracks exact positions of mismatches", () => {
|
|
114
|
+
// Test specific positions of mismatches
|
|
115
|
+
const result = findApproxMatches("ATGCTA", "ATCCAA", 2);
|
|
116
|
+
|
|
117
|
+
expect(result).toEqual([
|
|
118
|
+
{
|
|
119
|
+
index: 0,
|
|
120
|
+
match: "ATCCAA",
|
|
121
|
+
mismatchPositions: [2, 4],
|
|
122
|
+
numMismatches: 2
|
|
123
|
+
}
|
|
124
|
+
]);
|
|
125
|
+
});
|
|
126
|
+
});
|
package/src/index.js
CHANGED
|
@@ -40,6 +40,7 @@ export { default as getDegenerateRnaStringFromAAString } from "./getDegenerateRn
|
|
|
40
40
|
export { default as getVirtualDigest } from "./getVirtualDigest";
|
|
41
41
|
export { default as isEnzymeType2S } from "./isEnzymeType2S";
|
|
42
42
|
export { default as insertGapsIntoRefSeq } from "./insertGapsIntoRefSeq";
|
|
43
|
+
export { default as findApproxMatches } from "./findApproxMatches";
|
|
43
44
|
export { default as adjustBpsToReplaceOrInsert } from "./adjustBpsToReplaceOrInsert";
|
|
44
45
|
export { default as calculatePercentGC } from "./calculatePercentGC";
|
|
45
46
|
export { default as calculateTm } from "./calculateTm";
|