@teselagen/sequence-utils 0.3.31 → 0.3.32-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,126 @@
1
+ import findApproxMatches from "./findApproxMatches";
2
+
3
+ describe("findApproxMatches", () => {
4
+ it("returns exact matches with maxMismatches=0", () => {
5
+ // Test with DNA sequence
6
+ expect(findApproxMatches("ATG", "GATGC", 0)).toEqual([
7
+ { index: 1, match: "ATG", mismatchPositions: [], numMismatches: 0 }
8
+ ]);
9
+
10
+ // Test with no matches
11
+ expect(findApproxMatches("ATG", "GCCTA", 0)).toEqual([]);
12
+ });
13
+
14
+ it("finds matches with allowed mismatches", () => {
15
+ // One mismatch allowed, one actual mismatch
16
+ expect(findApproxMatches("ATG", "ACTG", 1)).toEqual([
17
+ { index: 1, match: "CTG", mismatchPositions: [0], numMismatches: 1 }
18
+ ]);
19
+
20
+ // Two mismatches allowed, two actual mismatches
21
+ expect(findApproxMatches("ATGC", "ACGA", 2)).toEqual([
22
+ { index: 0, match: "ACGA", mismatchPositions: [1, 3], numMismatches: 2 }
23
+ ]);
24
+
25
+ // Multiple matches with mismatches
26
+ expect(findApproxMatches("AGT", "AGTCAATAGTAAGTG", 1)).toEqual([
27
+ { index: 0, match: "AGT", mismatchPositions: [], numMismatches: 0 },
28
+ { index: 4, match: "AAT", mismatchPositions: [1], numMismatches: 1 },
29
+ { index: 7, match: "AGT", mismatchPositions: [], numMismatches: 0 },
30
+ { index: 11, match: "AGT", mismatchPositions: [], numMismatches: 0 }
31
+ ]);
32
+ });
33
+
34
+ it("respects the maximum mismatch threshold", () => {
35
+ // Three mismatches are too many when max is 2
36
+ expect(findApproxMatches("ATGC", "ACAA", 2)).toEqual([]);
37
+
38
+ // Three mismatches are allowed when max is 3
39
+ expect(findApproxMatches("ATGC", "ACAA", 3)).toEqual([
40
+ {
41
+ index: 0,
42
+ match: "ACAA",
43
+ mismatchPositions: [1, 2, 3],
44
+ numMismatches: 3
45
+ }
46
+ ]);
47
+ });
48
+
49
+ it("handles circular sequences correctly", () => {
50
+ // Non-circular sequence
51
+ expect(findApproxMatches("ATG", "TGA", 0, false)).toEqual([]);
52
+
53
+ // Circular sequence - match wraps around the end
54
+ expect(findApproxMatches("ATG", "TGA", 0, true)).toEqual([
55
+ { index: 2, match: "ATG", mismatchPositions: [], numMismatches: 0 }
56
+ ]);
57
+
58
+ // Circular sequence with mismatches
59
+ expect(findApproxMatches("ATG", "TGC", 1, true)).toEqual([
60
+ { index: 2, match: "CTG", mismatchPositions: [0], numMismatches: 1 }
61
+ ]);
62
+ });
63
+
64
+ it("handles edge cases", () => {
65
+ // Empty search sequence - returns matches at every position
66
+ expect(findApproxMatches("", "ATGC", 0)).toEqual([
67
+ { index: 0, match: "", mismatchPositions: [], numMismatches: 0 },
68
+ { index: 1, match: "", mismatchPositions: [], numMismatches: 0 },
69
+ { index: 2, match: "", mismatchPositions: [], numMismatches: 0 },
70
+ { index: 3, match: "", mismatchPositions: [], numMismatches: 0 },
71
+ { index: 4, match: "", mismatchPositions: [], numMismatches: 0 }
72
+ ]);
73
+
74
+ // Empty target sequence
75
+ expect(findApproxMatches("ATG", "", 0)).toEqual([]);
76
+
77
+ // Search sequence longer than target
78
+ expect(findApproxMatches("ATGCG", "ATGC", 0)).toEqual([]);
79
+
80
+ // Exactly matching length sequences
81
+ expect(findApproxMatches("ATGC", "ATGC", 0)).toEqual([
82
+ { index: 0, match: "ATGC", mismatchPositions: [], numMismatches: 0 }
83
+ ]);
84
+
85
+ // Sequences with special characters
86
+ expect(findApproxMatches("AT-G", "AT-GC", 0)).toEqual([
87
+ { index: 0, match: "AT-G", mismatchPositions: [], numMismatches: 0 }
88
+ ]);
89
+ });
90
+
91
+ it("handles larger sequences efficiently", () => {
92
+ const longTarget = "ATGCGATCGATCGATCGATCGATCGATCGATCG";
93
+ const longSearch = "ATCGATCG";
94
+
95
+ // The actual positions where the pattern appears in the sequence
96
+ const expected = [
97
+ { index: 5, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
98
+ { index: 9, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
99
+ { index: 13, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
100
+ { index: 17, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
101
+ { index: 21, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 },
102
+ { index: 25, match: "ATCGATCG", mismatchPositions: [], numMismatches: 0 }
103
+ ];
104
+
105
+ // Only include indices where we have full matches (length of search string)
106
+ const actual = findApproxMatches(longSearch, longTarget, 0).filter(
107
+ m => m.match.length === longSearch.length
108
+ );
109
+
110
+ expect(actual).toEqual(expected);
111
+ });
112
+
113
+ it("tracks exact positions of mismatches", () => {
114
+ // Test specific positions of mismatches
115
+ const result = findApproxMatches("ATGCTA", "ATCCAA", 2);
116
+
117
+ expect(result).toEqual([
118
+ {
119
+ index: 0,
120
+ match: "ATCCAA",
121
+ mismatchPositions: [2, 4],
122
+ numMismatches: 2
123
+ }
124
+ ]);
125
+ });
126
+ });
@@ -1,5 +1,5 @@
1
1
  import { generateRandomRange } from "@teselagen/range-utils";
2
- import shortid from "shortid";
2
+ import { nanoid } from "nanoid";
3
3
 
4
4
  function generateAnnotations(
5
5
  numberOfAnnotationsToGenerate,
@@ -21,7 +21,7 @@ function generateAnnotation(start, end, maxLength) {
21
21
  ...range,
22
22
  name: getRandomInt(0, 100000).toString(),
23
23
  type: "misc_feature",
24
- id: shortid(),
24
+ id: nanoid(),
25
25
  forward: Math.random() > 0.5,
26
26
  notes: {}
27
27
  };
@@ -1,4 +1,4 @@
1
- import shortid from "shortid";
1
+ import { nanoid } from "nanoid";
2
2
  import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
3
3
 
4
4
  /**
@@ -61,7 +61,7 @@ export default function getOrfsFromSequence(options) {
61
61
  forward: forward,
62
62
  annotationTypePlural: "orfs",
63
63
  isOrf: true,
64
- id: shortid()
64
+ id: nanoid()
65
65
  });
66
66
  }
67
67
  }
package/src/index.js CHANGED
@@ -40,6 +40,7 @@ export { default as getDegenerateRnaStringFromAAString } from "./getDegenerateRn
40
40
  export { default as getVirtualDigest } from "./getVirtualDigest";
41
41
  export { default as isEnzymeType2S } from "./isEnzymeType2S";
42
42
  export { default as insertGapsIntoRefSeq } from "./insertGapsIntoRefSeq";
43
+ export { default as findApproxMatches } from "./findApproxMatches";
43
44
  export { default as adjustBpsToReplaceOrInsert } from "./adjustBpsToReplaceOrInsert";
44
45
  export { default as calculatePercentGC } from "./calculatePercentGC";
45
46
  export { default as calculateTm } from "./calculateTm";
@@ -1,6 +1,6 @@
1
1
  import { cloneDeep, get, some } from "lodash-es";
2
2
  import { getFeatureToColorMap, getFeatureTypes } from "./featureTypesAndColors";
3
- import shortid from "shortid";
3
+ import { nanoid } from "nanoid";
4
4
 
5
5
  export default function tidyUpAnnotation(
6
6
  _annotation,
@@ -34,10 +34,10 @@ export default function tidyUpAnnotation(
34
34
  annotation.name = "Untitled annotation";
35
35
  }
36
36
  if (provideNewIdsForAnnotations) {
37
- annotation.id = shortid();
37
+ annotation.id = nanoid();
38
38
  }
39
39
  if (!annotation.id && annotation.id !== 0 && !doNotProvideIdsForAnnotations) {
40
- annotation.id = shortid();
40
+ annotation.id = nanoid();
41
41
  messages.push(
42
42
  "Unable to detect valid ID for annotation, setting ID to " + annotation.id
43
43
  );
@@ -1,5 +1,5 @@
1
1
  // tnrtodo: figure out where to insert this validation exactly..
2
- import shortid from "shortid";
2
+ import { nanoid } from "nanoid";
3
3
 
4
4
  import getAminoAcidDataForEachBaseOfDna from "./getAminoAcidDataForEachBaseOfDna";
5
5
  import { cloneDeep, flatMap } from "lodash-es";
@@ -155,7 +155,7 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
155
155
  if (item.id || item.id === 0) {
156
156
  itemId = item.id;
157
157
  } else {
158
- itemId = shortid();
158
+ itemId = nanoid();
159
159
  if (!doNotProvideIdsForAnnotations) {
160
160
  item.id = itemId; //assign the newly created id to the item
161
161
  }