uilint-duplicates 0.2.122 → 0.2.123

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1670,13 +1670,189 @@ function sortDuplicateGroups(groups) {
1670
1670
  });
1671
1671
  }
1672
1672
 
1673
+ // src/detection/structural-scorer.ts
1674
+ var DEFAULT_STRUCTURAL_WEIGHTS = {
1675
+ props: 0.25,
1676
+ jsx: 0.35,
1677
+ hooks: 0.25,
1678
+ size: 0.15
1679
+ };
1680
+ function jaccard(a, b) {
1681
+ if (a.length === 0 && b.length === 0) return 1;
1682
+ if (a.length === 0 || b.length === 0) return 0;
1683
+ const setA = new Set(a.map((s) => s.toLowerCase()));
1684
+ const setB = new Set(b.map((s) => s.toLowerCase()));
1685
+ const intersection = new Set([...setA].filter((x) => setB.has(x)));
1686
+ const union = /* @__PURE__ */ new Set([...setA, ...setB]);
1687
+ return intersection.size / union.size;
1688
+ }
1689
+ function calculateSizeRatio2(linesA, linesB) {
1690
+ if (linesA === 0 && linesB === 0) return 1;
1691
+ if (linesA === 0 || linesB === 0) return 0;
1692
+ const min = Math.min(linesA, linesB);
1693
+ const max = Math.max(linesA, linesB);
1694
+ return min / max;
1695
+ }
1696
+ function extractFeatures(storedMeta) {
1697
+ const meta = storedMeta.metadata ?? {};
1698
+ return {
1699
+ props: meta.props ?? [],
1700
+ jsxElements: meta.jsxElements ?? [],
1701
+ hooks: meta.hooks ?? [],
1702
+ lines: (storedMeta.endLine ?? 0) - (storedMeta.startLine ?? 0) + 1
1703
+ };
1704
+ }
1705
+ function calculateStructuralSimilarity(a, b, weights = DEFAULT_STRUCTURAL_WEIGHTS) {
1706
+ const featuresA = extractFeatures(a);
1707
+ const featuresB = extractFeatures(b);
1708
+ const propsOverlap = jaccard(featuresA.props, featuresB.props);
1709
+ const jsxOverlap = jaccard(featuresA.jsxElements, featuresB.jsxElements);
1710
+ const hooksOverlap = jaccard(featuresA.hooks, featuresB.hooks);
1711
+ const sizeRatio = calculateSizeRatio2(featuresA.lines, featuresB.lines);
1712
+ const combined = propsOverlap * weights.props + jsxOverlap * weights.jsx + hooksOverlap * weights.hooks + sizeRatio * weights.size;
1713
+ return {
1714
+ propsOverlap,
1715
+ jsxOverlap,
1716
+ hooksOverlap,
1717
+ sizeRatio,
1718
+ combined
1719
+ };
1720
+ }
1721
+ function hasHighStructuralSimilarity(a, b, threshold = 0.5) {
1722
+ const score = calculateStructuralSimilarity(a, b);
1723
+ return score.combined >= threshold;
1724
+ }
1725
+ function findStructurallySimilar(target, candidates, threshold = 0.5, limit = 10) {
1726
+ const results = [];
1727
+ for (const candidate of candidates) {
1728
+ if (candidate.filePath === target.filePath && candidate.startLine === target.startLine) {
1729
+ continue;
1730
+ }
1731
+ const score = calculateStructuralSimilarity(target, candidate);
1732
+ if (score.combined >= threshold) {
1733
+ results.push({ metadata: candidate, score });
1734
+ }
1735
+ }
1736
+ results.sort((a, b) => b.score.combined - a.score.combined);
1737
+ return results.slice(0, limit);
1738
+ }
1739
+
1740
+ // src/detection/confidence.ts
1741
+ var DEFAULT_CONFIDENCE_CONFIG = {
1742
+ highThreshold: 0.9,
1743
+ mediumThreshold: 0.75,
1744
+ lowThreshold: 0.6
1745
+ };
1746
+ function getConfidenceLevel(score, config = DEFAULT_CONFIDENCE_CONFIG) {
1747
+ if (score >= config.highThreshold) return "high";
1748
+ if (score >= config.mediumThreshold) return "medium";
1749
+ return "low";
1750
+ }
1751
+ function meetsMinimumThreshold(score, config = DEFAULT_CONFIDENCE_CONFIG) {
1752
+ return score >= config.lowThreshold;
1753
+ }
1754
+ function getConfidenceResult(score, config = DEFAULT_CONFIDENCE_CONFIG) {
1755
+ const level = getConfidenceLevel(score, config);
1756
+ switch (level) {
1757
+ case "high":
1758
+ return {
1759
+ level,
1760
+ score,
1761
+ description: "High confidence duplicate - likely copy-paste or near-identical implementation",
1762
+ action: "Strongly recommend consolidating into a single reusable component/function",
1763
+ color: "red"
1764
+ };
1765
+ case "medium":
1766
+ return {
1767
+ level,
1768
+ score,
1769
+ description: "Medium confidence - semantically similar code with different implementation details",
1770
+ action: "Review for potential consolidation or shared abstraction",
1771
+ color: "yellow"
1772
+ };
1773
+ case "low":
1774
+ return {
1775
+ level,
1776
+ score,
1777
+ description: "Low confidence - possibly related patterns or partial structural overlap",
1778
+ action: "Optional review - differences may be intentional",
1779
+ color: "green"
1780
+ };
1781
+ }
1782
+ }
1783
+ function getConfidenceEmoji(level) {
1784
+ switch (level) {
1785
+ case "high":
1786
+ return "\u{1F534}";
1787
+ case "medium":
1788
+ return "\u{1F7E1}";
1789
+ case "low":
1790
+ return "\u{1F7E2}";
1791
+ }
1792
+ }
1793
+ function getConfidenceAnsiColor(level) {
1794
+ switch (level) {
1795
+ case "high":
1796
+ return "\x1B[31m";
1797
+ // Red
1798
+ case "medium":
1799
+ return "\x1B[33m";
1800
+ // Yellow
1801
+ case "low":
1802
+ return "\x1B[32m";
1803
+ }
1804
+ }
1805
+ function formatConfidence(result, useEmoji = true, useColor = false) {
1806
+ const percent = Math.round(result.score * 100);
1807
+ const emoji = useEmoji ? getConfidenceEmoji(result.level) + " " : "";
1808
+ const colorStart = useColor ? getConfidenceAnsiColor(result.level) : "";
1809
+ const colorEnd = useColor ? "\x1B[0m" : "";
1810
+ return `${emoji}${colorStart}${percent}% similarity (${result.level} confidence)${colorEnd}`;
1811
+ }
1812
+ function formatConfidenceVerbose(result) {
1813
+ const lines = [
1814
+ formatConfidence(result),
1815
+ ` ${result.description}`,
1816
+ ` \u2192 ${result.action}`
1817
+ ];
1818
+ return lines.join("\n");
1819
+ }
1820
+ function compareConfidenceLevels(a, b) {
1821
+ const order = { high: 3, medium: 2, low: 1 };
1822
+ return order[a] - order[b];
1823
+ }
1824
+ function filterByConfidence(results, minLevel, config = DEFAULT_CONFIDENCE_CONFIG) {
1825
+ const minThreshold = minLevel === "high" ? config.highThreshold : minLevel === "medium" ? config.mediumThreshold : config.lowThreshold;
1826
+ return results.filter((r) => r.score >= minThreshold);
1827
+ }
1828
+
1673
1829
  // src/detection/duplicate-finder.ts
1830
+ function calculateCombinedSimilarity(semanticScore, metadataA, metadataB) {
1831
+ const structuralResult = calculateStructuralSimilarity(metadataA, metadataB);
1832
+ const structural = structuralResult.combined;
1833
+ const combined = semanticScore * 0.6 + structural * 0.4;
1834
+ return { combined, structural };
1835
+ }
1836
+ function getConfidenceThreshold(level) {
1837
+ switch (level) {
1838
+ case "high":
1839
+ return 0.9;
1840
+ case "medium":
1841
+ return 0.75;
1842
+ case "low":
1843
+ return 0.6;
1844
+ }
1845
+ }
1674
1846
  function findDuplicateGroups(vectorStore, metadataStore, options = {}) {
1675
1847
  const {
1676
- threshold = 0.85,
1848
+ threshold = 0.75,
1849
+ // Lowered from 0.85
1677
1850
  minGroupSize = 2,
1678
1851
  kind,
1679
- excludePaths = []
1852
+ excludePaths = [],
1853
+ includeSameFile = true,
1854
+ useStructuralBoost = true,
1855
+ confidenceFilter
1680
1856
  } = options;
1681
1857
  const groups = [];
1682
1858
  const processed = /* @__PURE__ */ new Set();
@@ -1693,7 +1869,8 @@ function findDuplicateGroups(vectorStore, metadataStore, options = {}) {
1693
1869
  if (processed.has(id)) continue;
1694
1870
  const vector = vectorStore.get(id);
1695
1871
  if (!vector) continue;
1696
- const similar = vectorStore.findSimilar(vector, 50, threshold);
1872
+ const searchThreshold = useStructuralBoost ? threshold * 0.8 : threshold;
1873
+ const similar = vectorStore.findSimilar(vector, 50, searchThreshold);
1697
1874
  let candidates = similar.filter((s) => {
1698
1875
  if (s.id === id) return false;
1699
1876
  if (processed.has(s.id)) return false;
@@ -1702,8 +1879,22 @@ function findDuplicateGroups(vectorStore, metadataStore, options = {}) {
1702
1879
  if (kind && candidateMeta.kind !== kind) return false;
1703
1880
  if (excludePaths.some((p) => candidateMeta.filePath.includes(p)))
1704
1881
  return false;
1882
+ if (!includeSameFile && candidateMeta.filePath === metadata.filePath)
1883
+ return false;
1705
1884
  return true;
1706
1885
  });
1886
+ if (useStructuralBoost) {
1887
+ candidates = candidates.map((c) => {
1888
+ const candidateMeta = metadataStore.get(c.id);
1889
+ if (!candidateMeta) return { ...c, combinedScore: c.score };
1890
+ const { combined, structural } = calculateCombinedSimilarity(
1891
+ c.score,
1892
+ metadata,
1893
+ candidateMeta
1894
+ );
1895
+ return { ...c, combinedScore: combined, structuralScore: structural };
1896
+ }).filter((c) => (c.combinedScore ?? c.score) >= threshold).sort((a, b) => (b.combinedScore ?? b.score) - (a.combinedScore ?? a.score));
1897
+ }
1707
1898
  if (!kind && candidates.length > 0) {
1708
1899
  const sameKindCandidates = candidates.filter((c) => {
1709
1900
  const meta = metadataStore.get(c.id);
@@ -1715,31 +1906,50 @@ function findDuplicateGroups(vectorStore, metadataStore, options = {}) {
1715
1906
  }
1716
1907
  if (candidates.length >= minGroupSize - 1) {
1717
1908
  const members = [
1718
- { id, metadata, score: 1 }
1719
- // First member (reference)
1909
+ {
1910
+ id,
1911
+ metadata,
1912
+ score: 1,
1913
+ combinedScore: 1,
1914
+ confidence: "high"
1915
+ }
1720
1916
  ];
1721
1917
  const similarities = [];
1722
1918
  for (const candidate of candidates) {
1723
1919
  const candidateMeta = metadataStore.get(candidate.id);
1724
1920
  if (candidateMeta) {
1921
+ const combinedScore = candidate.combinedScore ?? candidate.score;
1922
+ const structuralScore = candidate.structuralScore;
1923
+ const confidence = getConfidenceLevel(combinedScore);
1725
1924
  members.push({
1726
1925
  id: candidate.id,
1727
1926
  metadata: candidateMeta,
1728
- score: candidate.score
1927
+ score: candidate.score,
1928
+ combinedScore,
1929
+ structuralScore,
1930
+ confidence
1729
1931
  });
1730
- similarities.push(candidate.score);
1932
+ similarities.push(combinedScore);
1731
1933
  processed.add(candidate.id);
1732
1934
  }
1733
1935
  }
1734
1936
  processed.add(id);
1937
+ const avgSimilarity = calculateGroupAverageSimilarity(similarities);
1938
+ const groupConfidence = getConfidenceLevel(avgSimilarity);
1735
1939
  groups.push({
1736
1940
  members,
1737
- avgSimilarity: calculateGroupAverageSimilarity(similarities),
1738
- kind: metadata.kind
1941
+ avgSimilarity,
1942
+ kind: metadata.kind,
1943
+ confidence: groupConfidence
1739
1944
  });
1740
1945
  }
1741
1946
  }
1742
- return sortDuplicateGroups(groups);
1947
+ let result = groups;
1948
+ if (confidenceFilter) {
1949
+ const minThreshold = getConfidenceThreshold(confidenceFilter);
1950
+ result = groups.filter((g) => g.avgSimilarity >= minThreshold);
1951
+ }
1952
+ return sortDuplicateGroups(result);
1743
1953
  }
1744
1954
  function findSimilarToLocation(vectorStore, metadataStore, filePath, line, options = {}) {
1745
1955
  const { top = 10, threshold = 0.5 } = options;
@@ -1809,9 +2019,12 @@ async function findDuplicates(options = {}) {
1809
2019
  const vectorStore = indexer.getVectorStore();
1810
2020
  const metadataStore = indexer.getMetadataStore();
1811
2021
  const groups = findDuplicateGroups(vectorStore, metadataStore, {
1812
- threshold: options.threshold,
2022
+ threshold: options.threshold ?? 0.75,
1813
2023
  minGroupSize: options.minGroupSize,
1814
- kind: options.kind
2024
+ kind: options.kind,
2025
+ confidenceFilter: options.confidenceLevel,
2026
+ useStructuralBoost: options.useStructuralBoost ?? true,
2027
+ includeSameFile: options.includeSameFile ?? true
1815
2028
  });
1816
2029
  return groups.map((group) => ({
1817
2030
  members: group.members.map((m) => ({
@@ -1823,7 +2036,8 @@ async function findDuplicates(options = {}) {
1823
2036
  score: m.score
1824
2037
  })),
1825
2038
  avgSimilarity: group.avgSimilarity,
1826
- kind: group.kind
2039
+ kind: group.kind,
2040
+ confidence: group.confidence
1827
2041
  }));
1828
2042
  }
1829
2043
  async function searchSimilar(query, options = {}) {
@@ -1912,6 +2126,373 @@ async function getIndexStats(path = process.cwd()) {
1912
2126
  };
1913
2127
  }
1914
2128
 
2129
+ // src/detection/normalizer.ts
2130
+ var DEFAULT_NORMALIZATION_OPTIONS = {
2131
+ normalizeIdentifiers: false,
2132
+ normalizeSemantics: true,
2133
+ stripComments: true,
2134
+ normalizeWhitespace: true
2135
+ };
2136
+ var SEMANTIC_EQUIVALENTS = [
2137
+ // Size-related props
2138
+ [/\b(size|dimension|scale|magnitude)\b/gi, "__SIZE__"],
2139
+ // Variant-related props
2140
+ [/\b(variant|type|kind|style|mode)\b/gi, "__VARIANT__"],
2141
+ // Click/press handlers
2142
+ [/\b(onClick|onPress|onTap|handleClick|handlePress|onSelect)\b/g, "__CLICK_HANDLER__"],
2143
+ // Change handlers
2144
+ [/\b(onChange|onInput|handleChange|handleInput)\b/g, "__CHANGE_HANDLER__"],
2145
+ // Submit handlers
2146
+ [/\b(onSubmit|handleSubmit)\b/g, "__SUBMIT_HANDLER__"],
2147
+ // Loading states
2148
+ [/\b(loading|isLoading|pending|isFetching|isBusy)\b/gi, "__LOADING__"],
2149
+ // Error states
2150
+ [/\b(error|err|errorMsg|errorMessage|failure)\b/gi, "__ERROR__"],
2151
+ // Success states
2152
+ [/\b(success|isSuccess|succeeded|done)\b/gi, "__SUCCESS__"],
2153
+ // Data/items collections
2154
+ [/\b(data|items|list|results|entries|records)\b/gi, "__DATA__"],
2155
+ // Children/content slots
2156
+ [/\b(children|content|body|slot|inner)\b/gi, "__CHILDREN__"],
2157
+ // Label/title text
2158
+ [/\b(label|title|heading|name|text)\b/gi, "__LABEL__"],
2159
+ // Description/message
2160
+ [/\b(description|message|subtitle|detail|info)\b/gi, "__DESCRIPTION__"],
2161
+ // Value/amount
2162
+ [/\b(value|amount|total|count|number)\b/gi, "__VALUE__"],
2163
+ // Disabled state
2164
+ [/\b(disabled|isDisabled|readonly|readOnly)\b/gi, "__DISABLED__"],
2165
+ // Visible/shown state
2166
+ [/\b(visible|isVisible|shown|isShown|open|isOpen)\b/gi, "__VISIBLE__"],
2167
+ // Class names
2168
+ [/\b(className|classes|style|styles)\b/gi, "__CLASSNAME__"]
2169
+ ];
2170
+ function stripComments(code) {
2171
+ let result = code.replace(/\/\/.*$/gm, "");
2172
+ result = result.replace(/\/\*[\s\S]*?\*\//g, "");
2173
+ return result;
2174
+ }
2175
+ function normalizeWhitespace(code) {
2176
+ return code.replace(/\s+/g, " ").replace(/\s*([{}\[\]();,:<>])\s*/g, "$1").trim();
2177
+ }
2178
+ function applySemanticNormalization(code) {
2179
+ let result = code;
2180
+ for (const [pattern, replacement] of SEMANTIC_EQUIVALENTS) {
2181
+ result = result.replace(pattern, replacement);
2182
+ }
2183
+ return result;
2184
+ }
2185
+ function normalizeIdentifiersSimple(code) {
2186
+ const identifierMap = /* @__PURE__ */ new Map();
2187
+ let counter = 0;
2188
+ const identifierPattern = /\b([a-z_$][a-z0-9_$]*)\b/gi;
2189
+ const preserveList = /* @__PURE__ */ new Set([
2190
+ // JavaScript keywords
2191
+ "const",
2192
+ "let",
2193
+ "var",
2194
+ "function",
2195
+ "return",
2196
+ "if",
2197
+ "else",
2198
+ "for",
2199
+ "while",
2200
+ "do",
2201
+ "switch",
2202
+ "case",
2203
+ "break",
2204
+ "continue",
2205
+ "default",
2206
+ "try",
2207
+ "catch",
2208
+ "finally",
2209
+ "throw",
2210
+ "new",
2211
+ "this",
2212
+ "class",
2213
+ "extends",
2214
+ "super",
2215
+ "import",
2216
+ "export",
2217
+ "from",
2218
+ "as",
2219
+ "async",
2220
+ "await",
2221
+ "yield",
2222
+ "typeof",
2223
+ "instanceof",
2224
+ "in",
2225
+ "of",
2226
+ "void",
2227
+ "delete",
2228
+ "true",
2229
+ "false",
2230
+ "null",
2231
+ "undefined",
2232
+ // TypeScript keywords
2233
+ "interface",
2234
+ "type",
2235
+ "enum",
2236
+ "implements",
2237
+ "private",
2238
+ "public",
2239
+ "protected",
2240
+ "readonly",
2241
+ "abstract",
2242
+ "declare",
2243
+ "namespace",
2244
+ "module",
2245
+ // React hooks
2246
+ "useState",
2247
+ "useEffect",
2248
+ "useCallback",
2249
+ "useMemo",
2250
+ "useRef",
2251
+ "useContext",
2252
+ "useReducer",
2253
+ "useLayoutEffect",
2254
+ "useImperativeHandle",
2255
+ "useDebugValue",
2256
+ "useDeferredValue",
2257
+ "useTransition",
2258
+ "useId",
2259
+ "useSyncExternalStore",
2260
+ "useInsertionEffect",
2261
+ // React
2262
+ "React",
2263
+ "Component",
2264
+ "Fragment",
2265
+ "Suspense",
2266
+ "memo",
2267
+ "forwardRef",
2268
+ "createContext",
2269
+ "createElement",
2270
+ // Common globals
2271
+ "console",
2272
+ "window",
2273
+ "document",
2274
+ "Math",
2275
+ "Date",
2276
+ "JSON",
2277
+ "Object",
2278
+ "Array",
2279
+ "String",
2280
+ "Number",
2281
+ "Boolean",
2282
+ "Promise",
2283
+ "Set",
2284
+ "Map",
2285
+ "WeakSet",
2286
+ "WeakMap",
2287
+ "Symbol",
2288
+ "Error",
2289
+ "RegExp",
2290
+ "Intl",
2291
+ "setTimeout",
2292
+ "setInterval",
2293
+ "clearTimeout",
2294
+ "clearInterval",
2295
+ "fetch",
2296
+ "require",
2297
+ // Common type names
2298
+ "string",
2299
+ "number",
2300
+ "boolean",
2301
+ "object",
2302
+ "any",
2303
+ "unknown",
2304
+ "never",
2305
+ "void",
2306
+ // Normalized placeholders (preserve these)
2307
+ "__SIZE__",
2308
+ "__VARIANT__",
2309
+ "__CLICK_HANDLER__",
2310
+ "__CHANGE_HANDLER__",
2311
+ "__SUBMIT_HANDLER__",
2312
+ "__LOADING__",
2313
+ "__ERROR__",
2314
+ "__SUCCESS__",
2315
+ "__DATA__",
2316
+ "__CHILDREN__",
2317
+ "__LABEL__",
2318
+ "__DESCRIPTION__",
2319
+ "__VALUE__",
2320
+ "__DISABLED__",
2321
+ "__VISIBLE__",
2322
+ "__CLASSNAME__"
2323
+ ]);
2324
+ return code.replace(identifierPattern, (match) => {
2325
+ if (preserveList.has(match) || preserveList.has(match.toLowerCase())) {
2326
+ return match;
2327
+ }
2328
+ const key = match.toLowerCase();
2329
+ if (!identifierMap.has(key)) {
2330
+ identifierMap.set(key, `_ID${counter++}_`);
2331
+ }
2332
+ return identifierMap.get(key);
2333
+ });
2334
+ }
2335
+ function normalizeCode(code, options = DEFAULT_NORMALIZATION_OPTIONS) {
2336
+ let result = code;
2337
+ if (options.stripComments) {
2338
+ result = stripComments(result);
2339
+ }
2340
+ if (options.normalizeSemantics) {
2341
+ result = applySemanticNormalization(result);
2342
+ }
2343
+ if (options.normalizeIdentifiers) {
2344
+ result = normalizeIdentifiersSimple(result);
2345
+ }
2346
+ if (options.normalizeWhitespace) {
2347
+ result = normalizeWhitespace(result);
2348
+ }
2349
+ return result;
2350
+ }
2351
+ function levenshteinDistance(a, b) {
2352
+ if (a === b) return 0;
2353
+ if (a.length === 0) return b.length;
2354
+ if (b.length === 0) return a.length;
2355
+ let previousRow = new Array(b.length + 1);
2356
+ let currentRow = new Array(b.length + 1);
2357
+ for (let j = 0; j <= b.length; j++) {
2358
+ previousRow[j] = j;
2359
+ }
2360
+ for (let i = 1; i <= a.length; i++) {
2361
+ currentRow[0] = i;
2362
+ for (let j = 1; j <= b.length; j++) {
2363
+ const cost = a[i - 1] === b[j - 1] ? 0 : 1;
2364
+ currentRow[j] = Math.min(
2365
+ previousRow[j] + 1,
2366
+ // Deletion
2367
+ currentRow[j - 1] + 1,
2368
+ // Insertion
2369
+ previousRow[j - 1] + cost
2370
+ // Substitution
2371
+ );
2372
+ }
2373
+ [previousRow, currentRow] = [currentRow, previousRow];
2374
+ }
2375
+ return previousRow[b.length];
2376
+ }
2377
+ function calculateNormalizedSimilarity(codeA, codeB, options = { normalizeIdentifiers: true, normalizeSemantics: true }) {
2378
+ const normalizedA = normalizeCode(codeA, options);
2379
+ const normalizedB = normalizeCode(codeB, options);
2380
+ if (normalizedA === normalizedB) {
2381
+ return 1;
2382
+ }
2383
+ const distance = levenshteinDistance(normalizedA, normalizedB);
2384
+ const maxLen = Math.max(normalizedA.length, normalizedB.length);
2385
+ if (maxLen === 0) return 1;
2386
+ return 1 - distance / maxLen;
2387
+ }
2388
+ function isNearIdentical(codeA, codeB, threshold = 0.95) {
2389
+ return calculateNormalizedSimilarity(codeA, codeB) >= threshold;
2390
+ }
2391
+ function prepareForEmbedding(code) {
2392
+ return normalizeCode(code, {
2393
+ normalizeIdentifiers: false,
2394
+ normalizeSemantics: true,
2395
+ stripComments: true,
2396
+ normalizeWhitespace: false
2397
+ // Preserve structure for embedding
2398
+ });
2399
+ }
2400
+
2401
+ // src/detection/combined-scorer.ts
2402
+ var DEFAULT_COMBINED_SCORER_OPTIONS = {
2403
+ semanticWeight: 0.5,
2404
+ structuralWeight: 0.3,
2405
+ normalizedWeight: 0.2,
2406
+ includeNormalized: false
2407
+ };
2408
+ function calculateCombinedScore(semanticScore, metadataA, metadataB, codeA, codeB, options = {}) {
2409
+ const opts = { ...DEFAULT_COMBINED_SCORER_OPTIONS, ...options };
2410
+ const structuralDetails = calculateStructuralSimilarity(metadataA, metadataB);
2411
+ const structural = structuralDetails.combined;
2412
+ let normalized;
2413
+ if (opts.includeNormalized && codeA && codeB) {
2414
+ normalized = calculateNormalizedSimilarity(codeA, codeB);
2415
+ }
2416
+ let final;
2417
+ if (normalized !== void 0) {
2418
+ final = semanticScore * opts.semanticWeight + structural * opts.structuralWeight + normalized * opts.normalizedWeight;
2419
+ } else {
2420
+ const totalWeight = opts.semanticWeight + opts.structuralWeight;
2421
+ const adjustedSemanticWeight = opts.semanticWeight / totalWeight;
2422
+ const adjustedStructuralWeight = opts.structuralWeight / totalWeight;
2423
+ final = semanticScore * adjustedSemanticWeight + structural * adjustedStructuralWeight;
2424
+ }
2425
+ final = applyBoostForHighSignals(final, semanticScore, structural, normalized);
2426
+ final = Math.max(0, Math.min(1, final));
2427
+ const confidence = getConfidenceLevel(final);
2428
+ const confidenceDetails = getConfidenceResult(final);
2429
+ return {
2430
+ final,
2431
+ semantic: semanticScore,
2432
+ structural,
2433
+ structuralDetails,
2434
+ normalized,
2435
+ confidence,
2436
+ confidenceDetails
2437
+ };
2438
+ }
2439
+ function applyBoostForHighSignals(baseScore, semantic, structural, normalized) {
2440
+ const HIGH_SIGNAL_THRESHOLD = 0.95;
2441
+ const BOOST_FACTOR = 0.1;
2442
+ let boost = 0;
2443
+ if (normalized !== void 0 && normalized >= HIGH_SIGNAL_THRESHOLD) {
2444
+ boost = Math.max(boost, (normalized - HIGH_SIGNAL_THRESHOLD) * 2);
2445
+ }
2446
+ if (semantic >= HIGH_SIGNAL_THRESHOLD) {
2447
+ boost = Math.max(boost, (semantic - HIGH_SIGNAL_THRESHOLD) * 1.5);
2448
+ }
2449
+ if (structural >= HIGH_SIGNAL_THRESHOLD) {
2450
+ boost = Math.max(boost, (structural - HIGH_SIGNAL_THRESHOLD) * 1.5);
2451
+ }
2452
+ return baseScore + boost * BOOST_FACTOR;
2453
+ }
2454
+ function isPotentialDuplicate(metadataA, metadataB, threshold = 0.3) {
2455
+ const structural = calculateStructuralSimilarity(metadataA, metadataB);
2456
+ return structural.combined >= threshold;
2457
+ }
2458
+ function calculateQuickScore(metadataA, metadataB) {
2459
+ const structural = calculateStructuralSimilarity(metadataA, metadataB);
2460
+ const confidence = getConfidenceLevel(structural.combined);
2461
+ return { score: structural.combined, confidence };
2462
+ }
2463
+ function getRecommendedAction(score) {
2464
+ if (score.normalized !== void 0 && score.normalized >= 0.95) {
2465
+ return "These appear to be near-identical. Consolidate into a single implementation.";
2466
+ }
2467
+ if (score.final >= 0.9) {
2468
+ return "High similarity detected. Strongly consider consolidating into a shared component/function.";
2469
+ }
2470
+ if (score.final >= 0.75) {
2471
+ return "Moderate similarity. Review for potential abstraction into a shared utility.";
2472
+ }
2473
+ if (score.structural >= 0.8 && score.semantic < 0.7) {
2474
+ return "Similar structure but different semantics. May be intentionally different implementations.";
2475
+ }
2476
+ if (score.semantic >= 0.8 && score.structural < 0.5) {
2477
+ return "Similar purpose but different structure. Consider if a common abstraction makes sense.";
2478
+ }
2479
+ return "Low similarity. Likely different implementations that happen to share some patterns.";
2480
+ }
2481
+ function formatCombinedScore(score, verbose = false) {
2482
+ const percent = Math.round(score.final * 100);
2483
+ const lines = [];
2484
+ lines.push(`${percent}% similar (${score.confidence} confidence)`);
2485
+ if (verbose) {
2486
+ lines.push(` Semantic: ${Math.round(score.semantic * 100)}%`);
2487
+ lines.push(` Structural: ${Math.round(score.structural * 100)}%`);
2488
+ if (score.normalized !== void 0) {
2489
+ lines.push(` Normalized: ${Math.round(score.normalized * 100)}%`);
2490
+ }
2491
+ lines.push(` \u2192 ${getRecommendedAction(score)}`);
2492
+ }
2493
+ return lines.join("\n");
2494
+ }
2495
+
1915
2496
  export {
1916
2497
  chunkFile,
1917
2498
  prepareEmbeddingInput,
@@ -1928,6 +2509,21 @@ export {
1928
2509
  calculateDuplicateScore,
1929
2510
  calculateGroupAverageSimilarity,
1930
2511
  sortDuplicateGroups,
2512
+ jaccard,
2513
+ calculateSizeRatio2,
2514
+ calculateStructuralSimilarity,
2515
+ hasHighStructuralSimilarity,
2516
+ findStructurallySimilar,
2517
+ DEFAULT_CONFIDENCE_CONFIG,
2518
+ getConfidenceLevel,
2519
+ meetsMinimumThreshold,
2520
+ getConfidenceResult,
2521
+ getConfidenceEmoji,
2522
+ getConfidenceAnsiColor,
2523
+ formatConfidence,
2524
+ formatConfidenceVerbose,
2525
+ compareConfidenceLevels,
2526
+ filterByConfidence,
1931
2527
  findDuplicateGroups,
1932
2528
  findSimilarToLocation,
1933
2529
  findSimilarToQuery,
@@ -1937,6 +2533,17 @@ export {
1937
2533
  searchSimilar,
1938
2534
  findSimilarAtLocation,
1939
2535
  hasIndex,
1940
- getIndexStats
2536
+ getIndexStats,
2537
+ normalizeCode,
2538
+ levenshteinDistance,
2539
+ calculateNormalizedSimilarity,
2540
+ isNearIdentical,
2541
+ prepareForEmbedding,
2542
+ DEFAULT_COMBINED_SCORER_OPTIONS,
2543
+ calculateCombinedScore,
2544
+ isPotentialDuplicate,
2545
+ calculateQuickScore,
2546
+ getRecommendedAction,
2547
+ formatCombinedScore
1941
2548
  };
1942
- //# sourceMappingURL=chunk-54SLRAFO.js.map
2549
+ //# sourceMappingURL=chunk-BAZQUJDS.js.map