uilint-duplicates 0.2.122 → 0.2.123
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-54SLRAFO.js → chunk-BAZQUJDS.js} +622 -15
- package/dist/chunk-BAZQUJDS.js.map +1 -0
- package/dist/index.d.ts +344 -8
- package/dist/index.js +53 -1
- package/dist/node.d.ts +1 -1
- package/dist/node.js +53 -1
- package/package.json +2 -2
- package/dist/chunk-54SLRAFO.js.map +0 -1
|
@@ -1670,13 +1670,189 @@ function sortDuplicateGroups(groups) {
|
|
|
1670
1670
|
});
|
|
1671
1671
|
}
|
|
1672
1672
|
|
|
1673
|
+
// src/detection/structural-scorer.ts
|
|
1674
|
+
var DEFAULT_STRUCTURAL_WEIGHTS = {
|
|
1675
|
+
props: 0.25,
|
|
1676
|
+
jsx: 0.35,
|
|
1677
|
+
hooks: 0.25,
|
|
1678
|
+
size: 0.15
|
|
1679
|
+
};
|
|
1680
|
+
function jaccard(a, b) {
|
|
1681
|
+
if (a.length === 0 && b.length === 0) return 1;
|
|
1682
|
+
if (a.length === 0 || b.length === 0) return 0;
|
|
1683
|
+
const setA = new Set(a.map((s) => s.toLowerCase()));
|
|
1684
|
+
const setB = new Set(b.map((s) => s.toLowerCase()));
|
|
1685
|
+
const intersection = new Set([...setA].filter((x) => setB.has(x)));
|
|
1686
|
+
const union = /* @__PURE__ */ new Set([...setA, ...setB]);
|
|
1687
|
+
return intersection.size / union.size;
|
|
1688
|
+
}
|
|
1689
|
+
function calculateSizeRatio2(linesA, linesB) {
|
|
1690
|
+
if (linesA === 0 && linesB === 0) return 1;
|
|
1691
|
+
if (linesA === 0 || linesB === 0) return 0;
|
|
1692
|
+
const min = Math.min(linesA, linesB);
|
|
1693
|
+
const max = Math.max(linesA, linesB);
|
|
1694
|
+
return min / max;
|
|
1695
|
+
}
|
|
1696
|
+
function extractFeatures(storedMeta) {
|
|
1697
|
+
const meta = storedMeta.metadata ?? {};
|
|
1698
|
+
return {
|
|
1699
|
+
props: meta.props ?? [],
|
|
1700
|
+
jsxElements: meta.jsxElements ?? [],
|
|
1701
|
+
hooks: meta.hooks ?? [],
|
|
1702
|
+
lines: (storedMeta.endLine ?? 0) - (storedMeta.startLine ?? 0) + 1
|
|
1703
|
+
};
|
|
1704
|
+
}
|
|
1705
|
+
function calculateStructuralSimilarity(a, b, weights = DEFAULT_STRUCTURAL_WEIGHTS) {
|
|
1706
|
+
const featuresA = extractFeatures(a);
|
|
1707
|
+
const featuresB = extractFeatures(b);
|
|
1708
|
+
const propsOverlap = jaccard(featuresA.props, featuresB.props);
|
|
1709
|
+
const jsxOverlap = jaccard(featuresA.jsxElements, featuresB.jsxElements);
|
|
1710
|
+
const hooksOverlap = jaccard(featuresA.hooks, featuresB.hooks);
|
|
1711
|
+
const sizeRatio = calculateSizeRatio2(featuresA.lines, featuresB.lines);
|
|
1712
|
+
const combined = propsOverlap * weights.props + jsxOverlap * weights.jsx + hooksOverlap * weights.hooks + sizeRatio * weights.size;
|
|
1713
|
+
return {
|
|
1714
|
+
propsOverlap,
|
|
1715
|
+
jsxOverlap,
|
|
1716
|
+
hooksOverlap,
|
|
1717
|
+
sizeRatio,
|
|
1718
|
+
combined
|
|
1719
|
+
};
|
|
1720
|
+
}
|
|
1721
|
+
function hasHighStructuralSimilarity(a, b, threshold = 0.5) {
|
|
1722
|
+
const score = calculateStructuralSimilarity(a, b);
|
|
1723
|
+
return score.combined >= threshold;
|
|
1724
|
+
}
|
|
1725
|
+
function findStructurallySimilar(target, candidates, threshold = 0.5, limit = 10) {
|
|
1726
|
+
const results = [];
|
|
1727
|
+
for (const candidate of candidates) {
|
|
1728
|
+
if (candidate.filePath === target.filePath && candidate.startLine === target.startLine) {
|
|
1729
|
+
continue;
|
|
1730
|
+
}
|
|
1731
|
+
const score = calculateStructuralSimilarity(target, candidate);
|
|
1732
|
+
if (score.combined >= threshold) {
|
|
1733
|
+
results.push({ metadata: candidate, score });
|
|
1734
|
+
}
|
|
1735
|
+
}
|
|
1736
|
+
results.sort((a, b) => b.score.combined - a.score.combined);
|
|
1737
|
+
return results.slice(0, limit);
|
|
1738
|
+
}
|
|
1739
|
+
|
|
1740
|
+
// src/detection/confidence.ts
|
|
1741
|
+
var DEFAULT_CONFIDENCE_CONFIG = {
|
|
1742
|
+
highThreshold: 0.9,
|
|
1743
|
+
mediumThreshold: 0.75,
|
|
1744
|
+
lowThreshold: 0.6
|
|
1745
|
+
};
|
|
1746
|
+
function getConfidenceLevel(score, config = DEFAULT_CONFIDENCE_CONFIG) {
|
|
1747
|
+
if (score >= config.highThreshold) return "high";
|
|
1748
|
+
if (score >= config.mediumThreshold) return "medium";
|
|
1749
|
+
return "low";
|
|
1750
|
+
}
|
|
1751
|
+
function meetsMinimumThreshold(score, config = DEFAULT_CONFIDENCE_CONFIG) {
|
|
1752
|
+
return score >= config.lowThreshold;
|
|
1753
|
+
}
|
|
1754
|
+
function getConfidenceResult(score, config = DEFAULT_CONFIDENCE_CONFIG) {
|
|
1755
|
+
const level = getConfidenceLevel(score, config);
|
|
1756
|
+
switch (level) {
|
|
1757
|
+
case "high":
|
|
1758
|
+
return {
|
|
1759
|
+
level,
|
|
1760
|
+
score,
|
|
1761
|
+
description: "High confidence duplicate - likely copy-paste or near-identical implementation",
|
|
1762
|
+
action: "Strongly recommend consolidating into a single reusable component/function",
|
|
1763
|
+
color: "red"
|
|
1764
|
+
};
|
|
1765
|
+
case "medium":
|
|
1766
|
+
return {
|
|
1767
|
+
level,
|
|
1768
|
+
score,
|
|
1769
|
+
description: "Medium confidence - semantically similar code with different implementation details",
|
|
1770
|
+
action: "Review for potential consolidation or shared abstraction",
|
|
1771
|
+
color: "yellow"
|
|
1772
|
+
};
|
|
1773
|
+
case "low":
|
|
1774
|
+
return {
|
|
1775
|
+
level,
|
|
1776
|
+
score,
|
|
1777
|
+
description: "Low confidence - possibly related patterns or partial structural overlap",
|
|
1778
|
+
action: "Optional review - differences may be intentional",
|
|
1779
|
+
color: "green"
|
|
1780
|
+
};
|
|
1781
|
+
}
|
|
1782
|
+
}
|
|
1783
|
+
function getConfidenceEmoji(level) {
|
|
1784
|
+
switch (level) {
|
|
1785
|
+
case "high":
|
|
1786
|
+
return "\u{1F534}";
|
|
1787
|
+
case "medium":
|
|
1788
|
+
return "\u{1F7E1}";
|
|
1789
|
+
case "low":
|
|
1790
|
+
return "\u{1F7E2}";
|
|
1791
|
+
}
|
|
1792
|
+
}
|
|
1793
|
+
function getConfidenceAnsiColor(level) {
|
|
1794
|
+
switch (level) {
|
|
1795
|
+
case "high":
|
|
1796
|
+
return "\x1B[31m";
|
|
1797
|
+
// Red
|
|
1798
|
+
case "medium":
|
|
1799
|
+
return "\x1B[33m";
|
|
1800
|
+
// Yellow
|
|
1801
|
+
case "low":
|
|
1802
|
+
return "\x1B[32m";
|
|
1803
|
+
}
|
|
1804
|
+
}
|
|
1805
|
+
function formatConfidence(result, useEmoji = true, useColor = false) {
|
|
1806
|
+
const percent = Math.round(result.score * 100);
|
|
1807
|
+
const emoji = useEmoji ? getConfidenceEmoji(result.level) + " " : "";
|
|
1808
|
+
const colorStart = useColor ? getConfidenceAnsiColor(result.level) : "";
|
|
1809
|
+
const colorEnd = useColor ? "\x1B[0m" : "";
|
|
1810
|
+
return `${emoji}${colorStart}${percent}% similarity (${result.level} confidence)${colorEnd}`;
|
|
1811
|
+
}
|
|
1812
|
+
function formatConfidenceVerbose(result) {
|
|
1813
|
+
const lines = [
|
|
1814
|
+
formatConfidence(result),
|
|
1815
|
+
` ${result.description}`,
|
|
1816
|
+
` \u2192 ${result.action}`
|
|
1817
|
+
];
|
|
1818
|
+
return lines.join("\n");
|
|
1819
|
+
}
|
|
1820
|
+
function compareConfidenceLevels(a, b) {
|
|
1821
|
+
const order = { high: 3, medium: 2, low: 1 };
|
|
1822
|
+
return order[a] - order[b];
|
|
1823
|
+
}
|
|
1824
|
+
function filterByConfidence(results, minLevel, config = DEFAULT_CONFIDENCE_CONFIG) {
|
|
1825
|
+
const minThreshold = minLevel === "high" ? config.highThreshold : minLevel === "medium" ? config.mediumThreshold : config.lowThreshold;
|
|
1826
|
+
return results.filter((r) => r.score >= minThreshold);
|
|
1827
|
+
}
|
|
1828
|
+
|
|
1673
1829
|
// src/detection/duplicate-finder.ts
|
|
1830
|
+
function calculateCombinedSimilarity(semanticScore, metadataA, metadataB) {
|
|
1831
|
+
const structuralResult = calculateStructuralSimilarity(metadataA, metadataB);
|
|
1832
|
+
const structural = structuralResult.combined;
|
|
1833
|
+
const combined = semanticScore * 0.6 + structural * 0.4;
|
|
1834
|
+
return { combined, structural };
|
|
1835
|
+
}
|
|
1836
|
+
function getConfidenceThreshold(level) {
|
|
1837
|
+
switch (level) {
|
|
1838
|
+
case "high":
|
|
1839
|
+
return 0.9;
|
|
1840
|
+
case "medium":
|
|
1841
|
+
return 0.75;
|
|
1842
|
+
case "low":
|
|
1843
|
+
return 0.6;
|
|
1844
|
+
}
|
|
1845
|
+
}
|
|
1674
1846
|
function findDuplicateGroups(vectorStore, metadataStore, options = {}) {
|
|
1675
1847
|
const {
|
|
1676
|
-
threshold = 0.
|
|
1848
|
+
threshold = 0.75,
|
|
1849
|
+
// Lowered from 0.85
|
|
1677
1850
|
minGroupSize = 2,
|
|
1678
1851
|
kind,
|
|
1679
|
-
excludePaths = []
|
|
1852
|
+
excludePaths = [],
|
|
1853
|
+
includeSameFile = true,
|
|
1854
|
+
useStructuralBoost = true,
|
|
1855
|
+
confidenceFilter
|
|
1680
1856
|
} = options;
|
|
1681
1857
|
const groups = [];
|
|
1682
1858
|
const processed = /* @__PURE__ */ new Set();
|
|
@@ -1693,7 +1869,8 @@ function findDuplicateGroups(vectorStore, metadataStore, options = {}) {
|
|
|
1693
1869
|
if (processed.has(id)) continue;
|
|
1694
1870
|
const vector = vectorStore.get(id);
|
|
1695
1871
|
if (!vector) continue;
|
|
1696
|
-
const
|
|
1872
|
+
const searchThreshold = useStructuralBoost ? threshold * 0.8 : threshold;
|
|
1873
|
+
const similar = vectorStore.findSimilar(vector, 50, searchThreshold);
|
|
1697
1874
|
let candidates = similar.filter((s) => {
|
|
1698
1875
|
if (s.id === id) return false;
|
|
1699
1876
|
if (processed.has(s.id)) return false;
|
|
@@ -1702,8 +1879,22 @@ function findDuplicateGroups(vectorStore, metadataStore, options = {}) {
|
|
|
1702
1879
|
if (kind && candidateMeta.kind !== kind) return false;
|
|
1703
1880
|
if (excludePaths.some((p) => candidateMeta.filePath.includes(p)))
|
|
1704
1881
|
return false;
|
|
1882
|
+
if (!includeSameFile && candidateMeta.filePath === metadata.filePath)
|
|
1883
|
+
return false;
|
|
1705
1884
|
return true;
|
|
1706
1885
|
});
|
|
1886
|
+
if (useStructuralBoost) {
|
|
1887
|
+
candidates = candidates.map((c) => {
|
|
1888
|
+
const candidateMeta = metadataStore.get(c.id);
|
|
1889
|
+
if (!candidateMeta) return { ...c, combinedScore: c.score };
|
|
1890
|
+
const { combined, structural } = calculateCombinedSimilarity(
|
|
1891
|
+
c.score,
|
|
1892
|
+
metadata,
|
|
1893
|
+
candidateMeta
|
|
1894
|
+
);
|
|
1895
|
+
return { ...c, combinedScore: combined, structuralScore: structural };
|
|
1896
|
+
}).filter((c) => (c.combinedScore ?? c.score) >= threshold).sort((a, b) => (b.combinedScore ?? b.score) - (a.combinedScore ?? a.score));
|
|
1897
|
+
}
|
|
1707
1898
|
if (!kind && candidates.length > 0) {
|
|
1708
1899
|
const sameKindCandidates = candidates.filter((c) => {
|
|
1709
1900
|
const meta = metadataStore.get(c.id);
|
|
@@ -1715,31 +1906,50 @@ function findDuplicateGroups(vectorStore, metadataStore, options = {}) {
|
|
|
1715
1906
|
}
|
|
1716
1907
|
if (candidates.length >= minGroupSize - 1) {
|
|
1717
1908
|
const members = [
|
|
1718
|
-
{
|
|
1719
|
-
|
|
1909
|
+
{
|
|
1910
|
+
id,
|
|
1911
|
+
metadata,
|
|
1912
|
+
score: 1,
|
|
1913
|
+
combinedScore: 1,
|
|
1914
|
+
confidence: "high"
|
|
1915
|
+
}
|
|
1720
1916
|
];
|
|
1721
1917
|
const similarities = [];
|
|
1722
1918
|
for (const candidate of candidates) {
|
|
1723
1919
|
const candidateMeta = metadataStore.get(candidate.id);
|
|
1724
1920
|
if (candidateMeta) {
|
|
1921
|
+
const combinedScore = candidate.combinedScore ?? candidate.score;
|
|
1922
|
+
const structuralScore = candidate.structuralScore;
|
|
1923
|
+
const confidence = getConfidenceLevel(combinedScore);
|
|
1725
1924
|
members.push({
|
|
1726
1925
|
id: candidate.id,
|
|
1727
1926
|
metadata: candidateMeta,
|
|
1728
|
-
score: candidate.score
|
|
1927
|
+
score: candidate.score,
|
|
1928
|
+
combinedScore,
|
|
1929
|
+
structuralScore,
|
|
1930
|
+
confidence
|
|
1729
1931
|
});
|
|
1730
|
-
similarities.push(
|
|
1932
|
+
similarities.push(combinedScore);
|
|
1731
1933
|
processed.add(candidate.id);
|
|
1732
1934
|
}
|
|
1733
1935
|
}
|
|
1734
1936
|
processed.add(id);
|
|
1937
|
+
const avgSimilarity = calculateGroupAverageSimilarity(similarities);
|
|
1938
|
+
const groupConfidence = getConfidenceLevel(avgSimilarity);
|
|
1735
1939
|
groups.push({
|
|
1736
1940
|
members,
|
|
1737
|
-
avgSimilarity
|
|
1738
|
-
kind: metadata.kind
|
|
1941
|
+
avgSimilarity,
|
|
1942
|
+
kind: metadata.kind,
|
|
1943
|
+
confidence: groupConfidence
|
|
1739
1944
|
});
|
|
1740
1945
|
}
|
|
1741
1946
|
}
|
|
1742
|
-
|
|
1947
|
+
let result = groups;
|
|
1948
|
+
if (confidenceFilter) {
|
|
1949
|
+
const minThreshold = getConfidenceThreshold(confidenceFilter);
|
|
1950
|
+
result = groups.filter((g) => g.avgSimilarity >= minThreshold);
|
|
1951
|
+
}
|
|
1952
|
+
return sortDuplicateGroups(result);
|
|
1743
1953
|
}
|
|
1744
1954
|
function findSimilarToLocation(vectorStore, metadataStore, filePath, line, options = {}) {
|
|
1745
1955
|
const { top = 10, threshold = 0.5 } = options;
|
|
@@ -1809,9 +2019,12 @@ async function findDuplicates(options = {}) {
|
|
|
1809
2019
|
const vectorStore = indexer.getVectorStore();
|
|
1810
2020
|
const metadataStore = indexer.getMetadataStore();
|
|
1811
2021
|
const groups = findDuplicateGroups(vectorStore, metadataStore, {
|
|
1812
|
-
threshold: options.threshold,
|
|
2022
|
+
threshold: options.threshold ?? 0.75,
|
|
1813
2023
|
minGroupSize: options.minGroupSize,
|
|
1814
|
-
kind: options.kind
|
|
2024
|
+
kind: options.kind,
|
|
2025
|
+
confidenceFilter: options.confidenceLevel,
|
|
2026
|
+
useStructuralBoost: options.useStructuralBoost ?? true,
|
|
2027
|
+
includeSameFile: options.includeSameFile ?? true
|
|
1815
2028
|
});
|
|
1816
2029
|
return groups.map((group) => ({
|
|
1817
2030
|
members: group.members.map((m) => ({
|
|
@@ -1823,7 +2036,8 @@ async function findDuplicates(options = {}) {
|
|
|
1823
2036
|
score: m.score
|
|
1824
2037
|
})),
|
|
1825
2038
|
avgSimilarity: group.avgSimilarity,
|
|
1826
|
-
kind: group.kind
|
|
2039
|
+
kind: group.kind,
|
|
2040
|
+
confidence: group.confidence
|
|
1827
2041
|
}));
|
|
1828
2042
|
}
|
|
1829
2043
|
async function searchSimilar(query, options = {}) {
|
|
@@ -1912,6 +2126,373 @@ async function getIndexStats(path = process.cwd()) {
|
|
|
1912
2126
|
};
|
|
1913
2127
|
}
|
|
1914
2128
|
|
|
2129
|
+
// src/detection/normalizer.ts
|
|
2130
|
+
var DEFAULT_NORMALIZATION_OPTIONS = {
|
|
2131
|
+
normalizeIdentifiers: false,
|
|
2132
|
+
normalizeSemantics: true,
|
|
2133
|
+
stripComments: true,
|
|
2134
|
+
normalizeWhitespace: true
|
|
2135
|
+
};
|
|
2136
|
+
var SEMANTIC_EQUIVALENTS = [
|
|
2137
|
+
// Size-related props
|
|
2138
|
+
[/\b(size|dimension|scale|magnitude)\b/gi, "__SIZE__"],
|
|
2139
|
+
// Variant-related props
|
|
2140
|
+
[/\b(variant|type|kind|style|mode)\b/gi, "__VARIANT__"],
|
|
2141
|
+
// Click/press handlers
|
|
2142
|
+
[/\b(onClick|onPress|onTap|handleClick|handlePress|onSelect)\b/g, "__CLICK_HANDLER__"],
|
|
2143
|
+
// Change handlers
|
|
2144
|
+
[/\b(onChange|onInput|handleChange|handleInput)\b/g, "__CHANGE_HANDLER__"],
|
|
2145
|
+
// Submit handlers
|
|
2146
|
+
[/\b(onSubmit|handleSubmit)\b/g, "__SUBMIT_HANDLER__"],
|
|
2147
|
+
// Loading states
|
|
2148
|
+
[/\b(loading|isLoading|pending|isFetching|isBusy)\b/gi, "__LOADING__"],
|
|
2149
|
+
// Error states
|
|
2150
|
+
[/\b(error|err|errorMsg|errorMessage|failure)\b/gi, "__ERROR__"],
|
|
2151
|
+
// Success states
|
|
2152
|
+
[/\b(success|isSuccess|succeeded|done)\b/gi, "__SUCCESS__"],
|
|
2153
|
+
// Data/items collections
|
|
2154
|
+
[/\b(data|items|list|results|entries|records)\b/gi, "__DATA__"],
|
|
2155
|
+
// Children/content slots
|
|
2156
|
+
[/\b(children|content|body|slot|inner)\b/gi, "__CHILDREN__"],
|
|
2157
|
+
// Label/title text
|
|
2158
|
+
[/\b(label|title|heading|name|text)\b/gi, "__LABEL__"],
|
|
2159
|
+
// Description/message
|
|
2160
|
+
[/\b(description|message|subtitle|detail|info)\b/gi, "__DESCRIPTION__"],
|
|
2161
|
+
// Value/amount
|
|
2162
|
+
[/\b(value|amount|total|count|number)\b/gi, "__VALUE__"],
|
|
2163
|
+
// Disabled state
|
|
2164
|
+
[/\b(disabled|isDisabled|readonly|readOnly)\b/gi, "__DISABLED__"],
|
|
2165
|
+
// Visible/shown state
|
|
2166
|
+
[/\b(visible|isVisible|shown|isShown|open|isOpen)\b/gi, "__VISIBLE__"],
|
|
2167
|
+
// Class names
|
|
2168
|
+
[/\b(className|classes|style|styles)\b/gi, "__CLASSNAME__"]
|
|
2169
|
+
];
|
|
2170
|
+
function stripComments(code) {
|
|
2171
|
+
let result = code.replace(/\/\/.*$/gm, "");
|
|
2172
|
+
result = result.replace(/\/\*[\s\S]*?\*\//g, "");
|
|
2173
|
+
return result;
|
|
2174
|
+
}
|
|
2175
|
+
function normalizeWhitespace(code) {
|
|
2176
|
+
return code.replace(/\s+/g, " ").replace(/\s*([{}\[\]();,:<>])\s*/g, "$1").trim();
|
|
2177
|
+
}
|
|
2178
|
+
function applySemanticNormalization(code) {
|
|
2179
|
+
let result = code;
|
|
2180
|
+
for (const [pattern, replacement] of SEMANTIC_EQUIVALENTS) {
|
|
2181
|
+
result = result.replace(pattern, replacement);
|
|
2182
|
+
}
|
|
2183
|
+
return result;
|
|
2184
|
+
}
|
|
2185
|
+
function normalizeIdentifiersSimple(code) {
|
|
2186
|
+
const identifierMap = /* @__PURE__ */ new Map();
|
|
2187
|
+
let counter = 0;
|
|
2188
|
+
const identifierPattern = /\b([a-z_$][a-z0-9_$]*)\b/gi;
|
|
2189
|
+
const preserveList = /* @__PURE__ */ new Set([
|
|
2190
|
+
// JavaScript keywords
|
|
2191
|
+
"const",
|
|
2192
|
+
"let",
|
|
2193
|
+
"var",
|
|
2194
|
+
"function",
|
|
2195
|
+
"return",
|
|
2196
|
+
"if",
|
|
2197
|
+
"else",
|
|
2198
|
+
"for",
|
|
2199
|
+
"while",
|
|
2200
|
+
"do",
|
|
2201
|
+
"switch",
|
|
2202
|
+
"case",
|
|
2203
|
+
"break",
|
|
2204
|
+
"continue",
|
|
2205
|
+
"default",
|
|
2206
|
+
"try",
|
|
2207
|
+
"catch",
|
|
2208
|
+
"finally",
|
|
2209
|
+
"throw",
|
|
2210
|
+
"new",
|
|
2211
|
+
"this",
|
|
2212
|
+
"class",
|
|
2213
|
+
"extends",
|
|
2214
|
+
"super",
|
|
2215
|
+
"import",
|
|
2216
|
+
"export",
|
|
2217
|
+
"from",
|
|
2218
|
+
"as",
|
|
2219
|
+
"async",
|
|
2220
|
+
"await",
|
|
2221
|
+
"yield",
|
|
2222
|
+
"typeof",
|
|
2223
|
+
"instanceof",
|
|
2224
|
+
"in",
|
|
2225
|
+
"of",
|
|
2226
|
+
"void",
|
|
2227
|
+
"delete",
|
|
2228
|
+
"true",
|
|
2229
|
+
"false",
|
|
2230
|
+
"null",
|
|
2231
|
+
"undefined",
|
|
2232
|
+
// TypeScript keywords
|
|
2233
|
+
"interface",
|
|
2234
|
+
"type",
|
|
2235
|
+
"enum",
|
|
2236
|
+
"implements",
|
|
2237
|
+
"private",
|
|
2238
|
+
"public",
|
|
2239
|
+
"protected",
|
|
2240
|
+
"readonly",
|
|
2241
|
+
"abstract",
|
|
2242
|
+
"declare",
|
|
2243
|
+
"namespace",
|
|
2244
|
+
"module",
|
|
2245
|
+
// React hooks
|
|
2246
|
+
"useState",
|
|
2247
|
+
"useEffect",
|
|
2248
|
+
"useCallback",
|
|
2249
|
+
"useMemo",
|
|
2250
|
+
"useRef",
|
|
2251
|
+
"useContext",
|
|
2252
|
+
"useReducer",
|
|
2253
|
+
"useLayoutEffect",
|
|
2254
|
+
"useImperativeHandle",
|
|
2255
|
+
"useDebugValue",
|
|
2256
|
+
"useDeferredValue",
|
|
2257
|
+
"useTransition",
|
|
2258
|
+
"useId",
|
|
2259
|
+
"useSyncExternalStore",
|
|
2260
|
+
"useInsertionEffect",
|
|
2261
|
+
// React
|
|
2262
|
+
"React",
|
|
2263
|
+
"Component",
|
|
2264
|
+
"Fragment",
|
|
2265
|
+
"Suspense",
|
|
2266
|
+
"memo",
|
|
2267
|
+
"forwardRef",
|
|
2268
|
+
"createContext",
|
|
2269
|
+
"createElement",
|
|
2270
|
+
// Common globals
|
|
2271
|
+
"console",
|
|
2272
|
+
"window",
|
|
2273
|
+
"document",
|
|
2274
|
+
"Math",
|
|
2275
|
+
"Date",
|
|
2276
|
+
"JSON",
|
|
2277
|
+
"Object",
|
|
2278
|
+
"Array",
|
|
2279
|
+
"String",
|
|
2280
|
+
"Number",
|
|
2281
|
+
"Boolean",
|
|
2282
|
+
"Promise",
|
|
2283
|
+
"Set",
|
|
2284
|
+
"Map",
|
|
2285
|
+
"WeakSet",
|
|
2286
|
+
"WeakMap",
|
|
2287
|
+
"Symbol",
|
|
2288
|
+
"Error",
|
|
2289
|
+
"RegExp",
|
|
2290
|
+
"Intl",
|
|
2291
|
+
"setTimeout",
|
|
2292
|
+
"setInterval",
|
|
2293
|
+
"clearTimeout",
|
|
2294
|
+
"clearInterval",
|
|
2295
|
+
"fetch",
|
|
2296
|
+
"require",
|
|
2297
|
+
// Common type names
|
|
2298
|
+
"string",
|
|
2299
|
+
"number",
|
|
2300
|
+
"boolean",
|
|
2301
|
+
"object",
|
|
2302
|
+
"any",
|
|
2303
|
+
"unknown",
|
|
2304
|
+
"never",
|
|
2305
|
+
"void",
|
|
2306
|
+
// Normalized placeholders (preserve these)
|
|
2307
|
+
"__SIZE__",
|
|
2308
|
+
"__VARIANT__",
|
|
2309
|
+
"__CLICK_HANDLER__",
|
|
2310
|
+
"__CHANGE_HANDLER__",
|
|
2311
|
+
"__SUBMIT_HANDLER__",
|
|
2312
|
+
"__LOADING__",
|
|
2313
|
+
"__ERROR__",
|
|
2314
|
+
"__SUCCESS__",
|
|
2315
|
+
"__DATA__",
|
|
2316
|
+
"__CHILDREN__",
|
|
2317
|
+
"__LABEL__",
|
|
2318
|
+
"__DESCRIPTION__",
|
|
2319
|
+
"__VALUE__",
|
|
2320
|
+
"__DISABLED__",
|
|
2321
|
+
"__VISIBLE__",
|
|
2322
|
+
"__CLASSNAME__"
|
|
2323
|
+
]);
|
|
2324
|
+
return code.replace(identifierPattern, (match) => {
|
|
2325
|
+
if (preserveList.has(match) || preserveList.has(match.toLowerCase())) {
|
|
2326
|
+
return match;
|
|
2327
|
+
}
|
|
2328
|
+
const key = match.toLowerCase();
|
|
2329
|
+
if (!identifierMap.has(key)) {
|
|
2330
|
+
identifierMap.set(key, `_ID${counter++}_`);
|
|
2331
|
+
}
|
|
2332
|
+
return identifierMap.get(key);
|
|
2333
|
+
});
|
|
2334
|
+
}
|
|
2335
|
+
function normalizeCode(code, options = DEFAULT_NORMALIZATION_OPTIONS) {
|
|
2336
|
+
let result = code;
|
|
2337
|
+
if (options.stripComments) {
|
|
2338
|
+
result = stripComments(result);
|
|
2339
|
+
}
|
|
2340
|
+
if (options.normalizeSemantics) {
|
|
2341
|
+
result = applySemanticNormalization(result);
|
|
2342
|
+
}
|
|
2343
|
+
if (options.normalizeIdentifiers) {
|
|
2344
|
+
result = normalizeIdentifiersSimple(result);
|
|
2345
|
+
}
|
|
2346
|
+
if (options.normalizeWhitespace) {
|
|
2347
|
+
result = normalizeWhitespace(result);
|
|
2348
|
+
}
|
|
2349
|
+
return result;
|
|
2350
|
+
}
|
|
2351
|
+
function levenshteinDistance(a, b) {
|
|
2352
|
+
if (a === b) return 0;
|
|
2353
|
+
if (a.length === 0) return b.length;
|
|
2354
|
+
if (b.length === 0) return a.length;
|
|
2355
|
+
let previousRow = new Array(b.length + 1);
|
|
2356
|
+
let currentRow = new Array(b.length + 1);
|
|
2357
|
+
for (let j = 0; j <= b.length; j++) {
|
|
2358
|
+
previousRow[j] = j;
|
|
2359
|
+
}
|
|
2360
|
+
for (let i = 1; i <= a.length; i++) {
|
|
2361
|
+
currentRow[0] = i;
|
|
2362
|
+
for (let j = 1; j <= b.length; j++) {
|
|
2363
|
+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
2364
|
+
currentRow[j] = Math.min(
|
|
2365
|
+
previousRow[j] + 1,
|
|
2366
|
+
// Deletion
|
|
2367
|
+
currentRow[j - 1] + 1,
|
|
2368
|
+
// Insertion
|
|
2369
|
+
previousRow[j - 1] + cost
|
|
2370
|
+
// Substitution
|
|
2371
|
+
);
|
|
2372
|
+
}
|
|
2373
|
+
[previousRow, currentRow] = [currentRow, previousRow];
|
|
2374
|
+
}
|
|
2375
|
+
return previousRow[b.length];
|
|
2376
|
+
}
|
|
2377
|
+
function calculateNormalizedSimilarity(codeA, codeB, options = { normalizeIdentifiers: true, normalizeSemantics: true }) {
|
|
2378
|
+
const normalizedA = normalizeCode(codeA, options);
|
|
2379
|
+
const normalizedB = normalizeCode(codeB, options);
|
|
2380
|
+
if (normalizedA === normalizedB) {
|
|
2381
|
+
return 1;
|
|
2382
|
+
}
|
|
2383
|
+
const distance = levenshteinDistance(normalizedA, normalizedB);
|
|
2384
|
+
const maxLen = Math.max(normalizedA.length, normalizedB.length);
|
|
2385
|
+
if (maxLen === 0) return 1;
|
|
2386
|
+
return 1 - distance / maxLen;
|
|
2387
|
+
}
|
|
2388
|
+
function isNearIdentical(codeA, codeB, threshold = 0.95) {
|
|
2389
|
+
return calculateNormalizedSimilarity(codeA, codeB) >= threshold;
|
|
2390
|
+
}
|
|
2391
|
+
function prepareForEmbedding(code) {
|
|
2392
|
+
return normalizeCode(code, {
|
|
2393
|
+
normalizeIdentifiers: false,
|
|
2394
|
+
normalizeSemantics: true,
|
|
2395
|
+
stripComments: true,
|
|
2396
|
+
normalizeWhitespace: false
|
|
2397
|
+
// Preserve structure for embedding
|
|
2398
|
+
});
|
|
2399
|
+
}
|
|
2400
|
+
|
|
2401
|
+
// src/detection/combined-scorer.ts
|
|
2402
|
+
var DEFAULT_COMBINED_SCORER_OPTIONS = {
|
|
2403
|
+
semanticWeight: 0.5,
|
|
2404
|
+
structuralWeight: 0.3,
|
|
2405
|
+
normalizedWeight: 0.2,
|
|
2406
|
+
includeNormalized: false
|
|
2407
|
+
};
|
|
2408
|
+
function calculateCombinedScore(semanticScore, metadataA, metadataB, codeA, codeB, options = {}) {
|
|
2409
|
+
const opts = { ...DEFAULT_COMBINED_SCORER_OPTIONS, ...options };
|
|
2410
|
+
const structuralDetails = calculateStructuralSimilarity(metadataA, metadataB);
|
|
2411
|
+
const structural = structuralDetails.combined;
|
|
2412
|
+
let normalized;
|
|
2413
|
+
if (opts.includeNormalized && codeA && codeB) {
|
|
2414
|
+
normalized = calculateNormalizedSimilarity(codeA, codeB);
|
|
2415
|
+
}
|
|
2416
|
+
let final;
|
|
2417
|
+
if (normalized !== void 0) {
|
|
2418
|
+
final = semanticScore * opts.semanticWeight + structural * opts.structuralWeight + normalized * opts.normalizedWeight;
|
|
2419
|
+
} else {
|
|
2420
|
+
const totalWeight = opts.semanticWeight + opts.structuralWeight;
|
|
2421
|
+
const adjustedSemanticWeight = opts.semanticWeight / totalWeight;
|
|
2422
|
+
const adjustedStructuralWeight = opts.structuralWeight / totalWeight;
|
|
2423
|
+
final = semanticScore * adjustedSemanticWeight + structural * adjustedStructuralWeight;
|
|
2424
|
+
}
|
|
2425
|
+
final = applyBoostForHighSignals(final, semanticScore, structural, normalized);
|
|
2426
|
+
final = Math.max(0, Math.min(1, final));
|
|
2427
|
+
const confidence = getConfidenceLevel(final);
|
|
2428
|
+
const confidenceDetails = getConfidenceResult(final);
|
|
2429
|
+
return {
|
|
2430
|
+
final,
|
|
2431
|
+
semantic: semanticScore,
|
|
2432
|
+
structural,
|
|
2433
|
+
structuralDetails,
|
|
2434
|
+
normalized,
|
|
2435
|
+
confidence,
|
|
2436
|
+
confidenceDetails
|
|
2437
|
+
};
|
|
2438
|
+
}
|
|
2439
|
+
function applyBoostForHighSignals(baseScore, semantic, structural, normalized) {
|
|
2440
|
+
const HIGH_SIGNAL_THRESHOLD = 0.95;
|
|
2441
|
+
const BOOST_FACTOR = 0.1;
|
|
2442
|
+
let boost = 0;
|
|
2443
|
+
if (normalized !== void 0 && normalized >= HIGH_SIGNAL_THRESHOLD) {
|
|
2444
|
+
boost = Math.max(boost, (normalized - HIGH_SIGNAL_THRESHOLD) * 2);
|
|
2445
|
+
}
|
|
2446
|
+
if (semantic >= HIGH_SIGNAL_THRESHOLD) {
|
|
2447
|
+
boost = Math.max(boost, (semantic - HIGH_SIGNAL_THRESHOLD) * 1.5);
|
|
2448
|
+
}
|
|
2449
|
+
if (structural >= HIGH_SIGNAL_THRESHOLD) {
|
|
2450
|
+
boost = Math.max(boost, (structural - HIGH_SIGNAL_THRESHOLD) * 1.5);
|
|
2451
|
+
}
|
|
2452
|
+
return baseScore + boost * BOOST_FACTOR;
|
|
2453
|
+
}
|
|
2454
|
+
function isPotentialDuplicate(metadataA, metadataB, threshold = 0.3) {
|
|
2455
|
+
const structural = calculateStructuralSimilarity(metadataA, metadataB);
|
|
2456
|
+
return structural.combined >= threshold;
|
|
2457
|
+
}
|
|
2458
|
+
function calculateQuickScore(metadataA, metadataB) {
|
|
2459
|
+
const structural = calculateStructuralSimilarity(metadataA, metadataB);
|
|
2460
|
+
const confidence = getConfidenceLevel(structural.combined);
|
|
2461
|
+
return { score: structural.combined, confidence };
|
|
2462
|
+
}
|
|
2463
|
+
function getRecommendedAction(score) {
|
|
2464
|
+
if (score.normalized !== void 0 && score.normalized >= 0.95) {
|
|
2465
|
+
return "These appear to be near-identical. Consolidate into a single implementation.";
|
|
2466
|
+
}
|
|
2467
|
+
if (score.final >= 0.9) {
|
|
2468
|
+
return "High similarity detected. Strongly consider consolidating into a shared component/function.";
|
|
2469
|
+
}
|
|
2470
|
+
if (score.final >= 0.75) {
|
|
2471
|
+
return "Moderate similarity. Review for potential abstraction into a shared utility.";
|
|
2472
|
+
}
|
|
2473
|
+
if (score.structural >= 0.8 && score.semantic < 0.7) {
|
|
2474
|
+
return "Similar structure but different semantics. May be intentionally different implementations.";
|
|
2475
|
+
}
|
|
2476
|
+
if (score.semantic >= 0.8 && score.structural < 0.5) {
|
|
2477
|
+
return "Similar purpose but different structure. Consider if a common abstraction makes sense.";
|
|
2478
|
+
}
|
|
2479
|
+
return "Low similarity. Likely different implementations that happen to share some patterns.";
|
|
2480
|
+
}
|
|
2481
|
+
function formatCombinedScore(score, verbose = false) {
|
|
2482
|
+
const percent = Math.round(score.final * 100);
|
|
2483
|
+
const lines = [];
|
|
2484
|
+
lines.push(`${percent}% similar (${score.confidence} confidence)`);
|
|
2485
|
+
if (verbose) {
|
|
2486
|
+
lines.push(` Semantic: ${Math.round(score.semantic * 100)}%`);
|
|
2487
|
+
lines.push(` Structural: ${Math.round(score.structural * 100)}%`);
|
|
2488
|
+
if (score.normalized !== void 0) {
|
|
2489
|
+
lines.push(` Normalized: ${Math.round(score.normalized * 100)}%`);
|
|
2490
|
+
}
|
|
2491
|
+
lines.push(` \u2192 ${getRecommendedAction(score)}`);
|
|
2492
|
+
}
|
|
2493
|
+
return lines.join("\n");
|
|
2494
|
+
}
|
|
2495
|
+
|
|
1915
2496
|
export {
|
|
1916
2497
|
chunkFile,
|
|
1917
2498
|
prepareEmbeddingInput,
|
|
@@ -1928,6 +2509,21 @@ export {
|
|
|
1928
2509
|
calculateDuplicateScore,
|
|
1929
2510
|
calculateGroupAverageSimilarity,
|
|
1930
2511
|
sortDuplicateGroups,
|
|
2512
|
+
jaccard,
|
|
2513
|
+
calculateSizeRatio2,
|
|
2514
|
+
calculateStructuralSimilarity,
|
|
2515
|
+
hasHighStructuralSimilarity,
|
|
2516
|
+
findStructurallySimilar,
|
|
2517
|
+
DEFAULT_CONFIDENCE_CONFIG,
|
|
2518
|
+
getConfidenceLevel,
|
|
2519
|
+
meetsMinimumThreshold,
|
|
2520
|
+
getConfidenceResult,
|
|
2521
|
+
getConfidenceEmoji,
|
|
2522
|
+
getConfidenceAnsiColor,
|
|
2523
|
+
formatConfidence,
|
|
2524
|
+
formatConfidenceVerbose,
|
|
2525
|
+
compareConfidenceLevels,
|
|
2526
|
+
filterByConfidence,
|
|
1931
2527
|
findDuplicateGroups,
|
|
1932
2528
|
findSimilarToLocation,
|
|
1933
2529
|
findSimilarToQuery,
|
|
@@ -1937,6 +2533,17 @@ export {
|
|
|
1937
2533
|
searchSimilar,
|
|
1938
2534
|
findSimilarAtLocation,
|
|
1939
2535
|
hasIndex,
|
|
1940
|
-
getIndexStats
|
|
2536
|
+
getIndexStats,
|
|
2537
|
+
normalizeCode,
|
|
2538
|
+
levenshteinDistance,
|
|
2539
|
+
calculateNormalizedSimilarity,
|
|
2540
|
+
isNearIdentical,
|
|
2541
|
+
prepareForEmbedding,
|
|
2542
|
+
DEFAULT_COMBINED_SCORER_OPTIONS,
|
|
2543
|
+
calculateCombinedScore,
|
|
2544
|
+
isPotentialDuplicate,
|
|
2545
|
+
calculateQuickScore,
|
|
2546
|
+
getRecommendedAction,
|
|
2547
|
+
formatCombinedScore
|
|
1941
2548
|
};
|
|
1942
|
-
//# sourceMappingURL=chunk-
|
|
2549
|
+
//# sourceMappingURL=chunk-BAZQUJDS.js.map
|