pastapolice 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.cjs +114 -3
  2. package/package.json +5 -1
package/dist/index.cjs CHANGED
@@ -220025,6 +220025,10 @@ function isKeyword(text) {
220025
220025
  function hashNormalized(normalized) {
220026
220026
  return (0, import_xxhash.xxh64)(normalized).toString(16);
220027
220027
  }
220028
+ function tokenizeNormalized(normalized) {
220029
+ const tokens = normalized.split(/[\s{}\[\]();,.<>+/*%=!?&|:]+/).filter((t) => t.length > 0);
220030
+ return tokens;
220031
+ }
220028
220032
 
220029
220033
  // src/processor.ts
220030
220034
  async function processFile(file, minLines, map) {
@@ -220065,6 +220069,22 @@ async function processFileSemantic(file, minLines, semanticMap) {
220065
220069
  semanticMap.get(hash).push(func);
220066
220070
  }
220067
220071
  }
220072
+ async function processFileFuzzy(file, minLines, allFunctions = []) {
220073
+ const content = await (0, import_promises.readFile)(file, "utf-8");
220074
+ const functions = parseFunctions(file, content);
220075
+ const effectiveMinLines = Math.max(1, minLines);
220076
+ for (const func of functions) {
220077
+ func.file = file;
220078
+ const lineCount = func.end - func.start + 1;
220079
+ if (lineCount < effectiveMinLines) {
220080
+ continue;
220081
+ }
220082
+ normalizeFunction(file, content, func);
220083
+ func.tokens = tokenizeNormalized(func.normalized);
220084
+ allFunctions.push(func);
220085
+ }
220086
+ return allFunctions;
220087
+ }
220068
220088
  function normalizeLine(line) {
220069
220089
  return line.trim();
220070
220090
  }
@@ -220080,6 +220100,49 @@ function getSemanticMatches(semanticMap) {
220080
220100
  }
220081
220101
  return matches;
220082
220102
  }
220103
+ function jaccardSimilarity(tokens1, tokens2) {
220104
+ if (tokens1.length === 0 && tokens2.length === 0) return 1;
220105
+ if (tokens1.length === 0 || tokens2.length === 0) return 0;
220106
+ const set1 = new Set(tokens1);
220107
+ const set2 = new Set(tokens2);
220108
+ const intersection = new Set([...set1].filter((x) => set2.has(x)));
220109
+ const union = /* @__PURE__ */ new Set([...set1, ...set2]);
220110
+ return intersection.size / union.size;
220111
+ }
220112
+ function getFuzzyMatches(allFunctions, threshold = 0.6) {
220113
+ const matches = [];
220114
+ const checked = /* @__PURE__ */ new Set();
220115
+ for (let i = 0; i < allFunctions.length; i++) {
220116
+ for (let j = i + 1; j < allFunctions.length; j++) {
220117
+ const func1 = allFunctions[i];
220118
+ const func2 = allFunctions[j];
220119
+ if (func1.file === func2.file) continue;
220120
+ const pairKey = `${func1.file}:${func1.start}-${func1.end}|${func2.file}:${func2.start}-${func2.end}`;
220121
+ if (checked.has(pairKey)) continue;
220122
+ checked.add(pairKey);
220123
+ const tokens1 = func1.tokens || tokenizeNormalized(func1.normalized);
220124
+ const tokens2 = func2.tokens || tokenizeNormalized(func2.normalized);
220125
+ const similarity = jaccardSimilarity(tokens1, tokens2);
220126
+ if (similarity >= threshold) {
220127
+ const existingMatch = matches.find(
220128
+ (m) => m.pairs.some(
220129
+ (p) => p.func1.file === func1.file && p.func1.start === func1.start || p.func1.file === func2.file && p.func1.start === func2.start
220130
+ )
220131
+ );
220132
+ if (existingMatch) {
220133
+ existingMatch.pairs.push({ func1, func2, similarity });
220134
+ existingMatch.averageSimilarity = existingMatch.pairs.reduce((sum, p) => sum + p.similarity, 0) / existingMatch.pairs.length;
220135
+ } else {
220136
+ matches.push({
220137
+ pairs: [{ func1, func2, similarity }],
220138
+ averageSimilarity: similarity
220139
+ });
220140
+ }
220141
+ }
220142
+ }
220143
+ }
220144
+ return matches;
220145
+ }
220083
220146
 
220084
220147
  // node_modules/chalk/source/vendor/ansi-styles/index.js
220085
220148
  var ANSI_BACKGROUND_OFFSET = 10;
@@ -220624,12 +220687,56 @@ async function reportSemantic(matches) {
220624
220687
  }
220625
220688
  console.log(source_default.yellow(`Total semantic matches found: ${matches.length}`));
220626
220689
  }
220690
+ async function reportFuzzy(matches) {
220691
+ if (matches.length === 0) {
220692
+ console.log(source_default.green("\u2705 No suspicious pasta found."));
220693
+ return;
220694
+ }
220695
+ console.log(source_default.red("\n\u{1F693} Fuzzy semantic matching complete!\n"));
220696
+ for (const match of matches) {
220697
+ const similarityPercent = Math.round(match.averageSimilarity * 100);
220698
+ console.log(source_default.red(`\u{1F50D} ${similarityPercent}% similar functions detected:
220699
+ `));
220700
+ const funcSet = /* @__PURE__ */ new Map();
220701
+ for (const pair of match.pairs) {
220702
+ const key1 = `${pair.func1.file}:${pair.func1.start}`;
220703
+ const key2 = `${pair.func2.file}:${pair.func2.start}`;
220704
+ if (!funcSet.has(key1)) funcSet.set(key1, pair.func1);
220705
+ if (!funcSet.has(key2)) funcSet.set(key2, pair.func2);
220706
+ }
220707
+ for (const func of funcSet.values()) {
220708
+ console.log(source_default.yellow(` ${func.signature}`));
220709
+ console.log(source_default.gray(` File: ${func.file}:${func.start}-${func.end}`));
220710
+ try {
220711
+ const content = await (0, import_promises2.readFile)(func.file, "utf-8");
220712
+ const lines = content.split("\n");
220713
+ const snippet = lines.slice(func.start - 1, func.end).join("\n");
220714
+ console.log(source_default.gray(" Code:"));
220715
+ snippet.split("\n").forEach((line) => {
220716
+ console.log(source_default.gray(" " + line));
220717
+ });
220718
+ } catch {
220719
+ }
220720
+ console.log();
220721
+ }
220722
+ console.log(source_default.gray("--------------------------------\n"));
220723
+ }
220724
+ console.log(source_default.yellow(`Total fuzzy matches found: ${matches.length}`));
220725
+ }
220627
220726
 
220628
220727
  // src/scan.ts
220629
- async function scan(root, minLines, syntactic = false) {
220728
+ async function scan(root, minLines, syntactic = false, fuzzy = false) {
220630
220729
  console.log("\u{1F693} PastaPolice scanning...");
220631
220730
  const files = await getSourceFiles(root);
220632
- if (!syntactic) {
220731
+ if (fuzzy) {
220732
+ console.log("\u{1F50D} Mode: Fuzzy semantic matching (AST-based with similarity scoring)");
220733
+ const allFunctions = [];
220734
+ for (const file of files) {
220735
+ await processFileFuzzy(file, 1, allFunctions);
220736
+ }
220737
+ const matches = getFuzzyMatches(allFunctions, 0.7);
220738
+ await reportFuzzy(matches);
220739
+ } else if (!syntactic) {
220633
220740
  console.log("\u{1F50D} Mode: Semantic clone detection (AST-based)");
220634
220741
  const semanticMap = /* @__PURE__ */ new Map();
220635
220742
  for (const file of files) {
@@ -220652,10 +220759,14 @@ var program2 = new Command();
220652
220759
  program2.name("PastaPolice").description("Detect copy-paste code blocks").argument("[path]", "path to scan", ".").option("-m, --min-lines <number>", "minimum lines per block", "5").option(
220653
220760
  "-s, --syntactic",
220654
220761
  "use syntactic (line-based) detection instead of semantic"
220762
+ ).option(
220763
+ "-f, --fuzzy",
220764
+ "use fuzzy semantic matching to find similar (not exact) functions"
220655
220765
  ).action(async (path, options) => {
220656
220766
  const minLines = parseInt(options.minLines, 10);
220657
220767
  const syntactic = options.syntactic || false;
220658
- await scan(path, minLines, syntactic);
220768
+ const fuzzy = options.fuzzy || false;
220769
+ await scan(path, minLines, syntactic, fuzzy);
220659
220770
  });
220660
220771
  program2.parse();
220661
220772
  /*! Bundled license information:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pastapolice",
3
- "version": "1.0.2",
3
+ "version": "1.0.3",
4
4
  "description": "Detect copy-paste and semantically similar code in TypeScript/JavaScript projects",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
@@ -10,6 +10,10 @@
10
10
  "bin": {
11
11
  "pastapolice": "./dist/index.cjs"
12
12
  },
13
+ "repository": {
14
+ "type": "git",
15
+ "url": "https://github.com/lawlesx/PastaPolice.git"
16
+ },
13
17
  "keywords": [
14
18
  "duplicate",
15
19
  "code",