jscpd-rs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/CHANGELOG.md +69 -0
  2. package/Cargo.lock +1323 -0
  3. package/Cargo.toml +54 -0
  4. package/LICENSE +21 -0
  5. package/README.md +372 -0
  6. package/docs/api-parity.md +49 -0
  7. package/docs/cloning-plan.md +281 -0
  8. package/docs/compat-baseline.md +535 -0
  9. package/docs/format-porting.md +86 -0
  10. package/docs/junior-task-template.md +62 -0
  11. package/docs/junior-workflow.md +87 -0
  12. package/docs/migrating-from-jscpd.md +193 -0
  13. package/docs/npm-release.md +116 -0
  14. package/docs/public-benchmark-suite.md +81 -0
  15. package/docs/release-checklist.md +200 -0
  16. package/docs/release-decisions.md +103 -0
  17. package/docs/release-readiness.md +51 -0
  18. package/docs/upstream-bugs.md +501 -0
  19. package/docs/upstream-issue-drafts.md +393 -0
  20. package/docs/user-guide.md +309 -0
  21. package/examples/dump_oxc_tokens.rs +112 -0
  22. package/examples/library_api.rs +42 -0
  23. package/npm/bin/jscpd-rs.js +6 -0
  24. package/npm/bin/jscpd-server.js +6 -0
  25. package/npm/lib/run-binary.js +68 -0
  26. package/npm/scripts/postinstall.js +50 -0
  27. package/package.json +53 -0
  28. package/skills/dry-refactoring/SKILL.md +63 -0
  29. package/skills/jscpd/SKILL.md +85 -0
  30. package/src/app.rs +512 -0
  31. package/src/bin/jscpd-server.rs +429 -0
  32. package/src/blame.rs +130 -0
  33. package/src/cli/config.rs +543 -0
  34. package/src/cli/parsing.rs +301 -0
  35. package/src/cli/tests.rs +543 -0
  36. package/src/cli.rs +671 -0
  37. package/src/detector/matching/secondary.rs +387 -0
  38. package/src/detector/matching.rs +274 -0
  39. package/src/detector/model.rs +190 -0
  40. package/src/detector/prepare.rs +71 -0
  41. package/src/detector/skip_local.rs +40 -0
  42. package/src/detector/statistics.rs +138 -0
  43. package/src/detector/store.rs +96 -0
  44. package/src/detector/tests.rs +238 -0
  45. package/src/detector.rs +265 -0
  46. package/src/files/discovery.rs +508 -0
  47. package/src/files/gitignore.rs +203 -0
  48. package/src/files/paths.rs +68 -0
  49. package/src/files/shebang.rs +106 -0
  50. package/src/files/tests.rs +523 -0
  51. package/src/files.rs +25 -0
  52. package/src/formats.rs +570 -0
  53. package/src/lib.rs +433 -0
  54. package/src/main.rs +26 -0
  55. package/src/report/ai.rs +125 -0
  56. package/src/report/badge.rs +238 -0
  57. package/src/report/console.rs +180 -0
  58. package/src/report/console_common.rs +37 -0
  59. package/src/report/console_full.rs +139 -0
  60. package/src/report/csv.rs +65 -0
  61. package/src/report/escape.rs +8 -0
  62. package/src/report/file_output.rs +28 -0
  63. package/src/report/html/assets.rs +47 -0
  64. package/src/report/html.rs +336 -0
  65. package/src/report/json.rs +119 -0
  66. package/src/report/markdown.rs +125 -0
  67. package/src/report/sarif.rs +302 -0
  68. package/src/report/silent.rs +22 -0
  69. package/src/report/source.rs +38 -0
  70. package/src/report/summary.rs +50 -0
  71. package/src/report/test_support.rs +133 -0
  72. package/src/report/threshold.rs +76 -0
  73. package/src/report/xcode.rs +90 -0
  74. package/src/report/xml.rs +119 -0
  75. package/src/report.rs +250 -0
  76. package/src/server/mcp.rs +942 -0
  77. package/src/server.rs +1081 -0
  78. package/src/tokenizer/apex.rs +97 -0
  79. package/src/tokenizer/blocks.rs +532 -0
  80. package/src/tokenizer/embedded.rs +106 -0
  81. package/src/tokenizer/generic.rs +511 -0
  82. package/src/tokenizer/hash.rs +27 -0
  83. package/src/tokenizer/ignore.rs +33 -0
  84. package/src/tokenizer/line_index.rs +33 -0
  85. package/src/tokenizer/markdown.rs +289 -0
  86. package/src/tokenizer/markup_attrs.rs +289 -0
  87. package/src/tokenizer/oxc/fallback.rs +275 -0
  88. package/src/tokenizer/oxc/jsx.rs +168 -0
  89. package/src/tokenizer/oxc/kind.rs +177 -0
  90. package/src/tokenizer/oxc/lexical.rs +67 -0
  91. package/src/tokenizer/oxc.rs +659 -0
  92. package/src/tokenizer/scan.rs +88 -0
  93. package/src/tokenizer/tap.rs +150 -0
  94. package/src/tokenizer/tests.rs +915 -0
  95. package/src/tokenizer.rs +328 -0
  96. package/src/verbose.rs +195 -0
@@ -0,0 +1,112 @@
1
+ use std::{env, fs, path::Path};
2
+
3
+ use oxc_allocator::Allocator;
4
+ use oxc_parser::{
5
+ Parser,
6
+ config::{TokensLexerConfig, TokensParserConfig},
7
+ lexer::Lexer,
8
+ };
9
+ use oxc_span::SourceType;
10
+
11
+ fn main() {
12
+ let args = env::args().collect::<Vec<_>>();
13
+ if args.len() < 2 {
14
+ eprintln!("usage: dump_oxc_tokens [--lexer] <file> [start-line] [end-line]");
15
+ std::process::exit(2);
16
+ }
17
+
18
+ let use_lexer = args.get(1).is_some_and(|arg| arg == "--lexer");
19
+ let first_positional = if use_lexer { 2 } else { 1 };
20
+ let path = &args[first_positional];
21
+ let start_line = args
22
+ .get(first_positional + 1)
23
+ .and_then(|value| value.parse().ok())
24
+ .unwrap_or(1);
25
+ let end_line = args
26
+ .get(first_positional + 2)
27
+ .and_then(|value| value.parse().ok())
28
+ .unwrap_or(usize::MAX);
29
+ let content = fs::read_to_string(path).expect("read file");
30
+ let source_type = SourceType::from_path(Path::new(path)).unwrap_or_default();
31
+ let allocator = Allocator::new();
32
+ let line_index = LineIndex::new(&content);
33
+ let mut count = 0usize;
34
+
35
+ if use_lexer {
36
+ let mut lexer =
37
+ Lexer::new_for_benchmarks(&allocator, &content, source_type, TokensLexerConfig);
38
+ let mut token = lexer.first_token();
39
+ while !token.kind().is_eof() {
40
+ let start = token.start() as usize;
41
+ let end = token.end() as usize;
42
+ let location = line_index.location(start);
43
+ if location.line >= start_line && location.line <= end_line {
44
+ println!(
45
+ "{}:{} {:?}:{}",
46
+ location.line,
47
+ location.column,
48
+ token.kind(),
49
+ &content[start..end]
50
+ );
51
+ }
52
+ count += 1;
53
+ token = lexer.next_token_for_benchmarks();
54
+ }
55
+ } else {
56
+ let parser_return = Parser::new(&allocator, &content, source_type)
57
+ .with_config(TokensParserConfig)
58
+ .parse();
59
+ for token in parser_return.tokens {
60
+ let start = token.start() as usize;
61
+ let end = token.end() as usize;
62
+ let location = line_index.location(start);
63
+ if location.line >= start_line && location.line <= end_line {
64
+ println!(
65
+ "{}:{} {:?}:{}",
66
+ location.line,
67
+ location.column,
68
+ token.kind(),
69
+ &content[start..end]
70
+ );
71
+ }
72
+ count += 1;
73
+ }
74
+ }
75
+ eprintln!("tokens: {count}");
76
+ }
77
+
78
+ struct LineIndex {
79
+ newlines: Vec<usize>,
80
+ }
81
+
82
+ impl LineIndex {
83
+ fn new(content: &str) -> Self {
84
+ Self {
85
+ newlines: content
86
+ .bytes()
87
+ .enumerate()
88
+ .filter_map(|(idx, byte)| (byte == b'\n').then_some(idx))
89
+ .collect(),
90
+ }
91
+ }
92
+
93
+ fn location(&self, offset: usize) -> Location {
94
+ let previous_newlines = self
95
+ .newlines
96
+ .partition_point(|newline_offset| *newline_offset < offset);
97
+ let line_start = if previous_newlines == 0 {
98
+ 0
99
+ } else {
100
+ self.newlines[previous_newlines - 1] + 1
101
+ };
102
+ Location {
103
+ line: previous_newlines + 1,
104
+ column: offset - line_start + 1,
105
+ }
106
+ }
107
+ }
108
+
109
+ struct Location {
110
+ line: usize,
111
+ column: usize,
112
+ }
@@ -0,0 +1,42 @@
1
+ use jscpd_rs::{SourceFile, detect_source_files, get_default_options};
2
+
3
+ fn main() {
4
+ let mut options = get_default_options();
5
+ options.reporters.clear();
6
+ options.silent = true;
7
+ options.no_tips = true;
8
+ options.min_lines = 2;
9
+ options.min_tokens = 5;
10
+
11
+ let files = vec![
12
+ SourceFile {
13
+ source_id: "a.js".to_string(),
14
+ format: "javascript".to_string(),
15
+ content: duplicate_body(),
16
+ },
17
+ SourceFile {
18
+ source_id: "b.js".to_string(),
19
+ format: "javascript".to_string(),
20
+ content: duplicate_body(),
21
+ },
22
+ ];
23
+
24
+ let result = detect_source_files(files, &options);
25
+ println!(
26
+ "{} clones, {} duplicated lines, {:.2}% duplicated",
27
+ result.clones.len(),
28
+ result.statistics.total.duplicated_lines,
29
+ result.statistics.total.percentage
30
+ );
31
+ }
32
+
33
+ fn duplicate_body() -> String {
34
+ [
35
+ "const alpha = 1;",
36
+ "const beta = 2;",
37
+ "const gamma = alpha + beta;",
38
+ "console.log(gamma);",
39
+ "",
40
+ ]
41
+ .join("\n")
42
+ }
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+
4
+ const { runBinary } = require("../lib/run-binary");
5
+
6
+ runBinary("jscpd", process.argv.slice(2));
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+
4
+ const { runBinary } = require("../lib/run-binary");
5
+
6
+ runBinary("jscpd-server", process.argv.slice(2));
@@ -0,0 +1,68 @@
1
+ "use strict";
2
+
3
+ const fs = require("node:fs");
4
+ const path = require("node:path");
5
+ const { spawnSync } = require("node:child_process");
6
+
7
+ function packageRoot() {
8
+ return path.resolve(__dirname, "..", "..");
9
+ }
10
+
11
+ function binaryPath(name) {
12
+ const exe = process.platform === "win32" ? `${name}.exe` : name;
13
+ const targetDir =
14
+ process.env.CARGO_TARGET_DIR || path.join(packageRoot(), "target");
15
+ return path.join(targetDir, "release", exe);
16
+ }
17
+
18
+ function buildIfMissing(name) {
19
+ const binary = binaryPath(name);
20
+ if (fs.existsSync(binary)) {
21
+ return binary;
22
+ }
23
+
24
+ const script = path.join(packageRoot(), "npm", "scripts", "postinstall.js");
25
+ const result = spawnSync(process.execPath, [script], {
26
+ cwd: packageRoot(),
27
+ stdio: "inherit",
28
+ env: process.env,
29
+ });
30
+
31
+ if (result.error) {
32
+ throw result.error;
33
+ }
34
+ if (result.signal) {
35
+ process.kill(process.pid, result.signal);
36
+ }
37
+ if (result.status !== 0) {
38
+ process.exit(result.status ?? 1);
39
+ }
40
+ if (!fs.existsSync(binary)) {
41
+ console.error(`jscpd-rs: expected binary was not built: ${binary}`);
42
+ process.exit(1);
43
+ }
44
+
45
+ return binary;
46
+ }
47
+
48
+ function runBinary(name, args) {
49
+ const binary = buildIfMissing(name);
50
+ const result = spawnSync(binary, args, {
51
+ stdio: "inherit",
52
+ env: process.env,
53
+ });
54
+
55
+ if (result.error) {
56
+ if (result.error.code === "ENOENT") {
57
+ console.error(`jscpd-rs: binary not found: ${binary}`);
58
+ process.exit(1);
59
+ }
60
+ throw result.error;
61
+ }
62
+ if (result.signal) {
63
+ process.kill(process.pid, result.signal);
64
+ }
65
+ process.exit(result.status ?? 0);
66
+ }
67
+
68
+ module.exports = { runBinary };
@@ -0,0 +1,50 @@
1
+ "use strict";
2
+
3
+ const fs = require("node:fs");
4
+ const path = require("node:path");
5
+ const { spawnSync } = require("node:child_process");
6
+
7
+ const root = path.resolve(__dirname, "..", "..");
8
+ const cargo = process.env.CARGO || "cargo";
9
+ const targetDir = process.env.CARGO_TARGET_DIR || path.join(root, "target");
10
+ const releaseDir = path.join(targetDir, "release");
11
+ const exeSuffix = process.platform === "win32" ? ".exe" : "";
12
+ const binaries = ["jscpd", "jscpd-server"].map((name) =>
13
+ path.join(releaseDir, `${name}${exeSuffix}`),
14
+ );
15
+
16
+ if (process.env.JSCPD_RS_SKIP_POSTINSTALL === "1") {
17
+ console.log("jscpd-rs: skipping native build because JSCPD_RS_SKIP_POSTINSTALL=1");
18
+ process.exit(0);
19
+ }
20
+
21
+ if (binaries.every((binary) => fs.existsSync(binary))) {
22
+ process.exit(0);
23
+ }
24
+
25
+ console.log("jscpd-rs: building native binaries with Cargo");
26
+ const result = spawnSync(
27
+ cargo,
28
+ ["build", "--release", "--locked", "--bin", "jscpd", "--bin", "jscpd-server"],
29
+ {
30
+ cwd: root,
31
+ stdio: "inherit",
32
+ env: process.env,
33
+ },
34
+ );
35
+
36
+ if (result.error) {
37
+ if (result.error.code === "ENOENT") {
38
+ console.error(
39
+ "jscpd-rs: Cargo was not found. Install Rust from https://rustup.rs/ and retry.",
40
+ );
41
+ process.exit(1);
42
+ }
43
+ throw result.error;
44
+ }
45
+
46
+ if (result.signal) {
47
+ process.kill(process.pid, result.signal);
48
+ }
49
+
50
+ process.exit(result.status ?? 0);
package/package.json ADDED
@@ -0,0 +1,53 @@
1
+ {
2
+ "name": "jscpd-rs",
3
+ "version": "0.1.0",
4
+ "description": "Fast native Rust clone of jscpd for duplicate-code detection",
5
+ "license": "MIT",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "git+https://github.com/vv-bogdanov/jscpd-rs.git"
9
+ },
10
+ "homepage": "https://github.com/vv-bogdanov/jscpd-rs#readme",
11
+ "bugs": {
12
+ "url": "https://github.com/vv-bogdanov/jscpd-rs/issues"
13
+ },
14
+ "keywords": [
15
+ "jscpd",
16
+ "copy-paste",
17
+ "copy-paste-detector",
18
+ "duplication",
19
+ "duplicate-code",
20
+ "code-duplication",
21
+ "clone-detection",
22
+ "ci",
23
+ "sarif",
24
+ "cli"
25
+ ],
26
+ "bin": {
27
+ "jscpd-rs": "npm/bin/jscpd-rs.js",
28
+ "jscpd": "npm/bin/jscpd-rs.js",
29
+ "jscpd-server": "npm/bin/jscpd-server.js"
30
+ },
31
+ "scripts": {
32
+ "postinstall": "node npm/scripts/postinstall.js",
33
+ "test:npm-package": "scripts/npm-package-check.sh"
34
+ },
35
+ "files": [
36
+ "CHANGELOG.md",
37
+ "Cargo.lock",
38
+ "Cargo.toml",
39
+ "docs/**",
40
+ "examples/**",
41
+ "LICENSE",
42
+ "README.md",
43
+ "npm/**",
44
+ "skills/**",
45
+ "src/**"
46
+ ],
47
+ "engines": {
48
+ "node": ">=18"
49
+ },
50
+ "publishConfig": {
51
+ "access": "public"
52
+ }
53
+ }
@@ -0,0 +1,63 @@
1
+ ---
2
+ name: dry-refactoring
3
+ description: Guided workflow to eliminate copy-paste duplication detected by jscpd-rs.
4
+ ---
5
+
6
+ # dry-refactoring
7
+
8
+ Guided workflow to eliminate copy-paste duplication in source code. Use after
9
+ running `jscpd-rs` to detect clones.
10
+
11
+ ## Prerequisites
12
+
13
+ First, run `jscpd-rs` to identify duplications:
14
+
15
+ ```bash
16
+ npx jscpd-rs --reporters ai <path>
17
+ ```
18
+
19
+ See the [jscpd-rs skill](../jscpd/SKILL.md) for option details.
20
+
21
+ ## Workflow
22
+
23
+ 1. Run `jscpd-rs` with `--reporters ai` on the target path.
24
+ 2. Parse each clone line to identify the duplicated file and line ranges.
25
+ 3. Read both code fragments from the source files.
26
+ 4. Understand what the duplicated code does.
27
+ 5. Design a refactoring: extract a shared function, class, module, constant, or
28
+ base abstraction.
29
+ 6. Apply the refactoring and update all call sites, not just the two reported
30
+ locations.
31
+ 7. Run tests for the touched area.
32
+ 8. Re-run `jscpd-rs` to confirm the clone is gone or reduced.
33
+ 9. Repeat for the highest-impact remaining clones.
34
+
35
+ ## Refactoring Strategies
36
+
37
+ Extract function: use when the duplicate is a repeated logic block.
38
+
39
+ Extract module or utility: use when the duplicate spans files that can depend on
40
+ a shared helper.
41
+
42
+ Extract constant or config: use when the duplicate is repeated data,
43
+ configuration, selectors, strings, or magic values.
44
+
45
+ Template or base abstraction: use when the duplicate is structural and the
46
+ shared shape is stable.
47
+
48
+ ## Guardrails
49
+
50
+ - Do not refactor unrelated behavior while removing duplication.
51
+ - Keep the extracted abstraction named after the domain concept, not the clone.
52
+ - Check all similar call sites, because one clone pair can represent a larger
53
+ duplicated family.
54
+ - Keep test fixtures duplicated when the duplication is intentional and improves
55
+ readability.
56
+ - Prefer small, reversible refactors over broad rewrites.
57
+
58
+ ## Useful Commands
59
+
60
+ ```bash
61
+ npx jscpd-rs --reporters ai --min-lines 10 <path>
62
+ npx jscpd-rs --reporters json --output /tmp/jscpd-report <path>
63
+ ```
@@ -0,0 +1,85 @@
1
+ ---
2
+ name: jscpd
3
+ description: Fast native Rust clone of jscpd. Detect duplicated code and measure duplication percentages.
4
+ ---
5
+
6
+ # jscpd-rs
7
+
8
+ Fast native Rust clone of jscpd. Use this skill to run `jscpd-rs` and
9
+ understand its output.
10
+
11
+ ## Quick Start
12
+
13
+ ```bash
14
+ # Run with ai reporter (compact output optimized for agents)
15
+ npx jscpd-rs --reporters ai <path>
16
+
17
+ # With ignore patterns
18
+ npx jscpd-rs --reporters ai --ignore "**/node_modules/**,**/dist/**" <path>
19
+
20
+ # Scope to specific formats
21
+ npx jscpd-rs --reporters ai --format "javascript,typescript" <path>
22
+ ```
23
+
24
+ ## AI Reporter Output Format
25
+
26
+ The `ai` reporter produces compact, token-efficient output designed for agent
27
+ consumption:
28
+
29
+ ```text
30
+ Clones:
31
+ src/ foo.ts:10-25 ~ bar.ts:42-57
32
+ src/utils/helpers.ts:100-120 ~ src/utils/other.ts:5-25
33
+ ---
34
+ 3 clones · 4.2% duplication
35
+ ```
36
+
37
+ Each line represents one clone pair:
38
+
39
+ - Same file: `path/file.ts 10-25 ~ 45-60`
40
+ - Same directory: `shared/prefix/ file-a.ts:10-25 ~ file-b.ts:42-57`
41
+ - Different paths: `path/a.ts:10-25 ~ path/b.ts:42-57`
42
+
43
+ ## Options
44
+
45
+ | Option | Description |
46
+ | --- | --- |
47
+ | `--reporters ai` | Use the AI-optimized reporter |
48
+ | `--reporters html` | Generate HTML report |
49
+ | `--reporters json` | Output JSON report |
50
+ | `--min-tokens N` | Minimum tokens to consider a duplication |
51
+ | `--min-lines N` | Minimum lines to consider a duplication |
52
+ | `--threshold N` | Exit with error if duplication percentage exceeds N |
53
+ | `--ignore "glob"` | Ignore patterns, comma-separated |
54
+ | `--format "list"` | Limit to specific languages |
55
+ | `--pattern "glob"` | Glob pattern to select files |
56
+ | `--gitignore` | Respect `.gitignore` |
57
+ | `--output "path"` | Directory to write reports to |
58
+ | `--silent` | Suppress output |
59
+ | `--no-tips` | Disable terminal tips |
60
+ | `--config "path"` | Path to `.jscpd.json` config file |
61
+
62
+ ## Configuration File
63
+
64
+ Create a `.jscpd.json` in your project root:
65
+
66
+ ```json
67
+ {
68
+ "threshold": 0,
69
+ "reporters": ["ai"],
70
+ "ignore": ["**/node_modules/**", "**/dist/**", "**/*.min.*"],
71
+ "format": ["typescript", "javascript"],
72
+ "minLines": 5,
73
+ "minTokens": 50,
74
+ "output": "./reports/jscpd"
75
+ }
76
+ ```
77
+
78
+ ## Refactoring Duplicated Code
79
+
80
+ Once duplicates are detected, use the `dry-refactoring` skill for a guided
81
+ workflow to eliminate them:
82
+
83
+ ```bash
84
+ npx skills add vv-bogdanov/jscpd-rs --skill dry-refactoring
85
+ ```