jscpd-rs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/CHANGELOG.md +69 -0
  2. package/Cargo.lock +1323 -0
  3. package/Cargo.toml +54 -0
  4. package/LICENSE +21 -0
  5. package/README.md +372 -0
  6. package/docs/api-parity.md +49 -0
  7. package/docs/cloning-plan.md +281 -0
  8. package/docs/compat-baseline.md +535 -0
  9. package/docs/format-porting.md +86 -0
  10. package/docs/junior-task-template.md +62 -0
  11. package/docs/junior-workflow.md +87 -0
  12. package/docs/migrating-from-jscpd.md +193 -0
  13. package/docs/npm-release.md +116 -0
  14. package/docs/public-benchmark-suite.md +81 -0
  15. package/docs/release-checklist.md +200 -0
  16. package/docs/release-decisions.md +103 -0
  17. package/docs/release-readiness.md +51 -0
  18. package/docs/upstream-bugs.md +501 -0
  19. package/docs/upstream-issue-drafts.md +393 -0
  20. package/docs/user-guide.md +309 -0
  21. package/examples/dump_oxc_tokens.rs +112 -0
  22. package/examples/library_api.rs +42 -0
  23. package/npm/bin/jscpd-rs.js +6 -0
  24. package/npm/bin/jscpd-server.js +6 -0
  25. package/npm/lib/run-binary.js +68 -0
  26. package/npm/scripts/postinstall.js +50 -0
  27. package/package.json +53 -0
  28. package/skills/dry-refactoring/SKILL.md +63 -0
  29. package/skills/jscpd/SKILL.md +85 -0
  30. package/src/app.rs +512 -0
  31. package/src/bin/jscpd-server.rs +429 -0
  32. package/src/blame.rs +130 -0
  33. package/src/cli/config.rs +543 -0
  34. package/src/cli/parsing.rs +301 -0
  35. package/src/cli/tests.rs +543 -0
  36. package/src/cli.rs +671 -0
  37. package/src/detector/matching/secondary.rs +387 -0
  38. package/src/detector/matching.rs +274 -0
  39. package/src/detector/model.rs +190 -0
  40. package/src/detector/prepare.rs +71 -0
  41. package/src/detector/skip_local.rs +40 -0
  42. package/src/detector/statistics.rs +138 -0
  43. package/src/detector/store.rs +96 -0
  44. package/src/detector/tests.rs +238 -0
  45. package/src/detector.rs +265 -0
  46. package/src/files/discovery.rs +508 -0
  47. package/src/files/gitignore.rs +203 -0
  48. package/src/files/paths.rs +68 -0
  49. package/src/files/shebang.rs +106 -0
  50. package/src/files/tests.rs +523 -0
  51. package/src/files.rs +25 -0
  52. package/src/formats.rs +570 -0
  53. package/src/lib.rs +433 -0
  54. package/src/main.rs +26 -0
  55. package/src/report/ai.rs +125 -0
  56. package/src/report/badge.rs +238 -0
  57. package/src/report/console.rs +180 -0
  58. package/src/report/console_common.rs +37 -0
  59. package/src/report/console_full.rs +139 -0
  60. package/src/report/csv.rs +65 -0
  61. package/src/report/escape.rs +8 -0
  62. package/src/report/file_output.rs +28 -0
  63. package/src/report/html/assets.rs +47 -0
  64. package/src/report/html.rs +336 -0
  65. package/src/report/json.rs +119 -0
  66. package/src/report/markdown.rs +125 -0
  67. package/src/report/sarif.rs +302 -0
  68. package/src/report/silent.rs +22 -0
  69. package/src/report/source.rs +38 -0
  70. package/src/report/summary.rs +50 -0
  71. package/src/report/test_support.rs +133 -0
  72. package/src/report/threshold.rs +76 -0
  73. package/src/report/xcode.rs +90 -0
  74. package/src/report/xml.rs +119 -0
  75. package/src/report.rs +250 -0
  76. package/src/server/mcp.rs +942 -0
  77. package/src/server.rs +1081 -0
  78. package/src/tokenizer/apex.rs +97 -0
  79. package/src/tokenizer/blocks.rs +532 -0
  80. package/src/tokenizer/embedded.rs +106 -0
  81. package/src/tokenizer/generic.rs +511 -0
  82. package/src/tokenizer/hash.rs +27 -0
  83. package/src/tokenizer/ignore.rs +33 -0
  84. package/src/tokenizer/line_index.rs +33 -0
  85. package/src/tokenizer/markdown.rs +289 -0
  86. package/src/tokenizer/markup_attrs.rs +289 -0
  87. package/src/tokenizer/oxc/fallback.rs +275 -0
  88. package/src/tokenizer/oxc/jsx.rs +168 -0
  89. package/src/tokenizer/oxc/kind.rs +177 -0
  90. package/src/tokenizer/oxc/lexical.rs +67 -0
  91. package/src/tokenizer/oxc.rs +659 -0
  92. package/src/tokenizer/scan.rs +88 -0
  93. package/src/tokenizer/tap.rs +150 -0
  94. package/src/tokenizer/tests.rs +915 -0
  95. package/src/tokenizer.rs +328 -0
  96. package/src/verbose.rs +195 -0
@@ -0,0 +1,393 @@
1
+ # Upstream Issue Drafts
2
+
3
+ These drafts are prepared from `docs/upstream-bugs.md` for filing issues in
4
+ `kucherenko/jscpd`. Re-verify each issue against the current upstream default
5
+ branch before posting. Drafts that use public repositories include pinned
6
+ commits; the other drafts use upstream fixtures or minimal inline
7
+ reproductions.
8
+
9
+ Verification snapshot: on 2026-05-31, upstream remote `HEAD` resolved to
10
+ `refs/heads/master` at `50290cf`; no `refs/heads/main` was advertised. The
11
+ local upstream submodule is clean at that SHA and was used as the current
12
+ upstream checkout for quick verification. Drafts 1, 2, 3, 5, 6, and 7
13
+ reproduced. Draft 4 is covered by the public benchmark compatibility gate
14
+ recorded in `docs/compat-baseline.md`; the full GitHub Actions release-candidate
15
+ gate also passed on 2026-05-31.
16
+
17
+ Quick verification notes:
18
+
19
+ - Draft 1 reproduced the tokenizer output
20
+ `comment "//${b}${c?\":\"+c:\"\"}`}function k(){return 1}"`.
21
+ - Draft 2 reproduced `git blame` exit 128 from the parent repository on
22
+ `jscpd/fixtures/javascript/file_4.js`.
23
+ - Draft 3 reproduced the reported fixture ranges
24
+ `pug file1.pug:1-274 <> file2.pug:1-266`,
25
+ `haml file1.haml:1-26 <> file2.haml:1-26`, and
26
+ `aspnet file1.aspx:18-36 <> file2.aspx:18-43`.
27
+ - Draft 5 reproduced
28
+ `TypeError: Cannot read properties of undefined (reading 'range')`.
29
+ - Draft 6 was rechecked by source search: `cache` is defaulted/copied but has
30
+ no CLI flag or runtime read, `listeners` is only normalized, and
31
+ `tokensToSkip` appears only in the options interface.
32
+ - Draft 7 reproduced the documented bare-flag behavior for `--threshold`,
33
+ `--exitCode`, `--formats-exts javascript`, and `--ignore`.
34
+
35
+ ## Draft 1: JavaScript tokenizer treats `//` inside a template literal as a comment
36
+
37
+ Suggested title:
38
+
39
+ ```text
40
+ JavaScript tokenizer treats `//` inside a template literal as a line comment
41
+ ```
42
+
43
+ Suggested labels: `bug`, `javascript`, `tokenizer`.
44
+
45
+ Summary:
46
+
47
+ The Prism-backed JavaScript tokenizer can treat `//` inside a JavaScript
48
+ template literal as a line comment. The resulting comment token consumes the
49
+ rest of the physical line, including ordinary JavaScript after the template
50
+ literal. This can hide valid duplicated code behind one large comment token in
51
+ minified or bundled JavaScript.
52
+
53
+ Minimal tokenizer repro:
54
+
55
+ Run this from the upstream `jscpd` repository after building packages:
56
+
57
+ ```bash
58
+ node - <<'NODE'
59
+ const { Tokenizer } = require('./packages/tokenizer/dist/index.js');
60
+ const { mild } = require('./packages/core/dist/index.js');
61
+
62
+ const code =
63
+ 'function h(a){let j="//";return`${j}`}function j(){let{protocol:a,hostname:b,port:c}=window.location;return`${a}//${b}${c?":"+c:""}`}function k(){return 1}\n';
64
+
65
+ const tokens = new Tokenizer()
66
+ .generateMaps('repro.js', code, 'javascript', { minTokens: 1, mode: mild })[0]
67
+ .tokens;
68
+
69
+ for (const token of tokens.filter((t) => t.type === 'comment' || t.value.includes('//'))) {
70
+ console.log(`${token.type} ${JSON.stringify(token.value)} line=${token.loc.start.line} col=${token.loc.start.column}`);
71
+ }
72
+ NODE
73
+ ```
74
+
75
+ Observed symptoms:
76
+
77
+ - The first `//` string literal is emitted correctly as a `string` token.
78
+ - The `//` inside the template literal is emitted as a `comment` token:
79
+
80
+ ```text
81
+ string "\"//\"" line=1 col=21
82
+ comment "//${b}${c?\":\"+c:\"\"}`}function k(){return 1}" line=1 col=113
83
+ ```
84
+
85
+ Expected behavior:
86
+
87
+ `//` inside template literal text should remain a template string segment and
88
+ should not comment out the rest of the generated line.
89
+
90
+ Impact:
91
+
92
+ Generated/minified SSR bundles can lose duplicate coverage because ordinary
93
+ module code after the template literal is hidden inside one incorrect comment
94
+ token.
95
+
96
+ ## Draft 2: `--blame` fails for paths inside a nested Git repository
97
+
98
+ Suggested title:
99
+
100
+ ```text
101
+ --blame fails when scanned files are inside a nested Git repository or submodule
102
+ ```
103
+
104
+ Suggested labels: `bug`, `blame`, `git`.
105
+
106
+ Summary:
107
+
108
+ When `jscpd` is launched from a parent repository and the scan target is inside a
109
+ nested Git repository or submodule, `--blame` invokes `git blame` from the
110
+ parent working directory with the nested file path. The parent repository does
111
+ not track that nested file as a normal file, so `git blame` exits with 128 and
112
+ the whole detection run fails.
113
+
114
+ Repro:
115
+
116
+ ```bash
117
+ node jscpd/apps/jscpd/bin/jscpd jscpd/fixtures/javascript \
118
+ --format javascript \
119
+ --reporters json \
120
+ --output /tmp/jscpd-upstream-blame \
121
+ --silent \
122
+ --noTips \
123
+ --blame \
124
+ --min-tokens 20 \
125
+ --min-lines 3 \
126
+ --max-size 1mb \
127
+ --exitCode 0
128
+ ```
129
+
130
+ Observed first error on the current submodule:
131
+
132
+ ```text
133
+ Error: Command failed with exit code 128: /usr/bin/git blame -w jscpd/fixtures/javascript/file_2.mjs
134
+ fatal: no such path 'jscpd/fixtures/javascript/file_2.mjs' in HEAD
135
+ ```
136
+
137
+ Expected behavior:
138
+
139
+ Blame should run from the scanned file's own repository/worktree, or blame
140
+ should fail per file without aborting the whole detection run.
141
+
142
+ ## Draft 3: Clone ranges can extend through non-matching embedded/template blocks
143
+
144
+ Suggested title:
145
+
146
+ ```text
147
+ Clone ranges can extend through non-matching embedded or template blocks
148
+ ```
149
+
150
+ Suggested labels: `bug`, `detector`, `reporting`.
151
+
152
+ Summary:
153
+
154
+ Some reported clone ranges include neighboring source that does not match the
155
+ paired fragment. The issue is visible in fixture formats that contain large
156
+ block tokens or embedded markup.
157
+
158
+ Fixture repros:
159
+
160
+ ```bash
161
+ FORMAT=pug MIN_TOKENS=20 MIN_LINES=3 MAX_SIZE=1mb KEEP=1 \
162
+ scripts/compat.sh jscpd/fixtures/pug
163
+
164
+ FORMAT=haml MIN_TOKENS=20 MIN_LINES=3 MAX_SIZE=1mb KEEP=1 \
165
+ scripts/compat.sh jscpd/fixtures/haml
166
+
167
+ FORMAT=aspnet MIN_TOKENS=20 MIN_LINES=3 MAX_SIZE=1mb KEEP=1 \
168
+ scripts/compat.sh jscpd/fixtures/htmlembedded
169
+ ```
170
+
171
+ Observed examples:
172
+
173
+ - Pug reports `file1.pug:1-274` against `file2.pug:1-266`, including a
174
+ `style.` plain-text block whose CSS values differ.
175
+ - HAML reports `file1.haml:1-26` against `file2.haml:1-26`, including
176
+ different silent-comment blocks.
177
+ - ASP.NET reports `file1.aspx:18-36` against `file2.aspx:18-43`, including an
178
+ inserted email form group that is not present in the paired file.
179
+
180
+ Expected behavior:
181
+
182
+ Clone ranges should stop at the last matching token run, split around inserted
183
+ content, or document that specific ignored block types may intentionally extend
184
+ reported source ranges through non-matching text.
185
+
186
+ ## Draft 4: Public benchmark clone ranges are sometimes overextended or reversed
187
+
188
+ Suggested title:
189
+
190
+ ```text
191
+ Some clone ranges are overextended or reversed on large public repositories
192
+ ```
193
+
194
+ Suggested labels: `bug`, `detector`, `reporting`.
195
+
196
+ Summary:
197
+
198
+ Large-repository runs show reported fragments whose line ranges extend across
199
+ neighboring non-matching tests, table entries, or declarations. Several ranges
200
+ also have reversed start/end ordering.
201
+
202
+ Repro shapes:
203
+
204
+ ```bash
205
+ git clone https://github.com/facebook/react.git /tmp/jscpd-react
206
+ git -C /tmp/jscpd-react checkout f0dfee3
207
+ node apps/jscpd/bin/jscpd /tmp/jscpd-react \
208
+ --format javascript \
209
+ --reporters json \
210
+ --output /tmp/jscpd-react-report \
211
+ --silent \
212
+ --noTips \
213
+ --min-tokens 50 \
214
+ --min-lines 5 \
215
+ --max-size 1mb \
216
+ --exitCode 0
217
+
218
+ git clone https://github.com/vercel/next.js.git /tmp/jscpd-next
219
+ git -C /tmp/jscpd-next checkout 2bbb67b9
220
+ node apps/jscpd/bin/jscpd /tmp/jscpd-next \
221
+ --format typescript \
222
+ --reporters json \
223
+ --output /tmp/jscpd-next-report \
224
+ --silent \
225
+ --noTips \
226
+ --min-tokens 50 \
227
+ --min-lines 5 \
228
+ --max-size 1mb \
229
+ --exitCode 0
230
+
231
+ git clone https://github.com/prometheus/prometheus.git /tmp/jscpd-prometheus
232
+ git -C /tmp/jscpd-prometheus checkout a0524ee
233
+ node apps/jscpd/bin/jscpd /tmp/jscpd-prometheus \
234
+ --format go \
235
+ --reporters json \
236
+ --output /tmp/jscpd-prometheus-report \
237
+ --silent \
238
+ --noTips \
239
+ --min-tokens 50 \
240
+ --min-lines 5 \
241
+ --max-size 1mb \
242
+ --exitCode 0
243
+ ```
244
+
245
+ Observed examples:
246
+
247
+ - React reports ranges such as `ReactDOMFizzServerNode.js:229-179` and clone
248
+ fragments that continue into neighboring test bodies.
249
+ - Next.js reports broad ranges around inline snapshots and multi-test files,
250
+ including reversed endpoints like `459-314` and `892-745`.
251
+ - Prometheus reports broad table-test ranges where one early case is stretched
252
+ through unrelated later cases.
253
+
254
+ Expected behavior:
255
+
256
+ Reported fragments should keep start/end ordering stable and should stop at the
257
+ actual matching token run instead of stretching one match across unrelated
258
+ neighboring source.
259
+
260
+ ## Draft 5: Config string `minTokens` can corrupt token-window indexing
261
+
262
+ Suggested title:
263
+
264
+ ```text
265
+ String minTokens values from config can corrupt detector token windows
266
+ ```
267
+
268
+ Suggested labels: `bug`, `config`.
269
+
270
+ Summary:
271
+
272
+ Runtime config loaded from `.jscpd.json` or `package.json#jscpd` is merged
273
+ without the CLI numeric parser. Numeric-looking strings for some fields continue
274
+ through JavaScript coercion, but `minTokens` is later used in token-window
275
+ indexing with `+` before numeric subtraction. A value such as `"5"` can produce
276
+ string-concatenated indices and eventually an undefined token frame.
277
+
278
+ Minimal repro:
279
+
280
+ Run this from the upstream `jscpd` repository root:
281
+
282
+ ```bash
283
+ tmp=$(mktemp -d)
284
+ JSCPD_REPO=$(pwd)
285
+ mkdir -p "$tmp/src"
286
+ printf 'function alpha(){\n return [1,2,3,4,5,6,7,8,9,10].map((x)=>x+1).join(",");\n}\nfunction beta(){\n return alpha();\n}\n' > "$tmp/src/a.js"
287
+ cp "$tmp/src/a.js" "$tmp/src/b.js"
288
+ printf '{"path":["src"],"format":["javascript"],"reporters":["json"],"silent":true,"minTokens":"5","minLines":1,"maxSize":"1mb","exitCode":0}\n' > "$tmp/.jscpd.json"
289
+
290
+ cd "$tmp"
291
+ node "$JSCPD_REPO/apps/jscpd/bin/jscpd" --config .jscpd.json --noTips
292
+ ```
293
+
294
+ Observed first error:
295
+
296
+ ```text
297
+ TypeError: Cannot read properties of undefined (reading 'range')
298
+ at _RabinKarp.enlargeClone (.../packages/core/dist/index.js:100:49)
299
+ ```
300
+
301
+ Expected behavior:
302
+
303
+ Config numeric fields should be parsed and validated before detector execution,
304
+ or invalid string values should fail with a clear configuration error.
305
+
306
+ ## Draft 6: Public option fields are exposed but unused at runtime
307
+
308
+ Suggested title:
309
+
310
+ ```text
311
+ Some public option fields are exposed but unused at runtime
312
+ ```
313
+
314
+ Suggested labels: `bug`, `options`, `documentation`.
315
+
316
+ Summary:
317
+
318
+ The option surface exposes fields that look user-facing but are not consumed by
319
+ the runtime:
320
+
321
+ - `cache` is defined and defaulted, but there is no `--cache` CLI option and no
322
+ detector/tokenizer read of `options.cache`.
323
+ - `listeners` is normalized to an array, but runtime subscriptions come only
324
+ from built-in verbose/progress handling.
325
+ - `tokensToSkip` appears in the options interface but is not consumed by
326
+ tokenizer or detector code.
327
+
328
+ Expected behavior:
329
+
330
+ These fields should either be documented as reserved/no-op, removed from the
331
+ public option surface, or wired to actual runtime behavior.
332
+
333
+ ## Draft 7: Optional CLI values can produce TypeErrors or accidental behavior
334
+
335
+ Suggested title:
336
+
337
+ ```text
338
+ Bare optional CLI flags can produce TypeErrors or accidental behavior
339
+ ```
340
+
341
+ Suggested labels: `bug`, `cli`.
342
+
343
+ Summary:
344
+
345
+ Several Commander options accept optional values. When passed without a value,
346
+ Commander supplies boolean `true`, and later runtime code either crashes with a
347
+ TypeError or continues with surprising semantics.
348
+
349
+ Repro examples:
350
+
351
+ ```bash
352
+ node jscpd/apps/jscpd/bin/jscpd jscpd/fixtures/javascript \
353
+ --threshold \
354
+ --silent \
355
+ --noTips \
356
+ --min-tokens 20 \
357
+ --min-lines 3 \
358
+ --max-size 1mb
359
+
360
+ node jscpd/apps/jscpd/bin/jscpd jscpd/fixtures/javascript \
361
+ --exitCode \
362
+ --silent \
363
+ --noTips \
364
+ --min-tokens 20 \
365
+ --min-lines 3 \
366
+ --max-size 1mb
367
+
368
+ node jscpd/apps/jscpd/bin/jscpd jscpd/fixtures/custom \
369
+ --formats-exts javascript \
370
+ --silent \
371
+ --noTips \
372
+ --min-tokens 20 \
373
+ --min-lines 3 \
374
+ --max-size 1mb
375
+ ```
376
+
377
+ Observed behavior:
378
+
379
+ - Bare `--threshold` becomes `Number(true)`, so the threshold is treated as
380
+ `1%`.
381
+ - Bare `--exitCode` stores boolean `true`, which Node rejects as
382
+ `process.exitCode` when clones are found.
383
+ - Bare string flags such as `--ignore`, `--reporters`, `--mode`, `--format`,
384
+ `--formats-exts`, and `--formats-names` later crash because boolean `true`
385
+ is used as a string.
386
+ - Malformed mapping values like `--formats-exts javascript` crash during option
387
+ conversion because the parser assumes each entry contains `:`.
388
+
389
+ Expected behavior:
390
+
391
+ Flags that require values should declare required values, validate the bare flag
392
+ case explicitly, or normalize bare flags to documented defaults before option
393
+ conversion.
@@ -0,0 +1,309 @@
1
+ # jscpd-rs User Guide
2
+
3
+ `jscpd-rs` is a native Rust implementation of the common `jscpd` workflows:
4
+ scan source trees, detect copy-paste fragments, write reports, fail CI on
5
+ thresholds, and serve snippet checks over HTTP/MCP.
6
+
7
+ This guide is intentionally user-facing. Release policy, benchmark evidence,
8
+ and compatibility details live in the release docs linked from the README.
9
+
10
+ ## Installation
11
+
12
+ Install the Rust binaries from crates.io after publication:
13
+
14
+ ```bash
15
+ cargo install jscpd-rs --locked
16
+ ```
17
+
18
+ Install through npm/npx when Node is already part of the workflow:
19
+
20
+ ```bash
21
+ npm install -g jscpd-rs
22
+ npx jscpd-rs --version
23
+ ```
24
+
25
+ The first npm package builds the native binaries from source during
26
+ `postinstall`, so a Rust toolchain must be available. Prebuilt npm platform
27
+ packages are planned after the first release.
28
+
29
+ Install from a checkout:
30
+
31
+ ```bash
32
+ git clone https://github.com/vv-bogdanov/jscpd-rs.git
33
+ cd jscpd-rs
34
+ cargo install --path . --bins --locked
35
+ ```
36
+
37
+ ## CLI Usage
38
+
39
+ Run a scan:
40
+
41
+ ```bash
42
+ jscpd .
43
+ ```
44
+
45
+ Scan selected paths and formats:
46
+
47
+ ```bash
48
+ jscpd --format javascript,typescript apps packages
49
+ ```
50
+
51
+ Tune clone size:
52
+
53
+ ```bash
54
+ jscpd --min-lines 5 --min-tokens 50 src
55
+ ```
56
+
57
+ Write machine-readable reports:
58
+
59
+ ```bash
60
+ jscpd --reporters console,json,sarif --output report src
61
+ ```
62
+
63
+ Fail CI when the duplicated-line percentage is too high:
64
+
65
+ ```bash
66
+ jscpd --threshold 5 --exitCode 1 .
67
+ ```
68
+
69
+ Inspect the upstream-compatible help and format registry:
70
+
71
+ ```bash
72
+ jscpd --help
73
+ jscpd --list
74
+ ```
75
+
76
+ ## Common Options
77
+
78
+ | Option | Purpose | Default |
79
+ | --- | --- | --- |
80
+ | `--min-lines`, `-l` | minimum clone size in source lines | `5` |
81
+ | `--min-tokens`, `-k` | minimum clone size in tokens | `50` |
82
+ | `--max-lines`, `-x` | skip sources with more lines | `1000` |
83
+ | `--max-size`, `-z` | skip sources above a byte size such as `100kb` or `1mb` | `100kb` |
84
+ | `--threshold`, `-t` | fail when duplicated percentage is above the threshold | unset |
85
+ | `--exitCode` | exit code to use when duplication is detected | upstream default |
86
+ | `--format`, `-f` | comma-separated format allowlist | all formats |
87
+ | `--pattern`, `-p` | glob pattern used during discovery | `**/*` |
88
+ | `--ignore`, `-i` | comma-separated ignore globs | unset |
89
+ | `--ignore-pattern` | skip code blocks matching regular expressions | unset |
90
+ | `--mode`, `-m` | detection mode: `strict`, `mild`, or `weak` | `mild` |
91
+ | `--skipComments` | shorthand for `--mode weak` | `false` |
92
+ | `--reporters`, `-r` | comma-separated reporter list | `console` |
93
+ | `--output`, `-o` | report output directory | `./report` |
94
+ | `--blame`, `-b` | add Git author/date information to duplicate fragments | `false` |
95
+ | `--absolute`, `-a` | write absolute source paths in reports | `false` |
96
+ | `--noSymlinks`, `-n` | do not follow symlinks during discovery | `false` |
97
+ | `--gitignore` / `--no-gitignore` | enable or disable Git ignore handling | enabled |
98
+ | `--skipLocal` | report only cross-folder duplication | `false` |
99
+ | `--noTips` | suppress post-run tips and promotional output | `false` |
100
+
101
+ The CLI accepts multiple `--ignore` flags and comma-separated ignore lists:
102
+
103
+ ```bash
104
+ jscpd --ignore "target/**" --ignore "node_modules/**,dist/**" .
105
+ ```
106
+
107
+ ## Configuration
108
+
109
+ `jscpd-rs` reads `.jscpd.json` by default when present. It also reads a `jscpd`
110
+ section from `package.json`, matching the common upstream workflow.
111
+
112
+ Example `.jscpd.json`:
113
+
114
+ ```json
115
+ {
116
+ "path": ["src", "packages"],
117
+ "format": ["javascript", "typescript", "rust"],
118
+ "minLines": 5,
119
+ "minTokens": 50,
120
+ "threshold": 5,
121
+ "reporters": ["console", "json", "html", "sarif"],
122
+ "output": "report",
123
+ "ignore": ["target/**", "node_modules/**", "dist/**"],
124
+ "gitignore": true,
125
+ "noTips": true
126
+ }
127
+ ```
128
+
129
+ Example `package.json` section:
130
+
131
+ ```json
132
+ {
133
+ "jscpd": {
134
+ "path": ["src"],
135
+ "reporters": ["console", "json"],
136
+ "threshold": 5,
137
+ "ignore": ["coverage/**", "**/*.snap"]
138
+ }
139
+ }
140
+ ```
141
+
142
+ CLI arguments are applied after config loading, so command-line values can
143
+ override project defaults.
144
+
145
+ ## Reporters
146
+
147
+ Built-in native reporters:
148
+
149
+ | Reporter | Output |
150
+ | --- | --- |
151
+ | `console` | compact terminal summary and clone list |
152
+ | `consoleFull` | terminal output with duplicated fragments |
153
+ | `ai` | compact, token-efficient clone list for agent prompts |
154
+ | `json` | `jscpd-report.json` |
155
+ | `xml` | `jscpd-report.xml` |
156
+ | `csv` | `jscpd-report.csv` |
157
+ | `markdown` | `jscpd-report.md` |
158
+ | `html` | self-contained HTML report under `html/` |
159
+ | `sarif` | `jscpd-sarif.json` for code-scanning pipelines |
160
+ | `xcode` | diagnostics formatted for Xcode-style tooling |
161
+ | `threshold` | threshold-only error output |
162
+ | `badge` | `jscpd-badge.svg` |
163
+ | `silent` | suppress report output |
164
+
165
+ Dynamic npm reporters are intentionally not loaded in the first release. Unknown
166
+ external reporter names keep upstream-style warnings and continue where upstream
167
+ continues.
168
+
169
+ ## Server And MCP
170
+
171
+ Start the native server:
172
+
173
+ ```bash
174
+ jscpd-server . --host 127.0.0.1 --port 3000
175
+ ```
176
+
177
+ Health and statistics:
178
+
179
+ ```bash
180
+ curl http://127.0.0.1:3000/api/health
181
+ curl http://127.0.0.1:3000/api/stats
182
+ ```
183
+
184
+ Check a snippet against the scanned project:
185
+
186
+ ```bash
187
+ curl -X POST http://127.0.0.1:3000/api/check \
188
+ -H "Content-Type: application/json" \
189
+ -d '{"format":"javascript","code":"function sum(a,b){return a+b;}"}'
190
+ ```
191
+
192
+ Refresh the project token maps after changing files:
193
+
194
+ ```bash
195
+ curl -X POST http://127.0.0.1:3000/api/recheck
196
+ ```
197
+
198
+ MCP endpoint:
199
+
200
+ ```text
201
+ http://127.0.0.1:3000/mcp
202
+ ```
203
+
204
+ Example MCP client entry:
205
+
206
+ ```json
207
+ {
208
+ "mcpServers": {
209
+ "jscpd": {
210
+ "type": "streamable-http",
211
+ "url": "http://127.0.0.1:3000/mcp"
212
+ }
213
+ }
214
+ }
215
+ ```
216
+
217
+ The server exposes core duplication tools and a statistics resource over native
218
+ JSON-RPC HTTP.
219
+
220
+ ## Rust API
221
+
222
+ Path-based detection:
223
+
224
+ ```rust
225
+ use std::path::PathBuf;
226
+
227
+ fn main() -> anyhow::Result<()> {
228
+ let mut options = jscpd_rs::get_default_options();
229
+ options.paths = vec![PathBuf::from("src")];
230
+ options.reporters.clear();
231
+ options.silent = true;
232
+
233
+ let result = jscpd_rs::detect_clones_and_statistics(&options)?;
234
+ println!("{} clones", result.clones.len());
235
+ Ok(())
236
+ }
237
+ ```
238
+
239
+ In-memory detection:
240
+
241
+ ```rust
242
+ let mut options = jscpd_rs::get_default_options();
243
+ options.reporters.clear();
244
+ options.min_lines = 2;
245
+ options.min_tokens = 5;
246
+
247
+ let files = vec![
248
+ jscpd_rs::SourceFile {
249
+ source_id: "a.js".to_string(),
250
+ format: "javascript".to_string(),
251
+ content: "const a = 1;\nconst b = 2;\nconst c = a + b;\n".to_string(),
252
+ },
253
+ jscpd_rs::SourceFile {
254
+ source_id: "b.js".to_string(),
255
+ format: "javascript".to_string(),
256
+ content: "const a = 1;\nconst b = 2;\nconst c = a + b;\n".to_string(),
257
+ },
258
+ ];
259
+
260
+ let result = jscpd_rs::detect_source_files(files, &options);
261
+ assert!(!result.clones.is_empty());
262
+ ```
263
+
264
+ Useful entry points:
265
+
266
+ - `get_default_options`
267
+ - `get_options_from_args`
268
+ - `detect_clones`
269
+ - `detect_clones_and_statistics`
270
+ - `detect_source_files`
271
+ - `get_supported_formats`
272
+ - `get_format_by_file`
273
+ - `Tokenizer`
274
+ - `Detector`
275
+ - `MemoryStore`
276
+
277
+ ## AI Skills
278
+
279
+ Install the tool-reference and dry-refactoring skills:
280
+
281
+ ```bash
282
+ npx skills add vv-bogdanov/jscpd-rs --skill jscpd
283
+ npx skills add vv-bogdanov/jscpd-rs --skill dry-refactoring
284
+ ```
285
+
286
+ The `ai` reporter is designed for short clone summaries that can be passed to
287
+ coding agents without including duplicated source fragments by default:
288
+
289
+ ```bash
290
+ jscpd --reporters ai src
291
+ ```
292
+
293
+ ## Compatibility Notes
294
+
295
+ The first release target is practical, coverage-first upstream compatibility.
296
+ For the same inputs and options, `jscpd-rs` must not miss duplicated source
297
+ lines reported by upstream `jscpd`. Extra Rust findings are allowed while
298
+ compatibility converges and remain visible in compatibility reports.
299
+
300
+ Intentional first-release limits:
301
+
302
+ - dynamic npm reporters, stores, listeners, and plugins are not loaded;
303
+ - exact token totals and pair ordering may differ from upstream while
304
+ duplicated upstream lines remain covered;
305
+ - HTML output is self-contained and practically compatible, not pixel-perfect;
306
+ - the Rust crate exposes a native Rust API, not the upstream JavaScript API.
307
+
308
+ See `docs/release-decisions.md` and `docs/compat-baseline.md` for the current
309
+ release policy and compatibility evidence.