jscpd-rs 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +69 -0
- package/Cargo.lock +1323 -0
- package/Cargo.toml +54 -0
- package/LICENSE +21 -0
- package/README.md +372 -0
- package/docs/api-parity.md +49 -0
- package/docs/cloning-plan.md +281 -0
- package/docs/compat-baseline.md +535 -0
- package/docs/format-porting.md +86 -0
- package/docs/junior-task-template.md +62 -0
- package/docs/junior-workflow.md +87 -0
- package/docs/migrating-from-jscpd.md +193 -0
- package/docs/npm-release.md +116 -0
- package/docs/public-benchmark-suite.md +81 -0
- package/docs/release-checklist.md +200 -0
- package/docs/release-decisions.md +103 -0
- package/docs/release-readiness.md +51 -0
- package/docs/upstream-bugs.md +501 -0
- package/docs/upstream-issue-drafts.md +393 -0
- package/docs/user-guide.md +309 -0
- package/examples/dump_oxc_tokens.rs +112 -0
- package/examples/library_api.rs +42 -0
- package/npm/bin/jscpd-rs.js +6 -0
- package/npm/bin/jscpd-server.js +6 -0
- package/npm/lib/run-binary.js +68 -0
- package/npm/scripts/postinstall.js +50 -0
- package/package.json +53 -0
- package/skills/dry-refactoring/SKILL.md +63 -0
- package/skills/jscpd/SKILL.md +85 -0
- package/src/app.rs +512 -0
- package/src/bin/jscpd-server.rs +429 -0
- package/src/blame.rs +130 -0
- package/src/cli/config.rs +543 -0
- package/src/cli/parsing.rs +301 -0
- package/src/cli/tests.rs +543 -0
- package/src/cli.rs +671 -0
- package/src/detector/matching/secondary.rs +387 -0
- package/src/detector/matching.rs +274 -0
- package/src/detector/model.rs +190 -0
- package/src/detector/prepare.rs +71 -0
- package/src/detector/skip_local.rs +40 -0
- package/src/detector/statistics.rs +138 -0
- package/src/detector/store.rs +96 -0
- package/src/detector/tests.rs +238 -0
- package/src/detector.rs +265 -0
- package/src/files/discovery.rs +508 -0
- package/src/files/gitignore.rs +203 -0
- package/src/files/paths.rs +68 -0
- package/src/files/shebang.rs +106 -0
- package/src/files/tests.rs +523 -0
- package/src/files.rs +25 -0
- package/src/formats.rs +570 -0
- package/src/lib.rs +433 -0
- package/src/main.rs +26 -0
- package/src/report/ai.rs +125 -0
- package/src/report/badge.rs +238 -0
- package/src/report/console.rs +180 -0
- package/src/report/console_common.rs +37 -0
- package/src/report/console_full.rs +139 -0
- package/src/report/csv.rs +65 -0
- package/src/report/escape.rs +8 -0
- package/src/report/file_output.rs +28 -0
- package/src/report/html/assets.rs +47 -0
- package/src/report/html.rs +336 -0
- package/src/report/json.rs +119 -0
- package/src/report/markdown.rs +125 -0
- package/src/report/sarif.rs +302 -0
- package/src/report/silent.rs +22 -0
- package/src/report/source.rs +38 -0
- package/src/report/summary.rs +50 -0
- package/src/report/test_support.rs +133 -0
- package/src/report/threshold.rs +76 -0
- package/src/report/xcode.rs +90 -0
- package/src/report/xml.rs +119 -0
- package/src/report.rs +250 -0
- package/src/server/mcp.rs +942 -0
- package/src/server.rs +1081 -0
- package/src/tokenizer/apex.rs +97 -0
- package/src/tokenizer/blocks.rs +532 -0
- package/src/tokenizer/embedded.rs +106 -0
- package/src/tokenizer/generic.rs +511 -0
- package/src/tokenizer/hash.rs +27 -0
- package/src/tokenizer/ignore.rs +33 -0
- package/src/tokenizer/line_index.rs +33 -0
- package/src/tokenizer/markdown.rs +289 -0
- package/src/tokenizer/markup_attrs.rs +289 -0
- package/src/tokenizer/oxc/fallback.rs +275 -0
- package/src/tokenizer/oxc/jsx.rs +168 -0
- package/src/tokenizer/oxc/kind.rs +177 -0
- package/src/tokenizer/oxc/lexical.rs +67 -0
- package/src/tokenizer/oxc.rs +659 -0
- package/src/tokenizer/scan.rs +88 -0
- package/src/tokenizer/tap.rs +150 -0
- package/src/tokenizer/tests.rs +915 -0
- package/src/tokenizer.rs +328 -0
- package/src/verbose.rs +195 -0
package/Cargo.toml
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "jscpd-rs"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
edition = "2024"
|
|
5
|
+
rust-version = "1.93"
|
|
6
|
+
license = "MIT"
|
|
7
|
+
description = "Fast Rust clone of jscpd"
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
repository = "https://github.com/vv-bogdanov/jscpd-rs"
|
|
10
|
+
documentation = "https://docs.rs/jscpd-rs"
|
|
11
|
+
keywords = ["jscpd", "copy-paste", "duplication", "clone-detection", "cli"]
|
|
12
|
+
categories = ["command-line-utilities", "development-tools"]
|
|
13
|
+
include = [
|
|
14
|
+
"/CHANGELOG.md",
|
|
15
|
+
"/Cargo.lock",
|
|
16
|
+
"/Cargo.toml",
|
|
17
|
+
"/LICENSE",
|
|
18
|
+
"/README.md",
|
|
19
|
+
"/docs/**",
|
|
20
|
+
"/examples/**",
|
|
21
|
+
"/skills/**",
|
|
22
|
+
"/src/**",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[[bin]]
|
|
26
|
+
name = "jscpd"
|
|
27
|
+
path = "src/main.rs"
|
|
28
|
+
|
|
29
|
+
[[bin]]
|
|
30
|
+
name = "jscpd-server"
|
|
31
|
+
path = "src/bin/jscpd-server.rs"
|
|
32
|
+
|
|
33
|
+
[dependencies]
|
|
34
|
+
anyhow = "1.0"
|
|
35
|
+
axum = "0.8"
|
|
36
|
+
clap = { version = "4.5", features = ["derive"] }
|
|
37
|
+
form_urlencoded = "1.2"
|
|
38
|
+
getrandom = "0.2"
|
|
39
|
+
globset = "0.4"
|
|
40
|
+
ignore = "0.4"
|
|
41
|
+
oxc_allocator = "0.133.0"
|
|
42
|
+
oxc_parser = { version = "0.133.0", features = ["benchmarking"] }
|
|
43
|
+
oxc_span = "0.133.0"
|
|
44
|
+
rayon = "1.11"
|
|
45
|
+
regex = "1.11"
|
|
46
|
+
rustc-hash = "2.1"
|
|
47
|
+
serde = { version = "1.0", features = ["derive"] }
|
|
48
|
+
serde_json = "1.0"
|
|
49
|
+
time = { version = "0.3", features = ["formatting"] }
|
|
50
|
+
tokio = { version = "1.48", features = ["macros", "net", "rt-multi-thread"] }
|
|
51
|
+
xxhash-rust = { version = "0.8", features = ["xxh3"] }
|
|
52
|
+
|
|
53
|
+
[dev-dependencies]
|
|
54
|
+
tower = { version = "0.5", features = ["util"] }
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Viacheslav V Bogdanov
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
# jscpd-rs
|
|
2
|
+
|
|
3
|
+
[](https://github.com/vv-bogdanov/jscpd-rs/actions/workflows/release-gate.yml)
|
|
4
|
+
[](https://crates.io/crates/jscpd-rs)
|
|
5
|
+
[](https://docs.rs/jscpd-rs)
|
|
6
|
+
[](https://www.npmjs.com/package/jscpd-rs)
|
|
7
|
+
[](LICENSE)
|
|
8
|
+
[](https://www.rust-lang.org/)
|
|
9
|
+
|
|
10
|
+
Fast native Rust clone of [`jscpd`](https://github.com/kucherenko/jscpd)
|
|
11
|
+
for copy-paste and duplicate-code detection in local development and CI/CD.
|
|
12
|
+
It scans a codebase, finds duplicated source fragments across files, writes
|
|
13
|
+
reports for humans and automation, and can fail a build when duplication
|
|
14
|
+
crosses a configured threshold.
|
|
15
|
+
|
|
16
|
+
`jscpd-rs` keeps the upstream command shape, configuration formats, reports,
|
|
17
|
+
exit-code workflows, and server workflow, while moving the hot path to native
|
|
18
|
+
Rust. The practical goal is simple: keep duplication checks always-on without
|
|
19
|
+
spending unnecessary CI minutes, developer waiting time, cloud compute budget,
|
|
20
|
+
or electricity on repeated quality gates.
|
|
21
|
+
|
|
22
|
+
Recorded public release-candidate benchmarks are currently 50x+ faster than
|
|
23
|
+
upstream on the covered repositories. The compatibility gate is coverage-first:
|
|
24
|
+
on the same inputs and options, `jscpd-rs` must not miss duplicated source lines
|
|
25
|
+
reported by upstream `jscpd`.
|
|
26
|
+
|
|
27
|
+
## Install
|
|
28
|
+
|
|
29
|
+
Cargo:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
cargo install jscpd-rs --locked
|
|
33
|
+
jscpd --version
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
npm/npx:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
npm install -g jscpd-rs
|
|
40
|
+
jscpd --version
|
|
41
|
+
|
|
42
|
+
npx jscpd-rs --version
|
|
43
|
+
npx jscpd-rs .
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
The first npm package is a source-build package: install/postinstall compiles
|
|
47
|
+
the native Rust binaries with Cargo. A Rust toolchain must be available on the
|
|
48
|
+
installing machine. Prebuilt platform packages are a planned publication
|
|
49
|
+
improvement.
|
|
50
|
+
|
|
51
|
+
From this repository:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
git clone https://github.com/vv-bogdanov/jscpd-rs.git
|
|
55
|
+
cd jscpd-rs
|
|
56
|
+
cargo install --path . --bins --locked
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Build without installing:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
cargo build --release --bin jscpd
|
|
63
|
+
cargo build --release --bin jscpd-server
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Quick Start
|
|
67
|
+
|
|
68
|
+
Scan a project:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
jscpd .
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Tune the detection threshold:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
jscpd --min-lines 5 --min-tokens 50 src
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Generate reports:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
jscpd --reporters console,json,html --output report src
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Fail CI when duplication is above a threshold:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
jscpd --threshold 5 --exitCode 1 src
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Use the upstream-compatible command help and format list:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
jscpd --help
|
|
96
|
+
jscpd --list
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Start the native REST/MCP server:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
jscpd-server . --host 127.0.0.1 --port 3000
|
|
103
|
+
curl http://127.0.0.1:3000/api/health
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
The current server exposes `/`, `/api/health`, `/api/stats`, `/api/check`,
|
|
107
|
+
`/api/recheck`, and `/mcp`. Snippet checks reuse project token maps refreshed
|
|
108
|
+
by `/api/recheck`.
|
|
109
|
+
|
|
110
|
+
For full CLI, configuration, reporter, server, MCP, and Rust API examples, see
|
|
111
|
+
[docs/user-guide.md](docs/user-guide.md).
|
|
112
|
+
If you already use upstream `jscpd`, see
|
|
113
|
+
[docs/migrating-from-jscpd.md](docs/migrating-from-jscpd.md).
|
|
114
|
+
|
|
115
|
+
## GitHub Actions
|
|
116
|
+
|
|
117
|
+
Install from crates.io after publication:
|
|
118
|
+
|
|
119
|
+
```yaml
|
|
120
|
+
jobs:
|
|
121
|
+
duplication:
|
|
122
|
+
runs-on: ubuntu-latest
|
|
123
|
+
steps:
|
|
124
|
+
- uses: actions/checkout@v5
|
|
125
|
+
- uses: dtolnay/rust-toolchain@stable
|
|
126
|
+
- run: cargo install jscpd-rs --locked
|
|
127
|
+
- run: jscpd src --reporters console,json --threshold 5 --exitCode 1
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Use npm/npx when a Node-based CI environment is already available:
|
|
131
|
+
|
|
132
|
+
```yaml
|
|
133
|
+
jobs:
|
|
134
|
+
duplication:
|
|
135
|
+
runs-on: ubuntu-latest
|
|
136
|
+
steps:
|
|
137
|
+
- uses: actions/checkout@v5
|
|
138
|
+
- uses: dtolnay/rust-toolchain@stable
|
|
139
|
+
- uses: actions/setup-node@v5
|
|
140
|
+
with:
|
|
141
|
+
node-version: 22
|
|
142
|
+
- run: npx jscpd-rs src --reporters console,json --threshold 5 --exitCode 1
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Install from a checked-out source tree:
|
|
146
|
+
|
|
147
|
+
```yaml
|
|
148
|
+
jobs:
|
|
149
|
+
duplication:
|
|
150
|
+
runs-on: ubuntu-latest
|
|
151
|
+
steps:
|
|
152
|
+
- uses: actions/checkout@v5
|
|
153
|
+
- uses: dtolnay/rust-toolchain@stable
|
|
154
|
+
- run: cargo install --path . --bins --locked
|
|
155
|
+
- run: jscpd src --reporters console,json --threshold 5 --exitCode 1
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## AI Refactoring Skills
|
|
159
|
+
|
|
160
|
+
Install the project skills for duplication detection and guided dry
|
|
161
|
+
refactoring:
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
npx skills add vv-bogdanov/jscpd-rs --skill jscpd
|
|
165
|
+
npx skills add vv-bogdanov/jscpd-rs --skill dry-refactoring
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
## Why jscpd-rs
|
|
169
|
+
|
|
170
|
+
- **Fast CI/CD gates:** duplicate detection should be cheap enough to run on
|
|
171
|
+
every pull request.
|
|
172
|
+
- **Lower operating cost:** shorter scans reduce paid compute minutes and
|
|
173
|
+
repeated developer wait time.
|
|
174
|
+
- **Native detector path:** the detector does not embed or spawn JavaScript for
|
|
175
|
+
core behavior.
|
|
176
|
+
- **Practical compatibility:** the CLI, config, reporter, server, and exit-code
|
|
177
|
+
workflows are designed to map onto common upstream `jscpd` usage.
|
|
178
|
+
- **Small project-specific core:** use battle-tested Rust crates for CLI
|
|
179
|
+
parsing, config formats, ignore handling, serialization, token processing,
|
|
180
|
+
concurrency, and reporting wherever that keeps the implementation simpler.
|
|
181
|
+
|
|
182
|
+
## What Works Today
|
|
183
|
+
|
|
184
|
+
This is pre-release software. The first release target is a coverage-first
|
|
185
|
+
compatible CLI replacement for common `jscpd` workflows:
|
|
186
|
+
|
|
187
|
+
- `jscpd` and `jscpd-server` binaries with upstream-compatible command names;
|
|
188
|
+
- CLI and config option surface covered by compatibility scripts;
|
|
189
|
+
- native built-in reporters: `ai`, `console`, `consoleFull`, `csv`, `html`,
|
|
190
|
+
`json`, `markdown`, `silent`, `sarif`, `threshold`, `xcode`, `xml`, and
|
|
191
|
+
`badge`;
|
|
192
|
+
- upstream-synchronized format registry with native JS/TS/JSX/TSX tokenization
|
|
193
|
+
and generic native tokenization for long-tail formats;
|
|
194
|
+
- native Git blame support;
|
|
195
|
+
- native REST/MCP server workflow;
|
|
196
|
+
- Rust library API for running detection from paths or prepared in-memory
|
|
197
|
+
sources.
|
|
198
|
+
|
|
199
|
+
Dynamic npm reporters, stores, listeners, and plugins are intentionally out of
|
|
200
|
+
scope for the first release. Unknown external reporters/stores keep
|
|
201
|
+
upstream-style warnings and continue where upstream continues.
|
|
202
|
+
|
|
203
|
+
## Compatibility Contract
|
|
204
|
+
|
|
205
|
+
The release gate is coverage-first. For the same inputs and options,
|
|
206
|
+
`jscpd-rs` must not miss duplicated lines reported by upstream `jscpd`. Extra
|
|
207
|
+
Rust duplicates are allowed while compatibility converges, but compatibility
|
|
208
|
+
reports keep them visible as `extra` findings.
|
|
209
|
+
|
|
210
|
+
Exact pair ordering and token totals are quality metrics rather than the
|
|
211
|
+
default blocking gate. This matters for multi-way clones: different pair
|
|
212
|
+
selection can still cover the same duplicated source lines.
|
|
213
|
+
|
|
214
|
+
The upstream repository is checked out as `jscpd/` and treated as the
|
|
215
|
+
executable specification. Compatibility scripts run both implementations and
|
|
216
|
+
compare their reports.
|
|
217
|
+
|
|
218
|
+
## Performance
|
|
219
|
+
|
|
220
|
+
Latest recorded public benchmark baseline:
|
|
221
|
+
|
|
222
|
+
| Repo | Format | Rust avg | Upstream avg | Speedup |
|
|
223
|
+
| --- | --- | ---: | ---: | ---: |
|
|
224
|
+
| React | JavaScript | 0.199097s | 10.079214s | 50.62x |
|
|
225
|
+
| Next.js | TypeScript | 0.262433s | 14.715736s | 56.07x |
|
|
226
|
+
| Prometheus | Go | 0.085239s | 4.642435s | 54.46x |
|
|
227
|
+
|
|
228
|
+
Reproduce the public benchmark and coverage suite:
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
PUBLIC=1 PUBLIC_RUNS=3 scripts/release-gate.sh
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
The release-candidate workflow reruns the public suite before publication so
|
|
235
|
+
README numbers stay tied to a concrete commit and gate output.
|
|
236
|
+
|
|
237
|
+
## Library API
|
|
238
|
+
|
|
239
|
+
The crate exposes the detector core for native integrations:
|
|
240
|
+
|
|
241
|
+
```rust
|
|
242
|
+
let options = jscpd_rs::get_default_options();
|
|
243
|
+
let result = jscpd_rs::detect_clones_and_statistic(&options)?;
|
|
244
|
+
let clones = result.clones;
|
|
245
|
+
|
|
246
|
+
let clones = jscpd_rs::jscpd(["jscpd", "src", "--silent", "--noTips"])?;
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
`detect_clones_and_statistics` is also available as the idiomatic Rust spelling.
|
|
250
|
+
`jscpd` and `jscpd_with_exit_callback` provide a native embeddable argv runner
|
|
251
|
+
similar to upstream `jscpd(argv, exitCallback?)`. `get_options_from_args` parses
|
|
252
|
+
upstream-style argv into normalized `Options` for native integrations.
|
|
253
|
+
|
|
254
|
+
`Tokenizer` provides a native generate-maps entrypoint over the same tokenizer
|
|
255
|
+
used by detection. `Detector`, `Statistic`, and `MemoryStore` expose native
|
|
256
|
+
counterparts for the main upstream core classes without loading JavaScript.
|
|
257
|
+
`detect_source_files` accepts in-memory `SourceFile` values, which is the
|
|
258
|
+
foundation for the upstream-style snippet/server workflow. Format helpers are
|
|
259
|
+
available through `get_supported_formats`, `get_format_by_file`, and
|
|
260
|
+
`get_format_by_file_with_mappings`.
|
|
261
|
+
|
|
262
|
+
## Architecture
|
|
263
|
+
|
|
264
|
+
The implementation keeps the hot path small and native:
|
|
265
|
+
|
|
266
|
+
```text
|
|
267
|
+
paths/config
|
|
268
|
+
-> ignore-aware discovery
|
|
269
|
+
-> native token maps
|
|
270
|
+
-> parallel duplicate detection
|
|
271
|
+
-> reporters / exit codes / server API
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
Core crates and libraries:
|
|
275
|
+
|
|
276
|
+
- `clap`, `serde`, and config parsers for the CLI/config surface;
|
|
277
|
+
- `ignore`, `globset`, and Git ignore handling for file discovery;
|
|
278
|
+
- Oxc-backed JS/TS/JSX/TSX token processing for the highest-volume languages;
|
|
279
|
+
- native generic tokenizers for long-tail formats and embedded code blocks;
|
|
280
|
+
- `rayon` and Rust data structures for parallel discovery/detection work;
|
|
281
|
+
- native reporters for JSON, SARIF, XML, CSV, Markdown, HTML, console, badge,
|
|
282
|
+
Xcode, threshold, silent, and AI refactoring output.
|
|
283
|
+
|
|
284
|
+
## Known First-Release Deviations
|
|
285
|
+
|
|
286
|
+
The first release is native-only and coverage-first. These differences from the
|
|
287
|
+
JavaScript package are intentional unless a real workflow proves otherwise:
|
|
288
|
+
|
|
289
|
+
- dynamic npm reporters, stores, listeners, and plugins are not loaded;
|
|
290
|
+
- token totals and exact clone pair ordering may differ from Prism-based
|
|
291
|
+
upstream reports while duplicated upstream lines remain covered;
|
|
292
|
+
- HTML reports are self-contained and practically compatible, not pixel-perfect;
|
|
293
|
+
- the Rust crate exposes a native Rust API, not the upstream JavaScript package
|
|
294
|
+
API.
|
|
295
|
+
|
|
296
|
+
## Development
|
|
297
|
+
|
|
298
|
+
The upstream repository is checked out as the `jscpd/` git submodule and is the
|
|
299
|
+
executable specification for behavior.
|
|
300
|
+
|
|
301
|
+
```bash
|
|
302
|
+
git submodule update --init --recursive
|
|
303
|
+
cargo test
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
Useful focused checks:
|
|
307
|
+
|
|
308
|
+
```bash
|
|
309
|
+
scripts/compat-cli.sh
|
|
310
|
+
scripts/compat-config.sh
|
|
311
|
+
scripts/compat-reporters.sh
|
|
312
|
+
STRICT=coverage scripts/compat-matrix.sh
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
Known upstream bug candidates and intentional compatibility exceptions are
|
|
316
|
+
tracked in [docs/upstream-bugs.md](docs/upstream-bugs.md). GitHub-ready issue
|
|
317
|
+
drafts are prepared in
|
|
318
|
+
[docs/upstream-issue-drafts.md](docs/upstream-issue-drafts.md).
|
|
319
|
+
|
|
320
|
+
## Release Gates
|
|
321
|
+
|
|
322
|
+
Fast local gate:
|
|
323
|
+
|
|
324
|
+
```bash
|
|
325
|
+
scripts/release-gate.sh
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
Package/install gate:
|
|
329
|
+
|
|
330
|
+
```bash
|
|
331
|
+
scripts/package-check.sh
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
npm package/npx gate:
|
|
335
|
+
|
|
336
|
+
```bash
|
|
337
|
+
scripts/npm-package-check.sh
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
Full compatibility matrix:
|
|
341
|
+
|
|
342
|
+
```bash
|
|
343
|
+
FULL=1 scripts/release-gate.sh
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
Public benchmark and coverage gate:
|
|
347
|
+
|
|
348
|
+
```bash
|
|
349
|
+
PUBLIC=1 PUBLIC_RUNS=3 scripts/release-gate.sh
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
Release candidate gate:
|
|
353
|
+
|
|
354
|
+
```bash
|
|
355
|
+
scripts/release-candidate.sh
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
The GitHub Actions workflow runs the fast gate on pushes and pull requests.
|
|
359
|
+
Manual workflow runs can enable the full compatibility matrix and public
|
|
360
|
+
benchmark suite before a release, or set `release_candidate=true` to run the
|
|
361
|
+
full release-candidate gate in CI.
|
|
362
|
+
|
|
363
|
+
See [docs/compat-baseline.md](docs/compat-baseline.md) for the current gate
|
|
364
|
+
baseline, [docs/release-readiness.md](docs/release-readiness.md) for component
|
|
365
|
+
status, [docs/release-checklist.md](docs/release-checklist.md) for the
|
|
366
|
+
publication checklist, [CHANGELOG.md](CHANGELOG.md) for release notes, and
|
|
367
|
+
[docs/release-decisions.md](docs/release-decisions.md) for approved
|
|
368
|
+
first-release compatibility decisions.
|
|
369
|
+
|
|
370
|
+
## License
|
|
371
|
+
|
|
372
|
+
MIT
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# API Parity
|
|
2
|
+
|
|
3
|
+
Last updated: 2026-05-31.
|
|
4
|
+
|
|
5
|
+
This document tracks the upstream JavaScript API surface against the native Rust
|
|
6
|
+
API. The first release remains native-only: the crate does not embed Node.js,
|
|
7
|
+
does not call JavaScript at runtime, and does not ship a JavaScript package
|
|
8
|
+
wrapper unless that is chosen as a separate release target.
|
|
9
|
+
|
|
10
|
+
## App-Level API
|
|
11
|
+
|
|
12
|
+
| Upstream API | Rust status | Notes |
|
|
13
|
+
| --- | --- | --- |
|
|
14
|
+
| `detectClones(opts, store?, statisticProvider?)` | covered | Use `detect_clones(&Options)` for path-based detection. Native `MemoryStore` and `Statistic` helpers are exposed, but custom provider injection is not part of the first-release API. |
|
|
15
|
+
| `detectClonesAndStatistic(opts, store?)` | covered | Use `detect_clones_and_statistic(&Options)`. `detect_clones_and_statistics(&Options)` remains the idiomatic Rust spelling. |
|
|
16
|
+
| `jscpd(argv, exitCallback?)` | covered natively | Use the `jscpd` binary for process behavior, `jscpd(args)` for embeddable argv execution, or `jscpd_with_exit_callback(args, callback)` for upstream-style duplicate exit callback semantics. Exact JavaScript package export shape is not implemented. |
|
|
17
|
+
|
|
18
|
+
## Core And Tokenizer Helpers
|
|
19
|
+
|
|
20
|
+
| Upstream API | Rust status | Notes |
|
|
21
|
+
| --- | --- | --- |
|
|
22
|
+
| `getDefaultOptions()` | covered | Use `get_default_options()`. Defaults are also available through `Options::default()`. |
|
|
23
|
+
| `getSupportedFormats()` | covered | Use `get_supported_formats()`. The registry is generated from upstream and currently has 223 formats. |
|
|
24
|
+
| `getFormatByFile(path, formatsExts?, formatsNames?)` | covered | Use `get_format_by_file(path)` for default mappings or `get_format_by_file_with_mappings(path, formats_exts, formats_names)` for explicit mappings. |
|
|
25
|
+
| `Tokenizer` class | covered natively | Use `Tokenizer::new()` or `Tokenizer::with_options(options)` and `generate_maps(source_id, content, format)`. Exact JavaScript package export shape is not implemented. |
|
|
26
|
+
| `Detector` class | covered natively | Use `Detector::new(options)` for stateful in-memory source detection or `detect_source_files(files, options)` for batch detection. Exact JavaScript constructor shape is not implemented. |
|
|
27
|
+
| `Statistic` class | covered natively | Use `Statistic::new()`, `match_source(...)`, `clone_found(...)`, and `get_statistic()` for upstream-style statistics accumulation. |
|
|
28
|
+
| `MemoryStore` class | covered natively | Use `MemoryStore<T>::new()`, `namespace(...)`, `get(...)`, `set(...)`, and `close()` for the native in-memory store concept. |
|
|
29
|
+
| Validators, subscribers, custom stores, custom reporters | option-surface only | CLI/config options are preserved where practical; dynamic npm loading is intentionally out of scope for the first release. |
|
|
30
|
+
|
|
31
|
+
## Server API
|
|
32
|
+
|
|
33
|
+
| Upstream API | Rust status | Notes |
|
|
34
|
+
| --- | --- | --- |
|
|
35
|
+
| `jscpd-server` binary | partial | Native binary covers help, startup, core HTTP routes, and MCP smoke contracts through `scripts/compat-server.sh`. Exact Streamable HTTP SDK edge cases remain follow-up. |
|
|
36
|
+
| `startServer`, `JscpdServer`, `JscpdServerService` exports | partial | Native server modules exist, but JavaScript export shape parity is not implemented. |
|
|
37
|
+
|
|
38
|
+
## Remaining API Gaps
|
|
39
|
+
|
|
40
|
+
- Decide later whether a JavaScript package wrapper is worth shipping. The
|
|
41
|
+
current recommendation is to keep the first release native-only.
|
|
42
|
+
- Keep `jscpd(args)` / `jscpd_with_exit_callback(args, callback)` as the native
|
|
43
|
+
embeddable path; add a JavaScript wrapper only if publishing an npm package
|
|
44
|
+
becomes an explicit target.
|
|
45
|
+
- Keep `Tokenizer`, `Detector`, `Statistic`, and `MemoryStore` native and
|
|
46
|
+
detection-oriented for now; add exact JavaScript iterator/object-shape
|
|
47
|
+
wrappers only if an npm/API compatibility release is chosen.
|
|
48
|
+
- Keep custom store/reporter/provider APIs out of the release path until a real
|
|
49
|
+
integration requires native hooks.
|