jscpd-rs 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +69 -0
- package/Cargo.lock +1323 -0
- package/Cargo.toml +54 -0
- package/LICENSE +21 -0
- package/README.md +372 -0
- package/docs/api-parity.md +49 -0
- package/docs/cloning-plan.md +281 -0
- package/docs/compat-baseline.md +535 -0
- package/docs/format-porting.md +86 -0
- package/docs/junior-task-template.md +62 -0
- package/docs/junior-workflow.md +87 -0
- package/docs/migrating-from-jscpd.md +193 -0
- package/docs/npm-release.md +116 -0
- package/docs/public-benchmark-suite.md +81 -0
- package/docs/release-checklist.md +200 -0
- package/docs/release-decisions.md +103 -0
- package/docs/release-readiness.md +51 -0
- package/docs/upstream-bugs.md +501 -0
- package/docs/upstream-issue-drafts.md +393 -0
- package/docs/user-guide.md +309 -0
- package/examples/dump_oxc_tokens.rs +112 -0
- package/examples/library_api.rs +42 -0
- package/npm/bin/jscpd-rs.js +6 -0
- package/npm/bin/jscpd-server.js +6 -0
- package/npm/lib/run-binary.js +68 -0
- package/npm/scripts/postinstall.js +50 -0
- package/package.json +53 -0
- package/skills/dry-refactoring/SKILL.md +63 -0
- package/skills/jscpd/SKILL.md +85 -0
- package/src/app.rs +512 -0
- package/src/bin/jscpd-server.rs +429 -0
- package/src/blame.rs +130 -0
- package/src/cli/config.rs +543 -0
- package/src/cli/parsing.rs +301 -0
- package/src/cli/tests.rs +543 -0
- package/src/cli.rs +671 -0
- package/src/detector/matching/secondary.rs +387 -0
- package/src/detector/matching.rs +274 -0
- package/src/detector/model.rs +190 -0
- package/src/detector/prepare.rs +71 -0
- package/src/detector/skip_local.rs +40 -0
- package/src/detector/statistics.rs +138 -0
- package/src/detector/store.rs +96 -0
- package/src/detector/tests.rs +238 -0
- package/src/detector.rs +265 -0
- package/src/files/discovery.rs +508 -0
- package/src/files/gitignore.rs +203 -0
- package/src/files/paths.rs +68 -0
- package/src/files/shebang.rs +106 -0
- package/src/files/tests.rs +523 -0
- package/src/files.rs +25 -0
- package/src/formats.rs +570 -0
- package/src/lib.rs +433 -0
- package/src/main.rs +26 -0
- package/src/report/ai.rs +125 -0
- package/src/report/badge.rs +238 -0
- package/src/report/console.rs +180 -0
- package/src/report/console_common.rs +37 -0
- package/src/report/console_full.rs +139 -0
- package/src/report/csv.rs +65 -0
- package/src/report/escape.rs +8 -0
- package/src/report/file_output.rs +28 -0
- package/src/report/html/assets.rs +47 -0
- package/src/report/html.rs +336 -0
- package/src/report/json.rs +119 -0
- package/src/report/markdown.rs +125 -0
- package/src/report/sarif.rs +302 -0
- package/src/report/silent.rs +22 -0
- package/src/report/source.rs +38 -0
- package/src/report/summary.rs +50 -0
- package/src/report/test_support.rs +133 -0
- package/src/report/threshold.rs +76 -0
- package/src/report/xcode.rs +90 -0
- package/src/report/xml.rs +119 -0
- package/src/report.rs +250 -0
- package/src/server/mcp.rs +942 -0
- package/src/server.rs +1081 -0
- package/src/tokenizer/apex.rs +97 -0
- package/src/tokenizer/blocks.rs +532 -0
- package/src/tokenizer/embedded.rs +106 -0
- package/src/tokenizer/generic.rs +511 -0
- package/src/tokenizer/hash.rs +27 -0
- package/src/tokenizer/ignore.rs +33 -0
- package/src/tokenizer/line_index.rs +33 -0
- package/src/tokenizer/markdown.rs +289 -0
- package/src/tokenizer/markup_attrs.rs +289 -0
- package/src/tokenizer/oxc/fallback.rs +275 -0
- package/src/tokenizer/oxc/jsx.rs +168 -0
- package/src/tokenizer/oxc/kind.rs +177 -0
- package/src/tokenizer/oxc/lexical.rs +67 -0
- package/src/tokenizer/oxc.rs +659 -0
- package/src/tokenizer/scan.rs +88 -0
- package/src/tokenizer/tap.rs +150 -0
- package/src/tokenizer/tests.rs +915 -0
- package/src/tokenizer.rs +328 -0
- package/src/verbose.rs +195 -0
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
use std::ffi::OsString;
|
|
2
|
+
use std::path::PathBuf;
|
|
3
|
+
|
|
4
|
+
use anyhow::{Result, bail};
|
|
5
|
+
use clap::Parser;
|
|
6
|
+
use jscpd_rs::cli::{Cli, Options};
|
|
7
|
+
|
|
8
|
+
#[tokio::main]
|
|
9
|
+
async fn main() {
|
|
10
|
+
if let Err(error) = run().await {
|
|
11
|
+
eprintln!("{}", server_error_message(&error.to_string()));
|
|
12
|
+
std::process::exit(1);
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
async fn run() -> Result<()> {
|
|
17
|
+
let server_args = ServerArgs::from_env()?;
|
|
18
|
+
if server_args.help {
|
|
19
|
+
print_server_help();
|
|
20
|
+
return Ok(());
|
|
21
|
+
}
|
|
22
|
+
if let Some(option) = server_args.unknown_option {
|
|
23
|
+
eprintln!("error: unknown option '{option}'");
|
|
24
|
+
std::process::exit(1);
|
|
25
|
+
}
|
|
26
|
+
let cli = Cli::parse_from(server_args.jscpd_args);
|
|
27
|
+
if cli.version {
|
|
28
|
+
println!("{}", env!("CARGO_PKG_VERSION"));
|
|
29
|
+
return Ok(());
|
|
30
|
+
}
|
|
31
|
+
let working_directory = server_cli_working_directory(&cli);
|
|
32
|
+
let options = Options::from_cli(cli)?;
|
|
33
|
+
jscpd_rs::server::serve_with_working_directory(
|
|
34
|
+
options,
|
|
35
|
+
working_directory,
|
|
36
|
+
&server_args.host,
|
|
37
|
+
server_args.port,
|
|
38
|
+
)
|
|
39
|
+
.await
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
#[derive(Debug)]
|
|
43
|
+
struct ServerArgs {
|
|
44
|
+
host: String,
|
|
45
|
+
port: u16,
|
|
46
|
+
jscpd_args: Vec<OsString>,
|
|
47
|
+
help: bool,
|
|
48
|
+
unknown_option: Option<String>,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
impl ServerArgs {
|
|
52
|
+
fn from_env() -> Result<Self> {
|
|
53
|
+
Self::parse(std::env::args_os())
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
fn parse<I>(args: I) -> Result<Self>
|
|
57
|
+
where
|
|
58
|
+
I: IntoIterator<Item = OsString>,
|
|
59
|
+
{
|
|
60
|
+
let mut args = args.into_iter().collect::<Vec<_>>();
|
|
61
|
+
let program = args
|
|
62
|
+
.first()
|
|
63
|
+
.cloned()
|
|
64
|
+
.unwrap_or_else(|| OsString::from("jscpd-server"));
|
|
65
|
+
if !args.is_empty() {
|
|
66
|
+
args.remove(0);
|
|
67
|
+
}
|
|
68
|
+
let mut args = args.into_iter().peekable();
|
|
69
|
+
let mut host = "0.0.0.0".to_string();
|
|
70
|
+
let mut port = 3000u16;
|
|
71
|
+
let mut help = false;
|
|
72
|
+
let mut unknown_option = None;
|
|
73
|
+
let mut jscpd_args = vec![program];
|
|
74
|
+
|
|
75
|
+
while let Some(arg) = args.next() {
|
|
76
|
+
if arg == "--help" {
|
|
77
|
+
help = true;
|
|
78
|
+
} else if arg == "--host" || arg == "-H" {
|
|
79
|
+
host = next_optional_value(&mut args).unwrap_or_else(|| "true".to_string());
|
|
80
|
+
} else if arg == "--port" || arg == "-p" {
|
|
81
|
+
let value = next_optional_value(&mut args).unwrap_or_else(|| "true".to_string());
|
|
82
|
+
port = parse_port(&value)?;
|
|
83
|
+
} else if let Some(value) = prefixed_value(&arg, "--host=") {
|
|
84
|
+
host = value;
|
|
85
|
+
} else if let Some(value) = prefixed_value(&arg, "--port=") {
|
|
86
|
+
port = parse_port(&value)?;
|
|
87
|
+
} else if is_supported_jscpd_server_option(&arg) || !is_option_like(&arg) {
|
|
88
|
+
jscpd_args.push(arg);
|
|
89
|
+
} else {
|
|
90
|
+
unknown_option = arg.to_str().map(str::to_string);
|
|
91
|
+
break;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
Ok(Self {
|
|
96
|
+
host,
|
|
97
|
+
port,
|
|
98
|
+
jscpd_args,
|
|
99
|
+
help,
|
|
100
|
+
unknown_option,
|
|
101
|
+
})
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
fn next_optional_value<I>(args: &mut std::iter::Peekable<I>) -> Option<String>
|
|
106
|
+
where
|
|
107
|
+
I: Iterator<Item = OsString>,
|
|
108
|
+
{
|
|
109
|
+
let next = args.peek()?;
|
|
110
|
+
if next.to_str().is_some_and(|value| value.starts_with('-')) {
|
|
111
|
+
return None;
|
|
112
|
+
}
|
|
113
|
+
args.next().and_then(|value| value.into_string().ok())
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
fn print_server_help() {
|
|
117
|
+
println!("{}", server_help());
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
fn is_option_like(arg: &OsString) -> bool {
|
|
121
|
+
arg.to_str().is_some_and(|value| value.starts_with('-'))
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
fn is_supported_jscpd_server_option(arg: &OsString) -> bool {
|
|
125
|
+
let Some(value) = arg.to_str() else {
|
|
126
|
+
return false;
|
|
127
|
+
};
|
|
128
|
+
let option = value
|
|
129
|
+
.split_once('=')
|
|
130
|
+
.map_or(value, |(option, _value)| option);
|
|
131
|
+
matches!(
|
|
132
|
+
option,
|
|
133
|
+
"-V" | "--version"
|
|
134
|
+
| "-c"
|
|
135
|
+
| "--config"
|
|
136
|
+
| "-f"
|
|
137
|
+
| "--format"
|
|
138
|
+
| "-i"
|
|
139
|
+
| "--ignore"
|
|
140
|
+
| "--ignore-pattern"
|
|
141
|
+
| "-l"
|
|
142
|
+
| "--min-lines"
|
|
143
|
+
| "-k"
|
|
144
|
+
| "--min-tokens"
|
|
145
|
+
| "-x"
|
|
146
|
+
| "--max-lines"
|
|
147
|
+
| "-z"
|
|
148
|
+
| "--max-size"
|
|
149
|
+
| "-m"
|
|
150
|
+
| "--mode"
|
|
151
|
+
| "--store"
|
|
152
|
+
| "--store-path"
|
|
153
|
+
| "-a"
|
|
154
|
+
| "--absolute"
|
|
155
|
+
| "-n"
|
|
156
|
+
| "--noSymlinks"
|
|
157
|
+
| "--ignoreCase"
|
|
158
|
+
| "-g"
|
|
159
|
+
| "--gitignore"
|
|
160
|
+
| "--skipLocal"
|
|
161
|
+
)
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
fn server_help() -> &'static str {
|
|
165
|
+
r#"Usage: jscpd-server [options] <path>
|
|
166
|
+
|
|
167
|
+
Start jscpd as a server
|
|
168
|
+
|
|
169
|
+
Options:
|
|
170
|
+
-V, --version output the version number
|
|
171
|
+
-p, --port [number] port to run the server on (Default is 3000)
|
|
172
|
+
-H, --host [string] host to bind the server to (Default is 0.0.0.0)
|
|
173
|
+
-c, --config [string] path to config file (Default is .jscpd.json in
|
|
174
|
+
<path>)
|
|
175
|
+
-f, --format [string] format or formats separated by comma
|
|
176
|
+
-i, --ignore [string] glob pattern for files to exclude
|
|
177
|
+
--ignore-pattern [string] ignore code blocks matching regexp patterns
|
|
178
|
+
-l, --min-lines [number] min size of duplication in code lines (Default is
|
|
179
|
+
5)
|
|
180
|
+
-k, --min-tokens [number] min size of duplication in code tokens (Default is
|
|
181
|
+
50)
|
|
182
|
+
-x, --max-lines [number] max size of source in lines (Default is 1000)
|
|
183
|
+
-z, --max-size [string] max size of source in bytes, examples: 1kb, 1mb,
|
|
184
|
+
120kb (Default is 100kb)
|
|
185
|
+
-m, --mode [string] mode of quality of search, can be "strict", "mild" and "weak" (Default is "function mild(token) {
|
|
186
|
+
return strict(token) && token.type !== "empty" && token.type !== "new_line";
|
|
187
|
+
}")
|
|
188
|
+
--store [string] use for define custom store (e.g. --store leveldb
|
|
189
|
+
used for big codebase)
|
|
190
|
+
--store-path [string] directory to use for store cache (e.g.
|
|
191
|
+
--store-path /tmp/jscpd-cache, useful when running
|
|
192
|
+
multiple instances in parallel)
|
|
193
|
+
-a, --absolute use absolute path in reports
|
|
194
|
+
-n, --noSymlinks dont use symlinks for detection
|
|
195
|
+
--ignoreCase ignore case of symbols in code (experimental)
|
|
196
|
+
-g, --gitignore ignore all files from .gitignore file
|
|
197
|
+
--skipLocal skip duplicates in local folders
|
|
198
|
+
--help display help for command"#
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
fn prefixed_value(arg: &OsString, prefix: &str) -> Option<String> {
|
|
202
|
+
arg.to_str()
|
|
203
|
+
.and_then(|value| value.strip_prefix(prefix))
|
|
204
|
+
.map(str::to_string)
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
fn parse_port(value: &str) -> Result<u16> {
|
|
208
|
+
let Ok(port) = value.parse::<u16>() else {
|
|
209
|
+
bail!("Invalid port number: {value}");
|
|
210
|
+
};
|
|
211
|
+
if port == 0 {
|
|
212
|
+
bail!("Invalid port number: {value}");
|
|
213
|
+
}
|
|
214
|
+
Ok(port)
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
fn server_cli_working_directory(cli: &Cli) -> PathBuf {
|
|
218
|
+
cli.paths
|
|
219
|
+
.first()
|
|
220
|
+
.cloned()
|
|
221
|
+
.unwrap_or_else(|| std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")))
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
fn server_error_message(message: &str) -> String {
|
|
225
|
+
match message {
|
|
226
|
+
"TypeError: mode is not a function" => {
|
|
227
|
+
format!("Failed to start server: {message}")
|
|
228
|
+
}
|
|
229
|
+
message
|
|
230
|
+
if message.starts_with("TypeError [ERR_INVALID_ARG_TYPE]")
|
|
231
|
+
|| message.starts_with("TypeError:")
|
|
232
|
+
|| message.starts_with("SyntaxError:") =>
|
|
233
|
+
{
|
|
234
|
+
message.to_string()
|
|
235
|
+
}
|
|
236
|
+
message => format!("Failed to start server: Error: {message}"),
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
#[cfg(test)]
|
|
241
|
+
mod tests {
|
|
242
|
+
use super::*;
|
|
243
|
+
|
|
244
|
+
fn parse(args: &[&str]) -> ServerArgs {
|
|
245
|
+
ServerArgs::parse(args.iter().map(OsString::from)).expect("parse server args")
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
#[test]
|
|
249
|
+
fn extracts_server_host_and_port() {
|
|
250
|
+
let args = parse(&[
|
|
251
|
+
"jscpd-server",
|
|
252
|
+
".",
|
|
253
|
+
"--host",
|
|
254
|
+
"127.0.0.1",
|
|
255
|
+
"--port",
|
|
256
|
+
"4567",
|
|
257
|
+
"--format",
|
|
258
|
+
"javascript",
|
|
259
|
+
]);
|
|
260
|
+
|
|
261
|
+
assert_eq!(args.host, "127.0.0.1");
|
|
262
|
+
assert_eq!(args.port, 4567);
|
|
263
|
+
assert_eq!(
|
|
264
|
+
args.jscpd_args,
|
|
265
|
+
vec![
|
|
266
|
+
OsString::from("jscpd-server"),
|
|
267
|
+
OsString::from("."),
|
|
268
|
+
OsString::from("--format"),
|
|
269
|
+
OsString::from("javascript"),
|
|
270
|
+
]
|
|
271
|
+
);
|
|
272
|
+
assert_eq!(args.unknown_option, None);
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
#[test]
|
|
276
|
+
fn supports_equals_server_flags() {
|
|
277
|
+
let args = parse(&["jscpd-server", "--host=localhost", "--port=3001", "src"]);
|
|
278
|
+
|
|
279
|
+
assert_eq!(args.host, "localhost");
|
|
280
|
+
assert_eq!(args.port, 3001);
|
|
281
|
+
assert_eq!(
|
|
282
|
+
args.jscpd_args,
|
|
283
|
+
vec![OsString::from("jscpd-server"), OsString::from("src")]
|
|
284
|
+
);
|
|
285
|
+
assert_eq!(args.unknown_option, None);
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
#[test]
|
|
289
|
+
fn server_working_directory_uses_cli_arg_before_config_paths() {
|
|
290
|
+
let cli = Cli::parse_from(["jscpd-server", "--config", ".jscpd.json"]);
|
|
291
|
+
assert_eq!(
|
|
292
|
+
server_cli_working_directory(&cli),
|
|
293
|
+
std::env::current_dir().unwrap()
|
|
294
|
+
);
|
|
295
|
+
|
|
296
|
+
let cli = Cli::parse_from(["jscpd-server", "--config", ".jscpd.json", "src"]);
|
|
297
|
+
assert_eq!(server_cli_working_directory(&cli), PathBuf::from("src"));
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
#[test]
|
|
301
|
+
fn detects_server_help_without_forwarding_to_jscpd_cli() {
|
|
302
|
+
let args = parse(&["jscpd-server", "--help"]);
|
|
303
|
+
|
|
304
|
+
assert!(args.help);
|
|
305
|
+
assert_eq!(args.unknown_option, None);
|
|
306
|
+
let help = server_help();
|
|
307
|
+
assert!(help.contains("Usage: jscpd-server [options] <path>"));
|
|
308
|
+
assert!(help.contains("Start jscpd as a server"));
|
|
309
|
+
assert!(help.contains("-p, --port [number]"));
|
|
310
|
+
assert!(help.contains("-H, --host [string]"));
|
|
311
|
+
assert!(
|
|
312
|
+
help.contains("function mild(token)"),
|
|
313
|
+
"server help should preserve upstream default mode text"
|
|
314
|
+
);
|
|
315
|
+
assert!(!help.contains("detector of copy/paste in files"));
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
#[test]
|
|
319
|
+
fn bare_or_invalid_server_port_matches_upstream_error() {
|
|
320
|
+
let error = ServerArgs::parse(["jscpd-server", "--port"].into_iter().map(OsString::from))
|
|
321
|
+
.expect_err("bare port should fail");
|
|
322
|
+
assert_eq!(error.to_string(), "Invalid port number: true");
|
|
323
|
+
|
|
324
|
+
let error = ServerArgs::parse(
|
|
325
|
+
["jscpd-server", "--port", "abc"]
|
|
326
|
+
.into_iter()
|
|
327
|
+
.map(OsString::from),
|
|
328
|
+
)
|
|
329
|
+
.expect_err("invalid port should fail");
|
|
330
|
+
assert_eq!(error.to_string(), "Invalid port number: abc");
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
#[test]
|
|
334
|
+
fn formats_server_start_errors_like_upstream() {
|
|
335
|
+
assert_eq!(
|
|
336
|
+
server_error_message("Invalid port number: true"),
|
|
337
|
+
"Failed to start server: Error: Invalid port number: true"
|
|
338
|
+
);
|
|
339
|
+
assert_eq!(
|
|
340
|
+
server_error_message("TypeError: cli.format.split is not a function"),
|
|
341
|
+
"TypeError: cli.format.split is not a function"
|
|
342
|
+
);
|
|
343
|
+
assert_eq!(
|
|
344
|
+
server_error_message("TypeError: mode is not a function"),
|
|
345
|
+
"Failed to start server: TypeError: mode is not a function"
|
|
346
|
+
);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
#[test]
|
|
350
|
+
fn rejects_options_not_supported_by_upstream_server() {
|
|
351
|
+
for option in [
|
|
352
|
+
"--list",
|
|
353
|
+
"-h",
|
|
354
|
+
"--reporters",
|
|
355
|
+
"--output",
|
|
356
|
+
"--debug",
|
|
357
|
+
"--verbose",
|
|
358
|
+
"--exitCode",
|
|
359
|
+
"--noTips",
|
|
360
|
+
"--skipComments",
|
|
361
|
+
"--formats-exts",
|
|
362
|
+
"--formats-names",
|
|
363
|
+
"--pattern",
|
|
364
|
+
"--blame",
|
|
365
|
+
"--silent",
|
|
366
|
+
"--threshold",
|
|
367
|
+
"--no-gitignore",
|
|
368
|
+
] {
|
|
369
|
+
let args = parse(&["jscpd-server", option]);
|
|
370
|
+
|
|
371
|
+
assert_eq!(args.unknown_option, Some(option.to_string()));
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
#[test]
|
|
376
|
+
fn forwards_only_upstream_server_common_options() {
|
|
377
|
+
let input = [
|
|
378
|
+
"jscpd-server",
|
|
379
|
+
"src",
|
|
380
|
+
"-V",
|
|
381
|
+
"--version",
|
|
382
|
+
"-c",
|
|
383
|
+
".jscpd.json",
|
|
384
|
+
"--config=custom.json",
|
|
385
|
+
"-f",
|
|
386
|
+
"javascript",
|
|
387
|
+
"--format=typescript",
|
|
388
|
+
"-i",
|
|
389
|
+
"**/*.min.js",
|
|
390
|
+
"--ignore=dist/**",
|
|
391
|
+
"--ignore-pattern",
|
|
392
|
+
"generated",
|
|
393
|
+
"-l",
|
|
394
|
+
"5",
|
|
395
|
+
"--min-lines=6",
|
|
396
|
+
"-k",
|
|
397
|
+
"50",
|
|
398
|
+
"--min-tokens=60",
|
|
399
|
+
"-x",
|
|
400
|
+
"1000",
|
|
401
|
+
"--max-lines=2000",
|
|
402
|
+
"-z",
|
|
403
|
+
"1mb",
|
|
404
|
+
"--max-size=2mb",
|
|
405
|
+
"-m",
|
|
406
|
+
"strict",
|
|
407
|
+
"--mode=weak",
|
|
408
|
+
"--store",
|
|
409
|
+
"memory",
|
|
410
|
+
"--store-path",
|
|
411
|
+
".cache",
|
|
412
|
+
"-a",
|
|
413
|
+
"--absolute",
|
|
414
|
+
"-n",
|
|
415
|
+
"--noSymlinks",
|
|
416
|
+
"--ignoreCase",
|
|
417
|
+
"-g",
|
|
418
|
+
"--gitignore",
|
|
419
|
+
"--skipLocal",
|
|
420
|
+
];
|
|
421
|
+
let args = parse(&input);
|
|
422
|
+
|
|
423
|
+
assert_eq!(args.unknown_option, None);
|
|
424
|
+
assert_eq!(
|
|
425
|
+
args.jscpd_args,
|
|
426
|
+
input.iter().map(OsString::from).collect::<Vec<_>>()
|
|
427
|
+
);
|
|
428
|
+
}
|
|
429
|
+
}
|
package/src/blame.rs
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
use std::collections::HashMap;
|
|
2
|
+
use std::path::Path;
|
|
3
|
+
use std::process::Command;
|
|
4
|
+
use std::sync::OnceLock;
|
|
5
|
+
|
|
6
|
+
use regex::Regex;
|
|
7
|
+
|
|
8
|
+
use crate::detector::{BlamedLine, BlamedLines, DetectionResult, Fragment};
|
|
9
|
+
|
|
10
|
+
pub fn apply_blame(result: &mut DetectionResult) {
|
|
11
|
+
let mut cache = HashMap::<String, Option<BlamedLines>>::new();
|
|
12
|
+
for clone in &mut result.clones {
|
|
13
|
+
apply_fragment_blame(&mut clone.duplication_a, &mut cache);
|
|
14
|
+
apply_fragment_blame(&mut clone.duplication_b, &mut cache);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
fn apply_fragment_blame(fragment: &mut Fragment, cache: &mut HashMap<String, Option<BlamedLines>>) {
|
|
19
|
+
let blamed_file = cache
|
|
20
|
+
.entry(fragment.source_id.clone())
|
|
21
|
+
.or_insert_with(|| blame_file(&fragment.source_id));
|
|
22
|
+
fragment.blame = blamed_file
|
|
23
|
+
.as_ref()
|
|
24
|
+
.map(|blame| slice_blame(blame, fragment.start.line, fragment.end.line))
|
|
25
|
+
.filter(|blame| !blame.is_empty());
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
fn blame_file(path: &str) -> Option<BlamedLines> {
|
|
29
|
+
let path = Path::new(path);
|
|
30
|
+
let parent = path
|
|
31
|
+
.parent()
|
|
32
|
+
.filter(|parent| !parent.as_os_str().is_empty())
|
|
33
|
+
.unwrap_or_else(|| Path::new("."));
|
|
34
|
+
let file_name = path.file_name()?;
|
|
35
|
+
let output = Command::new("git")
|
|
36
|
+
.arg("-C")
|
|
37
|
+
.arg(parent)
|
|
38
|
+
.arg("blame")
|
|
39
|
+
.arg("-w")
|
|
40
|
+
.arg("--")
|
|
41
|
+
.arg(file_name)
|
|
42
|
+
.output()
|
|
43
|
+
.ok()?;
|
|
44
|
+
if !output.status.success() {
|
|
45
|
+
return None;
|
|
46
|
+
}
|
|
47
|
+
let stdout = String::from_utf8(output.stdout).ok()?;
|
|
48
|
+
let blamed = parse_git_blame(&stdout);
|
|
49
|
+
(!blamed.is_empty()).then_some(blamed)
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
fn slice_blame(blame: &BlamedLines, start: usize, end: usize) -> BlamedLines {
|
|
53
|
+
(start..=end)
|
|
54
|
+
.filter_map(|line| {
|
|
55
|
+
let key = line.to_string();
|
|
56
|
+
blame.get(&key).cloned().map(|blamed| (key, blamed))
|
|
57
|
+
})
|
|
58
|
+
.collect()
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
fn parse_git_blame(output: &str) -> BlamedLines {
|
|
62
|
+
output
|
|
63
|
+
.lines()
|
|
64
|
+
.filter_map(parse_git_blame_line)
|
|
65
|
+
.map(|line| (line.line.clone(), line))
|
|
66
|
+
.collect()
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
fn parse_git_blame_line(raw_line: &str) -> Option<BlamedLine> {
|
|
70
|
+
let captures = blame_line_regex().captures(raw_line)?;
|
|
71
|
+
let line = captures.get(4)?.as_str().to_string();
|
|
72
|
+
if line.is_empty() {
|
|
73
|
+
return None;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
Some(BlamedLine {
|
|
77
|
+
rev: captures.get(1)?.as_str().to_string(),
|
|
78
|
+
author: captures.get(2)?.as_str().to_string(),
|
|
79
|
+
date: captures.get(3)?.as_str().to_string(),
|
|
80
|
+
line,
|
|
81
|
+
})
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
fn blame_line_regex() -> &'static Regex {
|
|
85
|
+
static REGEX: OnceLock<Regex> = OnceLock::new();
|
|
86
|
+
REGEX.get_or_init(|| {
|
|
87
|
+
Regex::new(
|
|
88
|
+
r"^(.+)\s+\((.+)\s+(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} [+-]\d{4})\s+(\d+)\)(.*)$",
|
|
89
|
+
)
|
|
90
|
+
.expect("valid git blame regex")
|
|
91
|
+
})
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
#[cfg(test)]
|
|
95
|
+
mod tests {
|
|
96
|
+
use super::*;
|
|
97
|
+
|
|
98
|
+
#[test]
|
|
99
|
+
fn parses_git_blame_lines() {
|
|
100
|
+
let output = "\
|
|
101
|
+
ca40bf24 tests/fixtures/file_4.js (Andrey Kucherenko 2013-06-02 23:31:50 +0300 56) footprints = typeof yeti !== \"undefined\";
|
|
102
|
+
bbbbbbbb (Bob Smith 2024-01-02 03:04:05 -0700 57) second
|
|
103
|
+
";
|
|
104
|
+
|
|
105
|
+
let blame = parse_git_blame(output);
|
|
106
|
+
|
|
107
|
+
assert_eq!(blame["56"].author, "Andrey Kucherenko");
|
|
108
|
+
assert_eq!(blame["56"].rev, "ca40bf24 tests/fixtures/file_4.js");
|
|
109
|
+
assert_eq!(blame["56"].date, "2013-06-02 23:31:50 +0300");
|
|
110
|
+
assert_eq!(blame["56"].line, "56");
|
|
111
|
+
assert_eq!(blame["57"].author, "Bob Smith");
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
#[test]
|
|
115
|
+
fn slices_blame_to_fragment_range() {
|
|
116
|
+
let blame = parse_git_blame(
|
|
117
|
+
"\
|
|
118
|
+
aaaaaaaa (Alice 2024-01-01 00:00:00 +0000 1) first
|
|
119
|
+
bbbbbbbb (Bob 2024-01-02 00:00:00 +0000 2) second
|
|
120
|
+
cccccccc (Carol 2024-01-03 00:00:00 +0000 3) third
|
|
121
|
+
",
|
|
122
|
+
);
|
|
123
|
+
|
|
124
|
+
let sliced = slice_blame(&blame, 2, 3);
|
|
125
|
+
|
|
126
|
+
assert_eq!(sliced.keys().cloned().collect::<Vec<_>>(), vec!["2", "3"]);
|
|
127
|
+
assert_eq!(sliced["2"].author, "Bob");
|
|
128
|
+
assert_eq!(sliced["3"].author, "Carol");
|
|
129
|
+
}
|
|
130
|
+
}
|