jscpd-rs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/CHANGELOG.md +69 -0
  2. package/Cargo.lock +1323 -0
  3. package/Cargo.toml +54 -0
  4. package/LICENSE +21 -0
  5. package/README.md +372 -0
  6. package/docs/api-parity.md +49 -0
  7. package/docs/cloning-plan.md +281 -0
  8. package/docs/compat-baseline.md +535 -0
  9. package/docs/format-porting.md +86 -0
  10. package/docs/junior-task-template.md +62 -0
  11. package/docs/junior-workflow.md +87 -0
  12. package/docs/migrating-from-jscpd.md +193 -0
  13. package/docs/npm-release.md +116 -0
  14. package/docs/public-benchmark-suite.md +81 -0
  15. package/docs/release-checklist.md +200 -0
  16. package/docs/release-decisions.md +103 -0
  17. package/docs/release-readiness.md +51 -0
  18. package/docs/upstream-bugs.md +501 -0
  19. package/docs/upstream-issue-drafts.md +393 -0
  20. package/docs/user-guide.md +309 -0
  21. package/examples/dump_oxc_tokens.rs +112 -0
  22. package/examples/library_api.rs +42 -0
  23. package/npm/bin/jscpd-rs.js +6 -0
  24. package/npm/bin/jscpd-server.js +6 -0
  25. package/npm/lib/run-binary.js +68 -0
  26. package/npm/scripts/postinstall.js +50 -0
  27. package/package.json +53 -0
  28. package/skills/dry-refactoring/SKILL.md +63 -0
  29. package/skills/jscpd/SKILL.md +85 -0
  30. package/src/app.rs +512 -0
  31. package/src/bin/jscpd-server.rs +429 -0
  32. package/src/blame.rs +130 -0
  33. package/src/cli/config.rs +543 -0
  34. package/src/cli/parsing.rs +301 -0
  35. package/src/cli/tests.rs +543 -0
  36. package/src/cli.rs +671 -0
  37. package/src/detector/matching/secondary.rs +387 -0
  38. package/src/detector/matching.rs +274 -0
  39. package/src/detector/model.rs +190 -0
  40. package/src/detector/prepare.rs +71 -0
  41. package/src/detector/skip_local.rs +40 -0
  42. package/src/detector/statistics.rs +138 -0
  43. package/src/detector/store.rs +96 -0
  44. package/src/detector/tests.rs +238 -0
  45. package/src/detector.rs +265 -0
  46. package/src/files/discovery.rs +508 -0
  47. package/src/files/gitignore.rs +203 -0
  48. package/src/files/paths.rs +68 -0
  49. package/src/files/shebang.rs +106 -0
  50. package/src/files/tests.rs +523 -0
  51. package/src/files.rs +25 -0
  52. package/src/formats.rs +570 -0
  53. package/src/lib.rs +433 -0
  54. package/src/main.rs +26 -0
  55. package/src/report/ai.rs +125 -0
  56. package/src/report/badge.rs +238 -0
  57. package/src/report/console.rs +180 -0
  58. package/src/report/console_common.rs +37 -0
  59. package/src/report/console_full.rs +139 -0
  60. package/src/report/csv.rs +65 -0
  61. package/src/report/escape.rs +8 -0
  62. package/src/report/file_output.rs +28 -0
  63. package/src/report/html/assets.rs +47 -0
  64. package/src/report/html.rs +336 -0
  65. package/src/report/json.rs +119 -0
  66. package/src/report/markdown.rs +125 -0
  67. package/src/report/sarif.rs +302 -0
  68. package/src/report/silent.rs +22 -0
  69. package/src/report/source.rs +38 -0
  70. package/src/report/summary.rs +50 -0
  71. package/src/report/test_support.rs +133 -0
  72. package/src/report/threshold.rs +76 -0
  73. package/src/report/xcode.rs +90 -0
  74. package/src/report/xml.rs +119 -0
  75. package/src/report.rs +250 -0
  76. package/src/server/mcp.rs +942 -0
  77. package/src/server.rs +1081 -0
  78. package/src/tokenizer/apex.rs +97 -0
  79. package/src/tokenizer/blocks.rs +532 -0
  80. package/src/tokenizer/embedded.rs +106 -0
  81. package/src/tokenizer/generic.rs +511 -0
  82. package/src/tokenizer/hash.rs +27 -0
  83. package/src/tokenizer/ignore.rs +33 -0
  84. package/src/tokenizer/line_index.rs +33 -0
  85. package/src/tokenizer/markdown.rs +289 -0
  86. package/src/tokenizer/markup_attrs.rs +289 -0
  87. package/src/tokenizer/oxc/fallback.rs +275 -0
  88. package/src/tokenizer/oxc/jsx.rs +168 -0
  89. package/src/tokenizer/oxc/kind.rs +177 -0
  90. package/src/tokenizer/oxc/lexical.rs +67 -0
  91. package/src/tokenizer/oxc.rs +659 -0
  92. package/src/tokenizer/scan.rs +88 -0
  93. package/src/tokenizer/tap.rs +150 -0
  94. package/src/tokenizer/tests.rs +915 -0
  95. package/src/tokenizer.rs +328 -0
  96. package/src/verbose.rs +195 -0
package/src/app.rs ADDED
@@ -0,0 +1,512 @@
1
+ use std::ffi::OsString;
2
+ use std::time::{Duration, Instant};
3
+
4
+ use anyhow::{Result, bail};
5
+ use clap::Parser;
6
+
7
+ use crate::cli::{Cli, ExitCode, Options};
8
+ use crate::detector::CloneMatch;
9
+ use crate::files::SourceFile;
10
+ use crate::{cli, files, formats, report, verbose};
11
+
12
+ #[derive(Clone, Debug, Default)]
13
+ pub struct JscpdOutcome {
14
+ pub clones: Vec<CloneMatch>,
15
+ pub exit_code: Option<i32>,
16
+ }
17
+
18
+ pub fn jscpd<I, T>(args: I) -> Result<Vec<CloneMatch>>
19
+ where
20
+ I: IntoIterator<Item = T>,
21
+ T: Into<OsString> + Clone,
22
+ {
23
+ Ok(run_cli_args(args)?.clones)
24
+ }
25
+
26
+ pub fn jscpd_with_exit_callback<I, T, F>(args: I, mut exit_callback: F) -> Result<Vec<CloneMatch>>
27
+ where
28
+ I: IntoIterator<Item = T>,
29
+ T: Into<OsString> + Clone,
30
+ F: FnMut(i32),
31
+ {
32
+ let outcome = run_cli_args(args)?;
33
+ if let Some(code) = outcome.exit_code {
34
+ exit_callback(code);
35
+ }
36
+ Ok(outcome.clones)
37
+ }
38
+
39
+ pub fn run_cli_args<I, T>(args: I) -> Result<JscpdOutcome>
40
+ where
41
+ I: IntoIterator<Item = T>,
42
+ T: Into<OsString> + Clone,
43
+ {
44
+ run_cli(Cli::try_parse_from(args)?)
45
+ }
46
+
47
+ pub fn run_current_process() -> Result<JscpdOutcome> {
48
+ run_cli(Cli::parse())
49
+ }
50
+
51
+ fn run_cli(cli: Cli) -> Result<JscpdOutcome> {
52
+ if cli.version {
53
+ println!("{}", env!("CARGO_PKG_VERSION"));
54
+ return Ok(JscpdOutcome::default());
55
+ }
56
+ let list = cli.list;
57
+
58
+ let options = Options::from_cli(cli)?;
59
+ if list {
60
+ print!("{}", list_output());
61
+ return Ok(JscpdOutcome::default());
62
+ }
63
+
64
+ let files = files::discover(&options)?;
65
+ if options.debug {
66
+ print_debug(&options, &files);
67
+ return Ok(JscpdOutcome::default());
68
+ }
69
+
70
+ print_store_warning(&options);
71
+ report::write_unknown_reporter_warnings(&options);
72
+
73
+ let started = Instant::now();
74
+ if files.is_empty() {
75
+ print_terminal_footer(&options, started.elapsed());
76
+ return Ok(JscpdOutcome::default());
77
+ }
78
+
79
+ let result = crate::detect_source_files(files, &options);
80
+
81
+ if options.verbose {
82
+ verbose::write_detection_events(&result);
83
+ }
84
+ report::write_progress(&result, &options);
85
+ report::write_reports(&result, &options)?;
86
+ print_terminal_footer(&options, started.elapsed());
87
+
88
+ let clones = result.clones;
89
+ let exit_code = if clones.is_empty() {
90
+ None
91
+ } else {
92
+ Some(match cli::resolve_node_exit_code(&options.exit_code) {
93
+ Ok(code) => code,
94
+ Err(message) => bail!("{message}"),
95
+ })
96
+ };
97
+
98
+ Ok(JscpdOutcome { clones, exit_code })
99
+ }
100
+
101
+ pub fn upstream_stdout_error(message: &str) -> Option<String> {
102
+ if message.starts_with("TypeError [ERR_INVALID_ARG_TYPE]")
103
+ || message.starts_with("TypeError:")
104
+ || message.starts_with("RangeError ")
105
+ || message.starts_with("SyntaxError:")
106
+ {
107
+ return Some(message.to_string());
108
+ }
109
+ if message.starts_with("Mode ") && message.ends_with(" does not supported yet.") {
110
+ return Some(format!("Error: {message}"));
111
+ }
112
+ None
113
+ }
114
+
115
+ fn print_debug(options: &Options, files: &[SourceFile]) {
116
+ print!("{}", debug_output(options, files));
117
+ }
118
+
119
+ fn print_store_warning(options: &Options) {
120
+ if let Some(warning) = cli::store_warning(options) {
121
+ eprintln!("{warning}");
122
+ }
123
+ }
124
+
125
+ fn print_terminal_footer(options: &Options, elapsed: Duration) {
126
+ if let Some(output) = terminal_footer_output(options, elapsed) {
127
+ print!("{output}");
128
+ }
129
+ }
130
+
131
+ fn terminal_footer_output(options: &Options, elapsed: Duration) -> Option<String> {
132
+ if options.silent {
133
+ return None;
134
+ }
135
+
136
+ let mut output = format!("time: {:.3}ms\n", elapsed.as_secs_f64() * 1000.0);
137
+ if !options.no_tips {
138
+ output.push('\n');
139
+ for tip in TIPS {
140
+ output.push_str(tip);
141
+ output.push('\n');
142
+ }
143
+ }
144
+ Some(output)
145
+ }
146
+
147
+ const TIPS: &[&str] =
148
+ &["💡 Auto-refactor with AI: npx skills add vv-bogdanov/jscpd-rs --skill dry-refactoring"];
149
+
150
+ fn debug_output(options: &Options, files: &[SourceFile]) -> String {
151
+ let mut output = String::new();
152
+ output.push_str("Options:\n");
153
+ output.push_str(&debug_options_output(options));
154
+ output.push('\n');
155
+ for file in files {
156
+ output.push_str(&file.source_id);
157
+ output.push('\n');
158
+ }
159
+ output.push_str(&format!("Found {} files to detect.\n", files.len()));
160
+ output
161
+ }
162
+
163
+ fn debug_options_output(options: &Options) -> String {
164
+ let mut fields = vec![
165
+ debug_string_field("executionId", options.execution_id.as_deref().unwrap_or("")),
166
+ debug_array_field(
167
+ "path",
168
+ &options
169
+ .paths
170
+ .iter()
171
+ .map(|path| path.display().to_string())
172
+ .collect::<Vec<_>>(),
173
+ ),
174
+ format!(" mode: [Function: {}]", mode_name(options.mode)),
175
+ format!(" minLines: {}", options.min_lines),
176
+ format!(" maxLines: {}", options.max_lines),
177
+ debug_string_field("maxSize", &debug_size(options.max_size_bytes)),
178
+ format!(" minTokens: {}", options.min_tokens),
179
+ debug_output_field(options),
180
+ debug_array_field("reporters", &options.reporters),
181
+ debug_array_field("ignore", &options.ignore),
182
+ debug_optional_number_field("threshold", options.threshold),
183
+ debug_format_mappings_field("formatsExts", &options.formats_exts),
184
+ debug_format_mappings_field("formatsNames", &options.formats_names),
185
+ format!(" debug: {}", options.debug),
186
+ format!(" silent: {}", options.silent),
187
+ format!(" blame: {}", options.blame),
188
+ format!(" cache: {}", options.cache),
189
+ format!(" absolute: {}", options.absolute),
190
+ format!(" noSymlinks: {}", options.no_symlinks),
191
+ format!(" skipLocal: {}", options.skip_local),
192
+ format!(" ignoreCase: {}", options.ignore_case),
193
+ format!(" gitignore: {}", options.gitignore),
194
+ debug_reporter_options_field(options),
195
+ debug_exit_code_field(&options.exit_code),
196
+ format!(" noTips: {}", options.no_tips),
197
+ ];
198
+ if let Some(config) = &options.config {
199
+ fields.push(debug_string_field("config", &config.display().to_string()));
200
+ }
201
+ fields.extend([
202
+ debug_array_field("listeners", &options.listeners),
203
+ debug_array_field("format", &debug_formats(options)),
204
+ ]);
205
+
206
+ if options.pattern != "**/*" {
207
+ fields.push(debug_string_field("pattern", &options.pattern));
208
+ }
209
+ if let Some(store) = &options.store {
210
+ fields.push(debug_string_field("store", store));
211
+ }
212
+ if let Some(store_path) = &options.store_path {
213
+ fields.push(debug_string_field(
214
+ "storePath",
215
+ &store_path.display().to_string(),
216
+ ));
217
+ }
218
+ if !options.tokens_to_skip.is_empty() {
219
+ fields.push(debug_array_field("tokensToSkip", &options.tokens_to_skip));
220
+ }
221
+
222
+ format!("{{\n{}\n}}", fields.join(",\n"))
223
+ }
224
+
225
+ fn debug_string_field(name: &str, value: &str) -> String {
226
+ format!(" {name}: '{}'", js_quote(value))
227
+ }
228
+
229
+ fn debug_output_field(options: &Options) -> String {
230
+ if options.output_is_bare {
231
+ " output: true".to_string()
232
+ } else {
233
+ debug_string_field("output", &options.output.display().to_string())
234
+ }
235
+ }
236
+
237
+ fn debug_array_field(name: &str, values: &[String]) -> String {
238
+ if values.is_empty() {
239
+ return format!(" {name}: []");
240
+ }
241
+ let values = values
242
+ .iter()
243
+ .map(|value| format!("'{}'", js_quote(value)))
244
+ .collect::<Vec<_>>()
245
+ .join(", ");
246
+ format!(" {name}: [ {values} ]")
247
+ }
248
+
249
+ fn debug_optional_number_field(name: &str, value: Option<f64>) -> String {
250
+ match value {
251
+ Some(value) => format!(" {name}: {value}"),
252
+ None => format!(" {name}: undefined"),
253
+ }
254
+ }
255
+
256
+ fn debug_exit_code_field(exit_code: &ExitCode) -> String {
257
+ match exit_code {
258
+ ExitCode::Number(value) => format!(" exitCode: {}", debug_js_number(*value)),
259
+ ExitCode::String(value) => debug_string_field("exitCode", value),
260
+ ExitCode::Boolean(value) => format!(" exitCode: {value}"),
261
+ }
262
+ }
263
+
264
+ fn debug_js_number(value: f64) -> String {
265
+ if value.is_nan() {
266
+ "NaN".to_string()
267
+ } else if value == f64::INFINITY {
268
+ "Infinity".to_string()
269
+ } else if value == f64::NEG_INFINITY {
270
+ "-Infinity".to_string()
271
+ } else if value.fract() == 0.0 {
272
+ format!("{value:.0}")
273
+ } else {
274
+ value.to_string()
275
+ }
276
+ }
277
+
278
+ fn debug_format_mappings_field(name: &str, mappings: &cli::FormatMappings) -> String {
279
+ if mappings.is_empty() {
280
+ return format!(" {name}: {{}}");
281
+ }
282
+ let entries = mappings
283
+ .iter()
284
+ .map(|(format, values)| {
285
+ let values = values
286
+ .iter()
287
+ .map(|value| format!("'{}'", js_quote(value)))
288
+ .collect::<Vec<_>>()
289
+ .join(", ");
290
+ format!("{}: [ {values} ]", js_quote(format))
291
+ })
292
+ .collect::<Vec<_>>()
293
+ .join(", ");
294
+ format!(" {name}: {{ {entries} }}")
295
+ }
296
+
297
+ fn debug_reporter_options_field(options: &Options) -> String {
298
+ if options.reporters_options.is_empty() {
299
+ return " reportersOptions: {}".to_string();
300
+ }
301
+ let json = serde_json::to_string(&options.reporters_options).unwrap_or_else(|_| "{}".into());
302
+ format!(" reportersOptions: {json}")
303
+ }
304
+
305
+ fn debug_formats(options: &Options) -> Vec<String> {
306
+ if let Some(formats) = &options.format_order {
307
+ return formats.clone();
308
+ }
309
+
310
+ let supported = formats::supported_formats();
311
+ match &options.formats {
312
+ Some(selected) => supported
313
+ .into_iter()
314
+ .filter(|format| selected.contains(*format))
315
+ .map(str::to_string)
316
+ .collect(),
317
+ None => supported.into_iter().map(str::to_string).collect(),
318
+ }
319
+ }
320
+
321
+ fn debug_size(bytes: u64) -> String {
322
+ if bytes.is_multiple_of(1024 * 1024) {
323
+ format!("{}mb", bytes / (1024 * 1024))
324
+ } else if bytes.is_multiple_of(1024) {
325
+ format!("{}kb", bytes / 1024)
326
+ } else {
327
+ format!("{bytes}b")
328
+ }
329
+ }
330
+
331
+ fn mode_name(mode: cli::Mode) -> &'static str {
332
+ match mode {
333
+ cli::Mode::Strict => "strict",
334
+ cli::Mode::Mild => "mild",
335
+ cli::Mode::Weak => "weak",
336
+ }
337
+ }
338
+
339
+ fn js_quote(value: &str) -> String {
340
+ value.replace('\\', "\\\\").replace('\'', "\\'")
341
+ }
342
+
343
+ fn list_output() -> String {
344
+ format!(
345
+ "Supported formats: \n{}\n",
346
+ formats::supported_formats().join(", ")
347
+ )
348
+ }
349
+
350
+ #[cfg(test)]
351
+ mod tests {
352
+ use super::*;
353
+
354
+ #[test]
355
+ fn jscpd_api_returns_clones_and_calls_exit_callback() {
356
+ let mut exit_codes = Vec::new();
357
+
358
+ let clones = jscpd_with_exit_callback(
359
+ [
360
+ "jscpd",
361
+ "jscpd/fixtures/clike/file2.c",
362
+ "--format",
363
+ "c",
364
+ "--min-tokens",
365
+ "20",
366
+ "--min-lines",
367
+ "3",
368
+ "--max-size",
369
+ "1mb",
370
+ "--silent",
371
+ "--noTips",
372
+ "--exitCode",
373
+ "7",
374
+ ],
375
+ |code| exit_codes.push(code),
376
+ )
377
+ .expect("run jscpd app API");
378
+
379
+ assert_eq!(clones.len(), 1);
380
+ assert_eq!(exit_codes, vec![7]);
381
+ }
382
+
383
+ #[test]
384
+ fn run_cli_args_handles_version_without_detection() {
385
+ let outcome = run_cli_args(["jscpd", "--version"]).expect("run version");
386
+
387
+ assert!(outcome.clones.is_empty());
388
+ assert_eq!(outcome.exit_code, None);
389
+ }
390
+
391
+ #[test]
392
+ fn debug_output_lists_options_and_files() {
393
+ let options = Options {
394
+ debug: true,
395
+ config: Some(std::path::PathBuf::from("/repo/.jscpd.json")),
396
+ formats: Some(std::collections::HashSet::from(["typescript".to_string()])),
397
+ format_order: Some(vec!["typescript".to_string(), "javascript".to_string()]),
398
+ ..Options::default()
399
+ };
400
+ let files = vec![
401
+ SourceFile {
402
+ source_id: "src/a.ts".to_string(),
403
+ format: "typescript".to_string(),
404
+ content: "const a = 1;".to_string(),
405
+ },
406
+ SourceFile {
407
+ source_id: "src/b.ts".to_string(),
408
+ format: "typescript".to_string(),
409
+ content: "const b = 1;".to_string(),
410
+ },
411
+ ];
412
+
413
+ let output = debug_output(&options, &files);
414
+
415
+ assert!(output.starts_with("Options:\n"));
416
+ assert!(!output.contains("Options {"));
417
+ assert!(output.contains("executionId: '"));
418
+ assert!(output.contains("path: [ '"));
419
+ assert!(output.contains("debug: true"));
420
+ assert!(output.contains("config: '/repo/.jscpd.json'"));
421
+ assert!(output.contains("mode: [Function: mild]"));
422
+ assert!(output.contains("maxSize: '100kb'"));
423
+ assert!(output.contains("format: [ 'typescript', 'javascript' ]"));
424
+ assert!(output.contains("src/a.ts\nsrc/b.ts"));
425
+ assert!(output.ends_with("Found 2 files to detect.\n"));
426
+ assert!(!output.contains("const a = 1"));
427
+ }
428
+
429
+ #[test]
430
+ fn list_output_matches_upstream_shape() {
431
+ let output = list_output();
432
+
433
+ assert!(output.starts_with("Supported formats: \n"));
434
+ assert!(output.contains("abap, actionscript, ada"));
435
+ assert!(output.contains(", typescript, "));
436
+ assert!(!output.lines().skip(1).any(|line| line == "typescript"));
437
+ }
438
+
439
+ #[test]
440
+ fn store_warning_matches_upstream_fallback_shape() {
441
+ let options = Options {
442
+ store: Some("leveldb".to_string()),
443
+ ..Options::default()
444
+ };
445
+
446
+ assert_eq!(
447
+ cli::store_warning(&options).as_deref(),
448
+ Some("store name leveldb not installed.")
449
+ );
450
+ assert!(cli::store_warning(&Options::default()).is_none());
451
+ }
452
+
453
+ #[test]
454
+ fn node_like_errors_match_upstream_stdout_shape() {
455
+ assert_eq!(
456
+ upstream_stdout_error("Mode zzz does not supported yet.").as_deref(),
457
+ Some("Error: Mode zzz does not supported yet.")
458
+ );
459
+ assert_eq!(
460
+ upstream_stdout_error(
461
+ "TypeError [ERR_INVALID_ARG_TYPE]: The \"paths[0]\" argument must be of type string."
462
+ )
463
+ .as_deref(),
464
+ Some(
465
+ "TypeError [ERR_INVALID_ARG_TYPE]: The \"paths[0]\" argument must be of type string."
466
+ )
467
+ );
468
+ assert_eq!(
469
+ upstream_stdout_error("TypeError: cli.ignore.split is not a function").as_deref(),
470
+ Some("TypeError: cli.ignore.split is not a function")
471
+ );
472
+ assert_eq!(
473
+ upstream_stdout_error(
474
+ "RangeError [ERR_OUT_OF_RANGE]: The value of \"code\" is out of range."
475
+ )
476
+ .as_deref(),
477
+ Some("RangeError [ERR_OUT_OF_RANGE]: The value of \"code\" is out of range.")
478
+ );
479
+ assert!(upstream_stdout_error("regular anyhow failure").is_none());
480
+ }
481
+
482
+ #[test]
483
+ fn terminal_footer_matches_upstream_silent_and_tips_rules() {
484
+ let elapsed = Duration::from_millis(42);
485
+ let verbose = Options {
486
+ no_tips: false,
487
+ ..Options::default()
488
+ };
489
+ let output = terminal_footer_output(&verbose, elapsed).unwrap();
490
+
491
+ assert!(output.starts_with("time: "));
492
+ assert!(output.contains(
493
+ "Auto-refactor with AI: npx skills add vv-bogdanov/jscpd-rs --skill dry-refactoring"
494
+ ));
495
+ assert!(!output.contains("Gangsta Agents"));
496
+ assert!(!output.contains("Support jscpd project"));
497
+
498
+ let no_tips = Options {
499
+ no_tips: true,
500
+ ..Options::default()
501
+ };
502
+ let output = terminal_footer_output(&no_tips, elapsed).unwrap();
503
+ assert!(output.starts_with("time: "));
504
+ assert!(!output.contains("Auto-refactor with AI"));
505
+
506
+ let silent = Options {
507
+ silent: true,
508
+ ..Options::default()
509
+ };
510
+ assert!(terminal_footer_output(&silent, elapsed).is_none());
511
+ }
512
+ }