jscpd-rs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/CHANGELOG.md +69 -0
  2. package/Cargo.lock +1323 -0
  3. package/Cargo.toml +54 -0
  4. package/LICENSE +21 -0
  5. package/README.md +372 -0
  6. package/docs/api-parity.md +49 -0
  7. package/docs/cloning-plan.md +281 -0
  8. package/docs/compat-baseline.md +535 -0
  9. package/docs/format-porting.md +86 -0
  10. package/docs/junior-task-template.md +62 -0
  11. package/docs/junior-workflow.md +87 -0
  12. package/docs/migrating-from-jscpd.md +193 -0
  13. package/docs/npm-release.md +116 -0
  14. package/docs/public-benchmark-suite.md +81 -0
  15. package/docs/release-checklist.md +200 -0
  16. package/docs/release-decisions.md +103 -0
  17. package/docs/release-readiness.md +51 -0
  18. package/docs/upstream-bugs.md +501 -0
  19. package/docs/upstream-issue-drafts.md +393 -0
  20. package/docs/user-guide.md +309 -0
  21. package/examples/dump_oxc_tokens.rs +112 -0
  22. package/examples/library_api.rs +42 -0
  23. package/npm/bin/jscpd-rs.js +6 -0
  24. package/npm/bin/jscpd-server.js +6 -0
  25. package/npm/lib/run-binary.js +68 -0
  26. package/npm/scripts/postinstall.js +50 -0
  27. package/package.json +53 -0
  28. package/skills/dry-refactoring/SKILL.md +63 -0
  29. package/skills/jscpd/SKILL.md +85 -0
  30. package/src/app.rs +512 -0
  31. package/src/bin/jscpd-server.rs +429 -0
  32. package/src/blame.rs +130 -0
  33. package/src/cli/config.rs +543 -0
  34. package/src/cli/parsing.rs +301 -0
  35. package/src/cli/tests.rs +543 -0
  36. package/src/cli.rs +671 -0
  37. package/src/detector/matching/secondary.rs +387 -0
  38. package/src/detector/matching.rs +274 -0
  39. package/src/detector/model.rs +190 -0
  40. package/src/detector/prepare.rs +71 -0
  41. package/src/detector/skip_local.rs +40 -0
  42. package/src/detector/statistics.rs +138 -0
  43. package/src/detector/store.rs +96 -0
  44. package/src/detector/tests.rs +238 -0
  45. package/src/detector.rs +265 -0
  46. package/src/files/discovery.rs +508 -0
  47. package/src/files/gitignore.rs +203 -0
  48. package/src/files/paths.rs +68 -0
  49. package/src/files/shebang.rs +106 -0
  50. package/src/files/tests.rs +523 -0
  51. package/src/files.rs +25 -0
  52. package/src/formats.rs +570 -0
  53. package/src/lib.rs +433 -0
  54. package/src/main.rs +26 -0
  55. package/src/report/ai.rs +125 -0
  56. package/src/report/badge.rs +238 -0
  57. package/src/report/console.rs +180 -0
  58. package/src/report/console_common.rs +37 -0
  59. package/src/report/console_full.rs +139 -0
  60. package/src/report/csv.rs +65 -0
  61. package/src/report/escape.rs +8 -0
  62. package/src/report/file_output.rs +28 -0
  63. package/src/report/html/assets.rs +47 -0
  64. package/src/report/html.rs +336 -0
  65. package/src/report/json.rs +119 -0
  66. package/src/report/markdown.rs +125 -0
  67. package/src/report/sarif.rs +302 -0
  68. package/src/report/silent.rs +22 -0
  69. package/src/report/source.rs +38 -0
  70. package/src/report/summary.rs +50 -0
  71. package/src/report/test_support.rs +133 -0
  72. package/src/report/threshold.rs +76 -0
  73. package/src/report/xcode.rs +90 -0
  74. package/src/report/xml.rs +119 -0
  75. package/src/report.rs +250 -0
  76. package/src/server/mcp.rs +942 -0
  77. package/src/server.rs +1081 -0
  78. package/src/tokenizer/apex.rs +97 -0
  79. package/src/tokenizer/blocks.rs +532 -0
  80. package/src/tokenizer/embedded.rs +106 -0
  81. package/src/tokenizer/generic.rs +511 -0
  82. package/src/tokenizer/hash.rs +27 -0
  83. package/src/tokenizer/ignore.rs +33 -0
  84. package/src/tokenizer/line_index.rs +33 -0
  85. package/src/tokenizer/markdown.rs +289 -0
  86. package/src/tokenizer/markup_attrs.rs +289 -0
  87. package/src/tokenizer/oxc/fallback.rs +275 -0
  88. package/src/tokenizer/oxc/jsx.rs +168 -0
  89. package/src/tokenizer/oxc/kind.rs +177 -0
  90. package/src/tokenizer/oxc/lexical.rs +67 -0
  91. package/src/tokenizer/oxc.rs +659 -0
  92. package/src/tokenizer/scan.rs +88 -0
  93. package/src/tokenizer/tap.rs +150 -0
  94. package/src/tokenizer/tests.rs +915 -0
  95. package/src/tokenizer.rs +328 -0
  96. package/src/verbose.rs +195 -0
@@ -0,0 +1,68 @@
1
+ use std::cmp::Ordering;
2
+ use std::ffi::OsString;
3
+ use std::path::{Path, PathBuf};
4
+
5
+ pub(super) fn display_relative_to(path: &Path, cwd: &Path) -> String {
6
+ relative_path(path, cwd)
7
+ .unwrap_or_else(|| path.to_path_buf())
8
+ .display()
9
+ .to_string()
10
+ }
11
+
12
+ pub(super) fn relative_path(path: &Path, base: &Path) -> Option<PathBuf> {
13
+ if !path.is_absolute() {
14
+ return Some(path.to_path_buf());
15
+ }
16
+ if !base.is_absolute() {
17
+ return None;
18
+ }
19
+
20
+ let path_components = normal_components(path);
21
+ let base_components = normal_components(base);
22
+ let common_len = path_components
23
+ .iter()
24
+ .zip(&base_components)
25
+ .take_while(|(left, right)| left == right)
26
+ .count();
27
+
28
+ let mut relative = PathBuf::new();
29
+ for _ in common_len..base_components.len() {
30
+ relative.push("..");
31
+ }
32
+ for component in &path_components[common_len..] {
33
+ relative.push(component);
34
+ }
35
+ Some(relative)
36
+ }
37
+
38
+ fn normal_components(path: &Path) -> Vec<OsString> {
39
+ path.components()
40
+ .filter_map(|component| match component {
41
+ std::path::Component::Normal(value) => Some(value.to_os_string()),
42
+ _ => None,
43
+ })
44
+ .collect()
45
+ }
46
+
47
+ pub(super) fn fast_glob_like_path_cmp(left: &Path, right: &Path) -> Ordering {
48
+ let left_components = left.components().collect::<Vec<_>>();
49
+ let right_components = right.components().collect::<Vec<_>>();
50
+ match left_components.len().cmp(&right_components.len()) {
51
+ Ordering::Equal => {}
52
+ ordering => return ordering,
53
+ }
54
+
55
+ for idx in 0..left_components.len() {
56
+ let left_component = left_components[idx].as_os_str();
57
+ let right_component = right_components[idx].as_os_str();
58
+ if left_component == right_component {
59
+ continue;
60
+ }
61
+
62
+ return left_component
63
+ .to_string_lossy()
64
+ .cmp(&right_component.to_string_lossy());
65
+ }
66
+
67
+ Ordering::Equal
68
+ }
@@ -0,0 +1,106 @@
1
+ use std::fs;
2
+ use std::io::Read;
3
+ use std::path::Path;
4
+
5
+ use anyhow::{Context, Result};
6
+
7
+ pub(super) fn shebang_format_for_path(
8
+ path: &Path,
9
+ metadata: &fs::Metadata,
10
+ ) -> Result<Option<&'static str>> {
11
+ if !is_executable(metadata) || is_symlink(path) {
12
+ return Ok(None);
13
+ }
14
+
15
+ let mut file =
16
+ fs::File::open(path).with_context(|| format!("failed to read `{}`", path.display()))?;
17
+ let mut buf = [0u8; 128];
18
+ let read = file
19
+ .read(&mut buf)
20
+ .with_context(|| format!("failed to read `{}`", path.display()))?;
21
+ let head = String::from_utf8_lossy(&buf[..read]);
22
+ let Some(first_line) = head.lines().next() else {
23
+ return Ok(None);
24
+ };
25
+ if !first_line.starts_with("#!") {
26
+ return Ok(None);
27
+ }
28
+
29
+ let mut tokens = first_line[2..].split_whitespace();
30
+ let Some(first_token) = tokens.next() else {
31
+ return Ok(None);
32
+ };
33
+ let interpreter = if Path::new(first_token)
34
+ .file_name()
35
+ .is_some_and(|name| name.to_string_lossy().starts_with("env"))
36
+ {
37
+ let Some(second_token) = tokens.next() else {
38
+ return Ok(None);
39
+ };
40
+ if second_token.starts_with('-') {
41
+ return Ok(None);
42
+ }
43
+ second_token
44
+ } else {
45
+ first_token
46
+ };
47
+
48
+ let Some(raw_name) = Path::new(interpreter).file_name() else {
49
+ return Ok(None);
50
+ };
51
+ let raw_name = raw_name.to_string_lossy();
52
+ if raw_name.as_bytes().first().is_some_and(u8::is_ascii_digit) {
53
+ return Ok(None);
54
+ }
55
+
56
+ Ok(shebang_name_to_format(&normalize_shebang_name(&raw_name)))
57
+ }
58
+
59
+ fn shebang_name_to_format(name: &str) -> Option<&'static str> {
60
+ match name {
61
+ "bash" | "sh" | "zsh" | "dash" | "ksh" => Some("bash"),
62
+ "python" => Some("python"),
63
+ "ruby" => Some("ruby"),
64
+ "perl" => Some("perl"),
65
+ "php" => Some("php"),
66
+ "node" | "nodejs" => Some("javascript"),
67
+ "lua" => Some("lua"),
68
+ "tclsh" | "wish" => Some("tcl"),
69
+ "groovy" => Some("groovy"),
70
+ "awk" | "gawk" | "nawk" => Some("awk"),
71
+ "rscript" => Some("r"),
72
+ _ => None,
73
+ }
74
+ }
75
+
76
+ fn normalize_shebang_name(raw_name: &str) -> String {
77
+ let mut end = raw_name.len();
78
+ if raw_name.as_bytes().last().is_some_and(u8::is_ascii_digit) {
79
+ while end > 0
80
+ && raw_name.as_bytes()[end - 1].is_ascii()
81
+ && (raw_name.as_bytes()[end - 1].is_ascii_digit()
82
+ || raw_name.as_bytes()[end - 1] == b'.')
83
+ {
84
+ end -= 1;
85
+ }
86
+ }
87
+ raw_name[..end].to_ascii_lowercase()
88
+ }
89
+
90
+ fn is_symlink(path: &Path) -> bool {
91
+ fs::symlink_metadata(path)
92
+ .map(|metadata| metadata.file_type().is_symlink())
93
+ .unwrap_or(false)
94
+ }
95
+
96
+ #[cfg(unix)]
97
+ fn is_executable(metadata: &fs::Metadata) -> bool {
98
+ use std::os::unix::fs::PermissionsExt;
99
+
100
+ metadata.permissions().mode() & 0o111 != 0
101
+ }
102
+
103
+ #[cfg(not(unix))]
104
+ fn is_executable(_metadata: &fs::Metadata) -> bool {
105
+ false
106
+ }
@@ -0,0 +1,523 @@
1
+ use std::cmp::Ordering;
2
+ use std::collections::HashSet;
3
+ use std::path::{Path, PathBuf};
4
+
5
+ use crate::cli::Options;
6
+
7
+ use super::SourceFile;
8
+ use super::discover;
9
+ use super::discovery::{
10
+ build_ignore_matcher, count_lines, decode_source, format_filter_skip_message, is_ignored,
11
+ };
12
+ use super::gitignore::{
13
+ collect_cwd_gitignore_patterns, collect_gitignore_patterns_with_global, gitignore_line_to_globs,
14
+ };
15
+ use super::paths::{display_relative_to, fast_glob_like_path_cmp, relative_path};
16
+
17
+ fn unique_temp_path(label: &str) -> PathBuf {
18
+ let nonce = std::time::SystemTime::now()
19
+ .duration_since(std::time::UNIX_EPOCH)
20
+ .unwrap()
21
+ .as_nanos();
22
+ std::env::temp_dir().join(format!("jscpd-rs-{label}-{}-{nonce}", std::process::id()))
23
+ }
24
+
25
+ fn discovery_options(paths: Vec<PathBuf>) -> Options {
26
+ Options {
27
+ paths,
28
+ min_lines: 1,
29
+ reporters: vec!["json".to_string()],
30
+ silent: true,
31
+ gitignore: false,
32
+ ..Options::default()
33
+ }
34
+ }
35
+
36
+ fn javascript_discovery_options(paths: Vec<PathBuf>) -> Options {
37
+ let mut options = discovery_options(paths);
38
+ options.formats = Some(HashSet::from(["javascript".to_string()]));
39
+ options
40
+ }
41
+
42
+ fn source_ids(files: &[SourceFile]) -> Vec<&str> {
43
+ files.iter().map(|file| file.source_id.as_str()).collect()
44
+ }
45
+
46
+ #[test]
47
+ fn fast_glob_like_order_places_parent_files_before_child_files() {
48
+ assert_eq!(
49
+ fast_glob_like_path_cmp(
50
+ Path::new("pkg/tokenizer/src/tokenize.ts"),
51
+ Path::new("pkg/tokenizer/src/languages/markdown-tokenizer.ts"),
52
+ ),
53
+ Ordering::Less
54
+ );
55
+ assert_eq!(
56
+ fast_glob_like_path_cmp(
57
+ Path::new("pkg/tokenizer/src/languages/astro.ts"),
58
+ Path::new("pkg/tokenizer/src/languages/vue.ts"),
59
+ ),
60
+ Ordering::Less
61
+ );
62
+ assert_eq!(
63
+ fast_glob_like_path_cmp(
64
+ Path::new("../example-app/landing/.next/types/validator.ts"),
65
+ Path::new("../example-app/landing/.next/dev/types/validator.ts"),
66
+ ),
67
+ Ordering::Less
68
+ );
69
+ }
70
+
71
+ #[test]
72
+ fn explicit_file_paths_preserve_cli_order_like_upstream() {
73
+ let dir = unique_temp_path("explicit-order");
74
+ let setup = dir.join("fixtures").join("setupTests.js");
75
+ let utils = dir
76
+ .join("packages")
77
+ .join("react-devtools-shared")
78
+ .join("utils.js");
79
+ let console_mock = dir
80
+ .join("packages")
81
+ .join("internal-test-utils")
82
+ .join("consoleMock.js");
83
+ std::fs::create_dir_all(setup.parent().unwrap()).unwrap();
84
+ std::fs::create_dir_all(utils.parent().unwrap()).unwrap();
85
+ std::fs::create_dir_all(console_mock.parent().unwrap()).unwrap();
86
+ std::fs::write(&setup, "const setup = 1;\n").unwrap();
87
+ std::fs::write(&utils, "const utils = 1;\n").unwrap();
88
+ std::fs::write(&console_mock, "const consoleMock = 1;\n").unwrap();
89
+
90
+ let options =
91
+ javascript_discovery_options(vec![setup.clone(), utils.clone(), console_mock.clone()]);
92
+
93
+ let files = discover(&options).unwrap();
94
+ let _ = std::fs::remove_dir_all(&dir);
95
+ let paths = source_ids(&files);
96
+
97
+ assert_eq!(paths.len(), 3);
98
+ assert!(paths[0].ends_with("fixtures/setupTests.js"));
99
+ assert!(paths[1].ends_with("packages/react-devtools-shared/utils.js"));
100
+ assert!(paths[2].ends_with("packages/internal-test-utils/consoleMock.js"));
101
+ }
102
+
103
+ #[test]
104
+ fn directory_discovery_preserves_glob_like_order_with_parallel_walk() {
105
+ let dir = unique_temp_path("parallel-order");
106
+ std::fs::create_dir_all(dir.join("packages/a")).unwrap();
107
+ std::fs::create_dir_all(dir.join("packages/b")).unwrap();
108
+ std::fs::write(dir.join("packages/root.js"), "const root = 1;\n").unwrap();
109
+ std::fs::write(dir.join("packages/a/file.js"), "const a = 1;\n").unwrap();
110
+ std::fs::write(dir.join("packages/b/file.js"), "const b = 1;\n").unwrap();
111
+
112
+ let options = javascript_discovery_options(vec![dir.clone()]);
113
+
114
+ let files = discover(&options).unwrap();
115
+ let _ = std::fs::remove_dir_all(&dir);
116
+ let paths = source_ids(&files);
117
+
118
+ assert_eq!(paths.len(), 3);
119
+ assert!(paths[0].ends_with("packages/root.js"));
120
+ assert!(paths[1].ends_with("packages/a/file.js"));
121
+ assert!(paths[2].ends_with("packages/b/file.js"));
122
+ }
123
+
124
+ #[test]
125
+ fn relative_path_formats_sibling_paths_like_upstream() {
126
+ assert_eq!(
127
+ relative_path(
128
+ Path::new("/workspace/example-app/file.ts"),
129
+ Path::new("/workspace/jscpd-rs")
130
+ )
131
+ .unwrap(),
132
+ Path::new("../example-app/file.ts")
133
+ );
134
+ }
135
+
136
+ #[test]
137
+ fn gitignore_line_to_globs_anchors_rooted_patterns_to_base_dir() {
138
+ let globs = gitignore_line_to_globs("/node_modules/", Some(Path::new("/repo/app")));
139
+ assert!(globs.iter().any(|glob| glob == "/repo/app/node_modules"));
140
+ assert!(globs.iter().any(|glob| glob == "/repo/app/node_modules/**"));
141
+ }
142
+
143
+ #[test]
144
+ fn gitignore_line_to_globs_preserves_negations_like_upstream() {
145
+ let globs = gitignore_line_to_globs("!ignored/keep.js", Some(Path::new("/repo/app")));
146
+
147
+ assert!(
148
+ globs
149
+ .iter()
150
+ .any(|glob| glob == "!/repo/app/ignored/keep.js")
151
+ );
152
+ assert!(
153
+ globs
154
+ .iter()
155
+ .any(|glob| glob == "!/repo/app/ignored/keep.js/**")
156
+ );
157
+ }
158
+
159
+ #[test]
160
+ fn gitignore_line_to_globs_matches_upstream_conversion_without_base_dir() {
161
+ assert_eq!(
162
+ gitignore_line_to_globs("/node_modules", None),
163
+ vec!["node_modules", "node_modules/**"]
164
+ );
165
+ assert_eq!(
166
+ gitignore_line_to_globs("src/dist", None),
167
+ vec!["src/dist", "src/dist/**", "**/src/dist", "**/src/dist/**"]
168
+ );
169
+ assert_eq!(
170
+ gitignore_line_to_globs("**/dist", None),
171
+ vec!["**/dist", "**/dist/**"]
172
+ );
173
+ assert_eq!(
174
+ gitignore_line_to_globs("!test.js", None),
175
+ vec!["!**/test.js", "!**/test.js/**"]
176
+ );
177
+ assert!(gitignore_line_to_globs("# ignored", None).is_empty());
178
+ assert!(gitignore_line_to_globs(" ", None).is_empty());
179
+ }
180
+
181
+ #[test]
182
+ fn gitignore_line_to_globs_keeps_upstream_variants_for_cwd_base_dir() {
183
+ let cwd = std::env::current_dir().unwrap();
184
+
185
+ let globs = gitignore_line_to_globs("src/dist", Some(&cwd));
186
+ assert!(globs.iter().any(|glob| glob == "src/dist"));
187
+ assert!(globs.iter().any(|glob| glob == "src/dist/**"));
188
+ assert!(globs.iter().any(|glob| glob == "**/src/dist"));
189
+ assert!(globs.iter().any(|glob| glob == "**/src/dist/**"));
190
+
191
+ let negated = gitignore_line_to_globs("!test.js", Some(&cwd));
192
+ assert!(negated.iter().any(|glob| glob == "!**/test.js"));
193
+ assert!(negated.iter().any(|glob| glob == "!**/test.js/**"));
194
+ }
195
+
196
+ #[test]
197
+ fn collect_gitignore_patterns_includes_global_excludes_like_upstream() {
198
+ let dir = unique_temp_path("global-excludes");
199
+ std::fs::create_dir_all(&dir).unwrap();
200
+ let global_excludes = dir.join("globalignore");
201
+ std::fs::write(&global_excludes, "*.swp\n.DS_Store\n# comment\n\n").unwrap();
202
+
203
+ let patterns =
204
+ collect_gitignore_patterns_with_global(std::slice::from_ref(&dir), Some(&global_excludes));
205
+ let _ = std::fs::remove_dir_all(&dir);
206
+
207
+ assert!(patterns.iter().any(|pattern| pattern == "**/*.swp"));
208
+ assert!(patterns.iter().any(|pattern| pattern == "**/*.swp/**"));
209
+ assert!(patterns.iter().any(|pattern| pattern == "**/.DS_Store"));
210
+ assert!(patterns.iter().all(|pattern| !pattern.contains("comment")));
211
+ }
212
+
213
+ #[test]
214
+ fn collect_cwd_gitignore_patterns_uses_upstream_unscoped_conversion() {
215
+ let dir = unique_temp_path("cwd-gitignore");
216
+ std::fs::create_dir_all(&dir).unwrap();
217
+ std::fs::write(dir.join(".gitignore"), "/target/\nreport\n# comment\n\n").unwrap();
218
+
219
+ let patterns = collect_cwd_gitignore_patterns(&dir);
220
+ let _ = std::fs::remove_dir_all(&dir);
221
+
222
+ assert!(patterns.iter().any(|pattern| pattern == "target"));
223
+ assert!(patterns.iter().any(|pattern| pattern == "target/**"));
224
+ assert!(patterns.iter().any(|pattern| pattern == "**/report"));
225
+ assert!(patterns.iter().all(|pattern| !pattern.contains("comment")));
226
+ }
227
+
228
+ #[test]
229
+ fn format_filter_skip_message_matches_upstream_shape() {
230
+ let cwd = Path::new("/repo");
231
+ let path = Path::new("/repo/src/file.ts");
232
+
233
+ assert_eq!(
234
+ format_filter_skip_message(path, "typescript", cwd),
235
+ "File src/file.ts skipped! Format \"typescript\" does not included to supported formats."
236
+ );
237
+ }
238
+
239
+ #[test]
240
+ fn decode_source_reuses_valid_utf8_and_falls_back_to_lossy() {
241
+ assert_eq!(
242
+ decode_source(b"const answer = 42;\n".to_vec()),
243
+ "const answer = 42;\n"
244
+ );
245
+ assert_eq!(decode_source(vec![b'a', 0xff, b'b']), "a\u{fffd}b");
246
+ }
247
+
248
+ #[test]
249
+ fn count_lines_matches_upstream_empty_and_newline_rules() {
250
+ assert_eq!(count_lines(b""), 1);
251
+ assert_eq!(count_lines(b"one"), 1);
252
+ assert_eq!(count_lines(b"one\n"), 2);
253
+ assert_eq!(count_lines(b"one\ntwo"), 2);
254
+ }
255
+
256
+ #[cfg(unix)]
257
+ #[test]
258
+ fn discovers_executable_node_shebang_without_extension() {
259
+ use std::os::unix::fs::PermissionsExt;
260
+
261
+ let path = unique_temp_path("node-shebang");
262
+ std::fs::write(&path, "#!/usr/bin/env node\nconsole.log(1);\n").unwrap();
263
+ let mut permissions = std::fs::metadata(&path).unwrap().permissions();
264
+ permissions.set_mode(0o755);
265
+ std::fs::set_permissions(&path, permissions).unwrap();
266
+
267
+ let options = javascript_discovery_options(vec![path.clone()]);
268
+
269
+ let files = discover(&options).unwrap();
270
+ let _ = std::fs::remove_file(&path);
271
+
272
+ assert_eq!(files.len(), 1);
273
+ assert_eq!(files[0].format, "javascript");
274
+ }
275
+
276
+ #[test]
277
+ fn discovers_common_non_native_formats() {
278
+ let dir = unique_temp_path("formats");
279
+ std::fs::create_dir_all(&dir).unwrap();
280
+ std::fs::write(dir.join("style.css"), "body { color: red; }\n").unwrap();
281
+ std::fs::write(dir.join("index.html"), "<main>hello</main>\n").unwrap();
282
+ std::fs::write(dir.join("config.yaml"), "enabled: true\n").unwrap();
283
+ std::fs::write(dir.join("settings.toml"), "enabled = true\n").unwrap();
284
+ std::fs::write(dir.join("Component.vue"), "<template><div /></template>\n").unwrap();
285
+
286
+ let options = discovery_options(vec![dir.clone()]);
287
+
288
+ let files = discover(&options).unwrap();
289
+ let _ = std::fs::remove_dir_all(&dir);
290
+ let formats = files
291
+ .iter()
292
+ .map(|file| file.format.as_str())
293
+ .collect::<HashSet<_>>();
294
+
295
+ assert!(formats.contains("css"));
296
+ assert!(formats.contains("markup"));
297
+ assert!(formats.contains("yaml"));
298
+ assert!(formats.contains("toml"));
299
+ assert!(formats.contains("vue"));
300
+ }
301
+
302
+ #[test]
303
+ fn discovers_custom_extension_mappings() {
304
+ let dir = unique_temp_path("custom-exts");
305
+ std::fs::create_dir_all(&dir).unwrap();
306
+ std::fs::write(dir.join("component.foo"), "const answer = 42;\n").unwrap();
307
+
308
+ let mut options = discovery_options(vec![dir.clone()]);
309
+ options.formats_exts = crate::cli::FormatMappings::from_pairs(vec![(
310
+ "javascript".to_string(),
311
+ vec!["foo".to_string()],
312
+ )]);
313
+
314
+ let files = discover(&options).unwrap();
315
+ let _ = std::fs::remove_dir_all(&dir);
316
+
317
+ assert_eq!(files.len(), 1);
318
+ assert_eq!(files[0].format, "javascript");
319
+ }
320
+
321
+ #[test]
322
+ fn discovers_custom_extensionless_name_mappings() {
323
+ let dir = unique_temp_path("custom-names");
324
+ std::fs::create_dir_all(&dir).unwrap();
325
+ std::fs::write(dir.join("Recipe"), "target:\n\tprintf ok\n").unwrap();
326
+
327
+ let mut options = discovery_options(vec![dir.clone()]);
328
+ options.formats_names = crate::cli::FormatMappings::from_pairs(vec![(
329
+ "makefile".to_string(),
330
+ vec!["Recipe".to_string()],
331
+ )]);
332
+
333
+ let files = discover(&options).unwrap();
334
+ let _ = std::fs::remove_dir_all(&dir);
335
+
336
+ assert_eq!(files.len(), 1);
337
+ assert_eq!(files[0].format, "makefile");
338
+ }
339
+
340
+ #[test]
341
+ fn reporter_uses_report_paths_when_silent() {
342
+ let dir = unique_temp_path("reporter-paths");
343
+ let path = dir.join("file.js");
344
+ std::fs::create_dir_all(&dir).unwrap();
345
+ std::fs::write(&path, "const alpha = 1;\n").unwrap();
346
+
347
+ let mut options = discovery_options(vec![path.clone()]);
348
+ options.reporters = vec!["html".to_string()];
349
+ let cwd = std::env::current_dir().unwrap();
350
+
351
+ let files = discover(&options).unwrap();
352
+ let _ = std::fs::remove_dir_all(&dir);
353
+
354
+ assert_eq!(files.len(), 1);
355
+ assert_eq!(files[0].source_id, display_relative_to(&path, &cwd));
356
+ }
357
+
358
+ #[test]
359
+ fn relative_ignore_pattern_matches_absolute_scan_root_like_upstream() {
360
+ let dir = unique_temp_path("relative-ignore");
361
+ std::fs::create_dir_all(dir.join("patches")).unwrap();
362
+ std::fs::create_dir_all(dir.join("src")).unwrap();
363
+ std::fs::write(dir.join("patches").join("patch.js"), "const patch = 1;\n").unwrap();
364
+ std::fs::write(dir.join("src").join("main.js"), "const main = 1;\n").unwrap();
365
+
366
+ let mut options = javascript_discovery_options(vec![dir.clone()]);
367
+ options.ignore = vec!["patches/**".to_string()];
368
+
369
+ let files = discover(&options).unwrap();
370
+ let _ = std::fs::remove_dir_all(&dir);
371
+ let paths = source_ids(&files);
372
+
373
+ assert_eq!(paths.len(), 1);
374
+ assert!(paths[0].ends_with("src/main.js"));
375
+ }
376
+
377
+ #[test]
378
+ fn dot_relative_ignore_pattern_matches_absolute_scan_root_like_upstream() {
379
+ let dir = unique_temp_path("dot-relative-ignore");
380
+ std::fs::create_dir_all(dir.join("patches")).unwrap();
381
+ std::fs::create_dir_all(dir.join("src")).unwrap();
382
+ std::fs::write(dir.join("patches").join("patch.js"), "const patch = 1;\n").unwrap();
383
+ std::fs::write(dir.join("src").join("main.js"), "const main = 1;\n").unwrap();
384
+
385
+ let mut options = javascript_discovery_options(vec![dir.clone()]);
386
+ options.ignore = vec!["./patches/**".to_string()];
387
+
388
+ let files = discover(&options).unwrap();
389
+ let _ = std::fs::remove_dir_all(&dir);
390
+ let paths = source_ids(&files);
391
+
392
+ assert_eq!(paths.len(), 1);
393
+ assert!(paths[0].ends_with("src/main.js"));
394
+ }
395
+
396
+ #[test]
397
+ fn relative_ignore_patterns_match_dot_relative_walk_paths() {
398
+ let matcher = build_ignore_matcher(&[
399
+ "jscpd/**".to_string(),
400
+ "target/**".to_string(),
401
+ ".git/**".to_string(),
402
+ ])
403
+ .unwrap();
404
+ let cwd = std::env::current_dir().unwrap();
405
+
406
+ assert!(is_ignored(Path::new("./jscpd/file.js"), &matcher, &cwd));
407
+ assert!(is_ignored(Path::new("./target/debug/app"), &matcher, &cwd));
408
+ assert!(is_ignored(Path::new("./.git/config"), &matcher, &cwd));
409
+ assert!(!is_ignored(Path::new("./src/lib.rs"), &matcher, &cwd));
410
+ }
411
+
412
+ #[cfg(unix)]
413
+ #[test]
414
+ fn no_symlinks_skips_symlink_scan_directory_like_upstream() {
415
+ let dir = unique_temp_path("no-symlink-dir");
416
+ let real_dir = dir.join("real");
417
+ let link_dir = dir.join("linkdir");
418
+ std::fs::create_dir_all(&real_dir).unwrap();
419
+ std::fs::write(real_dir.join("file.js"), "const linked = 1;\n").unwrap();
420
+ std::os::unix::fs::symlink(&real_dir, &link_dir).unwrap();
421
+
422
+ let mut options = javascript_discovery_options(vec![link_dir]);
423
+ options.no_symlinks = true;
424
+
425
+ let files = discover(&options).unwrap();
426
+ let _ = std::fs::remove_dir_all(&dir);
427
+
428
+ assert!(files.is_empty());
429
+ }
430
+
431
+ #[cfg(unix)]
432
+ #[test]
433
+ fn no_symlinks_skips_symlink_scan_file_like_upstream() {
434
+ let dir = unique_temp_path("no-symlink-file");
435
+ let real_file = dir.join("real.js");
436
+ let link_file = dir.join("link.js");
437
+ std::fs::create_dir_all(&dir).unwrap();
438
+ std::fs::write(&real_file, "const linked = 1;\n").unwrap();
439
+ std::os::unix::fs::symlink(&real_file, &link_file).unwrap();
440
+
441
+ let mut options = javascript_discovery_options(vec![link_file]);
442
+ options.no_symlinks = true;
443
+
444
+ let files = discover(&options).unwrap();
445
+ let _ = std::fs::remove_dir_all(&dir);
446
+
447
+ assert!(files.is_empty());
448
+ }
449
+
450
+ #[test]
451
+ fn empty_file_counts_as_one_line_like_upstream() {
452
+ let dir = unique_temp_path("empty-lines");
453
+ let path = dir.join("empty.js");
454
+ std::fs::create_dir_all(&dir).unwrap();
455
+ std::fs::write(&path, "").unwrap();
456
+
457
+ let mut options = discovery_options(vec![path.clone()]);
458
+ options.max_lines = 1;
459
+ options.reporters = Vec::new();
460
+
461
+ let files = discover(&options).unwrap();
462
+ let _ = std::fs::remove_dir_all(&dir);
463
+
464
+ assert_eq!(files.len(), 1);
465
+ assert_eq!(files[0].source_id, path.display().to_string());
466
+ }
467
+
468
+ #[test]
469
+ fn known_extension_files_over_max_size_are_filtered() {
470
+ let dir = unique_temp_path("max-size-filter");
471
+ let path = dir.join("large.js");
472
+ std::fs::create_dir_all(&dir).unwrap();
473
+ std::fs::write(&path, "const value = 'larger than the configured size';\n").unwrap();
474
+
475
+ let mut options = discovery_options(vec![path]);
476
+ options.max_size_bytes = 10;
477
+ options.reporters = Vec::new();
478
+
479
+ let files = discover(&options).unwrap();
480
+ let _ = std::fs::remove_dir_all(&dir);
481
+
482
+ assert!(files.is_empty());
483
+ }
484
+
485
+ #[test]
486
+ fn gitignore_negation_reincludes_files_during_compat_discovery() {
487
+ let dir = unique_temp_path("gitignore-negation");
488
+ let ignored = dir.join("ignored");
489
+ std::fs::create_dir_all(&ignored).unwrap();
490
+ std::fs::write(dir.join(".gitignore"), "ignored/**\n!ignored/keep.js\n").unwrap();
491
+ std::fs::write(ignored.join("drop.js"), "const drop = 1;\n").unwrap();
492
+ std::fs::write(ignored.join("keep.js"), "const keep = 1;\n").unwrap();
493
+
494
+ let mut options = discovery_options(vec![dir.clone()]);
495
+ options.gitignore = true;
496
+
497
+ let files = discover(&options).unwrap();
498
+ let _ = std::fs::remove_dir_all(&dir);
499
+ let paths = source_ids(&files);
500
+
501
+ assert_eq!(paths.len(), 1);
502
+ assert!(paths[0].ends_with("ignored/keep.js"));
503
+ }
504
+
505
+ #[test]
506
+ fn gitignore_broad_ignore_with_negated_filename_keeps_nested_file() {
507
+ let dir = unique_temp_path("gitignore-issue-723");
508
+ let nested = dir.join("nested");
509
+ std::fs::create_dir_all(&nested).unwrap();
510
+ std::fs::write(dir.join(".gitignore"), "**/**/*\n!test.js\n").unwrap();
511
+ std::fs::write(nested.join("drop.js"), "const drop = 1;\n").unwrap();
512
+ std::fs::write(nested.join("test.js"), "const keep = 1;\n").unwrap();
513
+
514
+ let mut options = discovery_options(vec![dir.clone()]);
515
+ options.gitignore = true;
516
+
517
+ let files = discover(&options).unwrap();
518
+ let _ = std::fs::remove_dir_all(&dir);
519
+ let paths = source_ids(&files);
520
+
521
+ assert_eq!(paths.len(), 1);
522
+ assert!(paths[0].ends_with("nested/test.js"));
523
+ }