jscpd-rs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/CHANGELOG.md +69 -0
  2. package/Cargo.lock +1323 -0
  3. package/Cargo.toml +54 -0
  4. package/LICENSE +21 -0
  5. package/README.md +372 -0
  6. package/docs/api-parity.md +49 -0
  7. package/docs/cloning-plan.md +281 -0
  8. package/docs/compat-baseline.md +535 -0
  9. package/docs/format-porting.md +86 -0
  10. package/docs/junior-task-template.md +62 -0
  11. package/docs/junior-workflow.md +87 -0
  12. package/docs/migrating-from-jscpd.md +193 -0
  13. package/docs/npm-release.md +116 -0
  14. package/docs/public-benchmark-suite.md +81 -0
  15. package/docs/release-checklist.md +200 -0
  16. package/docs/release-decisions.md +103 -0
  17. package/docs/release-readiness.md +51 -0
  18. package/docs/upstream-bugs.md +501 -0
  19. package/docs/upstream-issue-drafts.md +393 -0
  20. package/docs/user-guide.md +309 -0
  21. package/examples/dump_oxc_tokens.rs +112 -0
  22. package/examples/library_api.rs +42 -0
  23. package/npm/bin/jscpd-rs.js +6 -0
  24. package/npm/bin/jscpd-server.js +6 -0
  25. package/npm/lib/run-binary.js +68 -0
  26. package/npm/scripts/postinstall.js +50 -0
  27. package/package.json +53 -0
  28. package/skills/dry-refactoring/SKILL.md +63 -0
  29. package/skills/jscpd/SKILL.md +85 -0
  30. package/src/app.rs +512 -0
  31. package/src/bin/jscpd-server.rs +429 -0
  32. package/src/blame.rs +130 -0
  33. package/src/cli/config.rs +543 -0
  34. package/src/cli/parsing.rs +301 -0
  35. package/src/cli/tests.rs +543 -0
  36. package/src/cli.rs +671 -0
  37. package/src/detector/matching/secondary.rs +387 -0
  38. package/src/detector/matching.rs +274 -0
  39. package/src/detector/model.rs +190 -0
  40. package/src/detector/prepare.rs +71 -0
  41. package/src/detector/skip_local.rs +40 -0
  42. package/src/detector/statistics.rs +138 -0
  43. package/src/detector/store.rs +96 -0
  44. package/src/detector/tests.rs +238 -0
  45. package/src/detector.rs +265 -0
  46. package/src/files/discovery.rs +508 -0
  47. package/src/files/gitignore.rs +203 -0
  48. package/src/files/paths.rs +68 -0
  49. package/src/files/shebang.rs +106 -0
  50. package/src/files/tests.rs +523 -0
  51. package/src/files.rs +25 -0
  52. package/src/formats.rs +570 -0
  53. package/src/lib.rs +433 -0
  54. package/src/main.rs +26 -0
  55. package/src/report/ai.rs +125 -0
  56. package/src/report/badge.rs +238 -0
  57. package/src/report/console.rs +180 -0
  58. package/src/report/console_common.rs +37 -0
  59. package/src/report/console_full.rs +139 -0
  60. package/src/report/csv.rs +65 -0
  61. package/src/report/escape.rs +8 -0
  62. package/src/report/file_output.rs +28 -0
  63. package/src/report/html/assets.rs +47 -0
  64. package/src/report/html.rs +336 -0
  65. package/src/report/json.rs +119 -0
  66. package/src/report/markdown.rs +125 -0
  67. package/src/report/sarif.rs +302 -0
  68. package/src/report/silent.rs +22 -0
  69. package/src/report/source.rs +38 -0
  70. package/src/report/summary.rs +50 -0
  71. package/src/report/test_support.rs +133 -0
  72. package/src/report/threshold.rs +76 -0
  73. package/src/report/xcode.rs +90 -0
  74. package/src/report/xml.rs +119 -0
  75. package/src/report.rs +250 -0
  76. package/src/server/mcp.rs +942 -0
  77. package/src/server.rs +1081 -0
  78. package/src/tokenizer/apex.rs +97 -0
  79. package/src/tokenizer/blocks.rs +532 -0
  80. package/src/tokenizer/embedded.rs +106 -0
  81. package/src/tokenizer/generic.rs +511 -0
  82. package/src/tokenizer/hash.rs +27 -0
  83. package/src/tokenizer/ignore.rs +33 -0
  84. package/src/tokenizer/line_index.rs +33 -0
  85. package/src/tokenizer/markdown.rs +289 -0
  86. package/src/tokenizer/markup_attrs.rs +289 -0
  87. package/src/tokenizer/oxc/fallback.rs +275 -0
  88. package/src/tokenizer/oxc/jsx.rs +168 -0
  89. package/src/tokenizer/oxc/kind.rs +177 -0
  90. package/src/tokenizer/oxc/lexical.rs +67 -0
  91. package/src/tokenizer/oxc.rs +659 -0
  92. package/src/tokenizer/scan.rs +88 -0
  93. package/src/tokenizer/tap.rs +150 -0
  94. package/src/tokenizer/tests.rs +915 -0
  95. package/src/tokenizer.rs +328 -0
  96. package/src/verbose.rs +195 -0
@@ -0,0 +1,301 @@
1
+ use anyhow::{Context, Result, bail};
2
+ use regex::Regex;
3
+
4
+ use super::{ExitCode, FormatMappings};
5
+
6
+ pub(super) fn split_csv(value: &str) -> Vec<String> {
7
+ value
8
+ .split(',')
9
+ .map(str::trim)
10
+ .filter(|item| !item.is_empty())
11
+ .map(ToOwned::to_owned)
12
+ .collect()
13
+ }
14
+
15
+ pub(super) fn parse_format_mappings(value: &str) -> FormatMappings {
16
+ let mappings = value
17
+ .split(';')
18
+ .filter_map(|entry| {
19
+ let (format, values) = entry.split_once(':')?;
20
+ let values = split_csv(values);
21
+ (!format.trim().is_empty() && !values.is_empty())
22
+ .then(|| (format.trim().to_string(), values))
23
+ })
24
+ .collect();
25
+ FormatMappings(mappings)
26
+ }
27
+
28
+ pub(super) fn parse_format_mappings_like_upstream(value: &str) -> Result<FormatMappings> {
29
+ if value.is_empty() {
30
+ return Ok(FormatMappings::default());
31
+ }
32
+
33
+ let mut mappings = Vec::new();
34
+ for entry in value.split(';') {
35
+ let Some((format, values)) = entry.split_once(':') else {
36
+ bail!("TypeError: Cannot read properties of undefined (reading 'split')");
37
+ };
38
+ mappings.push((
39
+ format.to_string(),
40
+ values.split(',').map(ToOwned::to_owned).collect(),
41
+ ));
42
+ }
43
+
44
+ Ok(FormatMappings(mappings))
45
+ }
46
+
47
+ pub(super) fn compile_patterns(patterns: Vec<String>) -> Result<Vec<Regex>> {
48
+ patterns
49
+ .into_iter()
50
+ .map(|pattern| Regex::new(&pattern).with_context(|| format!("invalid regex `{pattern}`")))
51
+ .collect()
52
+ }
53
+
54
+ pub(super) fn parse_js_usize(value: &str) -> std::result::Result<usize, String> {
55
+ let trimmed = value.trim_start();
56
+ let rest = trimmed.strip_prefix('+').unwrap_or(trimmed);
57
+ if rest.starts_with('-') {
58
+ return Err(format!("invalid integer `{value}`"));
59
+ }
60
+
61
+ let (digits, radix) =
62
+ if let Some(hex) = rest.strip_prefix("0x").or_else(|| rest.strip_prefix("0X")) {
63
+ let digits = hex
64
+ .chars()
65
+ .take_while(|ch| ch.is_ascii_hexdigit())
66
+ .collect::<String>();
67
+ (digits, 16)
68
+ } else {
69
+ let digits = rest
70
+ .chars()
71
+ .take_while(|ch| ch.is_ascii_digit())
72
+ .collect::<String>();
73
+ (digits, 10)
74
+ };
75
+ if digits.is_empty() {
76
+ return Err(format!("invalid integer `{value}`"));
77
+ }
78
+
79
+ let mut parsed = 0usize;
80
+ for digit in digits.chars().filter_map(|ch| ch.to_digit(radix)) {
81
+ parsed = parsed
82
+ .saturating_mul(radix as usize)
83
+ .saturating_add(digit as usize);
84
+ }
85
+ Ok(parsed)
86
+ }
87
+
88
+ pub(super) fn parse_js_number(value: &str) -> std::result::Result<f64, String> {
89
+ let trimmed = value.trim();
90
+ if trimmed.is_empty() {
91
+ return Ok(0.0);
92
+ }
93
+ if trimmed == "NaN" {
94
+ return Ok(f64::NAN);
95
+ }
96
+ if trimmed == "Infinity" || trimmed == "+Infinity" {
97
+ return Ok(f64::INFINITY);
98
+ }
99
+ if trimmed == "-Infinity" {
100
+ return Ok(f64::NEG_INFINITY);
101
+ }
102
+ if let Some(hex) = trimmed
103
+ .strip_prefix("0x")
104
+ .or_else(|| trimmed.strip_prefix("0X"))
105
+ {
106
+ return Ok(u64::from_str_radix(hex, 16)
107
+ .map(|value| value as f64)
108
+ .unwrap_or(f64::NAN));
109
+ }
110
+ if let Some(binary) = trimmed
111
+ .strip_prefix("0b")
112
+ .or_else(|| trimmed.strip_prefix("0B"))
113
+ {
114
+ return Ok(u64::from_str_radix(binary, 2)
115
+ .map(|value| value as f64)
116
+ .unwrap_or(f64::NAN));
117
+ }
118
+ if let Some(octal) = trimmed
119
+ .strip_prefix("0o")
120
+ .or_else(|| trimmed.strip_prefix("0O"))
121
+ {
122
+ return Ok(u64::from_str_radix(octal, 8)
123
+ .map(|value| value as f64)
124
+ .unwrap_or(f64::NAN));
125
+ }
126
+
127
+ Ok(trimmed.parse::<f64>().unwrap_or(f64::NAN))
128
+ }
129
+
130
+ pub(super) fn node_exit_code(value: &ExitCode) -> std::result::Result<i32, NodeExitCodeError> {
131
+ match value {
132
+ ExitCode::Boolean(false) => Ok(0),
133
+ ExitCode::Boolean(true) => Err(NodeExitCodeError::InvalidType {
134
+ type_name: "boolean",
135
+ received: "true".to_string(),
136
+ }),
137
+ ExitCode::Number(number) if number.is_nan() || *number == 0.0 => Ok(0),
138
+ ExitCode::Number(number) => validate_node_exit_number(*number, format_js_number(*number)),
139
+ ExitCode::String(value) if value.is_empty() => Ok(0),
140
+ ExitCode::String(value) => {
141
+ let number = parse_js_number(value).unwrap_or(f64::NAN);
142
+ if number.is_nan() {
143
+ return Err(NodeExitCodeError::InvalidType {
144
+ type_name: "string",
145
+ received: format!("'{value}'"),
146
+ });
147
+ }
148
+ validate_node_exit_number(number, value.to_string())
149
+ }
150
+ }
151
+ }
152
+
153
+ fn validate_node_exit_number(
154
+ number: f64,
155
+ received: String,
156
+ ) -> std::result::Result<i32, NodeExitCodeError> {
157
+ if !number.is_finite()
158
+ || number.fract() != 0.0
159
+ || number < i32::MIN as f64
160
+ || number > i32::MAX as f64
161
+ {
162
+ return Err(NodeExitCodeError::OutOfRange { received });
163
+ }
164
+ Ok(number as i32)
165
+ }
166
+
167
+ fn format_js_number(number: f64) -> String {
168
+ if number.is_nan() {
169
+ "NaN".to_string()
170
+ } else if number == f64::INFINITY {
171
+ "Infinity".to_string()
172
+ } else if number == f64::NEG_INFINITY {
173
+ "-Infinity".to_string()
174
+ } else if number.fract() == 0.0 {
175
+ format!("{number:.0}")
176
+ } else {
177
+ number.to_string()
178
+ }
179
+ }
180
+
181
+ #[derive(Clone, Debug, PartialEq, Eq)]
182
+ pub(super) enum NodeExitCodeError {
183
+ InvalidType {
184
+ type_name: &'static str,
185
+ received: String,
186
+ },
187
+ OutOfRange {
188
+ received: String,
189
+ },
190
+ }
191
+
192
+ impl NodeExitCodeError {
193
+ pub(super) fn message(&self) -> String {
194
+ match self {
195
+ Self::InvalidType {
196
+ type_name,
197
+ received,
198
+ } => format!(
199
+ "TypeError [ERR_INVALID_ARG_TYPE]: The \"code\" argument must be of type number. Received type {type_name} ({received})"
200
+ ),
201
+ Self::OutOfRange { received } => format!(
202
+ "RangeError [ERR_OUT_OF_RANGE]: The value of \"code\" is out of range. It must be an integer. Received {received}"
203
+ ),
204
+ }
205
+ }
206
+ }
207
+
208
+ pub(super) fn parse_size(value: &str) -> Result<u64> {
209
+ let trimmed = value.trim();
210
+ if let Some(bytes) = parse_bytes_unit(trimmed) {
211
+ return Ok(bytes);
212
+ }
213
+ Ok(parse_js_int_bytes(trimmed))
214
+ }
215
+
216
+ fn parse_bytes_unit(value: &str) -> Option<u64> {
217
+ let (number_part, rest) = split_decimal_prefix(value)?;
218
+ let suffix = rest.trim_start().to_ascii_lowercase();
219
+ let multiplier = match suffix.as_str() {
220
+ "kb" => 1024.0,
221
+ "mb" => 1024.0 * 1024.0,
222
+ "gb" => 1024.0 * 1024.0 * 1024.0,
223
+ "tb" => 1024.0 * 1024.0 * 1024.0 * 1024.0,
224
+ "pb" => 1024.0 * 1024.0 * 1024.0 * 1024.0 * 1024.0,
225
+ _ => return None,
226
+ };
227
+ let number = number_part.parse::<f64>().ok()?;
228
+ Some(float_bytes_to_u64(number * multiplier))
229
+ }
230
+
231
+ fn split_decimal_prefix(value: &str) -> Option<(&str, &str)> {
232
+ let bytes = value.as_bytes();
233
+ let mut idx = 0;
234
+ if matches!(bytes.first(), Some(b'-' | b'+')) {
235
+ idx = 1;
236
+ }
237
+
238
+ let digit_start = idx;
239
+ while idx < bytes.len() && bytes[idx].is_ascii_digit() {
240
+ idx += 1;
241
+ }
242
+ if idx == digit_start {
243
+ return None;
244
+ }
245
+
246
+ if idx < bytes.len() && bytes[idx] == b'.' {
247
+ let dot = idx;
248
+ idx += 1;
249
+ let fraction_start = idx;
250
+ while idx < bytes.len() && bytes[idx].is_ascii_digit() {
251
+ idx += 1;
252
+ }
253
+ if idx == fraction_start {
254
+ idx = dot;
255
+ }
256
+ }
257
+
258
+ Some((&value[..idx], &value[idx..]))
259
+ }
260
+
261
+ fn parse_js_int_bytes(value: &str) -> u64 {
262
+ let bytes = value.as_bytes();
263
+ let mut idx = 0;
264
+ let negative = match bytes.first() {
265
+ Some(b'-') => {
266
+ idx = 1;
267
+ true
268
+ }
269
+ Some(b'+') => {
270
+ idx = 1;
271
+ false
272
+ }
273
+ _ => false,
274
+ };
275
+
276
+ if negative {
277
+ return 0;
278
+ }
279
+
280
+ let mut result = 0_u64;
281
+ let mut saw_digit = false;
282
+ while idx < bytes.len() && bytes[idx].is_ascii_digit() {
283
+ saw_digit = true;
284
+ result = result
285
+ .saturating_mul(10)
286
+ .saturating_add((bytes[idx] - b'0') as u64);
287
+ idx += 1;
288
+ }
289
+
290
+ if saw_digit { result } else { 0 }
291
+ }
292
+
293
+ fn float_bytes_to_u64(bytes: f64) -> u64 {
294
+ if !bytes.is_finite() || bytes <= 0.0 {
295
+ return 0;
296
+ }
297
+ if bytes >= u64::MAX as f64 {
298
+ return u64::MAX;
299
+ }
300
+ bytes.floor() as u64
301
+ }