jscpd-rs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/CHANGELOG.md +69 -0
  2. package/Cargo.lock +1323 -0
  3. package/Cargo.toml +54 -0
  4. package/LICENSE +21 -0
  5. package/README.md +372 -0
  6. package/docs/api-parity.md +49 -0
  7. package/docs/cloning-plan.md +281 -0
  8. package/docs/compat-baseline.md +535 -0
  9. package/docs/format-porting.md +86 -0
  10. package/docs/junior-task-template.md +62 -0
  11. package/docs/junior-workflow.md +87 -0
  12. package/docs/migrating-from-jscpd.md +193 -0
  13. package/docs/npm-release.md +116 -0
  14. package/docs/public-benchmark-suite.md +81 -0
  15. package/docs/release-checklist.md +200 -0
  16. package/docs/release-decisions.md +103 -0
  17. package/docs/release-readiness.md +51 -0
  18. package/docs/upstream-bugs.md +501 -0
  19. package/docs/upstream-issue-drafts.md +393 -0
  20. package/docs/user-guide.md +309 -0
  21. package/examples/dump_oxc_tokens.rs +112 -0
  22. package/examples/library_api.rs +42 -0
  23. package/npm/bin/jscpd-rs.js +6 -0
  24. package/npm/bin/jscpd-server.js +6 -0
  25. package/npm/lib/run-binary.js +68 -0
  26. package/npm/scripts/postinstall.js +50 -0
  27. package/package.json +53 -0
  28. package/skills/dry-refactoring/SKILL.md +63 -0
  29. package/skills/jscpd/SKILL.md +85 -0
  30. package/src/app.rs +512 -0
  31. package/src/bin/jscpd-server.rs +429 -0
  32. package/src/blame.rs +130 -0
  33. package/src/cli/config.rs +543 -0
  34. package/src/cli/parsing.rs +301 -0
  35. package/src/cli/tests.rs +543 -0
  36. package/src/cli.rs +671 -0
  37. package/src/detector/matching/secondary.rs +387 -0
  38. package/src/detector/matching.rs +274 -0
  39. package/src/detector/model.rs +190 -0
  40. package/src/detector/prepare.rs +71 -0
  41. package/src/detector/skip_local.rs +40 -0
  42. package/src/detector/statistics.rs +138 -0
  43. package/src/detector/store.rs +96 -0
  44. package/src/detector/tests.rs +238 -0
  45. package/src/detector.rs +265 -0
  46. package/src/files/discovery.rs +508 -0
  47. package/src/files/gitignore.rs +203 -0
  48. package/src/files/paths.rs +68 -0
  49. package/src/files/shebang.rs +106 -0
  50. package/src/files/tests.rs +523 -0
  51. package/src/files.rs +25 -0
  52. package/src/formats.rs +570 -0
  53. package/src/lib.rs +433 -0
  54. package/src/main.rs +26 -0
  55. package/src/report/ai.rs +125 -0
  56. package/src/report/badge.rs +238 -0
  57. package/src/report/console.rs +180 -0
  58. package/src/report/console_common.rs +37 -0
  59. package/src/report/console_full.rs +139 -0
  60. package/src/report/csv.rs +65 -0
  61. package/src/report/escape.rs +8 -0
  62. package/src/report/file_output.rs +28 -0
  63. package/src/report/html/assets.rs +47 -0
  64. package/src/report/html.rs +336 -0
  65. package/src/report/json.rs +119 -0
  66. package/src/report/markdown.rs +125 -0
  67. package/src/report/sarif.rs +302 -0
  68. package/src/report/silent.rs +22 -0
  69. package/src/report/source.rs +38 -0
  70. package/src/report/summary.rs +50 -0
  71. package/src/report/test_support.rs +133 -0
  72. package/src/report/threshold.rs +76 -0
  73. package/src/report/xcode.rs +90 -0
  74. package/src/report/xml.rs +119 -0
  75. package/src/report.rs +250 -0
  76. package/src/server/mcp.rs +942 -0
  77. package/src/server.rs +1081 -0
  78. package/src/tokenizer/apex.rs +97 -0
  79. package/src/tokenizer/blocks.rs +532 -0
  80. package/src/tokenizer/embedded.rs +106 -0
  81. package/src/tokenizer/generic.rs +511 -0
  82. package/src/tokenizer/hash.rs +27 -0
  83. package/src/tokenizer/ignore.rs +33 -0
  84. package/src/tokenizer/line_index.rs +33 -0
  85. package/src/tokenizer/markdown.rs +289 -0
  86. package/src/tokenizer/markup_attrs.rs +289 -0
  87. package/src/tokenizer/oxc/fallback.rs +275 -0
  88. package/src/tokenizer/oxc/jsx.rs +168 -0
  89. package/src/tokenizer/oxc/kind.rs +177 -0
  90. package/src/tokenizer/oxc/lexical.rs +67 -0
  91. package/src/tokenizer/oxc.rs +659 -0
  92. package/src/tokenizer/scan.rs +88 -0
  93. package/src/tokenizer/tap.rs +150 -0
  94. package/src/tokenizer/tests.rs +915 -0
  95. package/src/tokenizer.rs +328 -0
  96. package/src/verbose.rs +195 -0
@@ -0,0 +1,88 @@
1
+ #[derive(Clone, Copy)]
2
+ pub(super) struct LineSpan {
3
+ pub start: usize,
4
+ pub end: usize,
5
+ pub next_start: usize,
6
+ }
7
+
8
+ pub(super) fn line_spans(content: &str) -> Vec<LineSpan> {
9
+ let mut spans = Vec::new();
10
+ let mut start = 0usize;
11
+ while start < content.len() {
12
+ let rest = &content[start..];
13
+ let newline = rest.find('\n');
14
+ let end = newline
15
+ .map(|offset| start + offset)
16
+ .unwrap_or(content.len());
17
+ let next_start = newline.map(|offset| start + offset + 1).unwrap_or(end);
18
+ spans.push(LineSpan {
19
+ start,
20
+ end,
21
+ next_start,
22
+ });
23
+ start = next_start;
24
+ }
25
+ spans
26
+ }
27
+
28
+ pub(super) fn scan_line_comment(bytes: &[u8], start: usize, limit: usize) -> usize {
29
+ let mut idx = start + 2;
30
+ while idx < limit && bytes[idx] != b'\n' {
31
+ idx += 1;
32
+ }
33
+ idx
34
+ }
35
+
36
+ pub(super) fn scan_block_comment(bytes: &[u8], start: usize, limit: usize) -> usize {
37
+ let mut idx = start + 2;
38
+ while idx + 1 < limit {
39
+ if bytes[idx] == b'*' && bytes[idx + 1] == b'/' {
40
+ return idx + 2;
41
+ }
42
+ idx += 1;
43
+ }
44
+ limit
45
+ }
46
+
47
+ pub(super) fn has_code_in_gap(content: &str, start: usize, end: usize) -> bool {
48
+ let bytes = content.as_bytes();
49
+ let mut idx = start;
50
+ while idx < end {
51
+ let ch = content[idx..].chars().next().unwrap_or('\0');
52
+ if ch.is_whitespace() {
53
+ idx += ch.len_utf8();
54
+ } else if idx + 1 < end && bytes[idx] == b'/' && bytes[idx + 1] == b'/' {
55
+ idx = scan_line_comment(bytes, idx, end);
56
+ } else if idx + 1 < end && bytes[idx] == b'/' && bytes[idx + 1] == b'*' {
57
+ idx = scan_block_comment(bytes, idx, end);
58
+ } else {
59
+ return true;
60
+ }
61
+ }
62
+ false
63
+ }
64
+
65
+ pub(super) fn count_prism_whitespace_tokens(content: &str, start: usize, end: usize) -> usize {
66
+ let bytes = content.as_bytes();
67
+ let mut idx = start;
68
+ let mut count = 0usize;
69
+
70
+ while idx < end {
71
+ match bytes[idx] {
72
+ b'\n' => {
73
+ count += 1;
74
+ idx += 1;
75
+ }
76
+ b' ' | b'\t' | b'\r' | b'\x0c' | b'\x0b' => {
77
+ count += 1;
78
+ idx += 1;
79
+ while idx < end && matches!(bytes[idx], b' ' | b'\t' | b'\r' | b'\x0c' | b'\x0b') {
80
+ idx += 1;
81
+ }
82
+ }
83
+ _ => idx += 1,
84
+ }
85
+ }
86
+
87
+ count
88
+ }
@@ -0,0 +1,150 @@
1
+ use crate::cli::Options;
2
+
3
+ use super::embedded::{assign_sequential_positions, blank_ranges_preserve_newlines, offset_tokens};
4
+ use super::scan::line_spans;
5
+ use super::{
6
+ ByteSpan, DetectionToken, LineIndex, TokenContext, TokenKind, TokenMap, find_ignore_regions,
7
+ push_token, tokenize_generic,
8
+ };
9
+
10
+ pub(super) fn tokenize_maps(
11
+ content: &str,
12
+ options: &Options,
13
+ ignore_regions: &[[usize; 2]],
14
+ ) -> Vec<TokenMap> {
15
+ let blocks = tap_yaml_blocks(content);
16
+ let mut maps = Vec::new();
17
+ let sanitized = blank_ranges_preserve_newlines(
18
+ content,
19
+ blocks
20
+ .iter()
21
+ .map(|block| [block.start, block.end])
22
+ .collect::<Vec<_>>()
23
+ .as_slice(),
24
+ );
25
+ let tap_tokens = tokenize_tap_outer(&sanitized, options, ignore_regions);
26
+ if !tap_tokens.is_empty() {
27
+ maps.push(TokenMap {
28
+ format: "tap".to_string(),
29
+ tokens: tap_tokens,
30
+ positions_assigned: false,
31
+ });
32
+ }
33
+
34
+ let line_index = LineIndex::new(content);
35
+ let mut yaml_tokens = Vec::<DetectionToken>::new();
36
+ for block in blocks {
37
+ let inner = &content[block.start..block.end];
38
+ let inner_ignore_regions = find_ignore_regions(inner, options);
39
+ let mut tokens = tokenize_generic(inner, "yaml", options, &inner_ignore_regions);
40
+ let start = line_index.location(block.start);
41
+ offset_tokens(&mut tokens, block.start, &start);
42
+ yaml_tokens.extend(tokens);
43
+ }
44
+ if !yaml_tokens.is_empty() {
45
+ assign_sequential_positions(&mut yaml_tokens);
46
+ maps.push(TokenMap {
47
+ format: "yaml".to_string(),
48
+ tokens: yaml_tokens,
49
+ positions_assigned: true,
50
+ });
51
+ }
52
+
53
+ maps
54
+ }
55
+
56
+ fn tokenize_tap_outer(
57
+ content: &str,
58
+ options: &Options,
59
+ ignore_regions: &[[usize; 2]],
60
+ ) -> Vec<DetectionToken> {
61
+ let context = TokenContext {
62
+ content,
63
+ options,
64
+ ignore_regions,
65
+ };
66
+ let line_index = LineIndex::new(content);
67
+ let mut tokens = Vec::new();
68
+
69
+ for span in line_spans(content) {
70
+ let line = &content[span.start..span.end];
71
+ let Some(start_offset) = first_non_whitespace(line) else {
72
+ continue;
73
+ };
74
+ let end_offset = trim_line_end(line);
75
+ if start_offset >= end_offset {
76
+ continue;
77
+ }
78
+ let start = span.start + start_offset;
79
+ let end = span.start + end_offset;
80
+ push_token(
81
+ &mut tokens,
82
+ &context,
83
+ TokenKind::Default,
84
+ ByteSpan { start, end },
85
+ line_index.location(start),
86
+ line_index.location(end),
87
+ );
88
+ }
89
+
90
+ tokens
91
+ }
92
+
93
+ #[derive(Clone, Copy)]
94
+ struct TapYamlBlock {
95
+ start: usize,
96
+ end: usize,
97
+ }
98
+
99
+ fn tap_yaml_blocks(content: &str) -> Vec<TapYamlBlock> {
100
+ let lines = line_spans(content);
101
+ let mut blocks = Vec::new();
102
+ let mut idx = 0usize;
103
+
104
+ while idx < lines.len() {
105
+ let span = lines[idx];
106
+ let line = &content[span.start..span.end];
107
+ let Some(open_start) = tap_yaml_marker_start(line, "---") else {
108
+ idx += 1;
109
+ continue;
110
+ };
111
+ let Some(close_idx) = lines[idx + 1..]
112
+ .iter()
113
+ .position(|span| tap_yaml_marker_start(&content[span.start..span.end], "...").is_some())
114
+ .map(|position| idx + 1 + position)
115
+ else {
116
+ idx += 1;
117
+ continue;
118
+ };
119
+ let close_span = lines[close_idx];
120
+ let close_line = &content[close_span.start..close_span.end];
121
+ let close_start = tap_yaml_marker_start(close_line, "...").unwrap_or(0);
122
+
123
+ blocks.push(TapYamlBlock {
124
+ start: span.start + open_start,
125
+ end: close_span.start + close_start + "...".len(),
126
+ });
127
+ idx = close_idx + 1;
128
+ }
129
+
130
+ blocks
131
+ }
132
+
133
+ fn tap_yaml_marker_start(line: &str, marker: &str) -> Option<usize> {
134
+ let trimmed_start = line
135
+ .bytes()
136
+ .position(|byte| !matches!(byte, b' ' | b'\t'))
137
+ .unwrap_or(line.len());
138
+ (line[trimmed_start..].trim_end_matches([' ', '\t']) == marker).then_some(trimmed_start)
139
+ }
140
+
141
+ fn first_non_whitespace(line: &str) -> Option<usize> {
142
+ line.bytes().position(|byte| !matches!(byte, b' ' | b'\t'))
143
+ }
144
+
145
+ fn trim_line_end(line: &str) -> usize {
146
+ line.bytes()
147
+ .rposition(|byte| !matches!(byte, b' ' | b'\t'))
148
+ .map(|idx| idx + 1)
149
+ .unwrap_or(0)
150
+ }