jscpd-rs 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +69 -0
- package/Cargo.lock +1323 -0
- package/Cargo.toml +54 -0
- package/LICENSE +21 -0
- package/README.md +372 -0
- package/docs/api-parity.md +49 -0
- package/docs/cloning-plan.md +281 -0
- package/docs/compat-baseline.md +535 -0
- package/docs/format-porting.md +86 -0
- package/docs/junior-task-template.md +62 -0
- package/docs/junior-workflow.md +87 -0
- package/docs/migrating-from-jscpd.md +193 -0
- package/docs/npm-release.md +116 -0
- package/docs/public-benchmark-suite.md +81 -0
- package/docs/release-checklist.md +200 -0
- package/docs/release-decisions.md +103 -0
- package/docs/release-readiness.md +51 -0
- package/docs/upstream-bugs.md +501 -0
- package/docs/upstream-issue-drafts.md +393 -0
- package/docs/user-guide.md +309 -0
- package/examples/dump_oxc_tokens.rs +112 -0
- package/examples/library_api.rs +42 -0
- package/npm/bin/jscpd-rs.js +6 -0
- package/npm/bin/jscpd-server.js +6 -0
- package/npm/lib/run-binary.js +68 -0
- package/npm/scripts/postinstall.js +50 -0
- package/package.json +53 -0
- package/skills/dry-refactoring/SKILL.md +63 -0
- package/skills/jscpd/SKILL.md +85 -0
- package/src/app.rs +512 -0
- package/src/bin/jscpd-server.rs +429 -0
- package/src/blame.rs +130 -0
- package/src/cli/config.rs +543 -0
- package/src/cli/parsing.rs +301 -0
- package/src/cli/tests.rs +543 -0
- package/src/cli.rs +671 -0
- package/src/detector/matching/secondary.rs +387 -0
- package/src/detector/matching.rs +274 -0
- package/src/detector/model.rs +190 -0
- package/src/detector/prepare.rs +71 -0
- package/src/detector/skip_local.rs +40 -0
- package/src/detector/statistics.rs +138 -0
- package/src/detector/store.rs +96 -0
- package/src/detector/tests.rs +238 -0
- package/src/detector.rs +265 -0
- package/src/files/discovery.rs +508 -0
- package/src/files/gitignore.rs +203 -0
- package/src/files/paths.rs +68 -0
- package/src/files/shebang.rs +106 -0
- package/src/files/tests.rs +523 -0
- package/src/files.rs +25 -0
- package/src/formats.rs +570 -0
- package/src/lib.rs +433 -0
- package/src/main.rs +26 -0
- package/src/report/ai.rs +125 -0
- package/src/report/badge.rs +238 -0
- package/src/report/console.rs +180 -0
- package/src/report/console_common.rs +37 -0
- package/src/report/console_full.rs +139 -0
- package/src/report/csv.rs +65 -0
- package/src/report/escape.rs +8 -0
- package/src/report/file_output.rs +28 -0
- package/src/report/html/assets.rs +47 -0
- package/src/report/html.rs +336 -0
- package/src/report/json.rs +119 -0
- package/src/report/markdown.rs +125 -0
- package/src/report/sarif.rs +302 -0
- package/src/report/silent.rs +22 -0
- package/src/report/source.rs +38 -0
- package/src/report/summary.rs +50 -0
- package/src/report/test_support.rs +133 -0
- package/src/report/threshold.rs +76 -0
- package/src/report/xcode.rs +90 -0
- package/src/report/xml.rs +119 -0
- package/src/report.rs +250 -0
- package/src/server/mcp.rs +942 -0
- package/src/server.rs +1081 -0
- package/src/tokenizer/apex.rs +97 -0
- package/src/tokenizer/blocks.rs +532 -0
- package/src/tokenizer/embedded.rs +106 -0
- package/src/tokenizer/generic.rs +511 -0
- package/src/tokenizer/hash.rs +27 -0
- package/src/tokenizer/ignore.rs +33 -0
- package/src/tokenizer/line_index.rs +33 -0
- package/src/tokenizer/markdown.rs +289 -0
- package/src/tokenizer/markup_attrs.rs +289 -0
- package/src/tokenizer/oxc/fallback.rs +275 -0
- package/src/tokenizer/oxc/jsx.rs +168 -0
- package/src/tokenizer/oxc/kind.rs +177 -0
- package/src/tokenizer/oxc/lexical.rs +67 -0
- package/src/tokenizer/oxc.rs +659 -0
- package/src/tokenizer/scan.rs +88 -0
- package/src/tokenizer/tap.rs +150 -0
- package/src/tokenizer/tests.rs +915 -0
- package/src/tokenizer.rs +328 -0
- package/src/verbose.rs +195 -0
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
use super::super::scan::{scan_block_comment, scan_line_comment};
|
|
2
|
+
use super::super::{
|
|
3
|
+
ByteSpan, DetectionToken, LineIndex, TokenContext, TokenKind, push_strict_whitespace_tokens,
|
|
4
|
+
push_token,
|
|
5
|
+
};
|
|
6
|
+
use super::lexical::{is_js_constant, is_js_keyword};
|
|
7
|
+
use super::push_line_comment_tokens;
|
|
8
|
+
use super::scan_regex_literal_end;
|
|
9
|
+
|
|
10
|
+
pub(super) fn tokenize_js_like_range(
|
|
11
|
+
tokens: &mut Vec<DetectionToken>,
|
|
12
|
+
context: &TokenContext<'_>,
|
|
13
|
+
range_start: usize,
|
|
14
|
+
range_end: usize,
|
|
15
|
+
line_index: &LineIndex,
|
|
16
|
+
) {
|
|
17
|
+
let bytes = context.content.as_bytes();
|
|
18
|
+
let mut idx = range_start;
|
|
19
|
+
|
|
20
|
+
while idx < range_end {
|
|
21
|
+
let ch = context.content[idx..].chars().next().unwrap_or('\0');
|
|
22
|
+
if ch.is_whitespace() {
|
|
23
|
+
let whitespace_end = scan_whitespace(context.content, idx, range_end);
|
|
24
|
+
push_strict_whitespace_tokens(
|
|
25
|
+
tokens,
|
|
26
|
+
context,
|
|
27
|
+
ByteSpan {
|
|
28
|
+
start: idx,
|
|
29
|
+
end: whitespace_end,
|
|
30
|
+
},
|
|
31
|
+
line_index,
|
|
32
|
+
);
|
|
33
|
+
idx = whitespace_end.max(idx + ch.len_utf8());
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
if idx + 1 < range_end && bytes[idx] == b'/' && bytes[idx + 1] == b'/' {
|
|
38
|
+
let end = scan_line_comment(bytes, idx, range_end);
|
|
39
|
+
if context.options.mode != crate::cli::Mode::Weak {
|
|
40
|
+
push_line_comment_tokens(tokens, context, ByteSpan { start: idx, end }, line_index);
|
|
41
|
+
}
|
|
42
|
+
idx = end.max(idx + 1);
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
let (end, kind) = if idx + 1 < range_end && bytes[idx] == b'/' && bytes[idx + 1] == b'*' {
|
|
47
|
+
(
|
|
48
|
+
scan_block_comment(bytes, idx, range_end),
|
|
49
|
+
TokenKind::Comment,
|
|
50
|
+
)
|
|
51
|
+
} else if bytes[idx] == b'/' {
|
|
52
|
+
if let Some(end) = scan_regex_literal_end(context.content, idx, range_end) {
|
|
53
|
+
(end, TokenKind::String)
|
|
54
|
+
} else {
|
|
55
|
+
scan_operator_or_punctuation(bytes, idx, range_end)
|
|
56
|
+
}
|
|
57
|
+
} else if bytes[idx] == b'`' {
|
|
58
|
+
(
|
|
59
|
+
scan_template_literal(bytes, idx, range_end).unwrap_or(range_end),
|
|
60
|
+
TokenKind::String,
|
|
61
|
+
)
|
|
62
|
+
} else if matches!(bytes[idx], b'\'' | b'"') {
|
|
63
|
+
if let Some(end) = scan_closed_string(bytes, idx, bytes[idx], range_end) {
|
|
64
|
+
(end, TokenKind::String)
|
|
65
|
+
} else {
|
|
66
|
+
(
|
|
67
|
+
scan_unclosed_quote_fragment(context.content, idx, range_end),
|
|
68
|
+
TokenKind::Default,
|
|
69
|
+
)
|
|
70
|
+
}
|
|
71
|
+
} else if is_identifier_start(ch) {
|
|
72
|
+
let end = scan_identifier(context.content, idx, range_end);
|
|
73
|
+
let value = &context.content[idx..end];
|
|
74
|
+
let kind = if is_js_constant(value) {
|
|
75
|
+
TokenKind::Constant
|
|
76
|
+
} else if is_js_keyword(value) {
|
|
77
|
+
TokenKind::Keyword
|
|
78
|
+
} else {
|
|
79
|
+
TokenKind::Default
|
|
80
|
+
};
|
|
81
|
+
(end, kind)
|
|
82
|
+
} else if bytes[idx].is_ascii_digit() {
|
|
83
|
+
(scan_number(bytes, idx, range_end), TokenKind::Number)
|
|
84
|
+
} else {
|
|
85
|
+
scan_operator_or_punctuation(bytes, idx, range_end)
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
push_token(
|
|
89
|
+
tokens,
|
|
90
|
+
context,
|
|
91
|
+
kind,
|
|
92
|
+
ByteSpan { start: idx, end },
|
|
93
|
+
line_index.location(idx),
|
|
94
|
+
line_index.location(end),
|
|
95
|
+
);
|
|
96
|
+
idx = end.max(idx + 1);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
fn scan_closed_string(bytes: &[u8], start: usize, quote: u8, limit: usize) -> Option<usize> {
|
|
101
|
+
let mut idx = start + 1;
|
|
102
|
+
while idx < limit {
|
|
103
|
+
if bytes[idx] == b'\\' {
|
|
104
|
+
idx = (idx + 2).min(limit);
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
107
|
+
if matches!(bytes[idx], b'\n' | b'\r') {
|
|
108
|
+
return None;
|
|
109
|
+
}
|
|
110
|
+
if bytes[idx] == quote {
|
|
111
|
+
return Some(idx + 1);
|
|
112
|
+
}
|
|
113
|
+
idx += 1;
|
|
114
|
+
}
|
|
115
|
+
None
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
fn scan_template_literal(bytes: &[u8], start: usize, limit: usize) -> Option<usize> {
|
|
119
|
+
let mut idx = start + 1;
|
|
120
|
+
while idx < limit {
|
|
121
|
+
if bytes[idx] == b'\\' {
|
|
122
|
+
idx = (idx + 2).min(limit);
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
if bytes[idx] == b'`' {
|
|
126
|
+
return Some(idx + 1);
|
|
127
|
+
}
|
|
128
|
+
idx += 1;
|
|
129
|
+
}
|
|
130
|
+
None
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
fn scan_unclosed_quote_fragment(content: &str, start: usize, limit: usize) -> usize {
|
|
134
|
+
let bytes = content.as_bytes();
|
|
135
|
+
let mut idx = start + 1;
|
|
136
|
+
while idx < limit {
|
|
137
|
+
let ch = content[idx..].chars().next().unwrap_or('\0');
|
|
138
|
+
if ch.is_whitespace() || is_js_text_delimiter(bytes[idx]) {
|
|
139
|
+
break;
|
|
140
|
+
}
|
|
141
|
+
idx += ch.len_utf8();
|
|
142
|
+
}
|
|
143
|
+
idx
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
fn scan_whitespace(content: &str, start: usize, limit: usize) -> usize {
|
|
147
|
+
let mut end = start;
|
|
148
|
+
while end < limit {
|
|
149
|
+
let ch = content[end..].chars().next().unwrap_or('\0');
|
|
150
|
+
if !ch.is_whitespace() {
|
|
151
|
+
break;
|
|
152
|
+
}
|
|
153
|
+
end += ch.len_utf8();
|
|
154
|
+
}
|
|
155
|
+
end
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
fn scan_identifier(content: &str, start: usize, limit: usize) -> usize {
|
|
159
|
+
let mut idx = start;
|
|
160
|
+
while idx < limit {
|
|
161
|
+
let ch = content[idx..].chars().next().unwrap_or('\0');
|
|
162
|
+
if !is_identifier_continue(ch) {
|
|
163
|
+
break;
|
|
164
|
+
}
|
|
165
|
+
idx += ch.len_utf8();
|
|
166
|
+
}
|
|
167
|
+
idx
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
fn scan_number(bytes: &[u8], start: usize, limit: usize) -> usize {
|
|
171
|
+
let mut idx = start;
|
|
172
|
+
while idx < limit
|
|
173
|
+
&& (bytes[idx].is_ascii_alphanumeric() || matches!(bytes[idx], b'.' | b'_' | b'+' | b'-'))
|
|
174
|
+
{
|
|
175
|
+
idx += 1;
|
|
176
|
+
}
|
|
177
|
+
idx
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
fn scan_operator_or_punctuation(bytes: &[u8], start: usize, limit: usize) -> (usize, TokenKind) {
|
|
181
|
+
const OPERATORS: &[&[u8]] = &[
|
|
182
|
+
b">>>=", b"===", b"!==", b">>>", b"<<=", b">>=", b"**=", b"=>", b"==", b"!=", b"<=", b">=",
|
|
183
|
+
b"++", b"--", b"&&", b"||", b"??", b"?.", b"...", b"+=", b"-=", b"*=", b"/=", b"%=", b"&=",
|
|
184
|
+
b"|=", b"^=", b"<<", b">>", b"**",
|
|
185
|
+
];
|
|
186
|
+
for operator in OPERATORS {
|
|
187
|
+
if bytes[start..limit].starts_with(operator) {
|
|
188
|
+
return (start + operator.len(), TokenKind::Operator);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
let kind = if matches!(
|
|
192
|
+
bytes[start],
|
|
193
|
+
b'{' | b'}' | b'[' | b']' | b'(' | b')' | b';' | b',' | b':' | b'.'
|
|
194
|
+
) {
|
|
195
|
+
TokenKind::Punctuation
|
|
196
|
+
} else {
|
|
197
|
+
TokenKind::Operator
|
|
198
|
+
};
|
|
199
|
+
(start + 1, kind)
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
fn is_js_text_delimiter(byte: u8) -> bool {
|
|
203
|
+
matches!(
|
|
204
|
+
byte,
|
|
205
|
+
b'{' | b'}'
|
|
206
|
+
| b'['
|
|
207
|
+
| b']'
|
|
208
|
+
| b'('
|
|
209
|
+
| b')'
|
|
210
|
+
| b';'
|
|
211
|
+
| b','
|
|
212
|
+
| b':'
|
|
213
|
+
| b'.'
|
|
214
|
+
| b'<'
|
|
215
|
+
| b'>'
|
|
216
|
+
| b'='
|
|
217
|
+
| b'+'
|
|
218
|
+
| b'-'
|
|
219
|
+
| b'*'
|
|
220
|
+
| b'/'
|
|
221
|
+
| b'%'
|
|
222
|
+
| b'&'
|
|
223
|
+
| b'|'
|
|
224
|
+
| b'^'
|
|
225
|
+
| b'!'
|
|
226
|
+
| b'?'
|
|
227
|
+
| b'~'
|
|
228
|
+
)
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
fn is_identifier_start(ch: char) -> bool {
|
|
232
|
+
ch == '_' || ch == '$' || ch.is_ascii_alphabetic() || (ch as u32) > 0x7f
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
fn is_identifier_continue(ch: char) -> bool {
|
|
236
|
+
is_identifier_start(ch) || ch.is_ascii_digit()
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
#[cfg(test)]
|
|
240
|
+
mod tests {
|
|
241
|
+
use crate::cli::Options;
|
|
242
|
+
use crate::tokenizer::hash_token;
|
|
243
|
+
|
|
244
|
+
use super::*;
|
|
245
|
+
|
|
246
|
+
#[test]
|
|
247
|
+
fn multiline_template_literals_are_single_string_tokens() {
|
|
248
|
+
let content = "expect(store).toMatchInlineSnapshot(`\n [root]\n`);\n";
|
|
249
|
+
let options = Options::default();
|
|
250
|
+
let line_index = LineIndex::new(content);
|
|
251
|
+
let context = TokenContext {
|
|
252
|
+
content,
|
|
253
|
+
options: &options,
|
|
254
|
+
ignore_regions: &[],
|
|
255
|
+
};
|
|
256
|
+
let mut tokens = Vec::new();
|
|
257
|
+
|
|
258
|
+
tokenize_js_like_range(&mut tokens, &context, 0, content.len(), &line_index);
|
|
259
|
+
|
|
260
|
+
let template = tokens
|
|
261
|
+
.iter()
|
|
262
|
+
.find(|token| content[token.range[0]..token.range[1]].starts_with('`'))
|
|
263
|
+
.expect("template token");
|
|
264
|
+
assert_eq!(
|
|
265
|
+
&content[template.range[0]..template.range[1]],
|
|
266
|
+
"`\n [root]\n`"
|
|
267
|
+
);
|
|
268
|
+
assert_eq!(template.start.line, 1);
|
|
269
|
+
assert_eq!(template.end.line, 3);
|
|
270
|
+
assert_eq!(
|
|
271
|
+
template.hash,
|
|
272
|
+
hash_token(TokenKind::String, "`\n [root]\n`", false)
|
|
273
|
+
);
|
|
274
|
+
}
|
|
275
|
+
}
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
use oxc_parser::Kind;
|
|
2
|
+
|
|
3
|
+
use super::super::scan::count_prism_whitespace_tokens;
|
|
4
|
+
use super::super::{ByteSpan, DetectionToken, LineIndex, TokenContext, TokenKind};
|
|
5
|
+
use super::{RawOxcToken, push_oxc_token, push_token_part};
|
|
6
|
+
|
|
7
|
+
pub(super) fn tokenize_jsx_attribute_scripts(
|
|
8
|
+
parser_tokens: &[RawOxcToken],
|
|
9
|
+
groups: &[(usize, usize)],
|
|
10
|
+
context: &TokenContext<'_>,
|
|
11
|
+
line_index: &LineIndex,
|
|
12
|
+
) -> Vec<DetectionToken> {
|
|
13
|
+
let mut tokens = Vec::new();
|
|
14
|
+
let mut next_position = 0usize;
|
|
15
|
+
let mut previous_group_end = None;
|
|
16
|
+
|
|
17
|
+
for &(group_start_idx, group_end_idx) in groups {
|
|
18
|
+
let group_start = parser_tokens[group_start_idx].span.start;
|
|
19
|
+
if let Some(previous_end) = previous_group_end {
|
|
20
|
+
next_position += count_embedded_gap_positions(
|
|
21
|
+
context.content,
|
|
22
|
+
parser_tokens,
|
|
23
|
+
previous_end,
|
|
24
|
+
group_start,
|
|
25
|
+
);
|
|
26
|
+
}
|
|
27
|
+
let mut expression_depth = 0usize;
|
|
28
|
+
let mut previous_token_end = None;
|
|
29
|
+
for raw in &parser_tokens[group_start_idx..=group_end_idx] {
|
|
30
|
+
let before = tokens.len();
|
|
31
|
+
// Prism keeps default whitespace string tokens inside nested JSX
|
|
32
|
+
// script objects, and those tokens can decide minTokens windows.
|
|
33
|
+
if expression_depth >= 2
|
|
34
|
+
&& let Some(gap_start) = previous_token_end
|
|
35
|
+
{
|
|
36
|
+
push_embedded_default_gap(
|
|
37
|
+
&mut tokens,
|
|
38
|
+
context,
|
|
39
|
+
gap_start,
|
|
40
|
+
raw.span.start,
|
|
41
|
+
line_index,
|
|
42
|
+
);
|
|
43
|
+
}
|
|
44
|
+
push_oxc_token(&mut tokens, context, raw.kind, raw.span, line_index);
|
|
45
|
+
for pushed in &mut tokens[before..] {
|
|
46
|
+
pushed.start.position = next_position;
|
|
47
|
+
pushed.end.position = next_position;
|
|
48
|
+
next_position += 1;
|
|
49
|
+
}
|
|
50
|
+
match raw.kind {
|
|
51
|
+
Kind::LCurly => expression_depth += 1,
|
|
52
|
+
Kind::RCurly => expression_depth = expression_depth.saturating_sub(1),
|
|
53
|
+
_ => {}
|
|
54
|
+
}
|
|
55
|
+
previous_token_end = Some(raw.span.end);
|
|
56
|
+
}
|
|
57
|
+
previous_group_end = Some(parser_tokens[group_end_idx].span.end);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
tokens
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
fn push_embedded_default_gap(
|
|
64
|
+
tokens: &mut Vec<DetectionToken>,
|
|
65
|
+
context: &TokenContext<'_>,
|
|
66
|
+
gap_start: usize,
|
|
67
|
+
gap_end: usize,
|
|
68
|
+
line_index: &LineIndex,
|
|
69
|
+
) {
|
|
70
|
+
if gap_start >= gap_end {
|
|
71
|
+
return;
|
|
72
|
+
}
|
|
73
|
+
if !context.content[gap_start..gap_end]
|
|
74
|
+
.chars()
|
|
75
|
+
.all(char::is_whitespace)
|
|
76
|
+
{
|
|
77
|
+
return;
|
|
78
|
+
}
|
|
79
|
+
push_token_part(
|
|
80
|
+
tokens,
|
|
81
|
+
context,
|
|
82
|
+
TokenKind::Default,
|
|
83
|
+
ByteSpan {
|
|
84
|
+
start: gap_start,
|
|
85
|
+
end: gap_end,
|
|
86
|
+
},
|
|
87
|
+
line_index,
|
|
88
|
+
);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
pub(super) fn jsx_attribute_script_groups(parser_tokens: &[RawOxcToken]) -> Vec<(usize, usize)> {
|
|
92
|
+
let mut groups = Vec::new();
|
|
93
|
+
let mut in_jsx_tag = false;
|
|
94
|
+
let mut idx = 0usize;
|
|
95
|
+
|
|
96
|
+
while idx < parser_tokens.len() {
|
|
97
|
+
let token = parser_tokens[idx];
|
|
98
|
+
if !in_jsx_tag && token.kind == Kind::LAngle && looks_like_jsx_tag_start(parser_tokens, idx)
|
|
99
|
+
{
|
|
100
|
+
in_jsx_tag = true;
|
|
101
|
+
idx += 1;
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
if in_jsx_tag && token.kind == Kind::RAngle {
|
|
105
|
+
in_jsx_tag = false;
|
|
106
|
+
idx += 1;
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
if in_jsx_tag
|
|
110
|
+
&& token.kind == Kind::Eq
|
|
111
|
+
&& parser_tokens
|
|
112
|
+
.get(idx + 1)
|
|
113
|
+
.is_some_and(|next| next.kind == Kind::LCurly)
|
|
114
|
+
&& let Some(group_end_idx) = jsx_attribute_expression_end(parser_tokens, idx + 1)
|
|
115
|
+
{
|
|
116
|
+
groups.push((idx, group_end_idx));
|
|
117
|
+
idx = group_end_idx + 1;
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
idx += 1;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
groups
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
fn looks_like_jsx_tag_start(parser_tokens: &[RawOxcToken], idx: usize) -> bool {
|
|
127
|
+
matches!(
|
|
128
|
+
parser_tokens.get(idx + 1).map(|token| token.kind),
|
|
129
|
+
Some(Kind::Ident) | Some(Kind::This) | Some(Kind::PrivateIdentifier)
|
|
130
|
+
) || matches!(
|
|
131
|
+
(
|
|
132
|
+
parser_tokens.get(idx + 1).map(|token| token.kind),
|
|
133
|
+
parser_tokens.get(idx + 2).map(|token| token.kind),
|
|
134
|
+
),
|
|
135
|
+
(Some(Kind::Slash), Some(Kind::Ident))
|
|
136
|
+
)
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
fn jsx_attribute_expression_end(parser_tokens: &[RawOxcToken], lcurly_idx: usize) -> Option<usize> {
|
|
140
|
+
let mut depth = 0usize;
|
|
141
|
+
for (idx, token) in parser_tokens.iter().enumerate().skip(lcurly_idx) {
|
|
142
|
+
match token.kind {
|
|
143
|
+
Kind::LCurly => depth += 1,
|
|
144
|
+
Kind::RCurly => {
|
|
145
|
+
depth = depth.saturating_sub(1);
|
|
146
|
+
if depth == 0 {
|
|
147
|
+
return Some(idx);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
_ => {}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
None
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
fn count_embedded_gap_positions(
|
|
157
|
+
content: &str,
|
|
158
|
+
parser_tokens: &[RawOxcToken],
|
|
159
|
+
gap_start: usize,
|
|
160
|
+
gap_end: usize,
|
|
161
|
+
) -> usize {
|
|
162
|
+
count_prism_whitespace_tokens(content, gap_start, gap_end)
|
|
163
|
+
+ parser_tokens
|
|
164
|
+
.iter()
|
|
165
|
+
.filter(|token| token.span.start >= gap_start && token.span.end <= gap_end)
|
|
166
|
+
.filter(|token| token.kind != Kind::Skip)
|
|
167
|
+
.count()
|
|
168
|
+
}
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
use oxc_parser::Kind;
|
|
2
|
+
|
|
3
|
+
use super::super::TokenKind;
|
|
4
|
+
use super::lexical::is_js_constant;
|
|
5
|
+
|
|
6
|
+
pub(super) fn oxc_token_kind(kind: Kind, value: &str) -> TokenKind {
|
|
7
|
+
if kind == Kind::Ident && is_js_constant(value) {
|
|
8
|
+
TokenKind::Constant
|
|
9
|
+
} else {
|
|
10
|
+
token_kind_for_oxc(kind)
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
fn token_kind_for_oxc(kind: Kind) -> TokenKind {
|
|
15
|
+
if kind.is_number() {
|
|
16
|
+
return TokenKind::Number;
|
|
17
|
+
}
|
|
18
|
+
if matches!(
|
|
19
|
+
kind,
|
|
20
|
+
Kind::Str
|
|
21
|
+
| Kind::NoSubstitutionTemplate
|
|
22
|
+
| Kind::TemplateHead
|
|
23
|
+
| Kind::TemplateMiddle
|
|
24
|
+
| Kind::TemplateTail
|
|
25
|
+
| Kind::RegExp
|
|
26
|
+
) {
|
|
27
|
+
return TokenKind::String;
|
|
28
|
+
}
|
|
29
|
+
if is_oxc_keyword(kind) {
|
|
30
|
+
return TokenKind::Keyword;
|
|
31
|
+
}
|
|
32
|
+
if is_oxc_punctuation(kind) {
|
|
33
|
+
return TokenKind::Punctuation;
|
|
34
|
+
}
|
|
35
|
+
if is_oxc_operator(kind) {
|
|
36
|
+
return TokenKind::Operator;
|
|
37
|
+
}
|
|
38
|
+
TokenKind::Default
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
fn is_oxc_keyword(kind: Kind) -> bool {
|
|
42
|
+
matches!(
|
|
43
|
+
kind,
|
|
44
|
+
Kind::Await
|
|
45
|
+
| Kind::Break
|
|
46
|
+
| Kind::Case
|
|
47
|
+
| Kind::Catch
|
|
48
|
+
| Kind::Class
|
|
49
|
+
| Kind::Const
|
|
50
|
+
| Kind::Continue
|
|
51
|
+
| Kind::Debugger
|
|
52
|
+
| Kind::Default
|
|
53
|
+
| Kind::Delete
|
|
54
|
+
| Kind::Do
|
|
55
|
+
| Kind::Else
|
|
56
|
+
| Kind::Enum
|
|
57
|
+
| Kind::Export
|
|
58
|
+
| Kind::Extends
|
|
59
|
+
| Kind::Finally
|
|
60
|
+
| Kind::For
|
|
61
|
+
| Kind::Function
|
|
62
|
+
| Kind::If
|
|
63
|
+
| Kind::Import
|
|
64
|
+
| Kind::In
|
|
65
|
+
| Kind::Instanceof
|
|
66
|
+
| Kind::New
|
|
67
|
+
| Kind::Return
|
|
68
|
+
| Kind::Super
|
|
69
|
+
| Kind::Switch
|
|
70
|
+
| Kind::This
|
|
71
|
+
| Kind::Throw
|
|
72
|
+
| Kind::Try
|
|
73
|
+
| Kind::Typeof
|
|
74
|
+
| Kind::Var
|
|
75
|
+
| Kind::Void
|
|
76
|
+
| Kind::While
|
|
77
|
+
| Kind::With
|
|
78
|
+
| Kind::Async
|
|
79
|
+
| Kind::From
|
|
80
|
+
| Kind::Get
|
|
81
|
+
| Kind::Of
|
|
82
|
+
| Kind::Set
|
|
83
|
+
| Kind::As
|
|
84
|
+
| Kind::Type
|
|
85
|
+
| Kind::Undefined
|
|
86
|
+
| Kind::Implements
|
|
87
|
+
| Kind::Interface
|
|
88
|
+
| Kind::Let
|
|
89
|
+
| Kind::Package
|
|
90
|
+
| Kind::Private
|
|
91
|
+
| Kind::Protected
|
|
92
|
+
| Kind::Public
|
|
93
|
+
| Kind::Static
|
|
94
|
+
| Kind::Yield
|
|
95
|
+
| Kind::True
|
|
96
|
+
| Kind::False
|
|
97
|
+
| Kind::Null
|
|
98
|
+
)
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
fn is_oxc_punctuation(kind: Kind) -> bool {
|
|
102
|
+
matches!(
|
|
103
|
+
kind,
|
|
104
|
+
Kind::Colon
|
|
105
|
+
| Kind::Comma
|
|
106
|
+
| Kind::Dot
|
|
107
|
+
| Kind::LBrack
|
|
108
|
+
| Kind::LCurly
|
|
109
|
+
| Kind::LParen
|
|
110
|
+
| Kind::RBrack
|
|
111
|
+
| Kind::RCurly
|
|
112
|
+
| Kind::RParen
|
|
113
|
+
| Kind::Semicolon
|
|
114
|
+
)
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
fn is_oxc_operator(kind: Kind) -> bool {
|
|
118
|
+
!matches!(kind, Kind::Ident | Kind::PrivateIdentifier | Kind::JSXText)
|
|
119
|
+
&& !matches!(token_kind_for_operator_check(kind), TokenKind::Default)
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
fn token_kind_for_operator_check(kind: Kind) -> TokenKind {
|
|
123
|
+
if matches!(
|
|
124
|
+
kind,
|
|
125
|
+
Kind::Amp
|
|
126
|
+
| Kind::Amp2
|
|
127
|
+
| Kind::Amp2Eq
|
|
128
|
+
| Kind::AmpEq
|
|
129
|
+
| Kind::Bang
|
|
130
|
+
| Kind::Caret
|
|
131
|
+
| Kind::CaretEq
|
|
132
|
+
| Kind::Dot3
|
|
133
|
+
| Kind::Eq
|
|
134
|
+
| Kind::Eq2
|
|
135
|
+
| Kind::Eq3
|
|
136
|
+
| Kind::GtEq
|
|
137
|
+
| Kind::LAngle
|
|
138
|
+
| Kind::LtEq
|
|
139
|
+
| Kind::Minus
|
|
140
|
+
| Kind::Minus2
|
|
141
|
+
| Kind::MinusEq
|
|
142
|
+
| Kind::Neq
|
|
143
|
+
| Kind::Neq2
|
|
144
|
+
| Kind::Percent
|
|
145
|
+
| Kind::PercentEq
|
|
146
|
+
| Kind::Pipe
|
|
147
|
+
| Kind::Pipe2
|
|
148
|
+
| Kind::Pipe2Eq
|
|
149
|
+
| Kind::PipeEq
|
|
150
|
+
| Kind::Plus
|
|
151
|
+
| Kind::Plus2
|
|
152
|
+
| Kind::PlusEq
|
|
153
|
+
| Kind::Question
|
|
154
|
+
| Kind::Question2
|
|
155
|
+
| Kind::Question2Eq
|
|
156
|
+
| Kind::QuestionDot
|
|
157
|
+
| Kind::RAngle
|
|
158
|
+
| Kind::ShiftLeft
|
|
159
|
+
| Kind::ShiftLeftEq
|
|
160
|
+
| Kind::ShiftRight
|
|
161
|
+
| Kind::ShiftRight3
|
|
162
|
+
| Kind::ShiftRight3Eq
|
|
163
|
+
| Kind::ShiftRightEq
|
|
164
|
+
| Kind::Slash
|
|
165
|
+
| Kind::SlashEq
|
|
166
|
+
| Kind::Star
|
|
167
|
+
| Kind::Star2
|
|
168
|
+
| Kind::Star2Eq
|
|
169
|
+
| Kind::StarEq
|
|
170
|
+
| Kind::Tilde
|
|
171
|
+
| Kind::Arrow
|
|
172
|
+
) {
|
|
173
|
+
TokenKind::Operator
|
|
174
|
+
} else {
|
|
175
|
+
TokenKind::Default
|
|
176
|
+
}
|
|
177
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
pub(super) fn is_js_keyword(value: &str) -> bool {
|
|
2
|
+
matches!(
|
|
3
|
+
value,
|
|
4
|
+
"as" | "async"
|
|
5
|
+
| "await"
|
|
6
|
+
| "break"
|
|
7
|
+
| "case"
|
|
8
|
+
| "catch"
|
|
9
|
+
| "class"
|
|
10
|
+
| "const"
|
|
11
|
+
| "continue"
|
|
12
|
+
| "debugger"
|
|
13
|
+
| "default"
|
|
14
|
+
| "delete"
|
|
15
|
+
| "do"
|
|
16
|
+
| "else"
|
|
17
|
+
| "enum"
|
|
18
|
+
| "export"
|
|
19
|
+
| "extends"
|
|
20
|
+
| "false"
|
|
21
|
+
| "finally"
|
|
22
|
+
| "for"
|
|
23
|
+
| "from"
|
|
24
|
+
| "function"
|
|
25
|
+
| "get"
|
|
26
|
+
| "if"
|
|
27
|
+
| "implements"
|
|
28
|
+
| "import"
|
|
29
|
+
| "in"
|
|
30
|
+
| "instanceof"
|
|
31
|
+
| "interface"
|
|
32
|
+
| "let"
|
|
33
|
+
| "new"
|
|
34
|
+
| "null"
|
|
35
|
+
| "of"
|
|
36
|
+
| "package"
|
|
37
|
+
| "private"
|
|
38
|
+
| "protected"
|
|
39
|
+
| "public"
|
|
40
|
+
| "return"
|
|
41
|
+
| "set"
|
|
42
|
+
| "static"
|
|
43
|
+
| "super"
|
|
44
|
+
| "switch"
|
|
45
|
+
| "this"
|
|
46
|
+
| "throw"
|
|
47
|
+
| "true"
|
|
48
|
+
| "try"
|
|
49
|
+
| "type"
|
|
50
|
+
| "typeof"
|
|
51
|
+
| "undefined"
|
|
52
|
+
| "var"
|
|
53
|
+
| "void"
|
|
54
|
+
| "while"
|
|
55
|
+
| "with"
|
|
56
|
+
| "yield"
|
|
57
|
+
)
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
pub(super) fn is_js_constant(value: &str) -> bool {
|
|
61
|
+
let mut chars = value.chars();
|
|
62
|
+
let Some(first) = chars.next() else {
|
|
63
|
+
return false;
|
|
64
|
+
};
|
|
65
|
+
first.is_ascii_uppercase()
|
|
66
|
+
&& chars.all(|ch| ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_')
|
|
67
|
+
}
|