jscpd-rs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/CHANGELOG.md +69 -0
  2. package/Cargo.lock +1323 -0
  3. package/Cargo.toml +54 -0
  4. package/LICENSE +21 -0
  5. package/README.md +372 -0
  6. package/docs/api-parity.md +49 -0
  7. package/docs/cloning-plan.md +281 -0
  8. package/docs/compat-baseline.md +535 -0
  9. package/docs/format-porting.md +86 -0
  10. package/docs/junior-task-template.md +62 -0
  11. package/docs/junior-workflow.md +87 -0
  12. package/docs/migrating-from-jscpd.md +193 -0
  13. package/docs/npm-release.md +116 -0
  14. package/docs/public-benchmark-suite.md +81 -0
  15. package/docs/release-checklist.md +200 -0
  16. package/docs/release-decisions.md +103 -0
  17. package/docs/release-readiness.md +51 -0
  18. package/docs/upstream-bugs.md +501 -0
  19. package/docs/upstream-issue-drafts.md +393 -0
  20. package/docs/user-guide.md +309 -0
  21. package/examples/dump_oxc_tokens.rs +112 -0
  22. package/examples/library_api.rs +42 -0
  23. package/npm/bin/jscpd-rs.js +6 -0
  24. package/npm/bin/jscpd-server.js +6 -0
  25. package/npm/lib/run-binary.js +68 -0
  26. package/npm/scripts/postinstall.js +50 -0
  27. package/package.json +53 -0
  28. package/skills/dry-refactoring/SKILL.md +63 -0
  29. package/skills/jscpd/SKILL.md +85 -0
  30. package/src/app.rs +512 -0
  31. package/src/bin/jscpd-server.rs +429 -0
  32. package/src/blame.rs +130 -0
  33. package/src/cli/config.rs +543 -0
  34. package/src/cli/parsing.rs +301 -0
  35. package/src/cli/tests.rs +543 -0
  36. package/src/cli.rs +671 -0
  37. package/src/detector/matching/secondary.rs +387 -0
  38. package/src/detector/matching.rs +274 -0
  39. package/src/detector/model.rs +190 -0
  40. package/src/detector/prepare.rs +71 -0
  41. package/src/detector/skip_local.rs +40 -0
  42. package/src/detector/statistics.rs +138 -0
  43. package/src/detector/store.rs +96 -0
  44. package/src/detector/tests.rs +238 -0
  45. package/src/detector.rs +265 -0
  46. package/src/files/discovery.rs +508 -0
  47. package/src/files/gitignore.rs +203 -0
  48. package/src/files/paths.rs +68 -0
  49. package/src/files/shebang.rs +106 -0
  50. package/src/files/tests.rs +523 -0
  51. package/src/files.rs +25 -0
  52. package/src/formats.rs +570 -0
  53. package/src/lib.rs +433 -0
  54. package/src/main.rs +26 -0
  55. package/src/report/ai.rs +125 -0
  56. package/src/report/badge.rs +238 -0
  57. package/src/report/console.rs +180 -0
  58. package/src/report/console_common.rs +37 -0
  59. package/src/report/console_full.rs +139 -0
  60. package/src/report/csv.rs +65 -0
  61. package/src/report/escape.rs +8 -0
  62. package/src/report/file_output.rs +28 -0
  63. package/src/report/html/assets.rs +47 -0
  64. package/src/report/html.rs +336 -0
  65. package/src/report/json.rs +119 -0
  66. package/src/report/markdown.rs +125 -0
  67. package/src/report/sarif.rs +302 -0
  68. package/src/report/silent.rs +22 -0
  69. package/src/report/source.rs +38 -0
  70. package/src/report/summary.rs +50 -0
  71. package/src/report/test_support.rs +133 -0
  72. package/src/report/threshold.rs +76 -0
  73. package/src/report/xcode.rs +90 -0
  74. package/src/report/xml.rs +119 -0
  75. package/src/report.rs +250 -0
  76. package/src/server/mcp.rs +942 -0
  77. package/src/server.rs +1081 -0
  78. package/src/tokenizer/apex.rs +97 -0
  79. package/src/tokenizer/blocks.rs +532 -0
  80. package/src/tokenizer/embedded.rs +106 -0
  81. package/src/tokenizer/generic.rs +511 -0
  82. package/src/tokenizer/hash.rs +27 -0
  83. package/src/tokenizer/ignore.rs +33 -0
  84. package/src/tokenizer/line_index.rs +33 -0
  85. package/src/tokenizer/markdown.rs +289 -0
  86. package/src/tokenizer/markup_attrs.rs +289 -0
  87. package/src/tokenizer/oxc/fallback.rs +275 -0
  88. package/src/tokenizer/oxc/jsx.rs +168 -0
  89. package/src/tokenizer/oxc/kind.rs +177 -0
  90. package/src/tokenizer/oxc/lexical.rs +67 -0
  91. package/src/tokenizer/oxc.rs +659 -0
  92. package/src/tokenizer/scan.rs +88 -0
  93. package/src/tokenizer/tap.rs +150 -0
  94. package/src/tokenizer/tests.rs +915 -0
  95. package/src/tokenizer.rs +328 -0
  96. package/src/verbose.rs +195 -0
@@ -0,0 +1,659 @@
1
+ use std::path::Path;
2
+
3
+ use oxc_allocator::Allocator;
4
+ use oxc_parser::{Kind, Parser, Token as OxcToken, config::TokensParserConfig};
5
+ use oxc_span::SourceType;
6
+
7
+ use crate::cli::{Mode, Options};
8
+
9
+ use super::scan::{has_code_in_gap, scan_block_comment, scan_line_comment};
10
+ use super::{
11
+ ByteSpan, DetectionToken, LineIndex, TokenContext, TokenKind, TokenMap, hash_token,
12
+ push_strict_whitespace_tokens, push_token,
13
+ };
14
+
15
+ mod fallback;
16
+ mod jsx;
17
+ mod kind;
18
+ mod lexical;
19
+
20
+ use fallback::tokenize_js_like_range;
21
+ use jsx::{jsx_attribute_script_groups, tokenize_jsx_attribute_scripts};
22
+ use kind::oxc_token_kind;
23
+
24
+ #[derive(Clone, Copy)]
25
+ struct RawOxcToken {
26
+ kind: Kind,
27
+ span: ByteSpan,
28
+ }
29
+
30
+ pub(super) fn is_oxc_format(format: &str) -> bool {
31
+ matches!(format, "javascript" | "typescript" | "jsx" | "tsx" | "json")
32
+ }
33
+
34
+ pub(super) fn tokenize_oxc_maps(
35
+ content: &str,
36
+ format: &str,
37
+ options: &Options,
38
+ ignore_regions: &[[usize; 2]],
39
+ ) -> Vec<TokenMap> {
40
+ let context = TokenContext {
41
+ content,
42
+ options,
43
+ ignore_regions,
44
+ };
45
+ let allocator = Allocator::new();
46
+ let source_type = source_type_for_format(format);
47
+ let parser_return = Parser::new(&allocator, content, source_type)
48
+ .with_config(TokensParserConfig)
49
+ .parse();
50
+ let line_index = LineIndex::new(content);
51
+ let mut tokens = Vec::with_capacity(content.len().saturating_div(6));
52
+ let mut previous_end = 0usize;
53
+ let parser_tokens = parser_return.tokens;
54
+ let raw_jsx_tokens = if matches!(format, "jsx" | "tsx") {
55
+ Some(
56
+ parser_tokens
57
+ .iter()
58
+ .map(|token| raw_oxc_token(token, content.len()))
59
+ .collect::<Vec<_>>(),
60
+ )
61
+ } else {
62
+ None
63
+ };
64
+ let jsx_script_groups = if let Some(parser_tokens) = raw_jsx_tokens.as_deref() {
65
+ jsx_attribute_script_groups(parser_tokens)
66
+ } else {
67
+ Vec::new()
68
+ };
69
+ let mut idx = 0usize;
70
+ let mut template_expression_depth = 0usize;
71
+
72
+ while idx < parser_tokens.len() {
73
+ let token = raw_oxc_token(&parser_tokens[idx], content.len());
74
+ let start_byte = token.span.start;
75
+ let mut end_byte = token.span.end;
76
+ if start_byte > previous_end {
77
+ push_comments_in_gap(
78
+ &mut tokens,
79
+ &context,
80
+ previous_end,
81
+ start_byte,
82
+ &line_index,
83
+ template_expression_depth > 0,
84
+ );
85
+ }
86
+ if token.kind == Kind::RAngle {
87
+ while idx + 1 < parser_tokens.len() {
88
+ let next = raw_oxc_token(&parser_tokens[idx + 1], content.len());
89
+ if next.kind != Kind::RAngle || next.span.start != end_byte {
90
+ break;
91
+ }
92
+ idx += 1;
93
+ end_byte = next.span.end;
94
+ }
95
+ }
96
+ let span = ByteSpan {
97
+ start: start_byte,
98
+ end: end_byte,
99
+ };
100
+ if token.kind == Kind::Slash
101
+ && context.slice(span) == "/"
102
+ && let Some(regex_end) = scan_regex_literal_end(content, start_byte, content.len())
103
+ {
104
+ push_token_part(
105
+ &mut tokens,
106
+ &context,
107
+ TokenKind::String,
108
+ ByteSpan {
109
+ start: start_byte,
110
+ end: regex_end,
111
+ },
112
+ &line_index,
113
+ );
114
+ previous_end = previous_end.max(regex_end);
115
+ idx += 1;
116
+ while idx < parser_tokens.len() {
117
+ let skipped = raw_oxc_token(&parser_tokens[idx], content.len());
118
+ if skipped.span.start >= regex_end {
119
+ break;
120
+ }
121
+ previous_end = previous_end.max(skipped.span.end);
122
+ idx += 1;
123
+ }
124
+ continue;
125
+ }
126
+ push_oxc_token(&mut tokens, &context, token.kind, span, &line_index);
127
+ match token.kind {
128
+ Kind::TemplateHead => template_expression_depth += 1,
129
+ Kind::TemplateTail => {
130
+ template_expression_depth = template_expression_depth.saturating_sub(1);
131
+ }
132
+ _ => {}
133
+ }
134
+ previous_end = previous_end.max(end_byte);
135
+ idx += 1;
136
+ }
137
+
138
+ if previous_end < content.len() {
139
+ if has_code_in_gap(content, previous_end, content.len()) {
140
+ tokenize_js_like_range(
141
+ &mut tokens,
142
+ &context,
143
+ previous_end,
144
+ content.len(),
145
+ &line_index,
146
+ );
147
+ } else {
148
+ push_comments_in_gap(
149
+ &mut tokens,
150
+ &context,
151
+ previous_end,
152
+ content.len(),
153
+ &line_index,
154
+ false,
155
+ );
156
+ }
157
+ }
158
+
159
+ let mut maps = vec![TokenMap {
160
+ format: format.to_string(),
161
+ tokens,
162
+ positions_assigned: false,
163
+ }];
164
+ if matches!(format, "jsx" | "tsx") {
165
+ let parser_tokens = raw_jsx_tokens.as_deref().unwrap_or_default();
166
+ let embedded = tokenize_jsx_attribute_scripts(
167
+ parser_tokens,
168
+ &jsx_script_groups,
169
+ &context,
170
+ &line_index,
171
+ );
172
+ if !embedded.is_empty() {
173
+ maps.push(TokenMap {
174
+ format: "javascript".to_string(),
175
+ tokens: embedded,
176
+ positions_assigned: true,
177
+ });
178
+ }
179
+ }
180
+ maps
181
+ }
182
+
183
+ fn raw_oxc_token(token: &OxcToken, content_len: usize) -> RawOxcToken {
184
+ RawOxcToken {
185
+ kind: token.kind(),
186
+ span: ByteSpan {
187
+ start: (token.start() as usize).min(content_len),
188
+ end: (token.end() as usize).min(content_len),
189
+ },
190
+ }
191
+ }
192
+
193
+ fn source_type_for_format(format: &str) -> SourceType {
194
+ let filename = match format {
195
+ "javascript" => "input.jsx",
196
+ "typescript" => "input.ts",
197
+ "tsx" => "input.tsx",
198
+ "jsx" => "input.jsx",
199
+ _ => "input.js",
200
+ };
201
+ SourceType::from_path(Path::new(filename)).unwrap_or_else(|_| SourceType::default())
202
+ }
203
+
204
+ fn push_oxc_token(
205
+ tokens: &mut Vec<DetectionToken>,
206
+ context: &TokenContext<'_>,
207
+ kind: Kind,
208
+ span: ByteSpan,
209
+ line_index: &LineIndex,
210
+ ) {
211
+ if span.start >= span.end {
212
+ return;
213
+ }
214
+ let value = context.slice(span);
215
+ if value.starts_with("//") {
216
+ if context.options.mode != Mode::Weak {
217
+ push_line_comment_tokens(tokens, context, span, line_index);
218
+ }
219
+ return;
220
+ }
221
+ if value.starts_with("#!") {
222
+ push_hashbang_tokens(tokens, context, span, line_index);
223
+ return;
224
+ }
225
+ if value.starts_with("/*") || value.starts_with("<!--") {
226
+ if context.options.mode != Mode::Weak {
227
+ push_comment_token(tokens, context, span, line_index);
228
+ }
229
+ return;
230
+ }
231
+ if kind == Kind::Skip {
232
+ return;
233
+ }
234
+ if kind == Kind::JSXText {
235
+ tokenize_js_like_range(tokens, context, span.start, span.end, line_index);
236
+ return;
237
+ }
238
+ if kind == Kind::Ident && value.contains('-') {
239
+ tokenize_js_like_range(tokens, context, span.start, span.end, line_index);
240
+ return;
241
+ }
242
+ if kind == Kind::RegExp && !regex_literal_allowed_at(context.content, span.start) {
243
+ tokenize_js_like_range(tokens, context, span.start, span.end, line_index);
244
+ return;
245
+ }
246
+ if matches!(
247
+ kind,
248
+ Kind::TemplateHead | Kind::TemplateMiddle | Kind::TemplateTail
249
+ ) {
250
+ push_template_token_parts(tokens, context, kind, span, line_index);
251
+ return;
252
+ }
253
+ if kind == Kind::QuestionDot && context.slice(span) == "?." {
254
+ push_token_part(
255
+ tokens,
256
+ context,
257
+ TokenKind::Operator,
258
+ ByteSpan {
259
+ start: span.start,
260
+ end: span.start + 1,
261
+ },
262
+ line_index,
263
+ );
264
+ push_token_part(
265
+ tokens,
266
+ context,
267
+ TokenKind::Punctuation,
268
+ ByteSpan {
269
+ start: span.start + 1,
270
+ end: span.end,
271
+ },
272
+ line_index,
273
+ );
274
+ return;
275
+ }
276
+ if context.overlaps_ignore_region(span) {
277
+ return;
278
+ }
279
+ tokens.push(DetectionToken {
280
+ hash: hash_token(
281
+ oxc_token_kind(kind, context.slice(span)),
282
+ context.slice(span),
283
+ context.options.ignore_case,
284
+ ),
285
+ start: line_index.location(span.start),
286
+ end: line_index.location(span.end),
287
+ range: [span.start, span.end],
288
+ });
289
+ }
290
+
291
+ pub(super) fn scan_regex_literal_end(
292
+ content: &str,
293
+ slash_start: usize,
294
+ limit: usize,
295
+ ) -> Option<usize> {
296
+ if !regex_literal_allowed_at(content, slash_start) {
297
+ return None;
298
+ }
299
+ let bytes = content.as_bytes();
300
+ if bytes.get(slash_start) != Some(&b'/')
301
+ || matches!(bytes.get(slash_start + 1), Some(b'/' | b'*'))
302
+ {
303
+ return None;
304
+ }
305
+
306
+ let mut idx = slash_start + 1;
307
+ let mut escaped = false;
308
+ let mut in_class = false;
309
+ let mut saw_body = false;
310
+ while idx < bytes.len().min(limit) {
311
+ let byte = bytes[idx];
312
+ if byte == b'\n' || byte == b'\r' {
313
+ return None;
314
+ }
315
+ if escaped {
316
+ escaped = false;
317
+ saw_body = true;
318
+ idx += 1;
319
+ continue;
320
+ }
321
+ match byte {
322
+ b'\\' => {
323
+ escaped = true;
324
+ saw_body = true;
325
+ }
326
+ b'[' => {
327
+ in_class = true;
328
+ saw_body = true;
329
+ }
330
+ b']' => {
331
+ in_class = false;
332
+ saw_body = true;
333
+ }
334
+ b'/' if !in_class => {
335
+ if !saw_body {
336
+ return None;
337
+ }
338
+ idx += 1;
339
+ while idx < bytes.len().min(limit) && bytes[idx].is_ascii_alphabetic() {
340
+ idx += 1;
341
+ }
342
+ return Some(idx);
343
+ }
344
+ _ => {
345
+ saw_body = true;
346
+ }
347
+ }
348
+ idx += 1;
349
+ }
350
+ None
351
+ }
352
+
353
+ fn regex_literal_allowed_at(content: &str, slash_start: usize) -> bool {
354
+ let Some((idx, previous)) = content[..slash_start]
355
+ .char_indices()
356
+ .rev()
357
+ .find(|(_, ch)| !ch.is_whitespace())
358
+ else {
359
+ return true;
360
+ };
361
+ if previous == '!' && content[..idx].chars().rev().find(|ch| !ch.is_whitespace()) == Some('#') {
362
+ return false;
363
+ }
364
+
365
+ if matches!(
366
+ previous,
367
+ '(' | '{'
368
+ | '='
369
+ | ':'
370
+ | ','
371
+ | ';'
372
+ | '!'
373
+ | '?'
374
+ | '&'
375
+ | '|'
376
+ | '+'
377
+ | '-'
378
+ | '*'
379
+ | '~'
380
+ | '^'
381
+ | '<'
382
+ | '>'
383
+ ) {
384
+ return true;
385
+ }
386
+
387
+ let word_end = idx + previous.len_utf8();
388
+ let mut word_start = idx;
389
+ while word_start > 0 {
390
+ let Some((prev_idx, ch)) = content[..word_start].char_indices().next_back() else {
391
+ break;
392
+ };
393
+ if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' {
394
+ word_start = prev_idx;
395
+ } else {
396
+ break;
397
+ }
398
+ }
399
+ matches!(
400
+ &content[word_start..word_end],
401
+ "return" | "throw" | "case" | "delete" | "typeof" | "void" | "new" | "yield" | "await"
402
+ )
403
+ }
404
+
405
+ fn push_template_token_parts(
406
+ tokens: &mut Vec<DetectionToken>,
407
+ context: &TokenContext<'_>,
408
+ kind: Kind,
409
+ span: ByteSpan,
410
+ line_index: &LineIndex,
411
+ ) {
412
+ match kind {
413
+ Kind::TemplateHead => {
414
+ let interpolation_start = span.end.saturating_sub(2);
415
+ push_token_part(
416
+ tokens,
417
+ context,
418
+ TokenKind::String,
419
+ ByteSpan {
420
+ start: span.start,
421
+ end: interpolation_start,
422
+ },
423
+ line_index,
424
+ );
425
+ push_token_part(
426
+ tokens,
427
+ context,
428
+ TokenKind::Punctuation,
429
+ ByteSpan {
430
+ start: interpolation_start,
431
+ end: span.end,
432
+ },
433
+ line_index,
434
+ );
435
+ }
436
+ Kind::TemplateMiddle => {
437
+ push_token_part(
438
+ tokens,
439
+ context,
440
+ TokenKind::Punctuation,
441
+ ByteSpan {
442
+ start: span.start,
443
+ end: span.start.saturating_add(1),
444
+ },
445
+ line_index,
446
+ );
447
+ let interpolation_start = span.end.saturating_sub(2);
448
+ push_token_part(
449
+ tokens,
450
+ context,
451
+ TokenKind::String,
452
+ ByteSpan {
453
+ start: span.start.saturating_add(1),
454
+ end: interpolation_start,
455
+ },
456
+ line_index,
457
+ );
458
+ push_token_part(
459
+ tokens,
460
+ context,
461
+ TokenKind::Punctuation,
462
+ ByteSpan {
463
+ start: interpolation_start,
464
+ end: span.end,
465
+ },
466
+ line_index,
467
+ );
468
+ }
469
+ Kind::TemplateTail => {
470
+ push_token_part(
471
+ tokens,
472
+ context,
473
+ TokenKind::Punctuation,
474
+ ByteSpan {
475
+ start: span.start,
476
+ end: span.start.saturating_add(1),
477
+ },
478
+ line_index,
479
+ );
480
+ push_token_part(
481
+ tokens,
482
+ context,
483
+ TokenKind::String,
484
+ ByteSpan {
485
+ start: span.start.saturating_add(1),
486
+ end: span.end,
487
+ },
488
+ line_index,
489
+ );
490
+ }
491
+ _ => {}
492
+ }
493
+ }
494
+
495
+ fn push_token_part(
496
+ tokens: &mut Vec<DetectionToken>,
497
+ context: &TokenContext<'_>,
498
+ kind: TokenKind,
499
+ span: ByteSpan,
500
+ line_index: &LineIndex,
501
+ ) {
502
+ if span.start >= span.end || context.overlaps_ignore_region(span) {
503
+ return;
504
+ }
505
+ push_token(
506
+ tokens,
507
+ context,
508
+ kind,
509
+ span,
510
+ line_index.location(span.start),
511
+ line_index.location(span.end),
512
+ );
513
+ }
514
+
515
+ fn push_comments_in_gap(
516
+ tokens: &mut Vec<DetectionToken>,
517
+ context: &TokenContext<'_>,
518
+ gap_start: usize,
519
+ gap_end: usize,
520
+ line_index: &LineIndex,
521
+ preserve_whitespace_as_default: bool,
522
+ ) {
523
+ if gap_start >= gap_end {
524
+ return;
525
+ }
526
+
527
+ let bytes = context.content.as_bytes();
528
+ let mut idx = gap_start;
529
+ while idx < gap_end {
530
+ let ch = context.content[idx..].chars().next().unwrap_or('\0');
531
+ if ch.is_whitespace() {
532
+ let whitespace_end = scan_whitespace(context.content, idx, gap_end);
533
+ let span = ByteSpan {
534
+ start: idx,
535
+ end: whitespace_end,
536
+ };
537
+ if preserve_whitespace_as_default {
538
+ push_token_part(tokens, context, TokenKind::Default, span, line_index);
539
+ } else {
540
+ push_strict_whitespace_tokens(tokens, context, span, line_index);
541
+ }
542
+ idx = whitespace_end.max(idx + ch.len_utf8());
543
+ continue;
544
+ }
545
+ if idx + 1 >= gap_end {
546
+ break;
547
+ }
548
+ let is_hashbang = idx == 0 && bytes[idx] == b'#' && bytes[idx + 1] == b'!';
549
+ let is_line_comment = (bytes[idx] == b'/' && bytes[idx + 1] == b'/')
550
+ || bytes[idx..gap_end].starts_with(b"<!--");
551
+ let comment_end = if is_line_comment || is_hashbang {
552
+ Some(scan_line_comment(bytes, idx, gap_end))
553
+ } else if bytes[idx] == b'/' && bytes[idx + 1] == b'*' {
554
+ Some(scan_block_comment(bytes, idx, gap_end))
555
+ } else {
556
+ None
557
+ };
558
+
559
+ if let Some(comment_end) = comment_end {
560
+ if is_hashbang {
561
+ let span = ByteSpan {
562
+ start: idx,
563
+ end: comment_end,
564
+ };
565
+ push_hashbang_tokens(tokens, context, span, line_index);
566
+ } else if context.options.mode != Mode::Weak {
567
+ let span = ByteSpan {
568
+ start: idx,
569
+ end: comment_end,
570
+ };
571
+ if bytes[idx] == b'/' && bytes[idx + 1] == b'/' {
572
+ push_line_comment_tokens(tokens, context, span, line_index);
573
+ } else {
574
+ push_comment_token(tokens, context, span, line_index);
575
+ }
576
+ }
577
+ idx = comment_end.max(idx + 1);
578
+ } else {
579
+ idx += ch.len_utf8();
580
+ }
581
+ }
582
+ }
583
+
584
+ fn push_hashbang_tokens(
585
+ tokens: &mut Vec<DetectionToken>,
586
+ context: &TokenContext<'_>,
587
+ span: ByteSpan,
588
+ line_index: &LineIndex,
589
+ ) {
590
+ let hash_span = ByteSpan {
591
+ start: span.start,
592
+ end: span.start + 1,
593
+ };
594
+ push_token_part(tokens, context, TokenKind::Default, hash_span, line_index);
595
+ tokenize_js_like_range(tokens, context, span.start + 1, span.end, line_index);
596
+ }
597
+
598
+ pub(super) fn push_line_comment_tokens(
599
+ tokens: &mut Vec<DetectionToken>,
600
+ context: &TokenContext<'_>,
601
+ span: ByteSpan,
602
+ line_index: &LineIndex,
603
+ ) {
604
+ let mut part_start = None;
605
+ for (offset, ch) in context.slice(span).char_indices() {
606
+ let idx = span.start + offset;
607
+ if ch.is_whitespace() {
608
+ if let Some(start) = part_start.take() {
609
+ push_comment_token(tokens, context, ByteSpan { start, end: idx }, line_index);
610
+ }
611
+ } else if part_start.is_none() {
612
+ part_start = Some(idx);
613
+ }
614
+ }
615
+ if let Some(start) = part_start {
616
+ push_comment_token(
617
+ tokens,
618
+ context,
619
+ ByteSpan {
620
+ start,
621
+ end: span.end,
622
+ },
623
+ line_index,
624
+ );
625
+ }
626
+ }
627
+
628
+ fn scan_whitespace(content: &str, start: usize, limit: usize) -> usize {
629
+ let mut end = start;
630
+ while end < limit {
631
+ let ch = content[end..].chars().next().unwrap_or('\0');
632
+ if !ch.is_whitespace() {
633
+ break;
634
+ }
635
+ end += ch.len_utf8();
636
+ }
637
+ end
638
+ }
639
+
640
+ fn push_comment_token(
641
+ tokens: &mut Vec<DetectionToken>,
642
+ context: &TokenContext<'_>,
643
+ span: ByteSpan,
644
+ line_index: &LineIndex,
645
+ ) {
646
+ if span.start >= span.end || context.overlaps_ignore_region(span) {
647
+ return;
648
+ }
649
+ tokens.push(DetectionToken {
650
+ hash: hash_token(
651
+ TokenKind::Comment,
652
+ context.slice(span),
653
+ context.options.ignore_case,
654
+ ),
655
+ start: line_index.location(span.start),
656
+ end: line_index.location(span.end),
657
+ range: [span.start, span.end],
658
+ });
659
+ }