jscpd-rs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/CHANGELOG.md +69 -0
  2. package/Cargo.lock +1323 -0
  3. package/Cargo.toml +54 -0
  4. package/LICENSE +21 -0
  5. package/README.md +372 -0
  6. package/docs/api-parity.md +49 -0
  7. package/docs/cloning-plan.md +281 -0
  8. package/docs/compat-baseline.md +535 -0
  9. package/docs/format-porting.md +86 -0
  10. package/docs/junior-task-template.md +62 -0
  11. package/docs/junior-workflow.md +87 -0
  12. package/docs/migrating-from-jscpd.md +193 -0
  13. package/docs/npm-release.md +116 -0
  14. package/docs/public-benchmark-suite.md +81 -0
  15. package/docs/release-checklist.md +200 -0
  16. package/docs/release-decisions.md +103 -0
  17. package/docs/release-readiness.md +51 -0
  18. package/docs/upstream-bugs.md +501 -0
  19. package/docs/upstream-issue-drafts.md +393 -0
  20. package/docs/user-guide.md +309 -0
  21. package/examples/dump_oxc_tokens.rs +112 -0
  22. package/examples/library_api.rs +42 -0
  23. package/npm/bin/jscpd-rs.js +6 -0
  24. package/npm/bin/jscpd-server.js +6 -0
  25. package/npm/lib/run-binary.js +68 -0
  26. package/npm/scripts/postinstall.js +50 -0
  27. package/package.json +53 -0
  28. package/skills/dry-refactoring/SKILL.md +63 -0
  29. package/skills/jscpd/SKILL.md +85 -0
  30. package/src/app.rs +512 -0
  31. package/src/bin/jscpd-server.rs +429 -0
  32. package/src/blame.rs +130 -0
  33. package/src/cli/config.rs +543 -0
  34. package/src/cli/parsing.rs +301 -0
  35. package/src/cli/tests.rs +543 -0
  36. package/src/cli.rs +671 -0
  37. package/src/detector/matching/secondary.rs +387 -0
  38. package/src/detector/matching.rs +274 -0
  39. package/src/detector/model.rs +190 -0
  40. package/src/detector/prepare.rs +71 -0
  41. package/src/detector/skip_local.rs +40 -0
  42. package/src/detector/statistics.rs +138 -0
  43. package/src/detector/store.rs +96 -0
  44. package/src/detector/tests.rs +238 -0
  45. package/src/detector.rs +265 -0
  46. package/src/files/discovery.rs +508 -0
  47. package/src/files/gitignore.rs +203 -0
  48. package/src/files/paths.rs +68 -0
  49. package/src/files/shebang.rs +106 -0
  50. package/src/files/tests.rs +523 -0
  51. package/src/files.rs +25 -0
  52. package/src/formats.rs +570 -0
  53. package/src/lib.rs +433 -0
  54. package/src/main.rs +26 -0
  55. package/src/report/ai.rs +125 -0
  56. package/src/report/badge.rs +238 -0
  57. package/src/report/console.rs +180 -0
  58. package/src/report/console_common.rs +37 -0
  59. package/src/report/console_full.rs +139 -0
  60. package/src/report/csv.rs +65 -0
  61. package/src/report/escape.rs +8 -0
  62. package/src/report/file_output.rs +28 -0
  63. package/src/report/html/assets.rs +47 -0
  64. package/src/report/html.rs +336 -0
  65. package/src/report/json.rs +119 -0
  66. package/src/report/markdown.rs +125 -0
  67. package/src/report/sarif.rs +302 -0
  68. package/src/report/silent.rs +22 -0
  69. package/src/report/source.rs +38 -0
  70. package/src/report/summary.rs +50 -0
  71. package/src/report/test_support.rs +133 -0
  72. package/src/report/threshold.rs +76 -0
  73. package/src/report/xcode.rs +90 -0
  74. package/src/report/xml.rs +119 -0
  75. package/src/report.rs +250 -0
  76. package/src/server/mcp.rs +942 -0
  77. package/src/server.rs +1081 -0
  78. package/src/tokenizer/apex.rs +97 -0
  79. package/src/tokenizer/blocks.rs +532 -0
  80. package/src/tokenizer/embedded.rs +106 -0
  81. package/src/tokenizer/generic.rs +511 -0
  82. package/src/tokenizer/hash.rs +27 -0
  83. package/src/tokenizer/ignore.rs +33 -0
  84. package/src/tokenizer/line_index.rs +33 -0
  85. package/src/tokenizer/markdown.rs +289 -0
  86. package/src/tokenizer/markup_attrs.rs +289 -0
  87. package/src/tokenizer/oxc/fallback.rs +275 -0
  88. package/src/tokenizer/oxc/jsx.rs +168 -0
  89. package/src/tokenizer/oxc/kind.rs +177 -0
  90. package/src/tokenizer/oxc/lexical.rs +67 -0
  91. package/src/tokenizer/oxc.rs +659 -0
  92. package/src/tokenizer/scan.rs +88 -0
  93. package/src/tokenizer/tap.rs +150 -0
  94. package/src/tokenizer/tests.rs +915 -0
  95. package/src/tokenizer.rs +328 -0
  96. package/src/verbose.rs +195 -0
@@ -0,0 +1,387 @@
1
+ use rustc_hash::FxHashMap;
2
+
3
+ use crate::cli::Options;
4
+
5
+ use super::super::model::{
6
+ CloneMatch, Fragment, Occurrence, PreparedSource, SkippedClone, SourceId,
7
+ };
8
+ use super::{create_clone, enlarge_clone, flush_clone, windows_match};
9
+
10
+ const SECONDARY_OCCURRENCE_CAP: usize = 2;
11
+
12
+ pub(super) fn remember_repeated_window(
13
+ repeated_windows: &mut FxHashMap<u64, Vec<Occurrence>>,
14
+ hash: u64,
15
+ occurrence: Occurrence,
16
+ ) {
17
+ let bucket = repeated_windows.entry(hash).or_default();
18
+ if bucket.iter().any(|stored| {
19
+ stored.source_id == occurrence.source_id && stored.token_start == occurrence.token_start
20
+ }) {
21
+ return;
22
+ }
23
+ if bucket.len() < SECONDARY_OCCURRENCE_CAP {
24
+ bucket.push(occurrence);
25
+ }
26
+ }
27
+
28
+ #[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
29
+ struct CandidateWindow {
30
+ source_a: usize,
31
+ source_b: usize,
32
+ token_start_a: usize,
33
+ token_start_b: usize,
34
+ }
35
+
36
+ struct SecondaryOpen {
37
+ clone: CloneMatch,
38
+ source_a: usize,
39
+ source_b: usize,
40
+ last_token_start_a: usize,
41
+ last_token_start_b: usize,
42
+ }
43
+
44
+ pub(super) fn add_secondary_clones(
45
+ format: &str,
46
+ repeated_windows: FxHashMap<u64, Vec<Occurrence>>,
47
+ prepared_files: &[PreparedSource],
48
+ options: &Options,
49
+ clones: &mut Vec<CloneMatch>,
50
+ skipped_clones: &mut Vec<SkippedClone>,
51
+ ) {
52
+ if repeated_windows.is_empty() {
53
+ return;
54
+ }
55
+
56
+ let mut candidates = Vec::new();
57
+ for occurrences in repeated_windows.values() {
58
+ if occurrences.len() < 2 {
59
+ continue;
60
+ }
61
+ for left_idx in 0..occurrences.len() {
62
+ for right_idx in left_idx + 1..occurrences.len() {
63
+ let left = occurrences[left_idx];
64
+ let right = occurrences[right_idx];
65
+ if left.source_id == right.source_id && left.token_start == right.token_start {
66
+ continue;
67
+ }
68
+ if !windows_match(
69
+ &prepared_files[left.source_id.0].stream,
70
+ left.token_start,
71
+ &prepared_files[right.source_id.0].stream,
72
+ right.token_start,
73
+ options.min_tokens,
74
+ ) {
75
+ continue;
76
+ }
77
+ let (source_a, token_start_a, source_b, token_start_b) =
78
+ canonical_candidate_pair(left, right);
79
+ candidates.push(CandidateWindow {
80
+ source_a,
81
+ source_b,
82
+ token_start_a,
83
+ token_start_b,
84
+ });
85
+ }
86
+ }
87
+ }
88
+ if candidates.is_empty() {
89
+ return;
90
+ }
91
+ candidates.sort_unstable();
92
+ candidates.dedup();
93
+
94
+ let mut coverage = LineCoverage::from_clones(prepared_files, clones);
95
+ let mut open: Option<SecondaryOpen> = None;
96
+
97
+ for candidate in candidates {
98
+ if let Some(current) = open.as_mut()
99
+ && current.source_a == candidate.source_a
100
+ && current.source_b == candidate.source_b
101
+ && current.last_token_start_a + 1 == candidate.token_start_a
102
+ && current.last_token_start_b + 1 == candidate.token_start_b
103
+ {
104
+ enlarge_clone(
105
+ &mut current.clone,
106
+ Occurrence {
107
+ source_id: SourceId(candidate.source_a),
108
+ token_start: candidate.token_start_a,
109
+ },
110
+ Occurrence {
111
+ source_id: SourceId(candidate.source_b),
112
+ token_start: candidate.token_start_b,
113
+ },
114
+ prepared_files,
115
+ options,
116
+ );
117
+ current.last_token_start_a = candidate.token_start_a;
118
+ current.last_token_start_b = candidate.token_start_b;
119
+ continue;
120
+ }
121
+
122
+ flush_secondary_clone(open.take(), clones, skipped_clones, options, &mut coverage);
123
+ let occurrence_a = Occurrence {
124
+ source_id: SourceId(candidate.source_a),
125
+ token_start: candidate.token_start_a,
126
+ };
127
+ let occurrence_b = Occurrence {
128
+ source_id: SourceId(candidate.source_b),
129
+ token_start: candidate.token_start_b,
130
+ };
131
+ open = Some(SecondaryOpen {
132
+ clone: create_clone(format, occurrence_a, occurrence_b, prepared_files, options),
133
+ source_a: candidate.source_a,
134
+ source_b: candidate.source_b,
135
+ last_token_start_a: candidate.token_start_a,
136
+ last_token_start_b: candidate.token_start_b,
137
+ });
138
+ }
139
+
140
+ flush_secondary_clone(open.take(), clones, skipped_clones, options, &mut coverage);
141
+ }
142
+
143
+ fn canonical_candidate_pair(left: Occurrence, right: Occurrence) -> (usize, usize, usize, usize) {
144
+ let left_key = (left.source_id.0, left.token_start);
145
+ let right_key = (right.source_id.0, right.token_start);
146
+ if left_key <= right_key {
147
+ (
148
+ left.source_id.0,
149
+ left.token_start,
150
+ right.source_id.0,
151
+ right.token_start,
152
+ )
153
+ } else {
154
+ (
155
+ right.source_id.0,
156
+ right.token_start,
157
+ left.source_id.0,
158
+ left.token_start,
159
+ )
160
+ }
161
+ }
162
+
163
+ fn flush_secondary_clone(
164
+ open: Option<SecondaryOpen>,
165
+ clones: &mut Vec<CloneMatch>,
166
+ skipped_clones: &mut Vec<SkippedClone>,
167
+ options: &Options,
168
+ coverage: &mut LineCoverage,
169
+ ) {
170
+ let Some(open) = open else {
171
+ return;
172
+ };
173
+ let range_a = fragment_line_range(&open.clone.duplication_a);
174
+ let range_b = fragment_line_range(&open.clone.duplication_b);
175
+ if !coverage.extends(open.source_a, range_a) && !coverage.extends(open.source_b, range_b) {
176
+ return;
177
+ }
178
+
179
+ let before = clones.len();
180
+ flush_clone(Some(open.clone), clones, skipped_clones, options);
181
+ if clones.len() > before {
182
+ coverage.insert(open.source_a, range_a);
183
+ coverage.insert(open.source_b, range_b);
184
+ }
185
+ }
186
+
187
+ struct LineCoverage {
188
+ ranges_by_source: Vec<Vec<(usize, usize)>>,
189
+ }
190
+
191
+ impl LineCoverage {
192
+ fn from_clones(prepared_files: &[PreparedSource], clones: &[CloneMatch]) -> Self {
193
+ let mut source_lookup = FxHashMap::default();
194
+ for (idx, source) in prepared_files.iter().enumerate() {
195
+ source_lookup.insert(source.meta.source_id.as_str(), idx);
196
+ }
197
+ let mut coverage = Self {
198
+ ranges_by_source: vec![Vec::new(); prepared_files.len()],
199
+ };
200
+ for clone in clones {
201
+ if let Some(source_idx) = source_lookup.get(clone.duplication_a.source_id.as_str()) {
202
+ coverage.insert(*source_idx, fragment_line_range(&clone.duplication_a));
203
+ }
204
+ if let Some(source_idx) = source_lookup.get(clone.duplication_b.source_id.as_str()) {
205
+ coverage.insert(*source_idx, fragment_line_range(&clone.duplication_b));
206
+ }
207
+ }
208
+ coverage
209
+ }
210
+
211
+ fn extends(&self, source_idx: usize, range: (usize, usize)) -> bool {
212
+ let Some(ranges) = self.ranges_by_source.get(source_idx) else {
213
+ return true;
214
+ };
215
+ let mut next_line = range.0;
216
+ for &(start, end) in ranges {
217
+ if end < next_line {
218
+ continue;
219
+ }
220
+ if start > next_line {
221
+ return true;
222
+ }
223
+ next_line = next_line.max(end.saturating_add(1));
224
+ if next_line > range.1 {
225
+ return false;
226
+ }
227
+ }
228
+ next_line <= range.1
229
+ }
230
+
231
+ fn insert(&mut self, source_idx: usize, range: (usize, usize)) {
232
+ let Some(ranges) = self.ranges_by_source.get_mut(source_idx) else {
233
+ return;
234
+ };
235
+ ranges.push(range);
236
+ ranges.sort_unstable();
237
+
238
+ let mut merged: Vec<(usize, usize)> = Vec::with_capacity(ranges.len());
239
+ for &(start, end) in ranges.iter() {
240
+ if let Some((_, previous_end)) = merged.last_mut()
241
+ && start <= previous_end.saturating_add(1)
242
+ {
243
+ *previous_end = (*previous_end).max(end);
244
+ continue;
245
+ }
246
+ merged.push((start, end));
247
+ }
248
+ *ranges = merged;
249
+ }
250
+ }
251
+
252
+ fn fragment_line_range(fragment: &Fragment) -> (usize, usize) {
253
+ let start = fragment.start.line.min(fragment.end.line);
254
+ let end = fragment.start.line.max(fragment.end.line);
255
+ (start, end)
256
+ }
257
+
258
+ #[cfg(test)]
259
+ mod tests {
260
+ use rustc_hash::FxHashMap;
261
+
262
+ use crate::tokenizer::Location;
263
+
264
+ use super::super::super::model::{FormatId, SourceMeta, TokenSpan, TokenStream};
265
+ use super::*;
266
+
267
+ #[test]
268
+ fn secondary_clones_only_extend_uncovered_lines() {
269
+ let options = Options {
270
+ min_tokens: 3,
271
+ min_lines: 0,
272
+ ..Options::default()
273
+ };
274
+ let prepared_files = vec![
275
+ prepared_source(0, "a.js", &[1, 2, 3, 4, 5, 6]),
276
+ prepared_source(1, "b.js", &[8, 9, 3, 4, 5, 6]),
277
+ ];
278
+ let mut clones = vec![clone_with_lines("a.js", 1, 2, "b.js", 1, 2)];
279
+ let mut skipped_clones = Vec::new();
280
+
281
+ add_test_secondary_clones(&prepared_files, &options, &mut clones, &mut skipped_clones);
282
+
283
+ assert_eq!(clones.len(), 2);
284
+ assert_eq!(clones[1].duplication_a.source_id, "a.js");
285
+ assert_eq!(clones[1].duplication_a.start.line, 3);
286
+ assert_eq!(clones[1].duplication_a.end.line, 6);
287
+ assert_eq!(clones[1].duplication_b.source_id, "b.js");
288
+
289
+ add_test_secondary_clones(&prepared_files, &options, &mut clones, &mut skipped_clones);
290
+
291
+ assert_eq!(clones.len(), 2);
292
+ }
293
+
294
+ fn add_test_secondary_clones(
295
+ prepared_files: &[PreparedSource],
296
+ options: &Options,
297
+ clones: &mut Vec<CloneMatch>,
298
+ skipped_clones: &mut Vec<SkippedClone>,
299
+ ) {
300
+ add_secondary_clones(
301
+ "javascript",
302
+ repeated_windows([
303
+ Occurrence {
304
+ source_id: SourceId(0),
305
+ token_start: 2,
306
+ },
307
+ Occurrence {
308
+ source_id: SourceId(1),
309
+ token_start: 2,
310
+ },
311
+ ]),
312
+ prepared_files,
313
+ options,
314
+ clones,
315
+ skipped_clones,
316
+ );
317
+ }
318
+
319
+ fn repeated_windows<const N: usize>(
320
+ occurrences: [Occurrence; N],
321
+ ) -> FxHashMap<u64, Vec<Occurrence>> {
322
+ let mut repeated_windows = FxHashMap::default();
323
+ repeated_windows.insert(42, occurrences.to_vec());
324
+ repeated_windows
325
+ }
326
+
327
+ fn prepared_source(source_idx: usize, source_id: &str, hashes: &[u64]) -> PreparedSource {
328
+ PreparedSource {
329
+ meta: SourceMeta {
330
+ source_id: source_id.to_string(),
331
+ format: "javascript".to_string(),
332
+ content: String::new(),
333
+ lines: hashes.len(),
334
+ tokens: hashes.len(),
335
+ },
336
+ stream: TokenStream {
337
+ source_id: SourceId(source_idx),
338
+ format_id: FormatId(0),
339
+ hashes: hashes.to_vec(),
340
+ spans: (0..hashes.len()).map(token_span).collect(),
341
+ },
342
+ }
343
+ }
344
+
345
+ fn token_span(idx: usize) -> TokenSpan {
346
+ let line = idx + 1;
347
+ TokenSpan {
348
+ start: location(line, 1, idx),
349
+ end: location(line, 2, idx),
350
+ range: [idx, idx + 1],
351
+ }
352
+ }
353
+
354
+ fn clone_with_lines(
355
+ source_a: &str,
356
+ start_a: usize,
357
+ end_a: usize,
358
+ source_b: &str,
359
+ start_b: usize,
360
+ end_b: usize,
361
+ ) -> CloneMatch {
362
+ CloneMatch {
363
+ format: "javascript".to_string(),
364
+ duplication_a: fragment(source_a, start_a, end_a),
365
+ duplication_b: fragment(source_b, start_b, end_b),
366
+ tokens: 3,
367
+ }
368
+ }
369
+
370
+ fn fragment(source_id: &str, start: usize, end: usize) -> Fragment {
371
+ Fragment {
372
+ source_id: source_id.to_string(),
373
+ start: location(start, 1, start),
374
+ end: location(end, 2, end),
375
+ range: [start, end],
376
+ blame: None,
377
+ }
378
+ }
379
+
380
+ fn location(line: usize, column: usize, position: usize) -> Location {
381
+ Location {
382
+ line,
383
+ column,
384
+ position,
385
+ }
386
+ }
387
+ }
@@ -0,0 +1,274 @@
1
+ use rustc_hash::FxHashMap;
2
+
3
+ use crate::cli::Options;
4
+
5
+ use super::model::{
6
+ CloneMatch, FormatId, Fragment, Occurrence, PreparedSource, SkippedClone, TokenStream,
7
+ };
8
+ use super::skip_local::same_configured_root;
9
+ use super::statistics::clone_stat_lines;
10
+
11
+ mod secondary;
12
+
13
+ use secondary::{add_secondary_clones, remember_repeated_window};
14
+
15
+ const WINDOW_HASH_BASE: u64 = 0x9e37_79b9_7f4a_7c15;
16
+
17
+ pub(super) struct FormatDetection {
18
+ pub(super) clones: Vec<CloneMatch>,
19
+ pub(super) skipped_clones: Vec<SkippedClone>,
20
+ }
21
+
22
+ pub(super) fn detect_format(
23
+ format_id: FormatId,
24
+ source_indices: &[usize],
25
+ prepared_files: &[PreparedSource],
26
+ format_names: &[String],
27
+ options: &Options,
28
+ ) -> FormatDetection {
29
+ let mut store: FxHashMap<u64, Occurrence> = FxHashMap::default();
30
+ let mut repeated_windows: FxHashMap<u64, Vec<Occurrence>> = FxHashMap::default();
31
+ store.reserve(total_windows(
32
+ source_indices,
33
+ prepared_files,
34
+ options.min_tokens,
35
+ ));
36
+ let mut clones = Vec::new();
37
+ let mut skipped_clones = Vec::new();
38
+
39
+ for &source_idx in source_indices.iter().rev() {
40
+ let stream = &prepared_files[source_idx].stream;
41
+ debug_assert_eq!(stream.source_id.0, source_idx);
42
+ debug_assert_eq!(stream.format_id, format_id);
43
+
44
+ if stream.hashes.len() <= options.min_tokens {
45
+ continue;
46
+ }
47
+
48
+ let mut open_clone: Option<CloneMatch> = None;
49
+ let mut hash = initial_window_hash(&stream.hashes, options.min_tokens);
50
+ let window_power = WINDOW_HASH_BASE.wrapping_pow((options.min_tokens - 1) as u32);
51
+ let windows_len = stream.hashes.len() - options.min_tokens;
52
+
53
+ for token_start in 0..windows_len {
54
+ let current = Occurrence {
55
+ source_id: stream.source_id,
56
+ token_start,
57
+ };
58
+ match store.get(&hash).copied() {
59
+ Some(stored)
60
+ if windows_match(
61
+ stream,
62
+ token_start,
63
+ &prepared_files[stored.source_id.0].stream,
64
+ stored.token_start,
65
+ options.min_tokens,
66
+ ) =>
67
+ {
68
+ if open_clone.is_none() {
69
+ open_clone = Some(create_clone(
70
+ &format_names[format_id.0],
71
+ current,
72
+ stored,
73
+ prepared_files,
74
+ options,
75
+ ));
76
+ } else if let Some(clone) = open_clone.as_mut() {
77
+ enlarge_clone(clone, current, stored, prepared_files, options);
78
+ }
79
+ remember_repeated_window(&mut repeated_windows, hash, stored);
80
+ remember_repeated_window(&mut repeated_windows, hash, current);
81
+ }
82
+ _ => {
83
+ flush_clone(open_clone.take(), &mut clones, &mut skipped_clones, options);
84
+ store.insert(hash, current);
85
+ }
86
+ }
87
+
88
+ if token_start + options.min_tokens < stream.hashes.len() {
89
+ hash = next_window_hash(
90
+ hash,
91
+ stream.hashes[token_start],
92
+ stream.hashes[token_start + options.min_tokens],
93
+ window_power,
94
+ );
95
+ }
96
+ }
97
+ flush_clone(open_clone.take(), &mut clones, &mut skipped_clones, options);
98
+ }
99
+
100
+ add_secondary_clones(
101
+ &format_names[format_id.0],
102
+ repeated_windows,
103
+ prepared_files,
104
+ options,
105
+ &mut clones,
106
+ &mut skipped_clones,
107
+ );
108
+
109
+ FormatDetection {
110
+ clones,
111
+ skipped_clones,
112
+ }
113
+ }
114
+
115
+ fn total_windows(
116
+ source_indices: &[usize],
117
+ prepared_files: &[PreparedSource],
118
+ min_tokens: usize,
119
+ ) -> usize {
120
+ source_indices
121
+ .iter()
122
+ .map(|&source_idx| {
123
+ prepared_files[source_idx]
124
+ .stream
125
+ .hashes
126
+ .len()
127
+ .saturating_sub(min_tokens)
128
+ })
129
+ .sum()
130
+ }
131
+
132
+ fn initial_window_hash(hashes: &[u64], min_tokens: usize) -> u64 {
133
+ hashes[..min_tokens].iter().fold(0u64, |hash, token_hash| {
134
+ hash.wrapping_mul(WINDOW_HASH_BASE)
135
+ .wrapping_add(*token_hash)
136
+ })
137
+ }
138
+
139
+ fn next_window_hash(hash: u64, outgoing: u64, incoming: u64, window_power: u64) -> u64 {
140
+ hash.wrapping_sub(outgoing.wrapping_mul(window_power))
141
+ .wrapping_mul(WINDOW_HASH_BASE)
142
+ .wrapping_add(incoming)
143
+ }
144
+
145
+ fn windows_match(
146
+ stream_a: &TokenStream,
147
+ start_a: usize,
148
+ stream_b: &TokenStream,
149
+ start_b: usize,
150
+ min_tokens: usize,
151
+ ) -> bool {
152
+ stream_a.hashes[start_a..start_a + min_tokens] == stream_b.hashes[start_b..start_b + min_tokens]
153
+ }
154
+
155
+ fn create_clone(
156
+ format: &str,
157
+ occurrence_a: Occurrence,
158
+ occurrence_b: Occurrence,
159
+ prepared_files: &[PreparedSource],
160
+ options: &Options,
161
+ ) -> CloneMatch {
162
+ CloneMatch {
163
+ format: format.to_string(),
164
+ duplication_a: fragment_from_occurrence(occurrence_a, prepared_files, options.min_tokens),
165
+ duplication_b: fragment_from_occurrence(occurrence_b, prepared_files, options.min_tokens),
166
+ tokens: options.min_tokens,
167
+ }
168
+ }
169
+
170
+ fn enlarge_clone(
171
+ clone: &mut CloneMatch,
172
+ occurrence_a: Occurrence,
173
+ occurrence_b: Occurrence,
174
+ prepared_files: &[PreparedSource],
175
+ options: &Options,
176
+ ) {
177
+ enlarge_fragment_end(
178
+ &mut clone.duplication_a,
179
+ occurrence_a,
180
+ prepared_files,
181
+ options.min_tokens,
182
+ );
183
+ enlarge_fragment_end(
184
+ &mut clone.duplication_b,
185
+ occurrence_b,
186
+ prepared_files,
187
+ options.min_tokens,
188
+ );
189
+ clone.tokens += 1;
190
+ }
191
+
192
+ fn fragment_from_occurrence(
193
+ occurrence: Occurrence,
194
+ prepared_files: &[PreparedSource],
195
+ min_tokens: usize,
196
+ ) -> Fragment {
197
+ let source = &prepared_files[occurrence.source_id.0];
198
+ let start_span = &source.stream.spans[occurrence.token_start];
199
+ let end_span = &source.stream.spans[occurrence.token_start + min_tokens];
200
+ Fragment {
201
+ source_id: source.meta.source_id.clone(),
202
+ start: start_span.start.clone(),
203
+ end: end_span.end.clone(),
204
+ range: [start_span.range[0], end_span.range[1]],
205
+ blame: None,
206
+ }
207
+ }
208
+
209
+ fn enlarge_fragment_end(
210
+ fragment: &mut Fragment,
211
+ occurrence: Occurrence,
212
+ prepared_files: &[PreparedSource],
213
+ min_tokens: usize,
214
+ ) {
215
+ let source = &prepared_files[occurrence.source_id.0];
216
+ let end_span = &source.stream.spans[occurrence.token_start + min_tokens];
217
+ fragment.end = end_span.end.clone();
218
+ fragment.range[1] = end_span.range[1];
219
+ }
220
+
221
+ fn flush_clone(
222
+ clone: Option<CloneMatch>,
223
+ clones: &mut Vec<CloneMatch>,
224
+ skipped_clones: &mut Vec<SkippedClone>,
225
+ options: &Options,
226
+ ) {
227
+ let Some(clone) = clone else {
228
+ return;
229
+ };
230
+ if options.skip_local
231
+ && same_configured_root(
232
+ &clone.duplication_a.source_id,
233
+ &clone.duplication_b.source_id,
234
+ options,
235
+ )
236
+ {
237
+ push_skipped_clone(skipped_clones, options, clone, |clone| {
238
+ format!(
239
+ "Sources of duplication located in same local folder ({}, {})",
240
+ clone.duplication_a.source_id, clone.duplication_b.source_id
241
+ )
242
+ });
243
+ return;
244
+ }
245
+ let lines = clone_stat_lines(&clone);
246
+ if lines < options.min_lines {
247
+ push_skipped_clone(skipped_clones, options, clone, |_| {
248
+ format!(
249
+ "Lines of code less than limit ({lines} < {})",
250
+ options.min_lines
251
+ )
252
+ });
253
+ return;
254
+ }
255
+
256
+ clones.push(clone);
257
+ }
258
+
259
+ fn push_skipped_clone<F>(
260
+ skipped_clones: &mut Vec<SkippedClone>,
261
+ options: &Options,
262
+ clone: CloneMatch,
263
+ message: F,
264
+ ) where
265
+ F: FnOnce(&CloneMatch) -> String,
266
+ {
267
+ if options.verbose {
268
+ let message = message(&clone);
269
+ skipped_clones.push(SkippedClone {
270
+ clone,
271
+ message: vec![message],
272
+ });
273
+ }
274
+ }