jscpd-rs 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +69 -0
- package/Cargo.lock +1323 -0
- package/Cargo.toml +54 -0
- package/LICENSE +21 -0
- package/README.md +372 -0
- package/docs/api-parity.md +49 -0
- package/docs/cloning-plan.md +281 -0
- package/docs/compat-baseline.md +535 -0
- package/docs/format-porting.md +86 -0
- package/docs/junior-task-template.md +62 -0
- package/docs/junior-workflow.md +87 -0
- package/docs/migrating-from-jscpd.md +193 -0
- package/docs/npm-release.md +116 -0
- package/docs/public-benchmark-suite.md +81 -0
- package/docs/release-checklist.md +200 -0
- package/docs/release-decisions.md +103 -0
- package/docs/release-readiness.md +51 -0
- package/docs/upstream-bugs.md +501 -0
- package/docs/upstream-issue-drafts.md +393 -0
- package/docs/user-guide.md +309 -0
- package/examples/dump_oxc_tokens.rs +112 -0
- package/examples/library_api.rs +42 -0
- package/npm/bin/jscpd-rs.js +6 -0
- package/npm/bin/jscpd-server.js +6 -0
- package/npm/lib/run-binary.js +68 -0
- package/npm/scripts/postinstall.js +50 -0
- package/package.json +53 -0
- package/skills/dry-refactoring/SKILL.md +63 -0
- package/skills/jscpd/SKILL.md +85 -0
- package/src/app.rs +512 -0
- package/src/bin/jscpd-server.rs +429 -0
- package/src/blame.rs +130 -0
- package/src/cli/config.rs +543 -0
- package/src/cli/parsing.rs +301 -0
- package/src/cli/tests.rs +543 -0
- package/src/cli.rs +671 -0
- package/src/detector/matching/secondary.rs +387 -0
- package/src/detector/matching.rs +274 -0
- package/src/detector/model.rs +190 -0
- package/src/detector/prepare.rs +71 -0
- package/src/detector/skip_local.rs +40 -0
- package/src/detector/statistics.rs +138 -0
- package/src/detector/store.rs +96 -0
- package/src/detector/tests.rs +238 -0
- package/src/detector.rs +265 -0
- package/src/files/discovery.rs +508 -0
- package/src/files/gitignore.rs +203 -0
- package/src/files/paths.rs +68 -0
- package/src/files/shebang.rs +106 -0
- package/src/files/tests.rs +523 -0
- package/src/files.rs +25 -0
- package/src/formats.rs +570 -0
- package/src/lib.rs +433 -0
- package/src/main.rs +26 -0
- package/src/report/ai.rs +125 -0
- package/src/report/badge.rs +238 -0
- package/src/report/console.rs +180 -0
- package/src/report/console_common.rs +37 -0
- package/src/report/console_full.rs +139 -0
- package/src/report/csv.rs +65 -0
- package/src/report/escape.rs +8 -0
- package/src/report/file_output.rs +28 -0
- package/src/report/html/assets.rs +47 -0
- package/src/report/html.rs +336 -0
- package/src/report/json.rs +119 -0
- package/src/report/markdown.rs +125 -0
- package/src/report/sarif.rs +302 -0
- package/src/report/silent.rs +22 -0
- package/src/report/source.rs +38 -0
- package/src/report/summary.rs +50 -0
- package/src/report/test_support.rs +133 -0
- package/src/report/threshold.rs +76 -0
- package/src/report/xcode.rs +90 -0
- package/src/report/xml.rs +119 -0
- package/src/report.rs +250 -0
- package/src/server/mcp.rs +942 -0
- package/src/server.rs +1081 -0
- package/src/tokenizer/apex.rs +97 -0
- package/src/tokenizer/blocks.rs +532 -0
- package/src/tokenizer/embedded.rs +106 -0
- package/src/tokenizer/generic.rs +511 -0
- package/src/tokenizer/hash.rs +27 -0
- package/src/tokenizer/ignore.rs +33 -0
- package/src/tokenizer/line_index.rs +33 -0
- package/src/tokenizer/markdown.rs +289 -0
- package/src/tokenizer/markup_attrs.rs +289 -0
- package/src/tokenizer/oxc/fallback.rs +275 -0
- package/src/tokenizer/oxc/jsx.rs +168 -0
- package/src/tokenizer/oxc/kind.rs +177 -0
- package/src/tokenizer/oxc/lexical.rs +67 -0
- package/src/tokenizer/oxc.rs +659 -0
- package/src/tokenizer/scan.rs +88 -0
- package/src/tokenizer/tap.rs +150 -0
- package/src/tokenizer/tests.rs +915 -0
- package/src/tokenizer.rs +328 -0
- package/src/verbose.rs +195 -0
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
use std::collections::BTreeSet;
|
|
2
|
+
|
|
3
|
+
use crate::cli::Options;
|
|
4
|
+
use crate::files::SourceFile;
|
|
5
|
+
use crate::tokenizer::Location;
|
|
6
|
+
|
|
7
|
+
use super::{
|
|
8
|
+
CloneMatch, Fragment, dedup_exact_clones, detect, detect_prepared_drafts, prepare_source_drafts,
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
fn detection_options() -> Options {
|
|
12
|
+
Options {
|
|
13
|
+
min_tokens: 3,
|
|
14
|
+
min_lines: 0,
|
|
15
|
+
..Options::default()
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
#[test]
|
|
20
|
+
fn detects_cross_file_duplicates() {
|
|
21
|
+
let content = "alpha beta gamma delta epsilon\n";
|
|
22
|
+
let files = vec![
|
|
23
|
+
source("a.js", content),
|
|
24
|
+
source("b.js", &format!("prefix\n{content}\nsuffix\n")),
|
|
25
|
+
];
|
|
26
|
+
|
|
27
|
+
let result = detect(files, &detection_options());
|
|
28
|
+
|
|
29
|
+
assert!(!result.clones.is_empty());
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
#[test]
|
|
33
|
+
fn detects_generic_format_duplicates() {
|
|
34
|
+
let content = "alpha beta gamma delta epsilon\n";
|
|
35
|
+
let files = vec![
|
|
36
|
+
source_with_format("a.css", "css", content),
|
|
37
|
+
source_with_format("b.css", "css", &format!("prefix\n{content}\nsuffix\n")),
|
|
38
|
+
];
|
|
39
|
+
|
|
40
|
+
let result = detect(files, &detection_options());
|
|
41
|
+
|
|
42
|
+
assert!(!result.clones.is_empty());
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
#[test]
|
|
46
|
+
fn skip_local_skips_clones_inside_same_configured_root() {
|
|
47
|
+
let options = Options {
|
|
48
|
+
paths: vec!["project".into()],
|
|
49
|
+
skip_local: true,
|
|
50
|
+
min_tokens: 3,
|
|
51
|
+
min_lines: 0,
|
|
52
|
+
..Options::default()
|
|
53
|
+
};
|
|
54
|
+
let content = "alpha beta gamma delta epsilon\n";
|
|
55
|
+
let files = vec![
|
|
56
|
+
source("project/dir1/a.js", content),
|
|
57
|
+
source("project/dir2/b.js", content),
|
|
58
|
+
];
|
|
59
|
+
|
|
60
|
+
let result = detect(files, &options);
|
|
61
|
+
|
|
62
|
+
assert!(result.clones.is_empty());
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
#[test]
|
|
66
|
+
fn skip_local_keeps_clones_across_configured_roots() {
|
|
67
|
+
let options = Options {
|
|
68
|
+
paths: vec!["left".into(), "right".into()],
|
|
69
|
+
skip_local: true,
|
|
70
|
+
min_tokens: 3,
|
|
71
|
+
min_lines: 0,
|
|
72
|
+
..Options::default()
|
|
73
|
+
};
|
|
74
|
+
let content = "alpha beta gamma delta epsilon\n";
|
|
75
|
+
let files = vec![source("left/a.js", content), source("right/b.js", content)];
|
|
76
|
+
|
|
77
|
+
let result = detect(files, &options);
|
|
78
|
+
|
|
79
|
+
assert!(!result.clones.is_empty());
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
#[test]
|
|
83
|
+
fn skips_empty_token_sources_in_statistics() {
|
|
84
|
+
let content = "// jscpd:ignore-start\nignored\n// jscpd:ignore-end\n";
|
|
85
|
+
|
|
86
|
+
let result = detect(vec![source("ignored.js", content)], &Options::default());
|
|
87
|
+
|
|
88
|
+
assert_eq!(result.sources.len(), 0);
|
|
89
|
+
assert_eq!(result.statistics.total.sources, 0);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
#[test]
|
|
93
|
+
fn prepared_drafts_detection_matches_direct_detection() {
|
|
94
|
+
let options = Options {
|
|
95
|
+
reporters: vec!["json".to_string()],
|
|
96
|
+
..detection_options()
|
|
97
|
+
};
|
|
98
|
+
let content = "alpha beta gamma delta epsilon\n";
|
|
99
|
+
let files = vec![
|
|
100
|
+
source("a.js", content),
|
|
101
|
+
source("b.js", &format!("prefix\n{content}\nsuffix\n")),
|
|
102
|
+
];
|
|
103
|
+
|
|
104
|
+
let direct = detect(files.clone(), &options);
|
|
105
|
+
let prepared = detect_prepared_drafts(prepare_source_drafts(files, &options), &options);
|
|
106
|
+
|
|
107
|
+
assert_eq!(prepared.clones.len(), direct.clones.len());
|
|
108
|
+
assert_eq!(
|
|
109
|
+
prepared.statistics.total.sources,
|
|
110
|
+
direct.statistics.total.sources
|
|
111
|
+
);
|
|
112
|
+
assert_eq!(
|
|
113
|
+
prepared.statistics.total.clones,
|
|
114
|
+
direct.statistics.total.clones
|
|
115
|
+
);
|
|
116
|
+
assert_eq!(
|
|
117
|
+
prepared.source_contents.keys().collect::<BTreeSet<_>>(),
|
|
118
|
+
direct.source_contents.keys().collect::<BTreeSet<_>>()
|
|
119
|
+
);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
#[test]
|
|
123
|
+
fn detects_typescript_template_tail_clone_before_member_name_difference() {
|
|
124
|
+
let options = Options {
|
|
125
|
+
min_tokens: 50,
|
|
126
|
+
min_lines: 5,
|
|
127
|
+
..Options::default()
|
|
128
|
+
};
|
|
129
|
+
let content = r#"
|
|
130
|
+
function first(workUnitAsyncStorage, reportResult) {
|
|
131
|
+
console.log = function (...args: Array<any>) {
|
|
132
|
+
const store = workUnitAsyncStorage.getStore()
|
|
133
|
+
reportResult({
|
|
134
|
+
type: 'console-call',
|
|
135
|
+
method: 'log',
|
|
136
|
+
input: `${store ? '[Store]' : '[No Store]'}: ${args.join(' ')}`,
|
|
137
|
+
})
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
require('next/dist/server/node-environment-extensions/console-exit')
|
|
141
|
+
|
|
142
|
+
workUnitAsyncStorage.run({ type: 'request' } as WorkUnitStore, () => {
|
|
143
|
+
console.log('inside')
|
|
144
|
+
})
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function second(workUnitAsyncStorage, reportResult) {
|
|
148
|
+
console.error = function (...args: Array<any>) {
|
|
149
|
+
const store = workUnitAsyncStorage.getStore()
|
|
150
|
+
reportResult({
|
|
151
|
+
type: 'console-call',
|
|
152
|
+
method: 'error',
|
|
153
|
+
input: `${store ? '[Store]' : '[No Store]'}: ${args.join(' ')}`,
|
|
154
|
+
})
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
require('next/dist/server/node-environment-extensions/console-exit')
|
|
158
|
+
|
|
159
|
+
workUnitAsyncStorage.run({ type: 'request' } as WorkUnitStore, () => {
|
|
160
|
+
console.error('inside')
|
|
161
|
+
})
|
|
162
|
+
}
|
|
163
|
+
"#;
|
|
164
|
+
|
|
165
|
+
let result = detect(
|
|
166
|
+
vec![source_with_format("console.ts", "typescript", content)],
|
|
167
|
+
&options,
|
|
168
|
+
);
|
|
169
|
+
|
|
170
|
+
assert!(result.clones.iter().any(|clone| {
|
|
171
|
+
clone.duplication_a.start.line <= 24
|
|
172
|
+
&& clone.duplication_a.end.line >= 32
|
|
173
|
+
&& clone.duplication_b.start.line <= 7
|
|
174
|
+
&& clone.duplication_b.end.line >= 15
|
|
175
|
+
}));
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
#[test]
|
|
179
|
+
fn deduplicates_exact_clone_records() {
|
|
180
|
+
let mut clones = vec![
|
|
181
|
+
clone_with_lines("javascript", "a.js", 1, 5, "b.js", 1, 5),
|
|
182
|
+
clone_with_lines("javascript", "a.js", 1, 5, "b.js", 1, 5),
|
|
183
|
+
clone_with_lines("javascript", "a.js", 6, 10, "b.js", 6, 10),
|
|
184
|
+
];
|
|
185
|
+
|
|
186
|
+
dedup_exact_clones(&mut clones);
|
|
187
|
+
|
|
188
|
+
assert_eq!(clones.len(), 2);
|
|
189
|
+
assert_eq!(clones[0].duplication_a.start.line, 1);
|
|
190
|
+
assert_eq!(clones[1].duplication_a.start.line, 6);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
fn source(path: &str, content: &str) -> SourceFile {
|
|
194
|
+
source_with_format(path, "javascript", content)
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
fn source_with_format(path: &str, format: &str, content: &str) -> SourceFile {
|
|
198
|
+
SourceFile {
|
|
199
|
+
source_id: path.to_string(),
|
|
200
|
+
format: format.to_string(),
|
|
201
|
+
content: content.to_string(),
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
fn clone_with_lines(
|
|
206
|
+
format: &str,
|
|
207
|
+
source_a: &str,
|
|
208
|
+
start_a: usize,
|
|
209
|
+
end_a: usize,
|
|
210
|
+
source_b: &str,
|
|
211
|
+
start_b: usize,
|
|
212
|
+
end_b: usize,
|
|
213
|
+
) -> CloneMatch {
|
|
214
|
+
CloneMatch {
|
|
215
|
+
format: format.to_string(),
|
|
216
|
+
duplication_a: fragment(source_a, start_a, end_a),
|
|
217
|
+
duplication_b: fragment(source_b, start_b, end_b),
|
|
218
|
+
tokens: 20,
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
fn fragment(source_id: &str, start: usize, end: usize) -> Fragment {
|
|
223
|
+
Fragment {
|
|
224
|
+
source_id: source_id.to_string(),
|
|
225
|
+
start: location(start, 1, start),
|
|
226
|
+
end: location(end, 1, end),
|
|
227
|
+
range: [start, end],
|
|
228
|
+
blame: None,
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
fn location(line: usize, column: usize, position: usize) -> Location {
|
|
233
|
+
Location {
|
|
234
|
+
line,
|
|
235
|
+
column,
|
|
236
|
+
position,
|
|
237
|
+
}
|
|
238
|
+
}
|
package/src/detector.rs
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
use std::collections::HashMap;
|
|
2
|
+
|
|
3
|
+
use rayon::prelude::*;
|
|
4
|
+
use rustc_hash::FxHashSet;
|
|
5
|
+
|
|
6
|
+
use crate::cli::Options;
|
|
7
|
+
use crate::files::SourceFile;
|
|
8
|
+
|
|
9
|
+
mod matching;
|
|
10
|
+
mod model;
|
|
11
|
+
mod prepare;
|
|
12
|
+
mod skip_local;
|
|
13
|
+
mod statistics;
|
|
14
|
+
mod store;
|
|
15
|
+
#[cfg(test)]
|
|
16
|
+
mod tests;
|
|
17
|
+
|
|
18
|
+
#[cfg(test)]
|
|
19
|
+
pub use model::FormatStatistic;
|
|
20
|
+
pub(crate) use model::PreparedSourceDraft;
|
|
21
|
+
pub use model::{
|
|
22
|
+
BlamedLine, BlamedLines, CloneMatch, DetectionResult, Fragment, SkippedClone, SourceSummary,
|
|
23
|
+
StatisticRow, Statistics,
|
|
24
|
+
};
|
|
25
|
+
pub use statistics::{Statistic, clone_lines};
|
|
26
|
+
pub use store::{MemoryStore, MemoryStoreError};
|
|
27
|
+
|
|
28
|
+
use matching::detect_format;
|
|
29
|
+
use model::{FormatId, PreparedSource, SourceId, TokenStream};
|
|
30
|
+
use prepare::{assign_formats, prepare_file_maps};
|
|
31
|
+
use statistics::{finalize_percentages, update_clone_statistics, update_source_statistics};
|
|
32
|
+
|
|
33
|
+
/// Incremental detector facade for native integrations.
|
|
34
|
+
///
|
|
35
|
+
/// For one-shot detection, prefer `detect_source_files` or
|
|
36
|
+
/// `detect_clones_and_statistics`. Use this type when an integration wants to
|
|
37
|
+
/// keep options and previously submitted in-memory sources together.
|
|
38
|
+
#[derive(Clone, Debug)]
|
|
39
|
+
pub struct Detector {
|
|
40
|
+
options: Options,
|
|
41
|
+
sources: Vec<SourceFile>,
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
impl Detector {
|
|
45
|
+
/// Create an empty detector with the provided options.
|
|
46
|
+
pub fn new(options: Options) -> Self {
|
|
47
|
+
Self {
|
|
48
|
+
options,
|
|
49
|
+
sources: Vec::new(),
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/// Create a detector preloaded with in-memory sources.
|
|
54
|
+
pub fn with_sources(options: Options, sources: Vec<SourceFile>) -> Self {
|
|
55
|
+
Self { options, sources }
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/// Return detector options.
|
|
59
|
+
pub fn options(&self) -> &Options {
|
|
60
|
+
&self.options
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/// Mutably access detector options.
|
|
64
|
+
pub fn options_mut(&mut self) -> &mut Options {
|
|
65
|
+
&mut self.options
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/// Return sources currently held by this detector.
|
|
69
|
+
pub fn sources(&self) -> &[SourceFile] {
|
|
70
|
+
&self.sources
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/// Remove all remembered sources.
|
|
74
|
+
pub fn clear(&mut self) {
|
|
75
|
+
self.sources.clear();
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/// Add one source and return clones involving that new source.
|
|
79
|
+
pub fn detect(
|
|
80
|
+
&mut self,
|
|
81
|
+
source_id: impl Into<String>,
|
|
82
|
+
text: impl Into<String>,
|
|
83
|
+
format: impl Into<String>,
|
|
84
|
+
) -> Vec<CloneMatch> {
|
|
85
|
+
self.detect_source_file(SourceFile {
|
|
86
|
+
source_id: source_id.into(),
|
|
87
|
+
format: format.into(),
|
|
88
|
+
content: text.into(),
|
|
89
|
+
})
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/// Add one prepared source and return clones involving that new source.
|
|
93
|
+
pub fn detect_source_file(&mut self, source: SourceFile) -> Vec<CloneMatch> {
|
|
94
|
+
let source_id = source.source_id.clone();
|
|
95
|
+
self.sources.push(source);
|
|
96
|
+
let result = detect(self.sources.clone(), &self.options);
|
|
97
|
+
result
|
|
98
|
+
.clones
|
|
99
|
+
.into_iter()
|
|
100
|
+
.filter(|clone| {
|
|
101
|
+
clone.duplication_a.source_id == source_id
|
|
102
|
+
|| clone.duplication_b.source_id == source_id
|
|
103
|
+
})
|
|
104
|
+
.collect()
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/// Run one-shot detection against the provided prepared sources.
|
|
108
|
+
pub fn detect_files(&self, files: Vec<SourceFile>) -> DetectionResult {
|
|
109
|
+
detect(files, &self.options)
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
pub fn detect(files: Vec<SourceFile>, options: &Options) -> DetectionResult {
|
|
114
|
+
detect_prepared_drafts(prepare_source_drafts(files, options), options)
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
pub(crate) fn prepare_source_drafts(
|
|
118
|
+
files: Vec<SourceFile>,
|
|
119
|
+
options: &Options,
|
|
120
|
+
) -> Vec<PreparedSourceDraft> {
|
|
121
|
+
files
|
|
122
|
+
.into_par_iter()
|
|
123
|
+
.map(|file| prepare_file_maps(file, options))
|
|
124
|
+
.collect::<Vec<_>>()
|
|
125
|
+
.into_iter()
|
|
126
|
+
.flatten()
|
|
127
|
+
.collect::<Vec<_>>()
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
pub(crate) fn detect_prepared_drafts(
|
|
131
|
+
prepared_drafts: Vec<PreparedSourceDraft>,
|
|
132
|
+
options: &Options,
|
|
133
|
+
) -> DetectionResult {
|
|
134
|
+
let (format_ids, format_names) = assign_formats(&prepared_drafts);
|
|
135
|
+
let prepared_files = prepared_drafts
|
|
136
|
+
.into_iter()
|
|
137
|
+
.enumerate()
|
|
138
|
+
.map(|(idx, draft)| PreparedSource {
|
|
139
|
+
meta: draft.meta,
|
|
140
|
+
stream: TokenStream {
|
|
141
|
+
source_id: SourceId(idx),
|
|
142
|
+
format_id: format_ids[idx],
|
|
143
|
+
hashes: draft.hashes,
|
|
144
|
+
spans: draft.spans,
|
|
145
|
+
},
|
|
146
|
+
})
|
|
147
|
+
.collect::<Vec<_>>();
|
|
148
|
+
|
|
149
|
+
let mut statistics = Statistics::default();
|
|
150
|
+
let mut sources = Vec::new();
|
|
151
|
+
let mut source_contents = HashMap::new();
|
|
152
|
+
let mut source_indices_by_format = vec![Vec::new(); format_names.len()];
|
|
153
|
+
let include_source_contents = options
|
|
154
|
+
.reporters
|
|
155
|
+
.iter()
|
|
156
|
+
.any(|reporter| matches!(reporter.as_str(), "json" | "xml" | "html" | "consoleFull"));
|
|
157
|
+
|
|
158
|
+
for (idx, prepared) in prepared_files.iter().enumerate() {
|
|
159
|
+
if prepared.stream.spans.is_empty() {
|
|
160
|
+
continue;
|
|
161
|
+
}
|
|
162
|
+
update_source_statistics(
|
|
163
|
+
&mut statistics,
|
|
164
|
+
&prepared.meta.source_id,
|
|
165
|
+
&prepared.meta.format,
|
|
166
|
+
prepared.meta.lines,
|
|
167
|
+
prepared.meta.tokens,
|
|
168
|
+
);
|
|
169
|
+
sources.push(SourceSummary {
|
|
170
|
+
path: prepared.meta.source_id.clone(),
|
|
171
|
+
format: prepared.meta.format.clone(),
|
|
172
|
+
lines: prepared.meta.lines,
|
|
173
|
+
tokens: prepared.meta.tokens,
|
|
174
|
+
});
|
|
175
|
+
if include_source_contents {
|
|
176
|
+
source_contents.insert(
|
|
177
|
+
prepared.meta.source_id.clone(),
|
|
178
|
+
prepared.meta.content.clone(),
|
|
179
|
+
);
|
|
180
|
+
}
|
|
181
|
+
source_indices_by_format[prepared.stream.format_id.0].push(idx);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
let format_results = source_indices_by_format
|
|
185
|
+
.par_iter()
|
|
186
|
+
.enumerate()
|
|
187
|
+
.map(|(format_id, source_indices)| {
|
|
188
|
+
detect_format(
|
|
189
|
+
FormatId(format_id),
|
|
190
|
+
source_indices,
|
|
191
|
+
&prepared_files,
|
|
192
|
+
&format_names,
|
|
193
|
+
options,
|
|
194
|
+
)
|
|
195
|
+
})
|
|
196
|
+
.collect::<Vec<_>>();
|
|
197
|
+
|
|
198
|
+
let mut clones = Vec::new();
|
|
199
|
+
let mut skipped_clones = Vec::new();
|
|
200
|
+
for format_result in format_results {
|
|
201
|
+
clones.extend(format_result.clones);
|
|
202
|
+
skipped_clones.extend(format_result.skipped_clones);
|
|
203
|
+
}
|
|
204
|
+
dedup_exact_clones(&mut clones);
|
|
205
|
+
for clone in &clones {
|
|
206
|
+
update_clone_statistics(&mut statistics, clone);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
finalize_percentages(&mut statistics);
|
|
210
|
+
|
|
211
|
+
DetectionResult {
|
|
212
|
+
clones,
|
|
213
|
+
skipped_clones,
|
|
214
|
+
statistics,
|
|
215
|
+
sources,
|
|
216
|
+
source_contents,
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
fn dedup_exact_clones(clones: &mut Vec<CloneMatch>) {
|
|
221
|
+
let mut seen = FxHashSet::default();
|
|
222
|
+
clones.retain(|clone| seen.insert(CloneDedupKey::from(clone)));
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
#[derive(Hash, Eq, PartialEq)]
|
|
226
|
+
struct CloneDedupKey {
|
|
227
|
+
format: String,
|
|
228
|
+
duplication_a: FragmentDedupKey,
|
|
229
|
+
duplication_b: FragmentDedupKey,
|
|
230
|
+
tokens: usize,
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
impl From<&CloneMatch> for CloneDedupKey {
|
|
234
|
+
fn from(clone: &CloneMatch) -> Self {
|
|
235
|
+
Self {
|
|
236
|
+
format: clone.format.clone(),
|
|
237
|
+
duplication_a: FragmentDedupKey::from(&clone.duplication_a),
|
|
238
|
+
duplication_b: FragmentDedupKey::from(&clone.duplication_b),
|
|
239
|
+
tokens: clone.tokens,
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
#[derive(Hash, Eq, PartialEq)]
|
|
245
|
+
struct FragmentDedupKey {
|
|
246
|
+
source_id: String,
|
|
247
|
+
start_line: usize,
|
|
248
|
+
start_column: usize,
|
|
249
|
+
end_line: usize,
|
|
250
|
+
end_column: usize,
|
|
251
|
+
range: [usize; 2],
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
impl From<&Fragment> for FragmentDedupKey {
|
|
255
|
+
fn from(fragment: &Fragment) -> Self {
|
|
256
|
+
Self {
|
|
257
|
+
source_id: fragment.source_id.clone(),
|
|
258
|
+
start_line: fragment.start.line,
|
|
259
|
+
start_column: fragment.start.column,
|
|
260
|
+
end_line: fragment.end.line,
|
|
261
|
+
end_column: fragment.end.column,
|
|
262
|
+
range: fragment.range,
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
}
|