@lamentis/naome 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +2 -2
- package/README.md +108 -47
- package/bin/naome.js +16 -1
- package/crates/naome-cli/Cargo.toml +1 -1
- package/crates/naome-cli/src/dispatcher.rs +6 -2
- package/crates/naome-cli/src/main.rs +35 -23
- package/crates/naome-cli/src/quality_commands.rs +230 -11
- package/crates/naome-cli/src/workflow_commands.rs +21 -1
- package/crates/naome-core/Cargo.toml +1 -1
- package/crates/naome-core/src/git.rs +4 -2
- package/crates/naome-core/src/install_plan.rs +2 -0
- package/crates/naome-core/src/lib.rs +11 -7
- package/crates/naome-core/src/quality/baseline.rs +8 -0
- package/crates/naome-core/src/quality/cache.rs +153 -0
- package/crates/naome-core/src/quality/checks/duplicate_blocks.rs +25 -11
- package/crates/naome-core/src/quality/checks/near_duplicates.rs +4 -2
- package/crates/naome-core/src/quality/checks.rs +7 -8
- package/crates/naome-core/src/quality/cleanup.rs +36 -3
- package/crates/naome-core/src/quality/mod.rs +57 -9
- package/crates/naome-core/src/quality/scanner/analysis/normalize.rs +78 -0
- package/crates/naome-core/src/quality/scanner/analysis.rs +160 -0
- package/crates/naome-core/src/quality/scanner/repo_paths.rs +39 -3
- package/crates/naome-core/src/quality/scanner.rs +193 -220
- package/crates/naome-core/src/quality/semantic/checks.rs +134 -0
- package/crates/naome-core/src/quality/semantic/extract.rs +158 -0
- package/crates/naome-core/src/quality/semantic/model.rs +85 -0
- package/crates/naome-core/src/quality/semantic/route.rs +52 -0
- package/crates/naome-core/src/quality/semantic.rs +68 -0
- package/crates/naome-core/src/quality/structure/checks/directory.rs +9 -19
- package/crates/naome-core/src/quality/structure/checks.rs +1 -1
- package/crates/naome-core/src/quality/structure/classify.rs +52 -0
- package/crates/naome-core/src/quality/structure/mod.rs +2 -2
- package/crates/naome-core/src/quality/structure/model.rs +8 -1
- package/crates/naome-core/src/quality/types.rs +40 -2
- package/crates/naome-core/src/route/builtin_checks.rs +1 -15
- package/crates/naome-core/src/workflow/doctor.rs +144 -0
- package/crates/naome-core/src/workflow/mod.rs +2 -0
- package/crates/naome-core/src/workflow/mutation.rs +1 -2
- package/crates/naome-core/tests/install_plan.rs +2 -0
- package/crates/naome-core/tests/quality.rs +14 -5
- package/crates/naome-core/tests/quality_performance.rs +231 -0
- package/crates/naome-core/tests/quality_structure_policy.rs +19 -0
- package/crates/naome-core/tests/route_user_diff.rs +10 -6
- package/crates/naome-core/tests/semantic_legacy.rs +140 -0
- package/crates/naome-core/tests/workflow_doctor.rs +24 -0
- package/crates/naome-core/tests/workflow_policy.rs +6 -1
- package/installer/git-boundary.js +1 -0
- package/native/darwin-arm64/naome +0 -0
- package/native/linux-x64/naome +0 -0
- package/package.json +1 -1
- package/templates/naome-root/.naome/bin/check-harness-health.js +2 -2
- package/templates/naome-root/.naome/bin/check-task-state.js +2 -2
- package/templates/naome-root/.naome/bin/naome.js +11 -4
- package/templates/naome-root/.naome/manifest.json +2 -2
- package/templates/naome-root/.naomeignore +1 -0
- package/templates/naome-root/docs/naome/agent-workflow.md +16 -14
- package/templates/naome-root/docs/naome/repository-quality.md +63 -4
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
mod analysis;
|
|
1
2
|
mod repo_paths;
|
|
2
3
|
|
|
3
4
|
use std::collections::{HashMap, HashSet};
|
|
@@ -7,13 +8,15 @@ use std::path::Path;
|
|
|
7
8
|
use sha2::{Digest, Sha256};
|
|
8
9
|
|
|
9
10
|
use crate::{git, models::NaomeError, paths};
|
|
10
|
-
use repo_paths::added_lines_by_path;
|
|
11
11
|
pub(crate) use repo_paths::collect_repo_paths;
|
|
12
|
+
use repo_paths::{added_lines_by_path, tracked_blob_hashes};
|
|
12
13
|
|
|
14
|
+
use super::cache::QualityCache;
|
|
13
15
|
use super::types::{
|
|
14
16
|
default_generated_paths, default_ignored_paths, QualityLimits, QualityMode,
|
|
15
17
|
RepositoryQualityConfig,
|
|
16
18
|
};
|
|
19
|
+
use analysis::analyze_repo_file;
|
|
17
20
|
|
|
18
21
|
#[derive(Debug, Clone)]
|
|
19
22
|
pub struct QualityContext {
|
|
@@ -23,6 +26,11 @@ pub struct QualityContext {
|
|
|
23
26
|
pub repo_paths: Vec<String>,
|
|
24
27
|
pub target_paths: HashSet<String>,
|
|
25
28
|
pub files: Vec<FileAnalysis>,
|
|
29
|
+
pub comparison_files: Vec<FileAnalysis>,
|
|
30
|
+
pub truncated: bool,
|
|
31
|
+
pub reason_codes: Vec<String>,
|
|
32
|
+
pub cache_hits: usize,
|
|
33
|
+
pub cache_misses: usize,
|
|
26
34
|
}
|
|
27
35
|
|
|
28
36
|
impl QualityContext {
|
|
@@ -30,8 +38,12 @@ impl QualityContext {
|
|
|
30
38
|
self.files.iter().map(|file| file.path.clone()).collect()
|
|
31
39
|
}
|
|
32
40
|
|
|
41
|
+
pub fn comparison_candidate_files(&self) -> impl Iterator<Item = &FileAnalysis> {
|
|
42
|
+
self.files.iter().chain(self.comparison_files.iter())
|
|
43
|
+
}
|
|
44
|
+
|
|
33
45
|
pub fn applies_to(&self, path: &str) -> bool {
|
|
34
|
-
self.mode
|
|
46
|
+
!self.mode.is_changed() || self.target_paths.contains(path)
|
|
35
47
|
}
|
|
36
48
|
|
|
37
49
|
pub fn check_applies_to(&self, check_id: &str, path: &str) -> bool {
|
|
@@ -43,22 +55,23 @@ impl QualityContext {
|
|
|
43
55
|
}
|
|
44
56
|
}
|
|
45
57
|
|
|
46
|
-
#[derive(Debug, Clone)]
|
|
58
|
+
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
|
47
59
|
pub struct FileAnalysis {
|
|
48
60
|
pub path: String,
|
|
49
61
|
pub line_count: usize,
|
|
50
62
|
pub added_lines: usize,
|
|
63
|
+
pub raw_lines: Vec<String>,
|
|
51
64
|
pub normalized_lines: Vec<NormalizedLine>,
|
|
52
65
|
pub symbols: Vec<SymbolAnalysis>,
|
|
53
66
|
}
|
|
54
67
|
|
|
55
|
-
#[derive(Debug, Clone)]
|
|
68
|
+
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
|
56
69
|
pub struct NormalizedLine {
|
|
57
70
|
pub line_number: usize,
|
|
58
71
|
pub value: String,
|
|
59
72
|
}
|
|
60
73
|
|
|
61
|
-
#[derive(Debug, Clone)]
|
|
74
|
+
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
|
62
75
|
pub struct SymbolAnalysis {
|
|
63
76
|
pub kind: String,
|
|
64
77
|
pub name: String,
|
|
@@ -85,36 +98,41 @@ pub fn scan_repository(
|
|
|
85
98
|
whole_repo_paths.sort();
|
|
86
99
|
whole_repo_paths.dedup();
|
|
87
100
|
let scan_paths = match mode {
|
|
88
|
-
QualityMode::
|
|
89
|
-
QualityMode::Report => whole_repo_paths.clone(),
|
|
101
|
+
QualityMode::ChangedFast => changed_paths.clone(),
|
|
102
|
+
QualityMode::Report | QualityMode::DeepReport => whole_repo_paths.clone(),
|
|
90
103
|
};
|
|
91
|
-
let added_lines =
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
104
|
+
let added_lines = if mode.is_changed() {
|
|
105
|
+
added_lines_by_path(root, &target_paths)?
|
|
106
|
+
} else {
|
|
107
|
+
HashMap::new()
|
|
108
|
+
};
|
|
109
|
+
let comparison_added_lines = HashMap::new();
|
|
110
|
+
let blob_hashes = tracked_blob_hashes(root).unwrap_or_default();
|
|
111
|
+
let cache = QualityCache::new(root, &config);
|
|
112
|
+
let ignore_patterns = CompiledPatternSet::new(ignore_patterns(root, &config));
|
|
113
|
+
let mut state = ScanState::default();
|
|
114
|
+
let mut files = analyze_primary_paths(
|
|
115
|
+
root,
|
|
116
|
+
mode,
|
|
117
|
+
&scan_paths,
|
|
118
|
+
&added_lines,
|
|
119
|
+
&blob_hashes,
|
|
120
|
+
&cache,
|
|
121
|
+
&ignore_patterns,
|
|
122
|
+
&mut state,
|
|
123
|
+
);
|
|
124
|
+
let comparison_files = analyze_comparison_paths(
|
|
125
|
+
root,
|
|
126
|
+
mode,
|
|
127
|
+
&whole_repo_paths,
|
|
128
|
+
&target_paths,
|
|
129
|
+
&comparison_added_lines,
|
|
130
|
+
&blob_hashes,
|
|
131
|
+
&cache,
|
|
132
|
+
&ignore_patterns,
|
|
133
|
+
files.len(),
|
|
134
|
+
&mut state,
|
|
135
|
+
);
|
|
118
136
|
|
|
119
137
|
files.sort_by(|left, right| left.path.cmp(&right.path));
|
|
120
138
|
Ok(QualityContext {
|
|
@@ -124,215 +142,145 @@ pub fn scan_repository(
|
|
|
124
142
|
repo_paths: whole_repo_paths,
|
|
125
143
|
target_paths,
|
|
126
144
|
files,
|
|
145
|
+
comparison_files,
|
|
146
|
+
truncated: state.truncated,
|
|
147
|
+
reason_codes: state.reason_codes,
|
|
148
|
+
cache_hits: state.cache_hits,
|
|
149
|
+
cache_misses: state.cache_misses,
|
|
127
150
|
})
|
|
128
151
|
}
|
|
129
152
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
format!("sha256:{:x}", hasher.finalize())
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
fn analyze_repo_file(
|
|
140
|
-
root: &Path,
|
|
141
|
-
path: &str,
|
|
142
|
-
added_lines: &HashMap<String, usize>,
|
|
143
|
-
) -> Option<FileAnalysis> {
|
|
144
|
-
let full_path = root.join(path);
|
|
145
|
-
if !full_path.is_file() || is_binary_extension(path) {
|
|
146
|
-
return None;
|
|
147
|
-
}
|
|
148
|
-
let content = fs::read_to_string(&full_path).ok()?;
|
|
149
|
-
Some(analyze_file(
|
|
150
|
-
path,
|
|
151
|
-
&content,
|
|
152
|
-
added_lines.get(path).copied().unwrap_or(0),
|
|
153
|
-
))
|
|
153
|
+
#[derive(Default)]
|
|
154
|
+
struct ScanState {
|
|
155
|
+
cache_hits: usize,
|
|
156
|
+
cache_misses: usize,
|
|
157
|
+
truncated: bool,
|
|
158
|
+
reason_codes: Vec<String>,
|
|
154
159
|
}
|
|
155
160
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
.map(|(line_number, value)| NormalizedLine { line_number, value })
|
|
163
|
-
.collect::<Vec<_>>();
|
|
164
|
-
let symbols = detect_symbols(&lines);
|
|
165
|
-
FileAnalysis {
|
|
166
|
-
path: path.to_string(),
|
|
167
|
-
line_count: lines.len(),
|
|
168
|
-
added_lines,
|
|
169
|
-
normalized_lines,
|
|
170
|
-
symbols,
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
fn detect_symbols(lines: &[&str]) -> Vec<SymbolAnalysis> {
|
|
175
|
-
let mut starts = Vec::new();
|
|
176
|
-
for (index, line) in lines.iter().enumerate() {
|
|
177
|
-
let indent = indentation(line);
|
|
178
|
-
if let Some((kind, name)) = symbol_start(line.trim()) {
|
|
179
|
-
starts.push((index, indent, kind, name));
|
|
161
|
+
impl ScanState {
|
|
162
|
+
fn record_cache_result(&mut self, cache_hit: bool) {
|
|
163
|
+
if cache_hit {
|
|
164
|
+
self.cache_hits += 1;
|
|
165
|
+
} else {
|
|
166
|
+
self.cache_misses += 1;
|
|
180
167
|
}
|
|
181
168
|
}
|
|
182
169
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
.iter()
|
|
187
|
-
.skip(position + 1)
|
|
188
|
-
.find(|(_, next_indent, _, _)| next_indent <= indent)
|
|
189
|
-
.map(|(next_index, _, _, _)| next_index.saturating_sub(1))
|
|
190
|
-
.unwrap_or_else(|| lines.len().saturating_sub(1));
|
|
191
|
-
let normalized_body = lines[*start_index..=end_index]
|
|
192
|
-
.iter()
|
|
193
|
-
.filter_map(|line| normalize_line(line))
|
|
194
|
-
.collect::<Vec<_>>();
|
|
195
|
-
let tokens = normalized_body
|
|
196
|
-
.iter()
|
|
197
|
-
.flat_map(|line| token_set(line))
|
|
198
|
-
.collect::<HashSet<_>>();
|
|
199
|
-
symbols.push(SymbolAnalysis {
|
|
200
|
-
kind: kind.clone(),
|
|
201
|
-
name: name.clone(),
|
|
202
|
-
start_line: start_index + 1,
|
|
203
|
-
end_line: end_index + 1,
|
|
204
|
-
indent: *indent,
|
|
205
|
-
tokens,
|
|
206
|
-
});
|
|
170
|
+
fn truncate(&mut self, reason: &str) {
|
|
171
|
+
self.truncated = true;
|
|
172
|
+
add_reason(&mut self.reason_codes, reason);
|
|
207
173
|
}
|
|
208
|
-
symbols
|
|
209
174
|
}
|
|
210
175
|
|
|
211
|
-
fn
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
(
|
|
224
|
-
|
|
225
|
-
("enum", "pub enum "),
|
|
226
|
-
("impl", "impl "),
|
|
227
|
-
];
|
|
228
|
-
for (kind, prefix) in candidates {
|
|
229
|
-
if let Some(rest) = trimmed.strip_prefix(prefix) {
|
|
230
|
-
return Some((kind.to_string(), symbol_name(rest)));
|
|
176
|
+
fn analyze_primary_paths(
|
|
177
|
+
root: &Path,
|
|
178
|
+
mode: QualityMode,
|
|
179
|
+
paths: &[String],
|
|
180
|
+
added_lines: &HashMap<String, usize>,
|
|
181
|
+
blob_hashes: &HashMap<String, String>,
|
|
182
|
+
cache: &QualityCache,
|
|
183
|
+
ignore_patterns: &CompiledPatternSet,
|
|
184
|
+
state: &mut ScanState,
|
|
185
|
+
) -> Vec<FileAnalysis> {
|
|
186
|
+
let mut files = Vec::new();
|
|
187
|
+
for path in paths {
|
|
188
|
+
if ignore_patterns.matches(path) {
|
|
189
|
+
continue;
|
|
231
190
|
}
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
191
|
+
if files.len() >= max_scanned_files(mode) {
|
|
192
|
+
state.truncate("max_scanned_files");
|
|
193
|
+
break;
|
|
194
|
+
}
|
|
195
|
+
if file_exceeds_budget(root, path, mode) {
|
|
196
|
+
state.truncate("max_file_bytes");
|
|
197
|
+
continue;
|
|
198
|
+
}
|
|
199
|
+
if let Some((file, cache_hit)) =
|
|
200
|
+
analyze_repo_file(root, path, added_lines, blob_hashes, cache, !mode.is_changed())
|
|
201
|
+
{
|
|
202
|
+
state.record_cache_result(cache_hit);
|
|
203
|
+
files.push(file);
|
|
239
204
|
}
|
|
240
205
|
}
|
|
241
|
-
|
|
206
|
+
files
|
|
242
207
|
}
|
|
243
208
|
|
|
244
|
-
fn
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
{
|
|
259
|
-
return None;
|
|
209
|
+
fn analyze_comparison_paths(
|
|
210
|
+
root: &Path,
|
|
211
|
+
mode: QualityMode,
|
|
212
|
+
paths: &[String],
|
|
213
|
+
target_paths: &HashSet<String>,
|
|
214
|
+
added_lines: &HashMap<String, usize>,
|
|
215
|
+
blob_hashes: &HashMap<String, String>,
|
|
216
|
+
cache: &QualityCache,
|
|
217
|
+
ignore_patterns: &CompiledPatternSet,
|
|
218
|
+
primary_count: usize,
|
|
219
|
+
state: &mut ScanState,
|
|
220
|
+
) -> Vec<FileAnalysis> {
|
|
221
|
+
if !mode.is_changed() {
|
|
222
|
+
return Vec::new();
|
|
260
223
|
}
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
let mut quote = '\0';
|
|
265
|
-
let mut previous_space = false;
|
|
266
|
-
for character in trimmed.chars() {
|
|
267
|
-
if in_string {
|
|
268
|
-
if character == quote {
|
|
269
|
-
in_string = false;
|
|
270
|
-
normalized.push('S');
|
|
271
|
-
previous_space = false;
|
|
272
|
-
}
|
|
224
|
+
let mut files = Vec::new();
|
|
225
|
+
for path in paths {
|
|
226
|
+
if target_paths.contains(path) || ignore_patterns.matches(path) {
|
|
273
227
|
continue;
|
|
274
228
|
}
|
|
275
|
-
if
|
|
276
|
-
|
|
277
|
-
quote = character;
|
|
229
|
+
if file_exceeds_budget(root, path, mode) {
|
|
230
|
+
state.truncate("max_file_bytes");
|
|
278
231
|
continue;
|
|
279
232
|
}
|
|
280
|
-
let
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
normalized.push(next);
|
|
290
|
-
}
|
|
291
|
-
previous_space = true;
|
|
292
|
-
} else {
|
|
293
|
-
normalized.push(next);
|
|
294
|
-
previous_space = false;
|
|
233
|
+
if let Some((file, cache_hit)) =
|
|
234
|
+
analyze_repo_file(root, path, added_lines, blob_hashes, cache, true)
|
|
235
|
+
{
|
|
236
|
+
state.record_cache_result(cache_hit);
|
|
237
|
+
files.push(file);
|
|
238
|
+
}
|
|
239
|
+
if primary_count.saturating_add(files.len()) >= max_scanned_files(mode) {
|
|
240
|
+
state.truncate("max_scanned_files");
|
|
241
|
+
break;
|
|
295
242
|
}
|
|
296
243
|
}
|
|
297
|
-
|
|
298
|
-
|
|
244
|
+
files.sort_by(|left, right| left.path.cmp(&right.path));
|
|
245
|
+
files.dedup_by(|left, right| left.path == right.path);
|
|
246
|
+
files
|
|
299
247
|
}
|
|
300
248
|
|
|
301
|
-
fn
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
249
|
+
pub fn stable_fingerprint(parts: &[&str]) -> String {
|
|
250
|
+
let mut hasher = Sha256::new();
|
|
251
|
+
for part in parts {
|
|
252
|
+
hasher.update(part.as_bytes());
|
|
253
|
+
hasher.update(b"\0");
|
|
254
|
+
}
|
|
255
|
+
format!("sha256:{:x}", hasher.finalize())
|
|
307
256
|
}
|
|
308
257
|
|
|
309
|
-
fn
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
&& value.chars().filter(|character| *character == '"').count() >= 2
|
|
258
|
+
fn max_scanned_files(mode: QualityMode) -> usize {
|
|
259
|
+
match mode {
|
|
260
|
+
QualityMode::ChangedFast => 2_000,
|
|
261
|
+
QualityMode::Report => 5_000,
|
|
262
|
+
QualityMode::DeepReport => usize::MAX,
|
|
263
|
+
}
|
|
316
264
|
}
|
|
317
265
|
|
|
318
|
-
fn
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
266
|
+
fn max_file_bytes(mode: QualityMode) -> u64 {
|
|
267
|
+
match mode {
|
|
268
|
+
QualityMode::ChangedFast => 512 * 1024,
|
|
269
|
+
QualityMode::Report => 1024 * 1024,
|
|
270
|
+
QualityMode::DeepReport => 8 * 1024 * 1024,
|
|
271
|
+
}
|
|
322
272
|
}
|
|
323
273
|
|
|
324
|
-
fn
|
|
325
|
-
|
|
326
|
-
.
|
|
327
|
-
|
|
328
|
-
.collect()
|
|
274
|
+
fn file_exceeds_budget(root: &Path, path: &str, mode: QualityMode) -> bool {
|
|
275
|
+
fs::metadata(root.join(path)).map_or(false, |metadata| {
|
|
276
|
+
metadata.is_file() && metadata.len() > max_file_bytes(mode)
|
|
277
|
+
})
|
|
329
278
|
}
|
|
330
279
|
|
|
331
|
-
fn
|
|
332
|
-
|
|
333
|
-
.
|
|
334
|
-
|
|
335
|
-
.sum()
|
|
280
|
+
fn add_reason(reason_codes: &mut Vec<String>, reason: &str) {
|
|
281
|
+
if !reason_codes.iter().any(|existing| existing == reason) {
|
|
282
|
+
reason_codes.push(reason.to_string());
|
|
283
|
+
}
|
|
336
284
|
}
|
|
337
285
|
|
|
338
286
|
fn ignore_patterns(root: &Path, config: &RepositoryQualityConfig) -> Vec<String> {
|
|
@@ -364,16 +312,41 @@ fn read_naomeignore_patterns(root: &Path) -> Vec<String> {
|
|
|
364
312
|
.collect()
|
|
365
313
|
}
|
|
366
314
|
|
|
367
|
-
|
|
368
|
-
|
|
315
|
+
struct CompiledPatternSet {
|
|
316
|
+
exact: HashSet<String>,
|
|
317
|
+
prefixes: Vec<String>,
|
|
318
|
+
wildcards: Vec<String>,
|
|
369
319
|
}
|
|
370
320
|
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
321
|
+
impl CompiledPatternSet {
|
|
322
|
+
fn new(patterns: Vec<String>) -> Self {
|
|
323
|
+
let mut exact = HashSet::new();
|
|
324
|
+
let mut prefixes = Vec::new();
|
|
325
|
+
let mut wildcards = Vec::new();
|
|
326
|
+
for pattern in patterns {
|
|
327
|
+
let normalized = pattern.trim_start_matches("./").replace('\\', "/");
|
|
328
|
+
if normalized.ends_with("/**") && !normalized[..normalized.len() - 3].contains('*') {
|
|
329
|
+
prefixes.push(normalized.trim_end_matches("**").to_string());
|
|
330
|
+
} else if normalized.contains('*') || normalized.contains('?') {
|
|
331
|
+
wildcards.push(normalized);
|
|
332
|
+
} else {
|
|
333
|
+
exact.insert(normalized);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
prefixes.sort();
|
|
337
|
+
prefixes.dedup();
|
|
338
|
+
wildcards.sort();
|
|
339
|
+
wildcards.dedup();
|
|
340
|
+
Self {
|
|
341
|
+
exact,
|
|
342
|
+
prefixes,
|
|
343
|
+
wildcards,
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
fn matches(&self, path: &str) -> bool {
|
|
348
|
+
self.exact.contains(path)
|
|
349
|
+
|| self.prefixes.iter().any(|prefix| path.starts_with(prefix))
|
|
350
|
+
|| paths::matches_any(path, &self.wildcards)
|
|
351
|
+
}
|
|
379
352
|
}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
use std::collections::{BTreeMap, BTreeSet};
|
|
2
|
+
|
|
3
|
+
use super::model::{ObjectCandidate, SemanticFinding};
|
|
4
|
+
use super::route::{cleanup_route, finding_mode};
|
|
5
|
+
use crate::quality::scanner::{stable_fingerprint, QualityContext};
|
|
6
|
+
|
|
7
|
+
pub(super) fn copied_config_findings(
|
|
8
|
+
context: &QualityContext,
|
|
9
|
+
candidates: &[ObjectCandidate],
|
|
10
|
+
) -> Vec<SemanticFinding> {
|
|
11
|
+
let mut by_shape: BTreeMap<&str, Vec<&ObjectCandidate>> = BTreeMap::new();
|
|
12
|
+
for candidate in candidates
|
|
13
|
+
.iter()
|
|
14
|
+
.filter(|candidate| candidate.keys.len() >= 4 && candidate.line_count >= 8)
|
|
15
|
+
.filter(|candidate| has_config_shape_signal(&candidate.keys))
|
|
16
|
+
{
|
|
17
|
+
by_shape
|
|
18
|
+
.entry(candidate.shape_hash.as_str())
|
|
19
|
+
.or_default()
|
|
20
|
+
.push(candidate);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
by_shape
|
|
24
|
+
.into_values()
|
|
25
|
+
.filter(|group| group.len() >= 2)
|
|
26
|
+
.filter(|group| group_applies_to_mode(context, group))
|
|
27
|
+
.map(|group| {
|
|
28
|
+
let occurrences = group
|
|
29
|
+
.iter()
|
|
30
|
+
.map(|candidate| candidate.occurrence())
|
|
31
|
+
.collect::<Vec<_>>();
|
|
32
|
+
let primary = &occurrences[0];
|
|
33
|
+
let id = stable_fingerprint(&[
|
|
34
|
+
"semantic",
|
|
35
|
+
"copied-config-object",
|
|
36
|
+
&primary.shape_hash,
|
|
37
|
+
&primary.path,
|
|
38
|
+
]);
|
|
39
|
+
SemanticFinding {
|
|
40
|
+
id,
|
|
41
|
+
kind: "copied-config-object".to_string(),
|
|
42
|
+
confidence: 0.92,
|
|
43
|
+
severity: "medium".to_string(),
|
|
44
|
+
mode: finding_mode(context).to_string(),
|
|
45
|
+
summary: format!(
|
|
46
|
+
"Same config-like object shape appears in {} locations.",
|
|
47
|
+
occurrences.len()
|
|
48
|
+
),
|
|
49
|
+
cleanup_route: cleanup_route(
|
|
50
|
+
"Extract shared fixture builder",
|
|
51
|
+
&occurrences,
|
|
52
|
+
"Create a shared fixture or builder for the repeated object shape.",
|
|
53
|
+
),
|
|
54
|
+
occurrences,
|
|
55
|
+
}
|
|
56
|
+
})
|
|
57
|
+
.collect()
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
pub(super) fn inline_legacy_fixture_findings(
|
|
61
|
+
context: &QualityContext,
|
|
62
|
+
candidates: &[ObjectCandidate],
|
|
63
|
+
) -> Vec<SemanticFinding> {
|
|
64
|
+
candidates
|
|
65
|
+
.iter()
|
|
66
|
+
.filter(|candidate| context.applies_to(&candidate.path))
|
|
67
|
+
.filter(|candidate| candidate.in_test_context)
|
|
68
|
+
.filter(|candidate| !is_shared_fixture_factory(candidate))
|
|
69
|
+
.filter(|candidate| candidate.line_count >= 8)
|
|
70
|
+
.filter(|candidate| has_legacy_fixture_signal(&candidate.keys))
|
|
71
|
+
.map(|candidate| {
|
|
72
|
+
let occurrence = candidate.occurrence();
|
|
73
|
+
let id = stable_fingerprint(&[
|
|
74
|
+
"semantic",
|
|
75
|
+
"inline-legacy-fixture",
|
|
76
|
+
&occurrence.shape_hash,
|
|
77
|
+
&occurrence.path,
|
|
78
|
+
&occurrence.start_line.to_string(),
|
|
79
|
+
]);
|
|
80
|
+
SemanticFinding {
|
|
81
|
+
id,
|
|
82
|
+
kind: "inline-legacy-fixture".to_string(),
|
|
83
|
+
confidence: 0.86,
|
|
84
|
+
severity: "medium".to_string(),
|
|
85
|
+
mode: finding_mode(context).to_string(),
|
|
86
|
+
summary: "Large schema/version fixture is inline in a test or support context."
|
|
87
|
+
.to_string(),
|
|
88
|
+
cleanup_route: cleanup_route(
|
|
89
|
+
"Extract compatibility fixture",
|
|
90
|
+
std::slice::from_ref(&occurrence),
|
|
91
|
+
"Move the inline legacy fixture into a shared test fixture factory and keep version-specific differences explicit.",
|
|
92
|
+
),
|
|
93
|
+
occurrences: vec![occurrence],
|
|
94
|
+
}
|
|
95
|
+
})
|
|
96
|
+
.collect()
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
fn has_config_shape_signal(keys: &BTreeSet<String>) -> bool {
|
|
100
|
+
keys.contains("schema")
|
|
101
|
+
|| keys.contains("version")
|
|
102
|
+
|| keys.contains("status")
|
|
103
|
+
|| keys.contains("enabledadapters")
|
|
104
|
+
|| keys.contains("disabledchecks")
|
|
105
|
+
|| keys.contains("limits")
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
fn has_legacy_fixture_signal(keys: &BTreeSet<String>) -> bool {
|
|
109
|
+
(keys.contains("schema") && keys.contains("version"))
|
|
110
|
+
|| (keys.contains("version") && keys.contains("status"))
|
|
111
|
+
|| (keys.contains("fromversion") && keys.contains("toversion"))
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
fn is_shared_fixture_factory(candidate: &ObjectCandidate) -> bool {
|
|
115
|
+
let Some(symbol) = &candidate.symbol else {
|
|
116
|
+
return false;
|
|
117
|
+
};
|
|
118
|
+
let normalized = symbol.to_ascii_lowercase();
|
|
119
|
+
[
|
|
120
|
+
"builder", "contract", "factory", "fixture", "make", "template",
|
|
121
|
+
]
|
|
122
|
+
.iter()
|
|
123
|
+
.any(|marker| normalized.contains(marker))
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
fn group_applies_to_mode(context: &QualityContext, group: &[&ObjectCandidate]) -> bool {
|
|
127
|
+
if context.mode.is_deep() {
|
|
128
|
+
return true;
|
|
129
|
+
}
|
|
130
|
+
context.mode.is_changed()
|
|
131
|
+
&& group
|
|
132
|
+
.iter()
|
|
133
|
+
.any(|candidate| context.applies_to(&candidate.path))
|
|
134
|
+
}
|