@lamentis/naome 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/Cargo.lock +2 -2
  2. package/README.md +108 -47
  3. package/bin/naome.js +16 -1
  4. package/crates/naome-cli/Cargo.toml +1 -1
  5. package/crates/naome-cli/src/dispatcher.rs +6 -2
  6. package/crates/naome-cli/src/main.rs +35 -23
  7. package/crates/naome-cli/src/quality_commands.rs +230 -11
  8. package/crates/naome-cli/src/workflow_commands.rs +21 -1
  9. package/crates/naome-core/Cargo.toml +1 -1
  10. package/crates/naome-core/src/git.rs +4 -2
  11. package/crates/naome-core/src/install_plan.rs +2 -0
  12. package/crates/naome-core/src/lib.rs +11 -7
  13. package/crates/naome-core/src/quality/baseline.rs +8 -0
  14. package/crates/naome-core/src/quality/cache.rs +153 -0
  15. package/crates/naome-core/src/quality/checks/duplicate_blocks.rs +25 -11
  16. package/crates/naome-core/src/quality/checks/near_duplicates.rs +4 -2
  17. package/crates/naome-core/src/quality/checks.rs +7 -8
  18. package/crates/naome-core/src/quality/cleanup.rs +36 -3
  19. package/crates/naome-core/src/quality/mod.rs +57 -9
  20. package/crates/naome-core/src/quality/scanner/analysis/normalize.rs +78 -0
  21. package/crates/naome-core/src/quality/scanner/analysis.rs +160 -0
  22. package/crates/naome-core/src/quality/scanner/repo_paths.rs +39 -3
  23. package/crates/naome-core/src/quality/scanner.rs +193 -220
  24. package/crates/naome-core/src/quality/semantic/checks.rs +134 -0
  25. package/crates/naome-core/src/quality/semantic/extract.rs +158 -0
  26. package/crates/naome-core/src/quality/semantic/model.rs +85 -0
  27. package/crates/naome-core/src/quality/semantic/route.rs +52 -0
  28. package/crates/naome-core/src/quality/semantic.rs +68 -0
  29. package/crates/naome-core/src/quality/structure/checks/directory.rs +9 -19
  30. package/crates/naome-core/src/quality/structure/checks.rs +1 -1
  31. package/crates/naome-core/src/quality/structure/classify.rs +52 -0
  32. package/crates/naome-core/src/quality/structure/mod.rs +2 -2
  33. package/crates/naome-core/src/quality/structure/model.rs +8 -1
  34. package/crates/naome-core/src/quality/types.rs +40 -2
  35. package/crates/naome-core/src/route/builtin_checks.rs +1 -15
  36. package/crates/naome-core/src/workflow/doctor.rs +144 -0
  37. package/crates/naome-core/src/workflow/mod.rs +2 -0
  38. package/crates/naome-core/src/workflow/mutation.rs +1 -2
  39. package/crates/naome-core/tests/install_plan.rs +2 -0
  40. package/crates/naome-core/tests/quality.rs +14 -5
  41. package/crates/naome-core/tests/quality_performance.rs +231 -0
  42. package/crates/naome-core/tests/quality_structure_policy.rs +19 -0
  43. package/crates/naome-core/tests/route_user_diff.rs +10 -6
  44. package/crates/naome-core/tests/semantic_legacy.rs +140 -0
  45. package/crates/naome-core/tests/workflow_doctor.rs +24 -0
  46. package/crates/naome-core/tests/workflow_policy.rs +6 -1
  47. package/installer/git-boundary.js +1 -0
  48. package/native/darwin-arm64/naome +0 -0
  49. package/native/linux-x64/naome +0 -0
  50. package/package.json +1 -1
  51. package/templates/naome-root/.naome/bin/check-harness-health.js +2 -2
  52. package/templates/naome-root/.naome/bin/check-task-state.js +2 -2
  53. package/templates/naome-root/.naome/bin/naome.js +11 -4
  54. package/templates/naome-root/.naome/manifest.json +2 -2
  55. package/templates/naome-root/.naomeignore +1 -0
  56. package/templates/naome-root/docs/naome/agent-workflow.md +16 -14
  57. package/templates/naome-root/docs/naome/repository-quality.md +63 -4
@@ -1,3 +1,4 @@
1
+ mod analysis;
1
2
  mod repo_paths;
2
3
 
3
4
  use std::collections::{HashMap, HashSet};
@@ -7,13 +8,15 @@ use std::path::Path;
7
8
  use sha2::{Digest, Sha256};
8
9
 
9
10
  use crate::{git, models::NaomeError, paths};
10
- use repo_paths::added_lines_by_path;
11
11
  pub(crate) use repo_paths::collect_repo_paths;
12
+ use repo_paths::{added_lines_by_path, tracked_blob_hashes};
12
13
 
14
+ use super::cache::QualityCache;
13
15
  use super::types::{
14
16
  default_generated_paths, default_ignored_paths, QualityLimits, QualityMode,
15
17
  RepositoryQualityConfig,
16
18
  };
19
+ use analysis::analyze_repo_file;
17
20
 
18
21
  #[derive(Debug, Clone)]
19
22
  pub struct QualityContext {
@@ -23,6 +26,11 @@ pub struct QualityContext {
23
26
  pub repo_paths: Vec<String>,
24
27
  pub target_paths: HashSet<String>,
25
28
  pub files: Vec<FileAnalysis>,
29
+ pub comparison_files: Vec<FileAnalysis>,
30
+ pub truncated: bool,
31
+ pub reason_codes: Vec<String>,
32
+ pub cache_hits: usize,
33
+ pub cache_misses: usize,
26
34
  }
27
35
 
28
36
  impl QualityContext {
@@ -30,8 +38,12 @@ impl QualityContext {
30
38
  self.files.iter().map(|file| file.path.clone()).collect()
31
39
  }
32
40
 
41
+ pub fn comparison_candidate_files(&self) -> impl Iterator<Item = &FileAnalysis> {
42
+ self.files.iter().chain(self.comparison_files.iter())
43
+ }
44
+
33
45
  pub fn applies_to(&self, path: &str) -> bool {
34
- self.mode == QualityMode::Report || self.target_paths.contains(path)
46
+ !self.mode.is_changed() || self.target_paths.contains(path)
35
47
  }
36
48
 
37
49
  pub fn check_applies_to(&self, check_id: &str, path: &str) -> bool {
@@ -43,22 +55,23 @@ impl QualityContext {
43
55
  }
44
56
  }
45
57
 
46
- #[derive(Debug, Clone)]
58
+ #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
47
59
  pub struct FileAnalysis {
48
60
  pub path: String,
49
61
  pub line_count: usize,
50
62
  pub added_lines: usize,
63
+ pub raw_lines: Vec<String>,
51
64
  pub normalized_lines: Vec<NormalizedLine>,
52
65
  pub symbols: Vec<SymbolAnalysis>,
53
66
  }
54
67
 
55
- #[derive(Debug, Clone)]
68
+ #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
56
69
  pub struct NormalizedLine {
57
70
  pub line_number: usize,
58
71
  pub value: String,
59
72
  }
60
73
 
61
- #[derive(Debug, Clone)]
74
+ #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
62
75
  pub struct SymbolAnalysis {
63
76
  pub kind: String,
64
77
  pub name: String,
@@ -85,36 +98,41 @@ pub fn scan_repository(
85
98
  whole_repo_paths.sort();
86
99
  whole_repo_paths.dedup();
87
100
  let scan_paths = match mode {
88
- QualityMode::Changed => changed_paths.clone(),
89
- QualityMode::Report => whole_repo_paths.clone(),
101
+ QualityMode::ChangedFast => changed_paths.clone(),
102
+ QualityMode::Report | QualityMode::DeepReport => whole_repo_paths.clone(),
90
103
  };
91
- let added_lines = added_lines_by_path(root)?;
92
- let ignore_patterns = ignore_patterns(root, &config);
93
- let mut files = Vec::new();
94
-
95
- for path in scan_paths {
96
- if should_skip_path(&path, &ignore_patterns) {
97
- continue;
98
- }
99
- if let Some(file) = analyze_repo_file(root, &path, &added_lines) {
100
- files.push(file);
101
- }
102
- }
103
-
104
- if mode == QualityMode::Changed {
105
- let scanned = files
106
- .iter()
107
- .map(|file| file.path.clone())
108
- .collect::<HashSet<_>>();
109
- for path in &whole_repo_paths {
110
- if scanned.contains(path) || should_skip_path(path, &ignore_patterns) {
111
- continue;
112
- }
113
- if let Some(file) = analyze_repo_file(root, path, &added_lines) {
114
- files.push(file);
115
- }
116
- }
117
- }
104
+ let added_lines = if mode.is_changed() {
105
+ added_lines_by_path(root, &target_paths)?
106
+ } else {
107
+ HashMap::new()
108
+ };
109
+ let comparison_added_lines = HashMap::new();
110
+ let blob_hashes = tracked_blob_hashes(root).unwrap_or_default();
111
+ let cache = QualityCache::new(root, &config);
112
+ let ignore_patterns = CompiledPatternSet::new(ignore_patterns(root, &config));
113
+ let mut state = ScanState::default();
114
+ let mut files = analyze_primary_paths(
115
+ root,
116
+ mode,
117
+ &scan_paths,
118
+ &added_lines,
119
+ &blob_hashes,
120
+ &cache,
121
+ &ignore_patterns,
122
+ &mut state,
123
+ );
124
+ let comparison_files = analyze_comparison_paths(
125
+ root,
126
+ mode,
127
+ &whole_repo_paths,
128
+ &target_paths,
129
+ &comparison_added_lines,
130
+ &blob_hashes,
131
+ &cache,
132
+ &ignore_patterns,
133
+ files.len(),
134
+ &mut state,
135
+ );
118
136
 
119
137
  files.sort_by(|left, right| left.path.cmp(&right.path));
120
138
  Ok(QualityContext {
@@ -124,215 +142,145 @@ pub fn scan_repository(
124
142
  repo_paths: whole_repo_paths,
125
143
  target_paths,
126
144
  files,
145
+ comparison_files,
146
+ truncated: state.truncated,
147
+ reason_codes: state.reason_codes,
148
+ cache_hits: state.cache_hits,
149
+ cache_misses: state.cache_misses,
127
150
  })
128
151
  }
129
152
 
130
- pub fn stable_fingerprint(parts: &[&str]) -> String {
131
- let mut hasher = Sha256::new();
132
- for part in parts {
133
- hasher.update(part.as_bytes());
134
- hasher.update(b"\0");
135
- }
136
- format!("sha256:{:x}", hasher.finalize())
137
- }
138
-
139
- fn analyze_repo_file(
140
- root: &Path,
141
- path: &str,
142
- added_lines: &HashMap<String, usize>,
143
- ) -> Option<FileAnalysis> {
144
- let full_path = root.join(path);
145
- if !full_path.is_file() || is_binary_extension(path) {
146
- return None;
147
- }
148
- let content = fs::read_to_string(&full_path).ok()?;
149
- Some(analyze_file(
150
- path,
151
- &content,
152
- added_lines.get(path).copied().unwrap_or(0),
153
- ))
153
+ #[derive(Default)]
154
+ struct ScanState {
155
+ cache_hits: usize,
156
+ cache_misses: usize,
157
+ truncated: bool,
158
+ reason_codes: Vec<String>,
154
159
  }
155
160
 
156
- fn analyze_file(path: &str, content: &str, added_lines: usize) -> FileAnalysis {
157
- let lines = content.lines().collect::<Vec<_>>();
158
- let normalized_lines = lines
159
- .iter()
160
- .enumerate()
161
- .filter_map(|(index, line)| normalize_line(line).map(|value| (index + 1, value)))
162
- .map(|(line_number, value)| NormalizedLine { line_number, value })
163
- .collect::<Vec<_>>();
164
- let symbols = detect_symbols(&lines);
165
- FileAnalysis {
166
- path: path.to_string(),
167
- line_count: lines.len(),
168
- added_lines,
169
- normalized_lines,
170
- symbols,
171
- }
172
- }
173
-
174
- fn detect_symbols(lines: &[&str]) -> Vec<SymbolAnalysis> {
175
- let mut starts = Vec::new();
176
- for (index, line) in lines.iter().enumerate() {
177
- let indent = indentation(line);
178
- if let Some((kind, name)) = symbol_start(line.trim()) {
179
- starts.push((index, indent, kind, name));
161
+ impl ScanState {
162
+ fn record_cache_result(&mut self, cache_hit: bool) {
163
+ if cache_hit {
164
+ self.cache_hits += 1;
165
+ } else {
166
+ self.cache_misses += 1;
180
167
  }
181
168
  }
182
169
 
183
- let mut symbols = Vec::new();
184
- for (position, (start_index, indent, kind, name)) in starts.iter().enumerate() {
185
- let end_index = starts
186
- .iter()
187
- .skip(position + 1)
188
- .find(|(_, next_indent, _, _)| next_indent <= indent)
189
- .map(|(next_index, _, _, _)| next_index.saturating_sub(1))
190
- .unwrap_or_else(|| lines.len().saturating_sub(1));
191
- let normalized_body = lines[*start_index..=end_index]
192
- .iter()
193
- .filter_map(|line| normalize_line(line))
194
- .collect::<Vec<_>>();
195
- let tokens = normalized_body
196
- .iter()
197
- .flat_map(|line| token_set(line))
198
- .collect::<HashSet<_>>();
199
- symbols.push(SymbolAnalysis {
200
- kind: kind.clone(),
201
- name: name.clone(),
202
- start_line: start_index + 1,
203
- end_line: end_index + 1,
204
- indent: *indent,
205
- tokens,
206
- });
170
+ fn truncate(&mut self, reason: &str) {
171
+ self.truncated = true;
172
+ add_reason(&mut self.reason_codes, reason);
207
173
  }
208
- symbols
209
174
  }
210
175
 
211
- fn symbol_start(trimmed: &str) -> Option<(String, String)> {
212
- let candidates = [
213
- ("function", "function "),
214
- ("function", "export function "),
215
- ("function", "async function "),
216
- ("function", "export async function "),
217
- ("function", "def "),
218
- ("function", "fn "),
219
- ("function", "pub fn "),
220
- ("function", "func "),
221
- ("class", "class "),
222
- ("struct", "struct "),
223
- ("struct", "pub struct "),
224
- ("enum", "enum "),
225
- ("enum", "pub enum "),
226
- ("impl", "impl "),
227
- ];
228
- for (kind, prefix) in candidates {
229
- if let Some(rest) = trimmed.strip_prefix(prefix) {
230
- return Some((kind.to_string(), symbol_name(rest)));
176
+ fn analyze_primary_paths(
177
+ root: &Path,
178
+ mode: QualityMode,
179
+ paths: &[String],
180
+ added_lines: &HashMap<String, usize>,
181
+ blob_hashes: &HashMap<String, String>,
182
+ cache: &QualityCache,
183
+ ignore_patterns: &CompiledPatternSet,
184
+ state: &mut ScanState,
185
+ ) -> Vec<FileAnalysis> {
186
+ let mut files = Vec::new();
187
+ for path in paths {
188
+ if ignore_patterns.matches(path) {
189
+ continue;
231
190
  }
232
- }
233
- for prefix in ["const ", "let ", "export const ", "export let "] {
234
- if let Some(rest) = trimmed.strip_prefix(prefix) {
235
- if trimmed.contains("=>") || trimmed.contains("function") || trimmed.contains("React.")
236
- {
237
- return Some(("function".to_string(), symbol_name(rest)));
238
- }
191
+ if files.len() >= max_scanned_files(mode) {
192
+ state.truncate("max_scanned_files");
193
+ break;
194
+ }
195
+ if file_exceeds_budget(root, path, mode) {
196
+ state.truncate("max_file_bytes");
197
+ continue;
198
+ }
199
+ if let Some((file, cache_hit)) =
200
+ analyze_repo_file(root, path, added_lines, blob_hashes, cache, !mode.is_changed())
201
+ {
202
+ state.record_cache_result(cache_hit);
203
+ files.push(file);
239
204
  }
240
205
  }
241
- None
206
+ files
242
207
  }
243
208
 
244
- fn symbol_name(rest: &str) -> String {
245
- rest.chars()
246
- .take_while(|character| character.is_ascii_alphanumeric() || *character == '_')
247
- .collect::<String>()
248
- .trim_matches('_')
249
- .to_string()
250
- }
251
-
252
- fn normalize_line(line: &str) -> Option<String> {
253
- let trimmed = line.trim();
254
- if trimmed.is_empty()
255
- || is_comment_only(trimmed)
256
- || is_string_list_item(trimmed)
257
- || is_generated_hash_mapping(trimmed)
258
- {
259
- return None;
209
+ fn analyze_comparison_paths(
210
+ root: &Path,
211
+ mode: QualityMode,
212
+ paths: &[String],
213
+ target_paths: &HashSet<String>,
214
+ added_lines: &HashMap<String, usize>,
215
+ blob_hashes: &HashMap<String, String>,
216
+ cache: &QualityCache,
217
+ ignore_patterns: &CompiledPatternSet,
218
+ primary_count: usize,
219
+ state: &mut ScanState,
220
+ ) -> Vec<FileAnalysis> {
221
+ if !mode.is_changed() {
222
+ return Vec::new();
260
223
  }
261
-
262
- let mut normalized = String::new();
263
- let mut in_string = false;
264
- let mut quote = '\0';
265
- let mut previous_space = false;
266
- for character in trimmed.chars() {
267
- if in_string {
268
- if character == quote {
269
- in_string = false;
270
- normalized.push('S');
271
- previous_space = false;
272
- }
224
+ let mut files = Vec::new();
225
+ for path in paths {
226
+ if target_paths.contains(path) || ignore_patterns.matches(path) {
273
227
  continue;
274
228
  }
275
- if character == '"' || character == '\'' || character == '`' {
276
- in_string = true;
277
- quote = character;
229
+ if file_exceeds_budget(root, path, mode) {
230
+ state.truncate("max_file_bytes");
278
231
  continue;
279
232
  }
280
- let next = if character.is_ascii_digit() {
281
- '0'
282
- } else if character.is_whitespace() {
283
- ' '
284
- } else {
285
- character.to_ascii_lowercase()
286
- };
287
- if next == ' ' {
288
- if !previous_space {
289
- normalized.push(next);
290
- }
291
- previous_space = true;
292
- } else {
293
- normalized.push(next);
294
- previous_space = false;
233
+ if let Some((file, cache_hit)) =
234
+ analyze_repo_file(root, path, added_lines, blob_hashes, cache, true)
235
+ {
236
+ state.record_cache_result(cache_hit);
237
+ files.push(file);
238
+ }
239
+ if primary_count.saturating_add(files.len()) >= max_scanned_files(mode) {
240
+ state.truncate("max_scanned_files");
241
+ break;
295
242
  }
296
243
  }
297
- let value = normalized.trim().to_string();
298
- (!value.is_empty()).then_some(value)
244
+ files.sort_by(|left, right| left.path.cmp(&right.path));
245
+ files.dedup_by(|left, right| left.path == right.path);
246
+ files
299
247
  }
300
248
 
301
- fn is_comment_only(trimmed: &str) -> bool {
302
- trimmed.starts_with("//")
303
- || trimmed.starts_with('#')
304
- || trimmed.starts_with("/*")
305
- || trimmed.starts_with('*')
306
- || trimmed.starts_with("--")
249
+ pub fn stable_fingerprint(parts: &[&str]) -> String {
250
+ let mut hasher = Sha256::new();
251
+ for part in parts {
252
+ hasher.update(part.as_bytes());
253
+ hasher.update(b"\0");
254
+ }
255
+ format!("sha256:{:x}", hasher.finalize())
307
256
  }
308
257
 
309
- fn is_generated_hash_mapping(trimmed: &str) -> bool {
310
- let Some((key, value)) = trimmed.split_once(':') else {
311
- return false;
312
- };
313
- key.trim_start().starts_with('"')
314
- && value.trim_start().starts_with("\"sha256:")
315
- && value.chars().filter(|character| *character == '"').count() >= 2
258
+ fn max_scanned_files(mode: QualityMode) -> usize {
259
+ match mode {
260
+ QualityMode::ChangedFast => 2_000,
261
+ QualityMode::Report => 5_000,
262
+ QualityMode::DeepReport => usize::MAX,
263
+ }
316
264
  }
317
265
 
318
- fn is_string_list_item(trimmed: &str) -> bool {
319
- let value = trimmed.trim_end_matches(',');
320
- (value.starts_with('"') && value.ends_with('"'))
321
- || (value.starts_with('\'') && value.ends_with('\''))
266
+ fn max_file_bytes(mode: QualityMode) -> u64 {
267
+ match mode {
268
+ QualityMode::ChangedFast => 512 * 1024,
269
+ QualityMode::Report => 1024 * 1024,
270
+ QualityMode::DeepReport => 8 * 1024 * 1024,
271
+ }
322
272
  }
323
273
 
324
- fn token_set(line: &str) -> Vec<String> {
325
- line.split(|character: char| !character.is_ascii_alphanumeric() && character != '_')
326
- .filter(|token| token.len() > 1)
327
- .map(ToString::to_string)
328
- .collect()
274
+ fn file_exceeds_budget(root: &Path, path: &str, mode: QualityMode) -> bool {
275
+ fs::metadata(root.join(path)).map_or(false, |metadata| {
276
+ metadata.is_file() && metadata.len() > max_file_bytes(mode)
277
+ })
329
278
  }
330
279
 
331
- fn indentation(line: &str) -> usize {
332
- line.chars()
333
- .take_while(|character| character.is_whitespace())
334
- .map(|character| if character == '\t' { 2 } else { 1 })
335
- .sum()
280
+ fn add_reason(reason_codes: &mut Vec<String>, reason: &str) {
281
+ if !reason_codes.iter().any(|existing| existing == reason) {
282
+ reason_codes.push(reason.to_string());
283
+ }
336
284
  }
337
285
 
338
286
  fn ignore_patterns(root: &Path, config: &RepositoryQualityConfig) -> Vec<String> {
@@ -364,16 +312,41 @@ fn read_naomeignore_patterns(root: &Path) -> Vec<String> {
364
312
  .collect()
365
313
  }
366
314
 
367
- fn should_skip_path(path: &str, patterns: &[String]) -> bool {
368
- paths::matches_any(path, patterns)
315
+ struct CompiledPatternSet {
316
+ exact: HashSet<String>,
317
+ prefixes: Vec<String>,
318
+ wildcards: Vec<String>,
369
319
  }
370
320
 
371
- fn is_binary_extension(path: &str) -> bool {
372
- let lower = path.to_ascii_lowercase();
373
- [
374
- ".png", ".jpg", ".jpeg", ".gif", ".webp", ".ico", ".pdf", ".zip", ".gz", ".tgz", ".wasm",
375
- ".dylib", ".so", ".dll", ".exe", ".bin",
376
- ]
377
- .iter()
378
- .any(|extension| lower.ends_with(extension))
321
+ impl CompiledPatternSet {
322
+ fn new(patterns: Vec<String>) -> Self {
323
+ let mut exact = HashSet::new();
324
+ let mut prefixes = Vec::new();
325
+ let mut wildcards = Vec::new();
326
+ for pattern in patterns {
327
+ let normalized = pattern.trim_start_matches("./").replace('\\', "/");
328
+ if normalized.ends_with("/**") && !normalized[..normalized.len() - 3].contains('*') {
329
+ prefixes.push(normalized.trim_end_matches("**").to_string());
330
+ } else if normalized.contains('*') || normalized.contains('?') {
331
+ wildcards.push(normalized);
332
+ } else {
333
+ exact.insert(normalized);
334
+ }
335
+ }
336
+ prefixes.sort();
337
+ prefixes.dedup();
338
+ wildcards.sort();
339
+ wildcards.dedup();
340
+ Self {
341
+ exact,
342
+ prefixes,
343
+ wildcards,
344
+ }
345
+ }
346
+
347
+ fn matches(&self, path: &str) -> bool {
348
+ self.exact.contains(path)
349
+ || self.prefixes.iter().any(|prefix| path.starts_with(prefix))
350
+ || paths::matches_any(path, &self.wildcards)
351
+ }
379
352
  }
@@ -0,0 +1,134 @@
1
+ use std::collections::{BTreeMap, BTreeSet};
2
+
3
+ use super::model::{ObjectCandidate, SemanticFinding};
4
+ use super::route::{cleanup_route, finding_mode};
5
+ use crate::quality::scanner::{stable_fingerprint, QualityContext};
6
+
7
+ pub(super) fn copied_config_findings(
8
+ context: &QualityContext,
9
+ candidates: &[ObjectCandidate],
10
+ ) -> Vec<SemanticFinding> {
11
+ let mut by_shape: BTreeMap<&str, Vec<&ObjectCandidate>> = BTreeMap::new();
12
+ for candidate in candidates
13
+ .iter()
14
+ .filter(|candidate| candidate.keys.len() >= 4 && candidate.line_count >= 8)
15
+ .filter(|candidate| has_config_shape_signal(&candidate.keys))
16
+ {
17
+ by_shape
18
+ .entry(candidate.shape_hash.as_str())
19
+ .or_default()
20
+ .push(candidate);
21
+ }
22
+
23
+ by_shape
24
+ .into_values()
25
+ .filter(|group| group.len() >= 2)
26
+ .filter(|group| group_applies_to_mode(context, group))
27
+ .map(|group| {
28
+ let occurrences = group
29
+ .iter()
30
+ .map(|candidate| candidate.occurrence())
31
+ .collect::<Vec<_>>();
32
+ let primary = &occurrences[0];
33
+ let id = stable_fingerprint(&[
34
+ "semantic",
35
+ "copied-config-object",
36
+ &primary.shape_hash,
37
+ &primary.path,
38
+ ]);
39
+ SemanticFinding {
40
+ id,
41
+ kind: "copied-config-object".to_string(),
42
+ confidence: 0.92,
43
+ severity: "medium".to_string(),
44
+ mode: finding_mode(context).to_string(),
45
+ summary: format!(
46
+ "Same config-like object shape appears in {} locations.",
47
+ occurrences.len()
48
+ ),
49
+ cleanup_route: cleanup_route(
50
+ "Extract shared fixture builder",
51
+ &occurrences,
52
+ "Create a shared fixture or builder for the repeated object shape.",
53
+ ),
54
+ occurrences,
55
+ }
56
+ })
57
+ .collect()
58
+ }
59
+
60
+ pub(super) fn inline_legacy_fixture_findings(
61
+ context: &QualityContext,
62
+ candidates: &[ObjectCandidate],
63
+ ) -> Vec<SemanticFinding> {
64
+ candidates
65
+ .iter()
66
+ .filter(|candidate| context.applies_to(&candidate.path))
67
+ .filter(|candidate| candidate.in_test_context)
68
+ .filter(|candidate| !is_shared_fixture_factory(candidate))
69
+ .filter(|candidate| candidate.line_count >= 8)
70
+ .filter(|candidate| has_legacy_fixture_signal(&candidate.keys))
71
+ .map(|candidate| {
72
+ let occurrence = candidate.occurrence();
73
+ let id = stable_fingerprint(&[
74
+ "semantic",
75
+ "inline-legacy-fixture",
76
+ &occurrence.shape_hash,
77
+ &occurrence.path,
78
+ &occurrence.start_line.to_string(),
79
+ ]);
80
+ SemanticFinding {
81
+ id,
82
+ kind: "inline-legacy-fixture".to_string(),
83
+ confidence: 0.86,
84
+ severity: "medium".to_string(),
85
+ mode: finding_mode(context).to_string(),
86
+ summary: "Large schema/version fixture is inline in a test or support context."
87
+ .to_string(),
88
+ cleanup_route: cleanup_route(
89
+ "Extract compatibility fixture",
90
+ std::slice::from_ref(&occurrence),
91
+ "Move the inline legacy fixture into a shared test fixture factory and keep version-specific differences explicit.",
92
+ ),
93
+ occurrences: vec![occurrence],
94
+ }
95
+ })
96
+ .collect()
97
+ }
98
+
99
+ fn has_config_shape_signal(keys: &BTreeSet<String>) -> bool {
100
+ keys.contains("schema")
101
+ || keys.contains("version")
102
+ || keys.contains("status")
103
+ || keys.contains("enabledadapters")
104
+ || keys.contains("disabledchecks")
105
+ || keys.contains("limits")
106
+ }
107
+
108
+ fn has_legacy_fixture_signal(keys: &BTreeSet<String>) -> bool {
109
+ (keys.contains("schema") && keys.contains("version"))
110
+ || (keys.contains("version") && keys.contains("status"))
111
+ || (keys.contains("fromversion") && keys.contains("toversion"))
112
+ }
113
+
114
+ fn is_shared_fixture_factory(candidate: &ObjectCandidate) -> bool {
115
+ let Some(symbol) = &candidate.symbol else {
116
+ return false;
117
+ };
118
+ let normalized = symbol.to_ascii_lowercase();
119
+ [
120
+ "builder", "contract", "factory", "fixture", "make", "template",
121
+ ]
122
+ .iter()
123
+ .any(|marker| normalized.contains(marker))
124
+ }
125
+
126
+ fn group_applies_to_mode(context: &QualityContext, group: &[&ObjectCandidate]) -> bool {
127
+ if context.mode.is_deep() {
128
+ return true;
129
+ }
130
+ context.mode.is_changed()
131
+ && group
132
+ .iter()
133
+ .any(|candidate| context.applies_to(&candidate.path))
134
+ }