@lamentis/naome 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/Cargo.lock +2 -2
  2. package/README.md +108 -47
  3. package/bin/naome-node.js +2 -1579
  4. package/bin/naome.js +34 -5
  5. package/crates/naome-cli/Cargo.toml +1 -1
  6. package/crates/naome-cli/src/dispatcher.rs +7 -2
  7. package/crates/naome-cli/src/main.rs +37 -22
  8. package/crates/naome-cli/src/quality_commands.rs +317 -10
  9. package/crates/naome-cli/src/workflow_commands.rs +21 -1
  10. package/crates/naome-core/Cargo.toml +1 -1
  11. package/crates/naome-core/src/decision/checks.rs +64 -0
  12. package/crates/naome-core/src/decision/idle.rs +67 -0
  13. package/crates/naome-core/src/decision/json.rs +36 -0
  14. package/crates/naome-core/src/decision/states.rs +165 -0
  15. package/crates/naome-core/src/decision.rs +131 -353
  16. package/crates/naome-core/src/git.rs +4 -2
  17. package/crates/naome-core/src/install_plan.rs +4 -0
  18. package/crates/naome-core/src/lib.rs +12 -6
  19. package/crates/naome-core/src/paths.rs +3 -1
  20. package/crates/naome-core/src/quality/adapter_support.rs +89 -0
  21. package/crates/naome-core/src/quality/adapters.rs +20 -67
  22. package/crates/naome-core/src/quality/baseline.rs +8 -0
  23. package/crates/naome-core/src/quality/cache.rs +153 -0
  24. package/crates/naome-core/src/quality/checks/duplicate_blocks.rs +25 -11
  25. package/crates/naome-core/src/quality/checks/near_duplicates.rs +4 -2
  26. package/crates/naome-core/src/quality/checks.rs +7 -8
  27. package/crates/naome-core/src/quality/cleanup.rs +48 -3
  28. package/crates/naome-core/src/quality/config.rs +8 -15
  29. package/crates/naome-core/src/quality/config_support.rs +24 -0
  30. package/crates/naome-core/src/quality/mod.rs +72 -6
  31. package/crates/naome-core/src/quality/scanner/analysis/normalize.rs +78 -0
  32. package/crates/naome-core/src/quality/scanner/analysis.rs +160 -0
  33. package/crates/naome-core/src/quality/scanner/repo_paths.rs +39 -3
  34. package/crates/naome-core/src/quality/scanner.rs +200 -215
  35. package/crates/naome-core/src/quality/semantic/checks.rs +134 -0
  36. package/crates/naome-core/src/quality/semantic/extract.rs +158 -0
  37. package/crates/naome-core/src/quality/semantic/model.rs +85 -0
  38. package/crates/naome-core/src/quality/semantic/route.rs +52 -0
  39. package/crates/naome-core/src/quality/semantic.rs +68 -0
  40. package/crates/naome-core/src/quality/structure/adapters.rs +84 -0
  41. package/crates/naome-core/src/quality/structure/checks/basic.rs +153 -0
  42. package/crates/naome-core/src/quality/structure/checks/directory.rs +134 -0
  43. package/crates/naome-core/src/quality/structure/checks/pairing.rs +63 -0
  44. package/crates/naome-core/src/quality/structure/checks.rs +124 -0
  45. package/crates/naome-core/src/quality/structure/classify/roles.rs +188 -0
  46. package/crates/naome-core/src/quality/structure/classify.rs +146 -0
  47. package/crates/naome-core/src/quality/structure/config.rs +89 -0
  48. package/crates/naome-core/src/quality/structure/defaults.rs +107 -0
  49. package/crates/naome-core/src/quality/structure/mod.rs +77 -0
  50. package/crates/naome-core/src/quality/structure/model.rs +131 -0
  51. package/crates/naome-core/src/quality/types.rs +43 -2
  52. package/crates/naome-core/src/route/builtin_checks.rs +141 -0
  53. package/crates/naome-core/src/route/builtin_context.rs +73 -0
  54. package/crates/naome-core/src/route/builtin_integrity.rs +49 -0
  55. package/crates/naome-core/src/route/builtin_require.rs +40 -0
  56. package/crates/naome-core/src/route/context.rs +180 -0
  57. package/crates/naome-core/src/route/execution.rs +96 -0
  58. package/crates/naome-core/src/route/execution_baselines.rs +146 -0
  59. package/crates/naome-core/src/route/execution_support.rs +57 -0
  60. package/crates/naome-core/src/route/execution_tasks.rs +71 -0
  61. package/crates/naome-core/src/route/git_ops.rs +72 -0
  62. package/crates/naome-core/src/route/quality_gate.rs +73 -0
  63. package/crates/naome-core/src/route/quality_gate_config.rs +126 -0
  64. package/crates/naome-core/src/route/quality_gate_snapshot.rs +69 -0
  65. package/crates/naome-core/src/route/worktree.rs +75 -0
  66. package/crates/naome-core/src/route/worktree_files.rs +32 -0
  67. package/crates/naome-core/src/route/worktree_plan.rs +131 -0
  68. package/crates/naome-core/src/route.rs +44 -1217
  69. package/crates/naome-core/src/verification.rs +1 -0
  70. package/crates/naome-core/src/workflow/doctor.rs +144 -0
  71. package/crates/naome-core/src/workflow/mod.rs +2 -0
  72. package/crates/naome-core/src/workflow/mutation.rs +1 -2
  73. package/crates/naome-core/tests/decision.rs +24 -118
  74. package/crates/naome-core/tests/harness_health.rs +2 -0
  75. package/crates/naome-core/tests/install_plan.rs +2 -0
  76. package/crates/naome-core/tests/quality.rs +26 -123
  77. package/crates/naome-core/tests/quality_performance.rs +231 -0
  78. package/crates/naome-core/tests/quality_structure.rs +116 -0
  79. package/crates/naome-core/tests/quality_structure_adapters.rs +98 -0
  80. package/crates/naome-core/tests/quality_structure_policy.rs +144 -0
  81. package/crates/naome-core/tests/quality_structure_support/mod.rs +249 -0
  82. package/crates/naome-core/tests/repo_support/mod.rs +16 -0
  83. package/crates/naome-core/tests/repo_support/repo.rs +113 -0
  84. package/crates/naome-core/tests/repo_support/repo_factories.rs +99 -0
  85. package/crates/naome-core/tests/repo_support/repo_helpers.rs +123 -0
  86. package/crates/naome-core/tests/repo_support/routes.rs +81 -0
  87. package/crates/naome-core/tests/repo_support/verification.rs +168 -0
  88. package/crates/naome-core/tests/repo_support/verification_values.rs +135 -0
  89. package/crates/naome-core/tests/route.rs +1 -1376
  90. package/crates/naome-core/tests/route_baseline.rs +86 -0
  91. package/crates/naome-core/tests/route_completion.rs +141 -0
  92. package/crates/naome-core/tests/route_harness_refresh.rs +135 -0
  93. package/crates/naome-core/tests/route_user_diff.rs +202 -0
  94. package/crates/naome-core/tests/route_worktree.rs +54 -0
  95. package/crates/naome-core/tests/semantic_legacy.rs +140 -0
  96. package/crates/naome-core/tests/task_state.rs +60 -432
  97. package/crates/naome-core/tests/task_state_compact_support/repo.rs +1 -1
  98. package/crates/naome-core/tests/task_state_support/mod.rs +163 -0
  99. package/crates/naome-core/tests/task_state_support/states.rs +84 -0
  100. package/crates/naome-core/tests/verification.rs +4 -45
  101. package/crates/naome-core/tests/verification_contract.rs +22 -78
  102. package/crates/naome-core/tests/workflow_doctor.rs +24 -0
  103. package/crates/naome-core/tests/workflow_policy.rs +6 -1
  104. package/crates/naome-core/tests/workflow_support/mod.rs +1 -1
  105. package/installer/agents.js +90 -0
  106. package/installer/context.js +67 -0
  107. package/installer/filesystem.js +166 -0
  108. package/installer/flows.js +84 -0
  109. package/installer/git-boundary.js +171 -0
  110. package/installer/git-hook-content.js +36 -0
  111. package/installer/git-hooks.js +134 -0
  112. package/installer/git-local.js +2 -0
  113. package/installer/git-shared.js +35 -0
  114. package/installer/harness-file-ops.js +140 -0
  115. package/installer/harness-files.js +56 -0
  116. package/installer/harness-verification.js +123 -0
  117. package/installer/install-plan.js +66 -0
  118. package/installer/main.js +25 -0
  119. package/installer/manifest-state.js +167 -0
  120. package/installer/native-build.js +24 -0
  121. package/installer/native-format.js +6 -0
  122. package/installer/native.js +162 -0
  123. package/installer/output.js +131 -0
  124. package/installer/version.js +32 -0
  125. package/native/darwin-arm64/naome +0 -0
  126. package/native/linux-x64/naome +0 -0
  127. package/package.json +2 -1
  128. package/templates/naome-root/.naome/bin/check-harness-health.js +3 -3
  129. package/templates/naome-root/.naome/bin/check-task-state.js +3 -3
  130. package/templates/naome-root/.naome/bin/naome.js +32 -21
  131. package/templates/naome-root/.naome/manifest.json +5 -3
  132. package/templates/naome-root/.naome/repository-structure.json +90 -0
  133. package/templates/naome-root/.naome/verification.json +1 -0
  134. package/templates/naome-root/.naomeignore +1 -0
  135. package/templates/naome-root/docs/naome/agent-workflow.md +16 -14
  136. package/templates/naome-root/docs/naome/index.md +4 -3
  137. package/templates/naome-root/docs/naome/repository-quality.md +66 -4
  138. package/templates/naome-root/docs/naome/repository-structure.md +51 -0
  139. package/templates/naome-root/docs/naome/testing.md +2 -1
@@ -1,3 +1,4 @@
1
+ mod analysis;
1
2
  mod repo_paths;
2
3
 
3
4
  use std::collections::{HashMap, HashSet};
@@ -7,21 +8,29 @@ use std::path::Path;
7
8
  use sha2::{Digest, Sha256};
8
9
 
9
10
  use crate::{git, models::NaomeError, paths};
10
- use repo_paths::added_lines_by_path;
11
11
  pub(crate) use repo_paths::collect_repo_paths;
12
+ use repo_paths::{added_lines_by_path, tracked_blob_hashes};
12
13
 
14
+ use super::cache::QualityCache;
13
15
  use super::types::{
14
16
  default_generated_paths, default_ignored_paths, QualityLimits, QualityMode,
15
17
  RepositoryQualityConfig,
16
18
  };
19
+ use analysis::analyze_repo_file;
17
20
 
18
21
  #[derive(Debug, Clone)]
19
22
  pub struct QualityContext {
20
23
  pub mode: QualityMode,
21
24
  pub config: RepositoryQualityConfig,
22
25
  pub changed_paths: Vec<String>,
26
+ pub repo_paths: Vec<String>,
23
27
  pub target_paths: HashSet<String>,
24
28
  pub files: Vec<FileAnalysis>,
29
+ pub comparison_files: Vec<FileAnalysis>,
30
+ pub truncated: bool,
31
+ pub reason_codes: Vec<String>,
32
+ pub cache_hits: usize,
33
+ pub cache_misses: usize,
25
34
  }
26
35
 
27
36
  impl QualityContext {
@@ -29,8 +38,12 @@ impl QualityContext {
29
38
  self.files.iter().map(|file| file.path.clone()).collect()
30
39
  }
31
40
 
41
+ pub fn comparison_candidate_files(&self) -> impl Iterator<Item = &FileAnalysis> {
42
+ self.files.iter().chain(self.comparison_files.iter())
43
+ }
44
+
32
45
  pub fn applies_to(&self, path: &str) -> bool {
33
- self.mode == QualityMode::Report || self.target_paths.contains(path)
46
+ !self.mode.is_changed() || self.target_paths.contains(path)
34
47
  }
35
48
 
36
49
  pub fn check_applies_to(&self, check_id: &str, path: &str) -> bool {
@@ -42,22 +55,23 @@ impl QualityContext {
42
55
  }
43
56
  }
44
57
 
45
- #[derive(Debug, Clone)]
58
+ #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
46
59
  pub struct FileAnalysis {
47
60
  pub path: String,
48
61
  pub line_count: usize,
49
62
  pub added_lines: usize,
63
+ pub raw_lines: Vec<String>,
50
64
  pub normalized_lines: Vec<NormalizedLine>,
51
65
  pub symbols: Vec<SymbolAnalysis>,
52
66
  }
53
67
 
54
- #[derive(Debug, Clone)]
68
+ #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
55
69
  pub struct NormalizedLine {
56
70
  pub line_number: usize,
57
71
  pub value: String,
58
72
  }
59
73
 
60
- #[derive(Debug, Clone)]
74
+ #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
61
75
  pub struct SymbolAnalysis {
62
76
  pub kind: String,
63
77
  pub name: String,
@@ -80,247 +94,193 @@ pub fn scan_repository(
80
94
  ) -> Result<QualityContext, NaomeError> {
81
95
  let changed_paths = git::changed_paths(root)?;
82
96
  let target_paths = changed_paths.iter().cloned().collect::<HashSet<_>>();
97
+ let mut whole_repo_paths = collect_repo_paths(root)?;
98
+ whole_repo_paths.sort();
99
+ whole_repo_paths.dedup();
83
100
  let scan_paths = match mode {
84
- QualityMode::Changed => changed_paths.clone(),
85
- QualityMode::Report => collect_repo_paths(root)?,
101
+ QualityMode::ChangedFast => changed_paths.clone(),
102
+ QualityMode::Report | QualityMode::DeepReport => whole_repo_paths.clone(),
86
103
  };
87
- let added_lines = added_lines_by_path(root)?;
88
- let ignore_patterns = ignore_patterns(root, &config);
89
- let mut files = Vec::new();
90
-
91
- for path in scan_paths {
92
- if should_skip_path(&path, &ignore_patterns) {
93
- continue;
94
- }
95
- if let Some(file) = analyze_repo_file(root, &path, &added_lines) {
96
- files.push(file);
97
- }
98
- }
99
-
100
- if mode == QualityMode::Changed {
101
- let mut whole_repo_paths = collect_repo_paths(root)?;
102
- whole_repo_paths.sort();
103
- whole_repo_paths.dedup();
104
- let scanned = files
105
- .iter()
106
- .map(|file| file.path.clone())
107
- .collect::<HashSet<_>>();
108
- for path in whole_repo_paths {
109
- if scanned.contains(&path) || should_skip_path(&path, &ignore_patterns) {
110
- continue;
111
- }
112
- if let Some(file) = analyze_repo_file(root, &path, &added_lines) {
113
- files.push(file);
114
- }
115
- }
116
- }
104
+ let added_lines = if mode.is_changed() {
105
+ added_lines_by_path(root, &target_paths)?
106
+ } else {
107
+ HashMap::new()
108
+ };
109
+ let comparison_added_lines = HashMap::new();
110
+ let blob_hashes = tracked_blob_hashes(root).unwrap_or_default();
111
+ let cache = QualityCache::new(root, &config);
112
+ let ignore_patterns = CompiledPatternSet::new(ignore_patterns(root, &config));
113
+ let mut state = ScanState::default();
114
+ let mut files = analyze_primary_paths(
115
+ root,
116
+ mode,
117
+ &scan_paths,
118
+ &added_lines,
119
+ &blob_hashes,
120
+ &cache,
121
+ &ignore_patterns,
122
+ &mut state,
123
+ );
124
+ let comparison_files = analyze_comparison_paths(
125
+ root,
126
+ mode,
127
+ &whole_repo_paths,
128
+ &target_paths,
129
+ &comparison_added_lines,
130
+ &blob_hashes,
131
+ &cache,
132
+ &ignore_patterns,
133
+ files.len(),
134
+ &mut state,
135
+ );
117
136
 
118
137
  files.sort_by(|left, right| left.path.cmp(&right.path));
119
138
  Ok(QualityContext {
120
139
  mode,
121
140
  config,
122
141
  changed_paths,
142
+ repo_paths: whole_repo_paths,
123
143
  target_paths,
124
144
  files,
145
+ comparison_files,
146
+ truncated: state.truncated,
147
+ reason_codes: state.reason_codes,
148
+ cache_hits: state.cache_hits,
149
+ cache_misses: state.cache_misses,
125
150
  })
126
151
  }
127
152
 
128
- pub fn stable_fingerprint(parts: &[&str]) -> String {
129
- let mut hasher = Sha256::new();
130
- for part in parts {
131
- hasher.update(part.as_bytes());
132
- hasher.update(b"\0");
133
- }
134
- format!("sha256:{:x}", hasher.finalize())
153
+ #[derive(Default)]
154
+ struct ScanState {
155
+ cache_hits: usize,
156
+ cache_misses: usize,
157
+ truncated: bool,
158
+ reason_codes: Vec<String>,
135
159
  }
136
160
 
137
- fn analyze_repo_file(
138
- root: &Path,
139
- path: &str,
140
- added_lines: &HashMap<String, usize>,
141
- ) -> Option<FileAnalysis> {
142
- let full_path = root.join(path);
143
- if !full_path.is_file() || is_binary_extension(path) {
144
- return None;
145
- }
146
- let content = fs::read_to_string(&full_path).ok()?;
147
- Some(analyze_file(
148
- path,
149
- &content,
150
- added_lines.get(path).copied().unwrap_or(0),
151
- ))
152
- }
153
-
154
- fn analyze_file(path: &str, content: &str, added_lines: usize) -> FileAnalysis {
155
- let lines = content.lines().collect::<Vec<_>>();
156
- let normalized_lines = lines
157
- .iter()
158
- .enumerate()
159
- .filter_map(|(index, line)| normalize_line(line).map(|value| (index + 1, value)))
160
- .map(|(line_number, value)| NormalizedLine { line_number, value })
161
- .collect::<Vec<_>>();
162
- let symbols = detect_symbols(&lines);
163
- FileAnalysis {
164
- path: path.to_string(),
165
- line_count: lines.len(),
166
- added_lines,
167
- normalized_lines,
168
- symbols,
169
- }
170
- }
171
-
172
- fn detect_symbols(lines: &[&str]) -> Vec<SymbolAnalysis> {
173
- let mut starts = Vec::new();
174
- for (index, line) in lines.iter().enumerate() {
175
- let indent = indentation(line);
176
- if let Some((kind, name)) = symbol_start(line.trim()) {
177
- starts.push((index, indent, kind, name));
161
+ impl ScanState {
162
+ fn record_cache_result(&mut self, cache_hit: bool) {
163
+ if cache_hit {
164
+ self.cache_hits += 1;
165
+ } else {
166
+ self.cache_misses += 1;
178
167
  }
179
168
  }
180
169
 
181
- let mut symbols = Vec::new();
182
- for (position, (start_index, indent, kind, name)) in starts.iter().enumerate() {
183
- let end_index = starts
184
- .iter()
185
- .skip(position + 1)
186
- .find(|(_, next_indent, _, _)| next_indent <= indent)
187
- .map(|(next_index, _, _, _)| next_index.saturating_sub(1))
188
- .unwrap_or_else(|| lines.len().saturating_sub(1));
189
- let normalized_body = lines[*start_index..=end_index]
190
- .iter()
191
- .filter_map(|line| normalize_line(line))
192
- .collect::<Vec<_>>();
193
- let tokens = normalized_body
194
- .iter()
195
- .flat_map(|line| token_set(line))
196
- .collect::<HashSet<_>>();
197
- symbols.push(SymbolAnalysis {
198
- kind: kind.clone(),
199
- name: name.clone(),
200
- start_line: start_index + 1,
201
- end_line: end_index + 1,
202
- indent: *indent,
203
- tokens,
204
- });
170
+ fn truncate(&mut self, reason: &str) {
171
+ self.truncated = true;
172
+ add_reason(&mut self.reason_codes, reason);
205
173
  }
206
- symbols
207
174
  }
208
175
 
209
- fn symbol_start(trimmed: &str) -> Option<(String, String)> {
210
- let candidates = [
211
- ("function", "function "),
212
- ("function", "export function "),
213
- ("function", "async function "),
214
- ("function", "export async function "),
215
- ("function", "def "),
216
- ("function", "fn "),
217
- ("function", "pub fn "),
218
- ("function", "func "),
219
- ("class", "class "),
220
- ("struct", "struct "),
221
- ("struct", "pub struct "),
222
- ("enum", "enum "),
223
- ("enum", "pub enum "),
224
- ("impl", "impl "),
225
- ];
226
- for (kind, prefix) in candidates {
227
- if let Some(rest) = trimmed.strip_prefix(prefix) {
228
- return Some((kind.to_string(), symbol_name(rest)));
176
+ fn analyze_primary_paths(
177
+ root: &Path,
178
+ mode: QualityMode,
179
+ paths: &[String],
180
+ added_lines: &HashMap<String, usize>,
181
+ blob_hashes: &HashMap<String, String>,
182
+ cache: &QualityCache,
183
+ ignore_patterns: &CompiledPatternSet,
184
+ state: &mut ScanState,
185
+ ) -> Vec<FileAnalysis> {
186
+ let mut files = Vec::new();
187
+ for path in paths {
188
+ if ignore_patterns.matches(path) {
189
+ continue;
229
190
  }
230
- }
231
- for prefix in ["const ", "let ", "export const ", "export let "] {
232
- if let Some(rest) = trimmed.strip_prefix(prefix) {
233
- if trimmed.contains("=>") || trimmed.contains("function") || trimmed.contains("React.")
234
- {
235
- return Some(("function".to_string(), symbol_name(rest)));
236
- }
191
+ if files.len() >= max_scanned_files(mode) {
192
+ state.truncate("max_scanned_files");
193
+ break;
194
+ }
195
+ if file_exceeds_budget(root, path, mode) {
196
+ state.truncate("max_file_bytes");
197
+ continue;
198
+ }
199
+ if let Some((file, cache_hit)) =
200
+ analyze_repo_file(root, path, added_lines, blob_hashes, cache, !mode.is_changed())
201
+ {
202
+ state.record_cache_result(cache_hit);
203
+ files.push(file);
237
204
  }
238
205
  }
239
- None
240
- }
241
-
242
- fn symbol_name(rest: &str) -> String {
243
- rest.chars()
244
- .take_while(|character| character.is_ascii_alphanumeric() || *character == '_')
245
- .collect::<String>()
246
- .trim_matches('_')
247
- .to_string()
206
+ files
248
207
  }
249
208
 
250
- fn normalize_line(line: &str) -> Option<String> {
251
- let trimmed = line.trim();
252
- if trimmed.is_empty() || is_comment_only(trimmed) || is_generated_hash_mapping(trimmed) {
253
- return None;
209
+ fn analyze_comparison_paths(
210
+ root: &Path,
211
+ mode: QualityMode,
212
+ paths: &[String],
213
+ target_paths: &HashSet<String>,
214
+ added_lines: &HashMap<String, usize>,
215
+ blob_hashes: &HashMap<String, String>,
216
+ cache: &QualityCache,
217
+ ignore_patterns: &CompiledPatternSet,
218
+ primary_count: usize,
219
+ state: &mut ScanState,
220
+ ) -> Vec<FileAnalysis> {
221
+ if !mode.is_changed() {
222
+ return Vec::new();
254
223
  }
255
-
256
- let mut normalized = String::new();
257
- let mut in_string = false;
258
- let mut quote = '\0';
259
- let mut previous_space = false;
260
- for character in trimmed.chars() {
261
- if in_string {
262
- if character == quote {
263
- in_string = false;
264
- normalized.push('S');
265
- previous_space = false;
266
- }
224
+ let mut files = Vec::new();
225
+ for path in paths {
226
+ if target_paths.contains(path) || ignore_patterns.matches(path) {
267
227
  continue;
268
228
  }
269
- if character == '"' || character == '\'' || character == '`' {
270
- in_string = true;
271
- quote = character;
229
+ if file_exceeds_budget(root, path, mode) {
230
+ state.truncate("max_file_bytes");
272
231
  continue;
273
232
  }
274
- let next = if character.is_ascii_digit() {
275
- '0'
276
- } else if character.is_whitespace() {
277
- ' '
278
- } else {
279
- character.to_ascii_lowercase()
280
- };
281
- if next == ' ' {
282
- if !previous_space {
283
- normalized.push(next);
284
- }
285
- previous_space = true;
286
- } else {
287
- normalized.push(next);
288
- previous_space = false;
233
+ if let Some((file, cache_hit)) =
234
+ analyze_repo_file(root, path, added_lines, blob_hashes, cache, true)
235
+ {
236
+ state.record_cache_result(cache_hit);
237
+ files.push(file);
238
+ }
239
+ if primary_count.saturating_add(files.len()) >= max_scanned_files(mode) {
240
+ state.truncate("max_scanned_files");
241
+ break;
289
242
  }
290
243
  }
291
- let value = normalized.trim().to_string();
292
- (!value.is_empty()).then_some(value)
244
+ files.sort_by(|left, right| left.path.cmp(&right.path));
245
+ files.dedup_by(|left, right| left.path == right.path);
246
+ files
247
+ }
248
+
249
+ pub fn stable_fingerprint(parts: &[&str]) -> String {
250
+ let mut hasher = Sha256::new();
251
+ for part in parts {
252
+ hasher.update(part.as_bytes());
253
+ hasher.update(b"\0");
254
+ }
255
+ format!("sha256:{:x}", hasher.finalize())
293
256
  }
294
257
 
295
- fn is_comment_only(trimmed: &str) -> bool {
296
- trimmed.starts_with("//")
297
- || trimmed.starts_with('#')
298
- || trimmed.starts_with("/*")
299
- || trimmed.starts_with('*')
300
- || trimmed.starts_with("--")
258
+ fn max_scanned_files(mode: QualityMode) -> usize {
259
+ match mode {
260
+ QualityMode::ChangedFast => 2_000,
261
+ QualityMode::Report => 5_000,
262
+ QualityMode::DeepReport => usize::MAX,
263
+ }
301
264
  }
302
265
 
303
- fn is_generated_hash_mapping(trimmed: &str) -> bool {
304
- let Some((key, value)) = trimmed.split_once(':') else {
305
- return false;
306
- };
307
- key.trim_start().starts_with('"')
308
- && value.trim_start().starts_with("\"sha256:")
309
- && value.chars().filter(|character| *character == '"').count() >= 2
266
+ fn max_file_bytes(mode: QualityMode) -> u64 {
267
+ match mode {
268
+ QualityMode::ChangedFast => 512 * 1024,
269
+ QualityMode::Report => 1024 * 1024,
270
+ QualityMode::DeepReport => 8 * 1024 * 1024,
271
+ }
310
272
  }
311
273
 
312
- fn token_set(line: &str) -> Vec<String> {
313
- line.split(|character: char| !character.is_ascii_alphanumeric() && character != '_')
314
- .filter(|token| token.len() > 1)
315
- .map(ToString::to_string)
316
- .collect()
274
+ fn file_exceeds_budget(root: &Path, path: &str, mode: QualityMode) -> bool {
275
+ fs::metadata(root.join(path)).map_or(false, |metadata| {
276
+ metadata.is_file() && metadata.len() > max_file_bytes(mode)
277
+ })
317
278
  }
318
279
 
319
- fn indentation(line: &str) -> usize {
320
- line.chars()
321
- .take_while(|character| character.is_whitespace())
322
- .map(|character| if character == '\t' { 2 } else { 1 })
323
- .sum()
280
+ fn add_reason(reason_codes: &mut Vec<String>, reason: &str) {
281
+ if !reason_codes.iter().any(|existing| existing == reason) {
282
+ reason_codes.push(reason.to_string());
283
+ }
324
284
  }
325
285
 
326
286
  fn ignore_patterns(root: &Path, config: &RepositoryQualityConfig) -> Vec<String> {
@@ -352,16 +312,41 @@ fn read_naomeignore_patterns(root: &Path) -> Vec<String> {
352
312
  .collect()
353
313
  }
354
314
 
355
- fn should_skip_path(path: &str, patterns: &[String]) -> bool {
356
- paths::matches_any(path, patterns)
315
+ struct CompiledPatternSet {
316
+ exact: HashSet<String>,
317
+ prefixes: Vec<String>,
318
+ wildcards: Vec<String>,
357
319
  }
358
320
 
359
- fn is_binary_extension(path: &str) -> bool {
360
- let lower = path.to_ascii_lowercase();
361
- [
362
- ".png", ".jpg", ".jpeg", ".gif", ".webp", ".ico", ".pdf", ".zip", ".gz", ".tgz", ".wasm",
363
- ".dylib", ".so", ".dll", ".exe", ".bin",
364
- ]
365
- .iter()
366
- .any(|extension| lower.ends_with(extension))
321
+ impl CompiledPatternSet {
322
+ fn new(patterns: Vec<String>) -> Self {
323
+ let mut exact = HashSet::new();
324
+ let mut prefixes = Vec::new();
325
+ let mut wildcards = Vec::new();
326
+ for pattern in patterns {
327
+ let normalized = pattern.trim_start_matches("./").replace('\\', "/");
328
+ if normalized.ends_with("/**") && !normalized[..normalized.len() - 3].contains('*') {
329
+ prefixes.push(normalized.trim_end_matches("**").to_string());
330
+ } else if normalized.contains('*') || normalized.contains('?') {
331
+ wildcards.push(normalized);
332
+ } else {
333
+ exact.insert(normalized);
334
+ }
335
+ }
336
+ prefixes.sort();
337
+ prefixes.dedup();
338
+ wildcards.sort();
339
+ wildcards.dedup();
340
+ Self {
341
+ exact,
342
+ prefixes,
343
+ wildcards,
344
+ }
345
+ }
346
+
347
+ fn matches(&self, path: &str) -> bool {
348
+ self.exact.contains(path)
349
+ || self.prefixes.iter().any(|prefix| path.starts_with(prefix))
350
+ || paths::matches_any(path, &self.wildcards)
351
+ }
367
352
  }
@@ -0,0 +1,134 @@
1
+ use std::collections::{BTreeMap, BTreeSet};
2
+
3
+ use super::model::{ObjectCandidate, SemanticFinding};
4
+ use super::route::{cleanup_route, finding_mode};
5
+ use crate::quality::scanner::{stable_fingerprint, QualityContext};
6
+
7
+ pub(super) fn copied_config_findings(
8
+ context: &QualityContext,
9
+ candidates: &[ObjectCandidate],
10
+ ) -> Vec<SemanticFinding> {
11
+ let mut by_shape: BTreeMap<&str, Vec<&ObjectCandidate>> = BTreeMap::new();
12
+ for candidate in candidates
13
+ .iter()
14
+ .filter(|candidate| candidate.keys.len() >= 4 && candidate.line_count >= 8)
15
+ .filter(|candidate| has_config_shape_signal(&candidate.keys))
16
+ {
17
+ by_shape
18
+ .entry(candidate.shape_hash.as_str())
19
+ .or_default()
20
+ .push(candidate);
21
+ }
22
+
23
+ by_shape
24
+ .into_values()
25
+ .filter(|group| group.len() >= 2)
26
+ .filter(|group| group_applies_to_mode(context, group))
27
+ .map(|group| {
28
+ let occurrences = group
29
+ .iter()
30
+ .map(|candidate| candidate.occurrence())
31
+ .collect::<Vec<_>>();
32
+ let primary = &occurrences[0];
33
+ let id = stable_fingerprint(&[
34
+ "semantic",
35
+ "copied-config-object",
36
+ &primary.shape_hash,
37
+ &primary.path,
38
+ ]);
39
+ SemanticFinding {
40
+ id,
41
+ kind: "copied-config-object".to_string(),
42
+ confidence: 0.92,
43
+ severity: "medium".to_string(),
44
+ mode: finding_mode(context).to_string(),
45
+ summary: format!(
46
+ "Same config-like object shape appears in {} locations.",
47
+ occurrences.len()
48
+ ),
49
+ cleanup_route: cleanup_route(
50
+ "Extract shared fixture builder",
51
+ &occurrences,
52
+ "Create a shared fixture or builder for the repeated object shape.",
53
+ ),
54
+ occurrences,
55
+ }
56
+ })
57
+ .collect()
58
+ }
59
+
60
+ pub(super) fn inline_legacy_fixture_findings(
61
+ context: &QualityContext,
62
+ candidates: &[ObjectCandidate],
63
+ ) -> Vec<SemanticFinding> {
64
+ candidates
65
+ .iter()
66
+ .filter(|candidate| context.applies_to(&candidate.path))
67
+ .filter(|candidate| candidate.in_test_context)
68
+ .filter(|candidate| !is_shared_fixture_factory(candidate))
69
+ .filter(|candidate| candidate.line_count >= 8)
70
+ .filter(|candidate| has_legacy_fixture_signal(&candidate.keys))
71
+ .map(|candidate| {
72
+ let occurrence = candidate.occurrence();
73
+ let id = stable_fingerprint(&[
74
+ "semantic",
75
+ "inline-legacy-fixture",
76
+ &occurrence.shape_hash,
77
+ &occurrence.path,
78
+ &occurrence.start_line.to_string(),
79
+ ]);
80
+ SemanticFinding {
81
+ id,
82
+ kind: "inline-legacy-fixture".to_string(),
83
+ confidence: 0.86,
84
+ severity: "medium".to_string(),
85
+ mode: finding_mode(context).to_string(),
86
+ summary: "Large schema/version fixture is inline in a test or support context."
87
+ .to_string(),
88
+ cleanup_route: cleanup_route(
89
+ "Extract compatibility fixture",
90
+ std::slice::from_ref(&occurrence),
91
+ "Move the inline legacy fixture into a shared test fixture factory and keep version-specific differences explicit.",
92
+ ),
93
+ occurrences: vec![occurrence],
94
+ }
95
+ })
96
+ .collect()
97
+ }
98
+
99
+ fn has_config_shape_signal(keys: &BTreeSet<String>) -> bool {
100
+ keys.contains("schema")
101
+ || keys.contains("version")
102
+ || keys.contains("status")
103
+ || keys.contains("enabledadapters")
104
+ || keys.contains("disabledchecks")
105
+ || keys.contains("limits")
106
+ }
107
+
108
+ fn has_legacy_fixture_signal(keys: &BTreeSet<String>) -> bool {
109
+ (keys.contains("schema") && keys.contains("version"))
110
+ || (keys.contains("version") && keys.contains("status"))
111
+ || (keys.contains("fromversion") && keys.contains("toversion"))
112
+ }
113
+
114
+ fn is_shared_fixture_factory(candidate: &ObjectCandidate) -> bool {
115
+ let Some(symbol) = &candidate.symbol else {
116
+ return false;
117
+ };
118
+ let normalized = symbol.to_ascii_lowercase();
119
+ [
120
+ "builder", "contract", "factory", "fixture", "make", "template",
121
+ ]
122
+ .iter()
123
+ .any(|marker| normalized.contains(marker))
124
+ }
125
+
126
+ fn group_applies_to_mode(context: &QualityContext, group: &[&ObjectCandidate]) -> bool {
127
+ if context.mode.is_deep() {
128
+ return true;
129
+ }
130
+ context.mode.is_changed()
131
+ && group
132
+ .iter()
133
+ .any(|candidate| context.applies_to(&candidate.path))
134
+ }