@lamentis/naome 1.2.1 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/Cargo.lock +2 -2
  2. package/README.md +117 -47
  3. package/bin/naome.js +65 -12
  4. package/crates/naome-cli/Cargo.toml +1 -1
  5. package/crates/naome-cli/src/context_commands.rs +47 -0
  6. package/crates/naome-cli/src/dispatcher.rs +12 -2
  7. package/crates/naome-cli/src/main.rs +78 -29
  8. package/crates/naome-cli/src/quality_commands.rs +238 -34
  9. package/crates/naome-cli/src/quality_output.rs +34 -0
  10. package/crates/naome-cli/src/quality_reconcile_command.rs +45 -0
  11. package/crates/naome-cli/src/repository_model_commands.rs +84 -0
  12. package/crates/naome-cli/src/task_commands.rs +62 -0
  13. package/crates/naome-cli/src/workflow_commands.rs +120 -3
  14. package/crates/naome-core/Cargo.toml +1 -1
  15. package/crates/naome-core/src/context/helpers.rs +75 -0
  16. package/crates/naome-core/src/context/select.rs +134 -0
  17. package/crates/naome-core/src/context/types.rs +43 -0
  18. package/crates/naome-core/src/context.rs +6 -0
  19. package/crates/naome-core/src/decision/states.rs +1 -1
  20. package/crates/naome-core/src/decision.rs +4 -1
  21. package/crates/naome-core/src/git.rs +4 -2
  22. package/crates/naome-core/src/install_plan.rs +20 -0
  23. package/crates/naome-core/src/journal.rs +2 -7
  24. package/crates/naome-core/src/lib.rs +35 -8
  25. package/crates/naome-core/src/quality/adapter_ios.rs +131 -0
  26. package/crates/naome-core/src/quality/adapter_support.rs +67 -0
  27. package/crates/naome-core/src/quality/adapters.rs +81 -18
  28. package/crates/naome-core/src/quality/baseline.rs +8 -0
  29. package/crates/naome-core/src/quality/cache.rs +151 -0
  30. package/crates/naome-core/src/quality/checks/duplicate_blocks.rs +19 -8
  31. package/crates/naome-core/src/quality/checks/near_duplicates.rs +4 -2
  32. package/crates/naome-core/src/quality/checks.rs +7 -8
  33. package/crates/naome-core/src/quality/cleanup.rs +36 -3
  34. package/crates/naome-core/src/quality/config.rs +21 -3
  35. package/crates/naome-core/src/quality/mod.rs +189 -10
  36. package/crates/naome-core/src/quality/reconcile.rs +138 -0
  37. package/crates/naome-core/src/quality/reconcile_anchors.rs +64 -0
  38. package/crates/naome-core/src/quality/scanner/analysis/normalize.rs +78 -0
  39. package/crates/naome-core/src/quality/scanner/analysis.rs +175 -0
  40. package/crates/naome-core/src/quality/scanner/repo_paths.rs +39 -3
  41. package/crates/naome-core/src/quality/scanner.rs +235 -217
  42. package/crates/naome-core/src/quality/semantic/checks.rs +151 -0
  43. package/crates/naome-core/src/quality/semantic/extract.rs +158 -0
  44. package/crates/naome-core/src/quality/semantic/model.rs +85 -0
  45. package/crates/naome-core/src/quality/semantic/route.rs +52 -0
  46. package/crates/naome-core/src/quality/semantic.rs +68 -0
  47. package/crates/naome-core/src/quality/structure/adapter_ios.rs +149 -0
  48. package/crates/naome-core/src/quality/structure/adapters.rs +60 -42
  49. package/crates/naome-core/src/quality/structure/checks/directory.rs +13 -21
  50. package/crates/naome-core/src/quality/structure/checks.rs +1 -1
  51. package/crates/naome-core/src/quality/structure/classify/roles.rs +51 -5
  52. package/crates/naome-core/src/quality/structure/classify.rs +52 -0
  53. package/crates/naome-core/src/quality/structure/config.rs +24 -3
  54. package/crates/naome-core/src/quality/structure/mod.rs +5 -2
  55. package/crates/naome-core/src/quality/structure/model.rs +8 -1
  56. package/crates/naome-core/src/quality/types.rs +59 -2
  57. package/crates/naome-core/src/repository_model/detect.rs +188 -0
  58. package/crates/naome-core/src/repository_model/explain.rs +121 -0
  59. package/crates/naome-core/src/repository_model/path_scan.rs +67 -0
  60. package/crates/naome-core/src/repository_model/path_support.rs +59 -0
  61. package/crates/naome-core/src/repository_model/types.rs +152 -0
  62. package/crates/naome-core/src/repository_model/world.rs +48 -0
  63. package/crates/naome-core/src/repository_model/world_adapters.rs +145 -0
  64. package/crates/naome-core/src/repository_model/world_path_facts.rs +55 -0
  65. package/crates/naome-core/src/repository_model/world_paths.rs +168 -0
  66. package/crates/naome-core/src/repository_model.rs +164 -0
  67. package/crates/naome-core/src/route/builtin_checks.rs +41 -16
  68. package/crates/naome-core/src/task_ledger/import.rs +142 -0
  69. package/crates/naome-core/src/task_ledger/model.rs +13 -0
  70. package/crates/naome-core/src/task_ledger/proof_record.rs +52 -0
  71. package/crates/naome-core/src/task_ledger/read.rs +118 -0
  72. package/crates/naome-core/src/task_ledger/render.rs +55 -0
  73. package/crates/naome-core/src/task_ledger/write.rs +38 -0
  74. package/crates/naome-core/src/task_ledger.rs +48 -0
  75. package/crates/naome-core/src/task_state/api.rs +4 -2
  76. package/crates/naome-core/src/task_state/completed_refresh.rs +5 -16
  77. package/crates/naome-core/src/task_state/diff.rs +2 -2
  78. package/crates/naome-core/src/task_state/evidence.rs +8 -3
  79. package/crates/naome-core/src/task_state/mod.rs +1 -1
  80. package/crates/naome-core/src/task_state/progress.rs +13 -0
  81. package/crates/naome-core/src/task_state/proof_model.rs +8 -8
  82. package/crates/naome-core/src/task_state/repair.rs +2 -2
  83. package/crates/naome-core/src/task_state/task_diff_api.rs +9 -18
  84. package/crates/naome-core/src/task_state/types.rs +24 -0
  85. package/crates/naome-core/src/verification.rs +29 -18
  86. package/crates/naome-core/src/workflow/agent/capability.rs +194 -0
  87. package/crates/naome-core/src/workflow/agent/context_delta.rs +42 -0
  88. package/crates/naome-core/src/workflow/agent/decision.rs +32 -0
  89. package/crates/naome-core/src/workflow/agent/execution.rs +80 -0
  90. package/crates/naome-core/src/workflow/agent/proof.rs +24 -0
  91. package/crates/naome-core/src/workflow/agent/support.rs +58 -0
  92. package/crates/naome-core/src/workflow/agent/watchdog.rs +47 -0
  93. package/crates/naome-core/src/workflow/agent.rs +34 -0
  94. package/crates/naome-core/src/workflow/agent_types.rs +105 -0
  95. package/crates/naome-core/src/workflow/doctor.rs +183 -0
  96. package/crates/naome-core/src/workflow/mod.rs +13 -0
  97. package/crates/naome-core/src/workflow/mutation.rs +1 -2
  98. package/crates/naome-core/src/workflow/output.rs +8 -2
  99. package/crates/naome-core/src/workflow/phase_inference.rs +1 -1
  100. package/crates/naome-core/tests/context.rs +99 -0
  101. package/crates/naome-core/tests/harness_health.rs +4 -0
  102. package/crates/naome-core/tests/install_plan.rs +14 -0
  103. package/crates/naome-core/tests/quality.rs +190 -5
  104. package/crates/naome-core/tests/quality_performance.rs +268 -0
  105. package/crates/naome-core/tests/quality_structure_adapters.rs +39 -0
  106. package/crates/naome-core/tests/quality_structure_policy.rs +19 -0
  107. package/crates/naome-core/tests/repo_support/mod.rs +5 -1
  108. package/crates/naome-core/tests/repo_support/verification_values.rs +55 -0
  109. package/crates/naome-core/tests/repository_model.rs +281 -0
  110. package/crates/naome-core/tests/route_user_diff.rs +59 -7
  111. package/crates/naome-core/tests/semantic_legacy.rs +174 -0
  112. package/crates/naome-core/tests/task_ledger.rs +328 -0
  113. package/crates/naome-core/tests/task_state.rs +28 -0
  114. package/crates/naome-core/tests/verification.rs +29 -36
  115. package/crates/naome-core/tests/workflow_agent.rs +233 -0
  116. package/crates/naome-core/tests/workflow_agent_support/mod.rs +159 -0
  117. package/crates/naome-core/tests/workflow_doctor.rs +45 -0
  118. package/crates/naome-core/tests/workflow_policy.rs +6 -1
  119. package/installer/codex-hooks.js +121 -0
  120. package/installer/context.js +10 -0
  121. package/installer/filesystem.js +4 -0
  122. package/installer/flows.js +8 -4
  123. package/installer/git-boundary.js +1 -0
  124. package/installer/harness-files.js +6 -0
  125. package/installer/install-plan.js +4 -0
  126. package/installer/main.js +1 -1
  127. package/installer/native.js +1 -1
  128. package/native/darwin-arm64/naome +0 -0
  129. package/native/linux-x64/naome +0 -0
  130. package/package.json +1 -1
  131. package/templates/naome-root/.codex/config.toml +2 -0
  132. package/templates/naome-root/.codex/hooks.json +70 -0
  133. package/templates/naome-root/.naome/bin/check-harness-health.js +8 -6
  134. package/templates/naome-root/.naome/bin/check-task-state.js +12 -7
  135. package/templates/naome-root/.naome/bin/codex-hook-io.js +122 -0
  136. package/templates/naome-root/.naome/bin/codex-hook-policy.js +180 -0
  137. package/templates/naome-root/.naome/bin/codex-hook-runtime.js +174 -0
  138. package/templates/naome-root/.naome/bin/codex-hook.js +6 -0
  139. package/templates/naome-root/.naome/bin/naome.js +45 -7
  140. package/templates/naome-root/.naome/manifest.json +12 -6
  141. package/templates/naome-root/.naome/repository-model.json +6 -0
  142. package/templates/naome-root/.naome/repository-quality.json +3 -1
  143. package/templates/naome-root/.naome/verification.json +15 -1
  144. package/templates/naome-root/.naomeignore +1 -0
  145. package/templates/naome-root/AGENTS.md +38 -83
  146. package/templates/naome-root/docs/naome/agent-workflow.md +66 -28
  147. package/templates/naome-root/docs/naome/codex-hooks.md +82 -0
  148. package/templates/naome-root/docs/naome/context-economy.md +73 -0
  149. package/templates/naome-root/docs/naome/first-run.md +25 -14
  150. package/templates/naome-root/docs/naome/index.md +18 -10
  151. package/templates/naome-root/docs/naome/repository-model.md +92 -0
  152. package/templates/naome-root/docs/naome/repository-quality.md +104 -5
  153. package/templates/naome-root/docs/naome/repository-structure.md +10 -3
  154. package/templates/naome-root/docs/naome/task-ledger.md +71 -0
  155. package/templates/naome-root/docs/naome/testing.md +16 -3
@@ -1,3 +1,4 @@
1
+ mod analysis;
1
2
  mod repo_paths;
2
3
 
3
4
  use std::collections::{HashMap, HashSet};
@@ -7,13 +8,15 @@ use std::path::Path;
7
8
  use sha2::{Digest, Sha256};
8
9
 
9
10
  use crate::{git, models::NaomeError, paths};
10
- use repo_paths::added_lines_by_path;
11
11
  pub(crate) use repo_paths::collect_repo_paths;
12
+ use repo_paths::{added_lines_by_path, tracked_blob_hashes};
12
13
 
14
+ use super::cache::QualityCache;
13
15
  use super::types::{
14
16
  default_generated_paths, default_ignored_paths, QualityLimits, QualityMode,
15
17
  RepositoryQualityConfig,
16
18
  };
19
+ use analysis::analyze_repo_file;
17
20
 
18
21
  #[derive(Debug, Clone)]
19
22
  pub struct QualityContext {
@@ -23,6 +26,11 @@ pub struct QualityContext {
23
26
  pub repo_paths: Vec<String>,
24
27
  pub target_paths: HashSet<String>,
25
28
  pub files: Vec<FileAnalysis>,
29
+ pub comparison_files: Vec<FileAnalysis>,
30
+ pub truncated: bool,
31
+ pub reason_codes: Vec<String>,
32
+ pub cache_hits: usize,
33
+ pub cache_misses: usize,
26
34
  }
27
35
 
28
36
  impl QualityContext {
@@ -30,8 +38,12 @@ impl QualityContext {
30
38
  self.files.iter().map(|file| file.path.clone()).collect()
31
39
  }
32
40
 
41
+ pub fn comparison_candidate_files(&self) -> impl Iterator<Item = &FileAnalysis> {
42
+ self.files.iter().chain(self.comparison_files.iter())
43
+ }
44
+
33
45
  pub fn applies_to(&self, path: &str) -> bool {
34
- self.mode == QualityMode::Report || self.target_paths.contains(path)
46
+ !self.mode.is_changed() || self.target_paths.contains(path)
35
47
  }
36
48
 
37
49
  pub fn check_applies_to(&self, check_id: &str, path: &str) -> bool {
@@ -43,22 +55,23 @@ impl QualityContext {
43
55
  }
44
56
  }
45
57
 
46
- #[derive(Debug, Clone)]
58
+ #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
47
59
  pub struct FileAnalysis {
48
60
  pub path: String,
49
61
  pub line_count: usize,
50
62
  pub added_lines: usize,
63
+ pub raw_lines: Vec<String>,
51
64
  pub normalized_lines: Vec<NormalizedLine>,
52
65
  pub symbols: Vec<SymbolAnalysis>,
53
66
  }
54
67
 
55
- #[derive(Debug, Clone)]
68
+ #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
56
69
  pub struct NormalizedLine {
57
70
  pub line_number: usize,
58
71
  pub value: String,
59
72
  }
60
73
 
61
- #[derive(Debug, Clone)]
74
+ #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
62
75
  pub struct SymbolAnalysis {
63
76
  pub kind: String,
64
77
  pub name: String,
@@ -80,41 +93,74 @@ pub fn scan_repository(
80
93
  config: RepositoryQualityConfig,
81
94
  ) -> Result<QualityContext, NaomeError> {
82
95
  let changed_paths = git::changed_paths(root)?;
96
+ scan_repository_with_targets(root, mode, config, changed_paths, true)
97
+ }
98
+
99
+ pub fn scan_repository_paths(
100
+ root: &Path,
101
+ config: RepositoryQualityConfig,
102
+ paths: &[String],
103
+ ) -> Result<QualityContext, NaomeError> {
104
+ let mut target_paths = paths
105
+ .iter()
106
+ .map(|path| normalize_target_path(path))
107
+ .collect::<Result<Vec<_>, _>>()?;
108
+ target_paths.sort();
109
+ target_paths.dedup();
110
+ scan_repository_with_targets(root, QualityMode::PathScoped, config, target_paths, false)
111
+ }
112
+
113
+ fn scan_repository_with_targets(
114
+ root: &Path,
115
+ mode: QualityMode,
116
+ config: RepositoryQualityConfig,
117
+ changed_paths: Vec<String>,
118
+ include_comparison_files: bool,
119
+ ) -> Result<QualityContext, NaomeError> {
83
120
  let target_paths = changed_paths.iter().cloned().collect::<HashSet<_>>();
84
121
  let mut whole_repo_paths = collect_repo_paths(root)?;
85
122
  whole_repo_paths.sort();
86
123
  whole_repo_paths.dedup();
87
124
  let scan_paths = match mode {
88
- QualityMode::Changed => changed_paths.clone(),
89
- QualityMode::Report => whole_repo_paths.clone(),
125
+ QualityMode::ChangedFast | QualityMode::PathScoped => changed_paths.clone(),
126
+ QualityMode::Report | QualityMode::DeepReport => whole_repo_paths.clone(),
127
+ };
128
+ let added_lines = if mode.is_changed() {
129
+ added_lines_by_path(root, &target_paths)?
130
+ } else {
131
+ HashMap::new()
132
+ };
133
+ let comparison_added_lines = HashMap::new();
134
+ let blob_hashes = tracked_blob_hashes(root).unwrap_or_default();
135
+ let cache = QualityCache::new(root, &config);
136
+ let ignore_patterns = CompiledPatternSet::new(ignore_patterns(root, &config));
137
+ let mut state = ScanState::default();
138
+ let mut files = analyze_primary_paths(
139
+ root,
140
+ mode,
141
+ &scan_paths,
142
+ &added_lines,
143
+ &blob_hashes,
144
+ &cache,
145
+ &ignore_patterns,
146
+ &mut state,
147
+ );
148
+ let comparison_files = if include_comparison_files {
149
+ analyze_comparison_paths(
150
+ root,
151
+ mode,
152
+ &whole_repo_paths,
153
+ &target_paths,
154
+ &comparison_added_lines,
155
+ &blob_hashes,
156
+ &cache,
157
+ &ignore_patterns,
158
+ files.len(),
159
+ &mut state,
160
+ )
161
+ } else {
162
+ Vec::new()
90
163
  };
91
- let added_lines = added_lines_by_path(root)?;
92
- let ignore_patterns = ignore_patterns(root, &config);
93
- let mut files = Vec::new();
94
-
95
- for path in scan_paths {
96
- if should_skip_path(&path, &ignore_patterns) {
97
- continue;
98
- }
99
- if let Some(file) = analyze_repo_file(root, &path, &added_lines) {
100
- files.push(file);
101
- }
102
- }
103
-
104
- if mode == QualityMode::Changed {
105
- let scanned = files
106
- .iter()
107
- .map(|file| file.path.clone())
108
- .collect::<HashSet<_>>();
109
- for path in &whole_repo_paths {
110
- if scanned.contains(path) || should_skip_path(path, &ignore_patterns) {
111
- continue;
112
- }
113
- if let Some(file) = analyze_repo_file(root, path, &added_lines) {
114
- files.push(file);
115
- }
116
- }
117
- }
118
164
 
119
165
  files.sort_by(|left, right| left.path.cmp(&right.path));
120
166
  Ok(QualityContext {
@@ -124,215 +170,162 @@ pub fn scan_repository(
124
170
  repo_paths: whole_repo_paths,
125
171
  target_paths,
126
172
  files,
173
+ comparison_files,
174
+ truncated: state.truncated,
175
+ reason_codes: state.reason_codes,
176
+ cache_hits: state.cache_hits,
177
+ cache_misses: state.cache_misses,
127
178
  })
128
179
  }
129
180
 
130
- pub fn stable_fingerprint(parts: &[&str]) -> String {
131
- let mut hasher = Sha256::new();
132
- for part in parts {
133
- hasher.update(part.as_bytes());
134
- hasher.update(b"\0");
181
+ fn normalize_target_path(path: &str) -> Result<String, NaomeError> {
182
+ let normalized = path.trim_start_matches("./").replace('\\', "/");
183
+ let has_parent_segment = normalized.split('/').any(|segment| segment == "..");
184
+ if normalized.is_empty() || normalized.starts_with('/') || has_parent_segment {
185
+ return Err(NaomeError::new(format!(
186
+ "quality path must be a repository-relative path: {path}"
187
+ )));
135
188
  }
136
- format!("sha256:{:x}", hasher.finalize())
189
+ Ok(normalized)
137
190
  }
138
191
 
139
- fn analyze_repo_file(
140
- root: &Path,
141
- path: &str,
142
- added_lines: &HashMap<String, usize>,
143
- ) -> Option<FileAnalysis> {
144
- let full_path = root.join(path);
145
- if !full_path.is_file() || is_binary_extension(path) {
146
- return None;
147
- }
148
- let content = fs::read_to_string(&full_path).ok()?;
149
- Some(analyze_file(
150
- path,
151
- &content,
152
- added_lines.get(path).copied().unwrap_or(0),
153
- ))
192
+ #[derive(Default)]
193
+ struct ScanState {
194
+ cache_hits: usize,
195
+ cache_misses: usize,
196
+ truncated: bool,
197
+ reason_codes: Vec<String>,
154
198
  }
155
199
 
156
- fn analyze_file(path: &str, content: &str, added_lines: usize) -> FileAnalysis {
157
- let lines = content.lines().collect::<Vec<_>>();
158
- let normalized_lines = lines
159
- .iter()
160
- .enumerate()
161
- .filter_map(|(index, line)| normalize_line(line).map(|value| (index + 1, value)))
162
- .map(|(line_number, value)| NormalizedLine { line_number, value })
163
- .collect::<Vec<_>>();
164
- let symbols = detect_symbols(&lines);
165
- FileAnalysis {
166
- path: path.to_string(),
167
- line_count: lines.len(),
168
- added_lines,
169
- normalized_lines,
170
- symbols,
171
- }
172
- }
173
-
174
- fn detect_symbols(lines: &[&str]) -> Vec<SymbolAnalysis> {
175
- let mut starts = Vec::new();
176
- for (index, line) in lines.iter().enumerate() {
177
- let indent = indentation(line);
178
- if let Some((kind, name)) = symbol_start(line.trim()) {
179
- starts.push((index, indent, kind, name));
200
+ impl ScanState {
201
+ fn record_cache_result(&mut self, cache_hit: bool) {
202
+ if cache_hit {
203
+ self.cache_hits += 1;
204
+ } else {
205
+ self.cache_misses += 1;
180
206
  }
181
207
  }
182
208
 
183
- let mut symbols = Vec::new();
184
- for (position, (start_index, indent, kind, name)) in starts.iter().enumerate() {
185
- let end_index = starts
186
- .iter()
187
- .skip(position + 1)
188
- .find(|(_, next_indent, _, _)| next_indent <= indent)
189
- .map(|(next_index, _, _, _)| next_index.saturating_sub(1))
190
- .unwrap_or_else(|| lines.len().saturating_sub(1));
191
- let normalized_body = lines[*start_index..=end_index]
192
- .iter()
193
- .filter_map(|line| normalize_line(line))
194
- .collect::<Vec<_>>();
195
- let tokens = normalized_body
196
- .iter()
197
- .flat_map(|line| token_set(line))
198
- .collect::<HashSet<_>>();
199
- symbols.push(SymbolAnalysis {
200
- kind: kind.clone(),
201
- name: name.clone(),
202
- start_line: start_index + 1,
203
- end_line: end_index + 1,
204
- indent: *indent,
205
- tokens,
206
- });
209
+ fn truncate(&mut self, reason: &str) {
210
+ self.truncated = true;
211
+ add_reason(&mut self.reason_codes, reason);
207
212
  }
208
- symbols
209
213
  }
210
214
 
211
- fn symbol_start(trimmed: &str) -> Option<(String, String)> {
212
- let candidates = [
213
- ("function", "function "),
214
- ("function", "export function "),
215
- ("function", "async function "),
216
- ("function", "export async function "),
217
- ("function", "def "),
218
- ("function", "fn "),
219
- ("function", "pub fn "),
220
- ("function", "func "),
221
- ("class", "class "),
222
- ("struct", "struct "),
223
- ("struct", "pub struct "),
224
- ("enum", "enum "),
225
- ("enum", "pub enum "),
226
- ("impl", "impl "),
227
- ];
228
- for (kind, prefix) in candidates {
229
- if let Some(rest) = trimmed.strip_prefix(prefix) {
230
- return Some((kind.to_string(), symbol_name(rest)));
215
+ fn analyze_primary_paths(
216
+ root: &Path,
217
+ mode: QualityMode,
218
+ paths: &[String],
219
+ added_lines: &HashMap<String, usize>,
220
+ blob_hashes: &HashMap<String, String>,
221
+ cache: &QualityCache,
222
+ ignore_patterns: &CompiledPatternSet,
223
+ state: &mut ScanState,
224
+ ) -> Vec<FileAnalysis> {
225
+ let mut files = Vec::new();
226
+ for path in paths {
227
+ if ignore_patterns.matches(path) {
228
+ continue;
231
229
  }
232
- }
233
- for prefix in ["const ", "let ", "export const ", "export let "] {
234
- if let Some(rest) = trimmed.strip_prefix(prefix) {
235
- if trimmed.contains("=>") || trimmed.contains("function") || trimmed.contains("React.")
236
- {
237
- return Some(("function".to_string(), symbol_name(rest)));
238
- }
230
+ if files.len() >= max_scanned_files(mode) {
231
+ state.truncate("max_scanned_files");
232
+ break;
233
+ }
234
+ if file_exceeds_budget(root, path, mode) {
235
+ state.truncate("max_file_bytes");
236
+ continue;
237
+ }
238
+ if let Some((file, cache_hit)) = analyze_repo_file(
239
+ root,
240
+ path,
241
+ added_lines,
242
+ blob_hashes,
243
+ cache,
244
+ !mode.is_changed(),
245
+ ) {
246
+ state.record_cache_result(cache_hit);
247
+ files.push(file);
239
248
  }
240
249
  }
241
- None
242
- }
243
-
244
- fn symbol_name(rest: &str) -> String {
245
- rest.chars()
246
- .take_while(|character| character.is_ascii_alphanumeric() || *character == '_')
247
- .collect::<String>()
248
- .trim_matches('_')
249
- .to_string()
250
+ files
250
251
  }
251
252
 
252
- fn normalize_line(line: &str) -> Option<String> {
253
- let trimmed = line.trim();
254
- if trimmed.is_empty()
255
- || is_comment_only(trimmed)
256
- || is_string_list_item(trimmed)
257
- || is_generated_hash_mapping(trimmed)
258
- {
259
- return None;
253
+ fn analyze_comparison_paths(
254
+ root: &Path,
255
+ mode: QualityMode,
256
+ paths: &[String],
257
+ target_paths: &HashSet<String>,
258
+ added_lines: &HashMap<String, usize>,
259
+ blob_hashes: &HashMap<String, String>,
260
+ cache: &QualityCache,
261
+ ignore_patterns: &CompiledPatternSet,
262
+ primary_count: usize,
263
+ state: &mut ScanState,
264
+ ) -> Vec<FileAnalysis> {
265
+ if !mode.is_changed() {
266
+ return Vec::new();
260
267
  }
261
-
262
- let mut normalized = String::new();
263
- let mut in_string = false;
264
- let mut quote = '\0';
265
- let mut previous_space = false;
266
- for character in trimmed.chars() {
267
- if in_string {
268
- if character == quote {
269
- in_string = false;
270
- normalized.push('S');
271
- previous_space = false;
272
- }
268
+ let mut files = Vec::new();
269
+ for path in paths {
270
+ if target_paths.contains(path) || ignore_patterns.matches(path) {
273
271
  continue;
274
272
  }
275
- if character == '"' || character == '\'' || character == '`' {
276
- in_string = true;
277
- quote = character;
273
+ if file_exceeds_budget(root, path, mode) {
274
+ state.truncate("max_file_bytes");
278
275
  continue;
279
276
  }
280
- let next = if character.is_ascii_digit() {
281
- '0'
282
- } else if character.is_whitespace() {
283
- ' '
284
- } else {
285
- character.to_ascii_lowercase()
286
- };
287
- if next == ' ' {
288
- if !previous_space {
289
- normalized.push(next);
290
- }
291
- previous_space = true;
292
- } else {
293
- normalized.push(next);
294
- previous_space = false;
277
+ if let Some((file, cache_hit)) =
278
+ analyze_repo_file(root, path, added_lines, blob_hashes, cache, true)
279
+ {
280
+ state.record_cache_result(cache_hit);
281
+ files.push(file);
282
+ }
283
+ if primary_count.saturating_add(files.len()) >= max_scanned_files(mode) {
284
+ state.truncate("max_scanned_files");
285
+ break;
295
286
  }
296
287
  }
297
- let value = normalized.trim().to_string();
298
- (!value.is_empty()).then_some(value)
288
+ files.sort_by(|left, right| left.path.cmp(&right.path));
289
+ files.dedup_by(|left, right| left.path == right.path);
290
+ files
299
291
  }
300
292
 
301
- fn is_comment_only(trimmed: &str) -> bool {
302
- trimmed.starts_with("//")
303
- || trimmed.starts_with('#')
304
- || trimmed.starts_with("/*")
305
- || trimmed.starts_with('*')
306
- || trimmed.starts_with("--")
293
+ pub fn stable_fingerprint(parts: &[&str]) -> String {
294
+ let mut hasher = Sha256::new();
295
+ for part in parts {
296
+ hasher.update(part.as_bytes());
297
+ hasher.update(b"\0");
298
+ }
299
+ format!("sha256:{:x}", hasher.finalize())
307
300
  }
308
301
 
309
- fn is_generated_hash_mapping(trimmed: &str) -> bool {
310
- let Some((key, value)) = trimmed.split_once(':') else {
311
- return false;
312
- };
313
- key.trim_start().starts_with('"')
314
- && value.trim_start().starts_with("\"sha256:")
315
- && value.chars().filter(|character| *character == '"').count() >= 2
302
+ fn max_scanned_files(mode: QualityMode) -> usize {
303
+ match mode {
304
+ QualityMode::ChangedFast => 2_000,
305
+ QualityMode::PathScoped => 128,
306
+ QualityMode::Report => 5_000,
307
+ QualityMode::DeepReport => usize::MAX,
308
+ }
316
309
  }
317
310
 
318
- fn is_string_list_item(trimmed: &str) -> bool {
319
- let value = trimmed.trim_end_matches(',');
320
- (value.starts_with('"') && value.ends_with('"'))
321
- || (value.starts_with('\'') && value.ends_with('\''))
311
+ fn max_file_bytes(mode: QualityMode) -> u64 {
312
+ match mode {
313
+ QualityMode::ChangedFast | QualityMode::PathScoped => 512 * 1024,
314
+ QualityMode::Report => 1024 * 1024,
315
+ QualityMode::DeepReport => 8 * 1024 * 1024,
316
+ }
322
317
  }
323
318
 
324
- fn token_set(line: &str) -> Vec<String> {
325
- line.split(|character: char| !character.is_ascii_alphanumeric() && character != '_')
326
- .filter(|token| token.len() > 1)
327
- .map(ToString::to_string)
328
- .collect()
319
+ fn file_exceeds_budget(root: &Path, path: &str, mode: QualityMode) -> bool {
320
+ fs::metadata(root.join(path)).map_or(false, |metadata| {
321
+ metadata.is_file() && metadata.len() > max_file_bytes(mode)
322
+ })
329
323
  }
330
324
 
331
- fn indentation(line: &str) -> usize {
332
- line.chars()
333
- .take_while(|character| character.is_whitespace())
334
- .map(|character| if character == '\t' { 2 } else { 1 })
335
- .sum()
325
+ fn add_reason(reason_codes: &mut Vec<String>, reason: &str) {
326
+ if !reason_codes.iter().any(|existing| existing == reason) {
327
+ reason_codes.push(reason.to_string());
328
+ }
336
329
  }
337
330
 
338
331
  fn ignore_patterns(root: &Path, config: &RepositoryQualityConfig) -> Vec<String> {
@@ -364,16 +357,41 @@ fn read_naomeignore_patterns(root: &Path) -> Vec<String> {
364
357
  .collect()
365
358
  }
366
359
 
367
- fn should_skip_path(path: &str, patterns: &[String]) -> bool {
368
- paths::matches_any(path, patterns)
360
+ struct CompiledPatternSet {
361
+ exact: HashSet<String>,
362
+ prefixes: Vec<String>,
363
+ wildcards: Vec<String>,
369
364
  }
370
365
 
371
- fn is_binary_extension(path: &str) -> bool {
372
- let lower = path.to_ascii_lowercase();
373
- [
374
- ".png", ".jpg", ".jpeg", ".gif", ".webp", ".ico", ".pdf", ".zip", ".gz", ".tgz", ".wasm",
375
- ".dylib", ".so", ".dll", ".exe", ".bin",
376
- ]
377
- .iter()
378
- .any(|extension| lower.ends_with(extension))
366
+ impl CompiledPatternSet {
367
+ fn new(patterns: Vec<String>) -> Self {
368
+ let mut exact = HashSet::new();
369
+ let mut prefixes = Vec::new();
370
+ let mut wildcards = Vec::new();
371
+ for pattern in patterns {
372
+ let normalized = pattern.trim_start_matches("./").replace('\\', "/");
373
+ if normalized.ends_with("/**") && !normalized[..normalized.len() - 3].contains('*') {
374
+ prefixes.push(normalized.trim_end_matches("**").to_string());
375
+ } else if normalized.contains('*') || normalized.contains('?') {
376
+ wildcards.push(normalized);
377
+ } else {
378
+ exact.insert(normalized);
379
+ }
380
+ }
381
+ prefixes.sort();
382
+ prefixes.dedup();
383
+ wildcards.sort();
384
+ wildcards.dedup();
385
+ Self {
386
+ exact,
387
+ prefixes,
388
+ wildcards,
389
+ }
390
+ }
391
+
392
+ fn matches(&self, path: &str) -> bool {
393
+ self.exact.contains(path)
394
+ || self.prefixes.iter().any(|prefix| path.starts_with(prefix))
395
+ || paths::matches_any(path, &self.wildcards)
396
+ }
379
397
  }