@lamentis/naome 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/Cargo.lock +2 -2
  2. package/Cargo.toml +1 -1
  3. package/LICENSE +180 -21
  4. package/README.md +49 -6
  5. package/bin/naome-node.js +44 -4
  6. package/bin/naome.js +54 -16
  7. package/crates/naome-cli/Cargo.toml +1 -1
  8. package/crates/naome-cli/src/check_commands.rs +135 -0
  9. package/crates/naome-cli/src/cli_args.rs +5 -0
  10. package/crates/naome-cli/src/dispatcher.rs +36 -0
  11. package/crates/naome-cli/src/install_bridge.rs +83 -0
  12. package/crates/naome-cli/src/main.rs +57 -341
  13. package/crates/naome-cli/src/prompt_commands.rs +68 -0
  14. package/crates/naome-cli/src/quality_commands.rs +141 -0
  15. package/crates/naome-cli/src/simple_commands.rs +53 -0
  16. package/crates/naome-cli/src/workflow_commands.rs +153 -0
  17. package/crates/naome-core/Cargo.toml +1 -1
  18. package/crates/naome-core/src/harness_health/integrity.rs +96 -0
  19. package/crates/naome-core/src/harness_health.rs +14 -126
  20. package/crates/naome-core/src/install_plan.rs +3 -0
  21. package/crates/naome-core/src/intent/classifier.rs +171 -0
  22. package/crates/naome-core/src/intent/envelope.rs +108 -0
  23. package/crates/naome-core/src/intent/legacy.rs +138 -0
  24. package/crates/naome-core/src/intent/legacy_response.rs +76 -0
  25. package/crates/naome-core/src/intent/model.rs +71 -0
  26. package/crates/naome-core/src/intent/patterns.rs +170 -0
  27. package/crates/naome-core/src/intent/resolver.rs +162 -0
  28. package/crates/naome-core/src/intent/resolver_active.rs +17 -0
  29. package/crates/naome-core/src/intent/resolver_baseline.rs +55 -0
  30. package/crates/naome-core/src/intent/resolver_catalog.rs +167 -0
  31. package/crates/naome-core/src/intent/resolver_policy.rs +72 -0
  32. package/crates/naome-core/src/intent/resolver_shared.rs +55 -0
  33. package/crates/naome-core/src/intent/risk.rs +40 -0
  34. package/crates/naome-core/src/intent/segment.rs +170 -0
  35. package/crates/naome-core/src/intent.rs +64 -879
  36. package/crates/naome-core/src/journal.rs +9 -20
  37. package/crates/naome-core/src/lib.rs +13 -0
  38. package/crates/naome-core/src/quality/adapters.rs +178 -0
  39. package/crates/naome-core/src/quality/baseline.rs +75 -0
  40. package/crates/naome-core/src/quality/checks/duplicate_blocks.rs +175 -0
  41. package/crates/naome-core/src/quality/checks/near_duplicates.rs +130 -0
  42. package/crates/naome-core/src/quality/checks.rs +228 -0
  43. package/crates/naome-core/src/quality/cleanup.rs +72 -0
  44. package/crates/naome-core/src/quality/config.rs +109 -0
  45. package/crates/naome-core/src/quality/mod.rs +90 -0
  46. package/crates/naome-core/src/quality/scanner/repo_paths.rs +103 -0
  47. package/crates/naome-core/src/quality/scanner.rs +367 -0
  48. package/crates/naome-core/src/quality/types.rs +289 -0
  49. package/crates/naome-core/src/route.rs +292 -17
  50. package/crates/naome-core/src/task_state/admission.rs +63 -0
  51. package/crates/naome-core/src/task_state/admission_proof.rs +72 -0
  52. package/crates/naome-core/src/task_state/api.rs +130 -0
  53. package/crates/naome-core/src/task_state/commit_gate.rs +138 -0
  54. package/crates/naome-core/src/task_state/compact_proof.rs +160 -0
  55. package/crates/naome-core/src/task_state/completed_refresh.rs +89 -0
  56. package/crates/naome-core/src/task_state/completion.rs +72 -0
  57. package/crates/naome-core/src/task_state/deleted_paths.rs +47 -0
  58. package/crates/naome-core/src/task_state/diff.rs +95 -0
  59. package/crates/naome-core/src/task_state/evidence.rs +154 -0
  60. package/crates/naome-core/src/task_state/git_io.rs +86 -0
  61. package/crates/naome-core/src/task_state/git_parse.rs +86 -0
  62. package/crates/naome-core/src/task_state/git_refs.rs +37 -0
  63. package/crates/naome-core/src/task_state/human_review_state.rs +31 -0
  64. package/crates/naome-core/src/task_state/mod.rs +38 -0
  65. package/crates/naome-core/src/task_state/process_guard.rs +40 -0
  66. package/crates/naome-core/src/task_state/progress.rs +123 -0
  67. package/crates/naome-core/src/task_state/proof.rs +139 -0
  68. package/crates/naome-core/src/task_state/proof_entry.rs +66 -0
  69. package/crates/naome-core/src/task_state/proof_model.rs +70 -0
  70. package/crates/naome-core/src/task_state/proof_sources.rs +76 -0
  71. package/crates/naome-core/src/task_state/push_gate.rs +49 -0
  72. package/crates/naome-core/src/task_state/reconcile.rs +7 -0
  73. package/crates/naome-core/src/task_state/repair.rs +168 -0
  74. package/crates/naome-core/src/task_state/shape.rs +117 -0
  75. package/crates/naome-core/src/task_state/task_diff_api.rs +170 -0
  76. package/crates/naome-core/src/task_state/task_records.rs +131 -0
  77. package/crates/naome-core/src/task_state/task_references.rs +126 -0
  78. package/crates/naome-core/src/task_state/types.rs +87 -0
  79. package/crates/naome-core/src/task_state/util.rs +137 -0
  80. package/crates/naome-core/src/verification/render.rs +122 -0
  81. package/crates/naome-core/src/verification.rs +176 -58
  82. package/crates/naome-core/src/verification_contract.rs +49 -21
  83. package/crates/naome-core/src/workflow/integrity.rs +123 -0
  84. package/crates/naome-core/src/workflow/integrity_normalize.rs +7 -0
  85. package/crates/naome-core/src/workflow/integrity_support.rs +110 -0
  86. package/crates/naome-core/src/workflow/mod.rs +18 -0
  87. package/crates/naome-core/src/workflow/mutation.rs +68 -0
  88. package/crates/naome-core/src/workflow/output.rs +111 -0
  89. package/crates/naome-core/src/workflow/phase_inference.rs +73 -0
  90. package/crates/naome-core/src/workflow/phases.rs +169 -0
  91. package/crates/naome-core/src/workflow/policy.rs +156 -0
  92. package/crates/naome-core/src/workflow/processes.rs +91 -0
  93. package/crates/naome-core/src/workflow/types.rs +42 -0
  94. package/crates/naome-core/tests/harness_health.rs +3 -0
  95. package/crates/naome-core/tests/intent.rs +97 -792
  96. package/crates/naome-core/tests/intent_support/mod.rs +133 -0
  97. package/crates/naome-core/tests/intent_v2.rs +90 -0
  98. package/crates/naome-core/tests/quality.rs +425 -0
  99. package/crates/naome-core/tests/route.rs +221 -4
  100. package/crates/naome-core/tests/task_state.rs +3 -0
  101. package/crates/naome-core/tests/task_state_compact.rs +110 -0
  102. package/crates/naome-core/tests/task_state_compact_support/mod.rs +5 -0
  103. package/crates/naome-core/tests/task_state_compact_support/repo.rs +130 -0
  104. package/crates/naome-core/tests/task_state_compact_support/states.rs +151 -0
  105. package/crates/naome-core/tests/workflow_integrity.rs +85 -0
  106. package/crates/naome-core/tests/workflow_policy.rs +139 -0
  107. package/crates/naome-core/tests/workflow_support/mod.rs +194 -0
  108. package/native/darwin-arm64/naome +0 -0
  109. package/native/linux-x64/naome +0 -0
  110. package/package.json +2 -2
  111. package/templates/naome-root/.naome/bin/check-harness-health.js +66 -85
  112. package/templates/naome-root/.naome/bin/check-task-state.js +9 -10
  113. package/templates/naome-root/.naome/bin/naome.js +34 -63
  114. package/templates/naome-root/.naome/manifest.json +20 -18
  115. package/templates/naome-root/.naome/repository-quality-baseline.json +5 -0
  116. package/templates/naome-root/.naome/repository-quality.json +24 -0
  117. package/templates/naome-root/.naome/task-contract.schema.json +93 -11
  118. package/templates/naome-root/.naome/upgrade-state.json +1 -1
  119. package/templates/naome-root/.naome/verification.json +37 -0
  120. package/templates/naome-root/AGENTS.md +3 -0
  121. package/templates/naome-root/docs/naome/agent-workflow.md +25 -12
  122. package/templates/naome-root/docs/naome/execution.md +25 -21
  123. package/templates/naome-root/docs/naome/index.md +4 -3
  124. package/templates/naome-root/docs/naome/repository-quality.md +43 -0
  125. package/templates/naome-root/docs/naome/testing.md +12 -0
  126. package/crates/naome-core/src/task_state.rs +0 -2210
@@ -8,6 +8,7 @@ use serde::Serialize;
8
8
  use serde_json::Value;
9
9
 
10
10
  use crate::models::NaomeError;
11
+ use crate::task_state::canonical_proof_check_ids;
11
12
 
12
13
  const JOURNAL_PATH: &str = ".naome/task-journal.jsonl";
13
14
 
@@ -121,17 +122,7 @@ fn journal_contains(root: &Path, entry: &TaskJournalEntry) -> Result<bool, Naome
121
122
  }
122
123
 
123
124
  fn proof_summary(active_task: &Value) -> Vec<String> {
124
- active_task
125
- .get("proofResults")
126
- .and_then(Value::as_array)
127
- .map(|proofs| {
128
- proofs
129
- .iter()
130
- .filter_map(|proof| proof.get("checkId").and_then(Value::as_str))
131
- .map(ToString::to_string)
132
- .collect()
133
- })
134
- .unwrap_or_default()
125
+ canonical_proof_check_ids(active_task)
135
126
  }
136
127
 
137
128
  fn read_json(root: &Path, relative_path: &str) -> Result<Value, NaomeError> {
@@ -148,16 +139,14 @@ fn string_at(value: &Value, key: &str) -> Option<String> {
148
139
  }
149
140
 
150
141
  fn git_head(root: &Path) -> Result<Option<String>, NaomeError> {
151
- let output = Command::new("git")
152
- .args(["rev-parse", "HEAD"])
153
- .current_dir(root)
154
- .output()?;
155
- if !output.status.success() {
156
- return Ok(None);
142
+ let mut command = Command::new("git");
143
+ command.current_dir(root).arg("rev-parse").arg("HEAD");
144
+ let output = command.output()?;
145
+ if output.status.success() {
146
+ let head = String::from_utf8_lossy(&output.stdout).trim().to_string();
147
+ return Ok(Some(head));
157
148
  }
158
- Ok(Some(
159
- String::from_utf8_lossy(&output.stdout).trim().to_string(),
160
- ))
149
+ Ok(None)
161
150
  }
162
151
 
163
152
  fn timestamp_now() -> String {
@@ -6,10 +6,12 @@ mod intent;
6
6
  mod journal;
7
7
  mod models;
8
8
  mod paths;
9
+ mod quality;
9
10
  mod route;
10
11
  mod task_state;
11
12
  mod verification;
12
13
  mod verification_contract;
14
+ mod workflow;
13
15
 
14
16
  pub use decision::{evaluate_decision, format_decision, EvaluationOptions};
15
17
  pub use harness_health::{validate_harness_health, HarnessHealthOptions};
@@ -17,6 +19,11 @@ pub use install_plan::{install_plan, InstallPlan};
17
19
  pub use intent::{evaluate_intent, format_intent, IntentDecision, PromptEvidence};
18
20
  pub use journal::{append_task_journal, TaskJournalEntry};
19
21
  pub use models::{Decision, NaomeError};
22
+ pub use quality::{
23
+ check_repository_quality, init_repository_quality, plan_quality_cleanup, route_quality_cleanup,
24
+ QualityCleanupPlan, QualityCleanupRoute, QualityCleanupTask, QualityInitResult, QualityMode,
25
+ QualityReport, QualitySummary, QualityViolation, RepositoryQualityConfig,
26
+ };
20
27
  pub use route::{evaluate_route, explain_route, ExplainDecision, RouteDecision, RouteOptions};
21
28
  pub use task_state::{
22
29
  completed_task_commit_paths, validate_task_state, TaskStateMode, TaskStateOptions,
@@ -24,3 +31,9 @@ pub use task_state::{
24
31
  };
25
32
  pub use verification::seed_builtin_verification_checks;
26
33
  pub use verification_contract::validate_verification_contract;
34
+ pub use workflow::{
35
+ classify_mutations, refresh_integrity, safe_rg_args, summarize_command_output,
36
+ tracked_process_report, validate_read_boundaries, validate_search_command,
37
+ verification_phase_plan, CommandCheckResult, CommandOutputSummary, IntegrityRefreshReport,
38
+ MutationClassification, ProcessReport, ReadActivity, VerificationPhasePlan, WorkflowFinding,
39
+ };
@@ -0,0 +1,178 @@
1
+ use std::collections::HashSet;
2
+
3
+ use crate::models::NaomeError;
4
+
5
+ use super::types::{QualityLimitOverrides, QualityPathRule, RepositoryQualityConfig};
6
+
7
+ pub(crate) struct QualityAdapter {
8
+ pub id: &'static str,
9
+ pub generated_paths: &'static [&'static str],
10
+ detect: fn(&RepoSignals<'_>) -> bool,
11
+ path_rules: fn() -> Vec<QualityPathRule>,
12
+ }
13
+
14
+ struct RepoSignals<'a> {
15
+ paths: &'a [String],
16
+ }
17
+ impl RepoSignals<'_> {
18
+ fn has_manifest(&self, expected: &str) -> bool {
19
+ let nested_suffix = format!("/{expected}");
20
+ self.paths
21
+ .iter()
22
+ .any(|path| path == expected || path.ends_with(&nested_suffix))
23
+ }
24
+
25
+ fn has_extension(&self, extensions: &[&str]) -> bool {
26
+ self.paths
27
+ .iter()
28
+ .any(|path| extensions.iter().any(|extension| path.ends_with(extension)))
29
+ }
30
+ }
31
+ pub(crate) fn detected_adapter_ids(paths: &[String]) -> Vec<String> {
32
+ let signals = RepoSignals { paths };
33
+ registry()
34
+ .iter()
35
+ .filter(|adapter| (adapter.detect)(&signals))
36
+ .map(|adapter| adapter.id.to_string())
37
+ .collect()
38
+ }
39
+ pub(crate) fn apply_enabled_adapters(
40
+ mut config: RepositoryQualityConfig,
41
+ ) -> Result<RepositoryQualityConfig, NaomeError> {
42
+ let mut seen = HashSet::new();
43
+ let local_path_rules = std::mem::take(&mut config.path_rules);
44
+
45
+ for adapter_id in config.enabled_adapters.clone() {
46
+ if !seen.insert(adapter_id.clone()) {
47
+ return Err(NaomeError::new(format!(
48
+ ".naome/repository-quality.json enabledAdapters contains duplicate adapter '{adapter_id}'."
49
+ )));
50
+ }
51
+ let adapter = adapter_by_id(&adapter_id)?;
52
+ extend_unique(&mut config.generated_paths, adapter.generated_paths);
53
+ config.path_rules.extend((adapter.path_rules)());
54
+ }
55
+
56
+ config.path_rules.extend(local_path_rules);
57
+ Ok(config)
58
+ }
59
+ pub(crate) fn validate_adapter_ids(ids: &[String]) -> Result<(), NaomeError> {
60
+ let mut seen = HashSet::new();
61
+ for adapter_id in ids {
62
+ if !seen.insert(adapter_id) {
63
+ return Err(NaomeError::new(format!(
64
+ ".naome/repository-quality.json enabledAdapters contains duplicate adapter '{adapter_id}'."
65
+ )));
66
+ }
67
+ adapter_by_id(adapter_id)?;
68
+ }
69
+ Ok(())
70
+ }
71
+ fn adapter_by_id(id: &str) -> Result<&'static QualityAdapter, NaomeError> {
72
+ registry()
73
+ .iter()
74
+ .find(|adapter| adapter.id == id)
75
+ .ok_or_else(|| {
76
+ NaomeError::new(format!(
77
+ ".naome/repository-quality.json enabledAdapters contains unknown adapter '{id}'."
78
+ ))
79
+ })
80
+ }
81
+
82
+ fn registry() -> &'static [QualityAdapter] {
83
+ &[
84
+ QualityAdapter {
85
+ id: "rust",
86
+ generated_paths: &[],
87
+ detect: detects_rust,
88
+ path_rules: rust_path_rules,
89
+ },
90
+ QualityAdapter {
91
+ id: "javascript-typescript",
92
+ generated_paths: &["coverage/**", "**/coverage/**", ".next/**", "**/.next/**"],
93
+ detect: detects_javascript_typescript,
94
+ path_rules: javascript_typescript_path_rules,
95
+ },
96
+ ]
97
+ }
98
+
99
+ fn detects_rust(signals: &RepoSignals<'_>) -> bool {
100
+ signals.has_manifest("Cargo.toml") || signals.has_extension(&[".rs"])
101
+ }
102
+
103
+ fn detects_javascript_typescript(signals: &RepoSignals<'_>) -> bool {
104
+ signals.has_manifest("package.json")
105
+ || signals.has_extension(&[".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"])
106
+ }
107
+
108
+ fn rust_path_rules() -> Vec<QualityPathRule> {
109
+ vec![path_rule(
110
+ "rust-tests",
111
+ &["**/tests/*.rs", "**/tests/**/*.rs"],
112
+ test_file_limits(80),
113
+ &[],
114
+ )]
115
+ }
116
+
117
+ fn javascript_typescript_path_rules() -> Vec<QualityPathRule> {
118
+ vec![QualityPathRule {
119
+ id: "javascript-typescript-tests".to_string(),
120
+ paths: javascript_typescript_test_paths(),
121
+ limits: test_file_limits(80),
122
+ disabled_checks: Vec::new(),
123
+ }]
124
+ }
125
+
126
+ fn javascript_typescript_test_paths() -> Vec<String> {
127
+ let mut paths = Vec::new();
128
+ for marker in ["test", "spec"] {
129
+ for extension in ["js", "jsx", "ts", "tsx"] {
130
+ paths.push(format!("**/*.{marker}.{extension}"));
131
+ }
132
+ }
133
+ for directory in ["test", "tests"] {
134
+ for extension in ["js", "jsx", "ts", "tsx"] {
135
+ paths.push(format!("**/{directory}/**/*.{extension}"));
136
+ }
137
+ }
138
+ paths.push("scripts/*.test.js".to_string());
139
+ paths
140
+ }
141
+
142
+ fn test_file_limits(max_top_level_symbols: usize) -> QualityLimitOverrides {
143
+ QualityLimitOverrides {
144
+ max_file_lines: Some(650),
145
+ max_diff_added_lines: Some(220),
146
+ max_function_lines: Some(140),
147
+ max_top_level_symbols: Some(max_top_level_symbols),
148
+ duplicate_block_lines: Some(14),
149
+ near_duplicate_similarity: Some(0.96),
150
+ ..QualityLimitOverrides::default()
151
+ }
152
+ }
153
+
154
+ fn path_rule(
155
+ id: &str,
156
+ paths: &[&str],
157
+ limits: QualityLimitOverrides,
158
+ disabled_checks: &[&str],
159
+ ) -> QualityPathRule {
160
+ QualityPathRule {
161
+ id: id.to_string(),
162
+ paths: string_list(paths),
163
+ limits,
164
+ disabled_checks: string_list(disabled_checks),
165
+ }
166
+ }
167
+
168
+ fn string_list(values: &[&str]) -> Vec<String> {
169
+ values.iter().map(|value| (*value).to_string()).collect()
170
+ }
171
+
172
+ fn extend_unique(target: &mut Vec<String>, values: &[&str]) {
173
+ for value in values {
174
+ if !target.iter().any(|existing| existing == value) {
175
+ target.push((*value).to_string());
176
+ }
177
+ }
178
+ }
@@ -0,0 +1,75 @@
1
+ use std::collections::HashSet;
2
+ use std::fs;
3
+ use std::path::Path;
4
+
5
+ use serde::{Deserialize, Serialize};
6
+
7
+ use crate::models::NaomeError;
8
+
9
+ use super::types::QualityViolation;
10
+
11
+ const BASELINE_RELATIVE_PATH: &str = ".naome/repository-quality-baseline.json";
12
+
13
+ #[derive(Debug, Clone, Serialize, Deserialize)]
14
+ #[serde(rename_all = "camelCase")]
15
+ struct QualityBaseline {
16
+ schema: String,
17
+ version: u32,
18
+ violations: Vec<QualityBaselineEntry>,
19
+ }
20
+
21
+ #[derive(Debug, Clone, Serialize, Deserialize)]
22
+ #[serde(rename_all = "camelCase")]
23
+ struct QualityBaselineEntry {
24
+ fingerprint: String,
25
+ check_id: String,
26
+ path: String,
27
+ message: String,
28
+ }
29
+
30
+ pub fn baseline_relative_path() -> &'static str {
31
+ BASELINE_RELATIVE_PATH
32
+ }
33
+
34
+ pub fn read_baseline_fingerprints(root: &Path) -> Result<HashSet<String>, NaomeError> {
35
+ let path = root.join(BASELINE_RELATIVE_PATH);
36
+ if !path.is_file() {
37
+ return Ok(HashSet::new());
38
+ }
39
+
40
+ let baseline: QualityBaseline = serde_json::from_str(&fs::read_to_string(path)?)?;
41
+ Ok(baseline
42
+ .violations
43
+ .into_iter()
44
+ .map(|violation| violation.fingerprint)
45
+ .collect())
46
+ }
47
+
48
+ pub fn write_baseline(root: &Path, violations: &[QualityViolation]) -> Result<bool, NaomeError> {
49
+ let path = root.join(BASELINE_RELATIVE_PATH);
50
+ if let Some(parent) = path.parent() {
51
+ fs::create_dir_all(parent)?;
52
+ }
53
+
54
+ let entries = violations
55
+ .iter()
56
+ .map(|violation| QualityBaselineEntry {
57
+ fingerprint: violation.fingerprint.clone(),
58
+ check_id: violation.check_id.clone(),
59
+ path: violation.path.clone(),
60
+ message: violation.message.clone(),
61
+ })
62
+ .collect();
63
+ let baseline = QualityBaseline {
64
+ schema: "naome.repository-quality-baseline.v1".to_string(),
65
+ version: 1,
66
+ violations: entries,
67
+ };
68
+ let content = serde_json::to_string_pretty(&baseline)?;
69
+ let next = format!("{content}\n");
70
+ let changed = fs::read_to_string(&path).map_or(true, |current| current != next);
71
+ if changed {
72
+ fs::write(path, next)?;
73
+ }
74
+ Ok(changed)
75
+ }
@@ -0,0 +1,175 @@
1
+ use std::collections::{HashMap, HashSet};
2
+
3
+ use super::super::scanner::{stable_fingerprint, QualityContext};
4
+ use super::super::types::QualityViolation;
5
+ use super::{is_code_like_path, QualityCheck};
6
+
7
+ pub(super) struct DuplicateBlockCheck;
8
+
9
+ impl QualityCheck for DuplicateBlockCheck {
10
+ fn id(&self) -> &'static str {
11
+ "duplicate-blocks"
12
+ }
13
+
14
+ fn evaluate(&self, context: &QualityContext, violations: &mut Vec<QualityViolation>) {
15
+ let mut occurrences: HashMap<String, Vec<DuplicateOccurrence>> = HashMap::new();
16
+ for file in context.files.iter().filter(|file| {
17
+ is_code_like_path(&file.path)
18
+ && context.config.check_enabled_for_path(self.id(), &file.path)
19
+ }) {
20
+ let window = context.limits_for(&file.path).duplicate_block_lines;
21
+ if file.normalized_lines.len() < window {
22
+ continue;
23
+ }
24
+ for lines in file.normalized_lines.windows(window) {
25
+ let joined = lines
26
+ .iter()
27
+ .map(|line| line.value.as_str())
28
+ .collect::<Vec<_>>()
29
+ .join("\n");
30
+ let fingerprint = stable_fingerprint(&[self.id(), &joined]);
31
+ occurrences
32
+ .entry(fingerprint.clone())
33
+ .or_default()
34
+ .push(DuplicateOccurrence {
35
+ path: file.path.clone(),
36
+ line: lines[0].line_number,
37
+ window,
38
+ fingerprint,
39
+ });
40
+ }
41
+ }
42
+
43
+ let mut emitted = HashSet::new();
44
+ let mut candidates = Vec::new();
45
+ for group in occurrences.values() {
46
+ for occurrence in group {
47
+ if !context.check_applies_to(self.id(), &occurrence.path)
48
+ || !is_code_like_path(&occurrence.path)
49
+ {
50
+ continue;
51
+ }
52
+ let related_paths = related_duplicate_paths(group, occurrence);
53
+ if related_paths.is_empty() {
54
+ continue;
55
+ }
56
+ let key = format!(
57
+ "{}:{}:{}",
58
+ occurrence.path, occurrence.line, occurrence.fingerprint
59
+ );
60
+ if emitted.insert(key) {
61
+ candidates.push(DuplicateCandidate {
62
+ path: occurrence.path.clone(),
63
+ line: occurrence.line,
64
+ window: occurrence.window,
65
+ related_paths,
66
+ fingerprint: occurrence.fingerprint.clone(),
67
+ });
68
+ }
69
+ }
70
+ }
71
+ emit_duplicate_regions(self.id(), candidates, violations);
72
+ }
73
+ }
74
+
75
+ #[derive(Debug, Clone)]
76
+ struct DuplicateOccurrence {
77
+ path: String,
78
+ line: usize,
79
+ window: usize,
80
+ fingerprint: String,
81
+ }
82
+
83
+ #[derive(Debug, Clone)]
84
+ struct DuplicateCandidate {
85
+ path: String,
86
+ line: usize,
87
+ window: usize,
88
+ related_paths: Vec<String>,
89
+ fingerprint: String,
90
+ }
91
+
92
+ fn emit_duplicate_regions(
93
+ check_id: &str,
94
+ mut candidates: Vec<DuplicateCandidate>,
95
+ violations: &mut Vec<QualityViolation>,
96
+ ) {
97
+ candidates.sort_by(|left, right| {
98
+ left.path
99
+ .cmp(&right.path)
100
+ .then(left.related_paths.cmp(&right.related_paths))
101
+ .then(left.line.cmp(&right.line))
102
+ .then(left.window.cmp(&right.window))
103
+ .then(left.fingerprint.cmp(&right.fingerprint))
104
+ });
105
+
106
+ let mut regions: Vec<DuplicateCandidate> = Vec::new();
107
+ for candidate in candidates {
108
+ if let Some(previous) = regions.last_mut() {
109
+ if previous.path == candidate.path
110
+ && previous.related_paths == candidate.related_paths
111
+ && candidate.line <= previous.line.saturating_add(previous.window)
112
+ {
113
+ previous.window = previous
114
+ .window
115
+ .max(candidate.line.saturating_sub(previous.line) + candidate.window);
116
+ continue;
117
+ }
118
+ }
119
+ regions.push(candidate);
120
+ }
121
+
122
+ for region in regions {
123
+ violations.push(QualityViolation {
124
+ check_id: check_id.to_string(),
125
+ severity: "blocking".to_string(),
126
+ path: region.path.clone(),
127
+ line: Some(region.line),
128
+ message: format!(
129
+ "{} repeats a normalized code region already present in {}.",
130
+ region.path,
131
+ region.related_paths.join(", ")
132
+ ),
133
+ value: Some(region.window as f64),
134
+ limit: Some(region.window as f64),
135
+ fingerprint: stable_fingerprint(&[
136
+ check_id,
137
+ &region.path,
138
+ &region.line.to_string(),
139
+ &region.window.to_string(),
140
+ &region.related_paths.join("\0"),
141
+ &region.fingerprint,
142
+ ]),
143
+ related_paths: region.related_paths,
144
+ baseline: false,
145
+ });
146
+ }
147
+ }
148
+
149
+ fn related_duplicate_paths(
150
+ group: &[DuplicateOccurrence],
151
+ occurrence: &DuplicateOccurrence,
152
+ ) -> Vec<String> {
153
+ let mut related = group
154
+ .iter()
155
+ .filter(|other| duplicate_occurrences_are_related(occurrence, other))
156
+ .map(|other| other.path.clone())
157
+ .collect::<HashSet<_>>()
158
+ .into_iter()
159
+ .collect::<Vec<_>>();
160
+ related.sort();
161
+ related
162
+ }
163
+
164
+ fn duplicate_occurrences_are_related(
165
+ occurrence: &DuplicateOccurrence,
166
+ other: &DuplicateOccurrence,
167
+ ) -> bool {
168
+ if occurrence.path != other.path {
169
+ return true;
170
+ }
171
+ if occurrence.line == other.line {
172
+ return false;
173
+ }
174
+ other.line.saturating_add(other.window).saturating_sub(1) < occurrence.line
175
+ }
@@ -0,0 +1,130 @@
1
+ use std::collections::HashSet;
2
+
3
+ use super::super::scanner::{FileAnalysis, QualityContext, SymbolAnalysis};
4
+ use super::super::types::QualityViolation;
5
+ use super::{is_code_like_path, violation, QualityCheck};
6
+
7
+ pub(super) struct NearDuplicateFunctionCheck;
8
+
9
+ impl QualityCheck for NearDuplicateFunctionCheck {
10
+ fn id(&self) -> &'static str {
11
+ "near-duplicate-functions"
12
+ }
13
+
14
+ fn evaluate(&self, context: &QualityContext, violations: &mut Vec<QualityViolation>) {
15
+ let symbols = collect_function_occurrences(context, self.id());
16
+ let mut emitted = HashSet::new();
17
+
18
+ for (index, left) in symbols.iter().enumerate() {
19
+ if !context.applies_to(&left.file.path) {
20
+ continue;
21
+ }
22
+ for right in symbols.iter().skip(index + 1) {
23
+ if same_symbol(left, right)
24
+ || symbols_have_parent_child_relationship(left.symbol, right.symbol)
25
+ || !context
26
+ .config
27
+ .check_enabled_for_path(self.id(), &right.file.path)
28
+ {
29
+ continue;
30
+ }
31
+ let threshold = duplicate_threshold(context, left.file, right.file);
32
+ let similarity = jaccard(&left.symbol.tokens, &right.symbol.tokens);
33
+ if similarity < threshold {
34
+ continue;
35
+ }
36
+ let key = format!(
37
+ "{}:{}:{}:{}",
38
+ left.file.path,
39
+ left.symbol.start_line,
40
+ right.file.path,
41
+ right.symbol.start_line
42
+ );
43
+ if emitted.insert(key) {
44
+ emit_near_duplicate(self.id(), left, right, similarity, threshold, violations);
45
+ }
46
+ }
47
+ }
48
+ }
49
+ }
50
+
51
+ #[derive(Debug, Clone, Copy)]
52
+ struct FunctionOccurrence<'a> {
53
+ file: &'a FileAnalysis,
54
+ symbol: &'a SymbolAnalysis,
55
+ }
56
+
57
+ fn collect_function_occurrences<'a>(
58
+ context: &'a QualityContext,
59
+ check_id: &str,
60
+ ) -> Vec<FunctionOccurrence<'a>> {
61
+ context
62
+ .files
63
+ .iter()
64
+ .filter(|file| {
65
+ is_code_like_path(&file.path)
66
+ && context.config.check_enabled_for_path(check_id, &file.path)
67
+ })
68
+ .flat_map(|file| {
69
+ file.symbols
70
+ .iter()
71
+ .map(move |symbol| FunctionOccurrence { file, symbol })
72
+ })
73
+ .filter(|occurrence| {
74
+ occurrence.symbol.kind == "function" && occurrence.symbol.tokens.len() >= 12
75
+ })
76
+ .collect()
77
+ }
78
+
79
+ fn emit_near_duplicate(
80
+ check_id: &str,
81
+ left: &FunctionOccurrence<'_>,
82
+ right: &FunctionOccurrence<'_>,
83
+ similarity: f64,
84
+ threshold: f64,
85
+ violations: &mut Vec<QualityViolation>,
86
+ ) {
87
+ violations.push(violation(
88
+ check_id,
89
+ &left.file.path,
90
+ Some(left.symbol.start_line),
91
+ format!(
92
+ "{} {} is {:.0}% similar to {} {} in {}.",
93
+ left.symbol.kind,
94
+ left.symbol.name,
95
+ similarity * 100.0,
96
+ right.symbol.kind,
97
+ right.symbol.name,
98
+ right.file.path
99
+ ),
100
+ Some(similarity),
101
+ Some(threshold),
102
+ vec![right.file.path.clone()],
103
+ ));
104
+ }
105
+
106
+ fn same_symbol(left: &FunctionOccurrence<'_>, right: &FunctionOccurrence<'_>) -> bool {
107
+ left.file.path == right.file.path && left.symbol.start_line == right.symbol.start_line
108
+ }
109
+
110
+ fn duplicate_threshold(context: &QualityContext, left: &FileAnalysis, right: &FileAnalysis) -> f64 {
111
+ context
112
+ .limits_for(&left.path)
113
+ .near_duplicate_similarity
114
+ .max(context.limits_for(&right.path).near_duplicate_similarity)
115
+ }
116
+
117
+ fn symbols_have_parent_child_relationship(left: &SymbolAnalysis, right: &SymbolAnalysis) -> bool {
118
+ (left.start_line < right.start_line && left.end_line >= right.end_line)
119
+ || (right.start_line < left.start_line && right.end_line >= left.end_line)
120
+ }
121
+
122
+ fn jaccard(left: &HashSet<String>, right: &HashSet<String>) -> f64 {
123
+ let intersection = left.intersection(right).count();
124
+ let union = left.union(right).count();
125
+ if union == 0 {
126
+ 0.0
127
+ } else {
128
+ intersection as f64 / union as f64
129
+ }
130
+ }