@lamentis/naome 1.1.2 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +2 -2
- package/Cargo.toml +1 -1
- package/LICENSE +180 -21
- package/README.md +49 -6
- package/bin/naome.js +54 -16
- package/crates/naome-cli/Cargo.toml +1 -1
- package/crates/naome-cli/src/check_commands.rs +135 -0
- package/crates/naome-cli/src/cli_args.rs +5 -0
- package/crates/naome-cli/src/dispatcher.rs +36 -0
- package/crates/naome-cli/src/install_bridge.rs +83 -0
- package/crates/naome-cli/src/main.rs +57 -341
- package/crates/naome-cli/src/prompt_commands.rs +68 -0
- package/crates/naome-cli/src/quality_commands.rs +141 -0
- package/crates/naome-cli/src/simple_commands.rs +53 -0
- package/crates/naome-cli/src/workflow_commands.rs +153 -0
- package/crates/naome-core/Cargo.toml +1 -1
- package/crates/naome-core/src/harness_health/integrity.rs +96 -0
- package/crates/naome-core/src/harness_health.rs +14 -126
- package/crates/naome-core/src/install_plan.rs +3 -0
- package/crates/naome-core/src/intent/classifier.rs +171 -0
- package/crates/naome-core/src/intent/envelope.rs +108 -0
- package/crates/naome-core/src/intent/legacy.rs +138 -0
- package/crates/naome-core/src/intent/legacy_response.rs +76 -0
- package/crates/naome-core/src/intent/model.rs +71 -0
- package/crates/naome-core/src/intent/patterns.rs +170 -0
- package/crates/naome-core/src/intent/resolver.rs +162 -0
- package/crates/naome-core/src/intent/resolver_active.rs +17 -0
- package/crates/naome-core/src/intent/resolver_baseline.rs +55 -0
- package/crates/naome-core/src/intent/resolver_catalog.rs +167 -0
- package/crates/naome-core/src/intent/resolver_policy.rs +72 -0
- package/crates/naome-core/src/intent/resolver_shared.rs +55 -0
- package/crates/naome-core/src/intent/risk.rs +40 -0
- package/crates/naome-core/src/intent/segment.rs +170 -0
- package/crates/naome-core/src/intent.rs +64 -879
- package/crates/naome-core/src/journal.rs +9 -20
- package/crates/naome-core/src/lib.rs +13 -0
- package/crates/naome-core/src/quality/adapters.rs +178 -0
- package/crates/naome-core/src/quality/baseline.rs +75 -0
- package/crates/naome-core/src/quality/checks/duplicate_blocks.rs +175 -0
- package/crates/naome-core/src/quality/checks/near_duplicates.rs +130 -0
- package/crates/naome-core/src/quality/checks.rs +228 -0
- package/crates/naome-core/src/quality/cleanup.rs +72 -0
- package/crates/naome-core/src/quality/config.rs +109 -0
- package/crates/naome-core/src/quality/mod.rs +90 -0
- package/crates/naome-core/src/quality/scanner/repo_paths.rs +103 -0
- package/crates/naome-core/src/quality/scanner.rs +367 -0
- package/crates/naome-core/src/quality/types.rs +289 -0
- package/crates/naome-core/src/route.rs +62 -0
- package/crates/naome-core/src/task_state/admission.rs +63 -0
- package/crates/naome-core/src/task_state/admission_proof.rs +72 -0
- package/crates/naome-core/src/task_state/api.rs +130 -0
- package/crates/naome-core/src/task_state/commit_gate.rs +138 -0
- package/crates/naome-core/src/task_state/compact_proof.rs +160 -0
- package/crates/naome-core/src/task_state/completed_refresh.rs +89 -0
- package/crates/naome-core/src/task_state/completion.rs +72 -0
- package/crates/naome-core/src/task_state/deleted_paths.rs +47 -0
- package/crates/naome-core/src/task_state/diff.rs +95 -0
- package/crates/naome-core/src/task_state/evidence.rs +154 -0
- package/crates/naome-core/src/task_state/git_io.rs +86 -0
- package/crates/naome-core/src/task_state/git_parse.rs +86 -0
- package/crates/naome-core/src/task_state/git_refs.rs +37 -0
- package/crates/naome-core/src/task_state/human_review_state.rs +31 -0
- package/crates/naome-core/src/task_state/mod.rs +38 -0
- package/crates/naome-core/src/task_state/process_guard.rs +40 -0
- package/crates/naome-core/src/task_state/progress.rs +123 -0
- package/crates/naome-core/src/task_state/proof.rs +139 -0
- package/crates/naome-core/src/task_state/proof_entry.rs +66 -0
- package/crates/naome-core/src/task_state/proof_model.rs +70 -0
- package/crates/naome-core/src/task_state/proof_sources.rs +76 -0
- package/crates/naome-core/src/task_state/push_gate.rs +49 -0
- package/crates/naome-core/src/task_state/reconcile.rs +7 -0
- package/crates/naome-core/src/task_state/repair.rs +168 -0
- package/crates/naome-core/src/task_state/shape.rs +117 -0
- package/crates/naome-core/src/task_state/task_diff_api.rs +170 -0
- package/crates/naome-core/src/task_state/task_records.rs +131 -0
- package/crates/naome-core/src/task_state/task_references.rs +126 -0
- package/crates/naome-core/src/task_state/types.rs +87 -0
- package/crates/naome-core/src/task_state/util.rs +137 -0
- package/crates/naome-core/src/verification/render.rs +122 -0
- package/crates/naome-core/src/verification.rs +176 -58
- package/crates/naome-core/src/verification_contract.rs +49 -21
- package/crates/naome-core/src/workflow/integrity.rs +123 -0
- package/crates/naome-core/src/workflow/integrity_normalize.rs +7 -0
- package/crates/naome-core/src/workflow/integrity_support.rs +110 -0
- package/crates/naome-core/src/workflow/mod.rs +18 -0
- package/crates/naome-core/src/workflow/mutation.rs +68 -0
- package/crates/naome-core/src/workflow/output.rs +111 -0
- package/crates/naome-core/src/workflow/phase_inference.rs +73 -0
- package/crates/naome-core/src/workflow/phases.rs +169 -0
- package/crates/naome-core/src/workflow/policy.rs +156 -0
- package/crates/naome-core/src/workflow/processes.rs +91 -0
- package/crates/naome-core/src/workflow/types.rs +42 -0
- package/crates/naome-core/tests/harness_health.rs +3 -0
- package/crates/naome-core/tests/intent.rs +97 -792
- package/crates/naome-core/tests/intent_support/mod.rs +133 -0
- package/crates/naome-core/tests/intent_v2.rs +90 -0
- package/crates/naome-core/tests/quality.rs +425 -0
- package/crates/naome-core/tests/route.rs +88 -188
- package/crates/naome-core/tests/task_state.rs +3 -0
- package/crates/naome-core/tests/task_state_compact.rs +110 -0
- package/crates/naome-core/tests/task_state_compact_support/mod.rs +5 -0
- package/crates/naome-core/tests/task_state_compact_support/repo.rs +130 -0
- package/crates/naome-core/tests/task_state_compact_support/states.rs +151 -0
- package/crates/naome-core/tests/workflow_integrity.rs +85 -0
- package/crates/naome-core/tests/workflow_policy.rs +139 -0
- package/crates/naome-core/tests/workflow_support/mod.rs +194 -0
- package/native/darwin-arm64/naome +0 -0
- package/native/linux-x64/naome +0 -0
- package/package.json +2 -2
- package/templates/naome-root/.naome/bin/check-harness-health.js +66 -85
- package/templates/naome-root/.naome/bin/check-task-state.js +9 -10
- package/templates/naome-root/.naome/bin/naome.js +34 -63
- package/templates/naome-root/.naome/manifest.json +20 -18
- package/templates/naome-root/.naome/repository-quality-baseline.json +5 -0
- package/templates/naome-root/.naome/repository-quality.json +24 -0
- package/templates/naome-root/.naome/task-contract.schema.json +93 -11
- package/templates/naome-root/.naome/upgrade-state.json +1 -1
- package/templates/naome-root/.naome/verification.json +37 -0
- package/templates/naome-root/AGENTS.md +3 -0
- package/templates/naome-root/docs/naome/agent-workflow.md +25 -12
- package/templates/naome-root/docs/naome/execution.md +25 -21
- package/templates/naome-root/docs/naome/index.md +4 -3
- package/templates/naome-root/docs/naome/repository-quality.md +43 -0
- package/templates/naome-root/docs/naome/testing.md +12 -0
- package/crates/naome-core/src/task_state.rs +0 -2210
|
@@ -8,6 +8,7 @@ use serde::Serialize;
|
|
|
8
8
|
use serde_json::Value;
|
|
9
9
|
|
|
10
10
|
use crate::models::NaomeError;
|
|
11
|
+
use crate::task_state::canonical_proof_check_ids;
|
|
11
12
|
|
|
12
13
|
const JOURNAL_PATH: &str = ".naome/task-journal.jsonl";
|
|
13
14
|
|
|
@@ -121,17 +122,7 @@ fn journal_contains(root: &Path, entry: &TaskJournalEntry) -> Result<bool, Naome
|
|
|
121
122
|
}
|
|
122
123
|
|
|
123
124
|
fn proof_summary(active_task: &Value) -> Vec<String> {
|
|
124
|
-
active_task
|
|
125
|
-
.get("proofResults")
|
|
126
|
-
.and_then(Value::as_array)
|
|
127
|
-
.map(|proofs| {
|
|
128
|
-
proofs
|
|
129
|
-
.iter()
|
|
130
|
-
.filter_map(|proof| proof.get("checkId").and_then(Value::as_str))
|
|
131
|
-
.map(ToString::to_string)
|
|
132
|
-
.collect()
|
|
133
|
-
})
|
|
134
|
-
.unwrap_or_default()
|
|
125
|
+
canonical_proof_check_ids(active_task)
|
|
135
126
|
}
|
|
136
127
|
|
|
137
128
|
fn read_json(root: &Path, relative_path: &str) -> Result<Value, NaomeError> {
|
|
@@ -148,16 +139,14 @@ fn string_at(value: &Value, key: &str) -> Option<String> {
|
|
|
148
139
|
}
|
|
149
140
|
|
|
150
141
|
fn git_head(root: &Path) -> Result<Option<String>, NaomeError> {
|
|
151
|
-
let
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
return Ok(
|
|
142
|
+
let mut command = Command::new("git");
|
|
143
|
+
command.current_dir(root).arg("rev-parse").arg("HEAD");
|
|
144
|
+
let output = command.output()?;
|
|
145
|
+
if output.status.success() {
|
|
146
|
+
let head = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
|
147
|
+
return Ok(Some(head));
|
|
157
148
|
}
|
|
158
|
-
Ok(
|
|
159
|
-
String::from_utf8_lossy(&output.stdout).trim().to_string(),
|
|
160
|
-
))
|
|
149
|
+
Ok(None)
|
|
161
150
|
}
|
|
162
151
|
|
|
163
152
|
fn timestamp_now() -> String {
|
|
@@ -6,10 +6,12 @@ mod intent;
|
|
|
6
6
|
mod journal;
|
|
7
7
|
mod models;
|
|
8
8
|
mod paths;
|
|
9
|
+
mod quality;
|
|
9
10
|
mod route;
|
|
10
11
|
mod task_state;
|
|
11
12
|
mod verification;
|
|
12
13
|
mod verification_contract;
|
|
14
|
+
mod workflow;
|
|
13
15
|
|
|
14
16
|
pub use decision::{evaluate_decision, format_decision, EvaluationOptions};
|
|
15
17
|
pub use harness_health::{validate_harness_health, HarnessHealthOptions};
|
|
@@ -17,6 +19,11 @@ pub use install_plan::{install_plan, InstallPlan};
|
|
|
17
19
|
pub use intent::{evaluate_intent, format_intent, IntentDecision, PromptEvidence};
|
|
18
20
|
pub use journal::{append_task_journal, TaskJournalEntry};
|
|
19
21
|
pub use models::{Decision, NaomeError};
|
|
22
|
+
pub use quality::{
|
|
23
|
+
check_repository_quality, init_repository_quality, plan_quality_cleanup, route_quality_cleanup,
|
|
24
|
+
QualityCleanupPlan, QualityCleanupRoute, QualityCleanupTask, QualityInitResult, QualityMode,
|
|
25
|
+
QualityReport, QualitySummary, QualityViolation, RepositoryQualityConfig,
|
|
26
|
+
};
|
|
20
27
|
pub use route::{evaluate_route, explain_route, ExplainDecision, RouteDecision, RouteOptions};
|
|
21
28
|
pub use task_state::{
|
|
22
29
|
completed_task_commit_paths, validate_task_state, TaskStateMode, TaskStateOptions,
|
|
@@ -24,3 +31,9 @@ pub use task_state::{
|
|
|
24
31
|
};
|
|
25
32
|
pub use verification::seed_builtin_verification_checks;
|
|
26
33
|
pub use verification_contract::validate_verification_contract;
|
|
34
|
+
pub use workflow::{
|
|
35
|
+
classify_mutations, refresh_integrity, safe_rg_args, summarize_command_output,
|
|
36
|
+
tracked_process_report, validate_read_boundaries, validate_search_command,
|
|
37
|
+
verification_phase_plan, CommandCheckResult, CommandOutputSummary, IntegrityRefreshReport,
|
|
38
|
+
MutationClassification, ProcessReport, ReadActivity, VerificationPhasePlan, WorkflowFinding,
|
|
39
|
+
};
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
use std::collections::HashSet;
|
|
2
|
+
|
|
3
|
+
use crate::models::NaomeError;
|
|
4
|
+
|
|
5
|
+
use super::types::{QualityLimitOverrides, QualityPathRule, RepositoryQualityConfig};
|
|
6
|
+
|
|
7
|
+
pub(crate) struct QualityAdapter {
|
|
8
|
+
pub id: &'static str,
|
|
9
|
+
pub generated_paths: &'static [&'static str],
|
|
10
|
+
detect: fn(&RepoSignals<'_>) -> bool,
|
|
11
|
+
path_rules: fn() -> Vec<QualityPathRule>,
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
struct RepoSignals<'a> {
|
|
15
|
+
paths: &'a [String],
|
|
16
|
+
}
|
|
17
|
+
impl RepoSignals<'_> {
|
|
18
|
+
fn has_manifest(&self, expected: &str) -> bool {
|
|
19
|
+
let nested_suffix = format!("/{expected}");
|
|
20
|
+
self.paths
|
|
21
|
+
.iter()
|
|
22
|
+
.any(|path| path == expected || path.ends_with(&nested_suffix))
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
fn has_extension(&self, extensions: &[&str]) -> bool {
|
|
26
|
+
self.paths
|
|
27
|
+
.iter()
|
|
28
|
+
.any(|path| extensions.iter().any(|extension| path.ends_with(extension)))
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
pub(crate) fn detected_adapter_ids(paths: &[String]) -> Vec<String> {
|
|
32
|
+
let signals = RepoSignals { paths };
|
|
33
|
+
registry()
|
|
34
|
+
.iter()
|
|
35
|
+
.filter(|adapter| (adapter.detect)(&signals))
|
|
36
|
+
.map(|adapter| adapter.id.to_string())
|
|
37
|
+
.collect()
|
|
38
|
+
}
|
|
39
|
+
pub(crate) fn apply_enabled_adapters(
|
|
40
|
+
mut config: RepositoryQualityConfig,
|
|
41
|
+
) -> Result<RepositoryQualityConfig, NaomeError> {
|
|
42
|
+
let mut seen = HashSet::new();
|
|
43
|
+
let local_path_rules = std::mem::take(&mut config.path_rules);
|
|
44
|
+
|
|
45
|
+
for adapter_id in config.enabled_adapters.clone() {
|
|
46
|
+
if !seen.insert(adapter_id.clone()) {
|
|
47
|
+
return Err(NaomeError::new(format!(
|
|
48
|
+
".naome/repository-quality.json enabledAdapters contains duplicate adapter '{adapter_id}'."
|
|
49
|
+
)));
|
|
50
|
+
}
|
|
51
|
+
let adapter = adapter_by_id(&adapter_id)?;
|
|
52
|
+
extend_unique(&mut config.generated_paths, adapter.generated_paths);
|
|
53
|
+
config.path_rules.extend((adapter.path_rules)());
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
config.path_rules.extend(local_path_rules);
|
|
57
|
+
Ok(config)
|
|
58
|
+
}
|
|
59
|
+
pub(crate) fn validate_adapter_ids(ids: &[String]) -> Result<(), NaomeError> {
|
|
60
|
+
let mut seen = HashSet::new();
|
|
61
|
+
for adapter_id in ids {
|
|
62
|
+
if !seen.insert(adapter_id) {
|
|
63
|
+
return Err(NaomeError::new(format!(
|
|
64
|
+
".naome/repository-quality.json enabledAdapters contains duplicate adapter '{adapter_id}'."
|
|
65
|
+
)));
|
|
66
|
+
}
|
|
67
|
+
adapter_by_id(adapter_id)?;
|
|
68
|
+
}
|
|
69
|
+
Ok(())
|
|
70
|
+
}
|
|
71
|
+
fn adapter_by_id(id: &str) -> Result<&'static QualityAdapter, NaomeError> {
|
|
72
|
+
registry()
|
|
73
|
+
.iter()
|
|
74
|
+
.find(|adapter| adapter.id == id)
|
|
75
|
+
.ok_or_else(|| {
|
|
76
|
+
NaomeError::new(format!(
|
|
77
|
+
".naome/repository-quality.json enabledAdapters contains unknown adapter '{id}'."
|
|
78
|
+
))
|
|
79
|
+
})
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
fn registry() -> &'static [QualityAdapter] {
|
|
83
|
+
&[
|
|
84
|
+
QualityAdapter {
|
|
85
|
+
id: "rust",
|
|
86
|
+
generated_paths: &[],
|
|
87
|
+
detect: detects_rust,
|
|
88
|
+
path_rules: rust_path_rules,
|
|
89
|
+
},
|
|
90
|
+
QualityAdapter {
|
|
91
|
+
id: "javascript-typescript",
|
|
92
|
+
generated_paths: &["coverage/**", "**/coverage/**", ".next/**", "**/.next/**"],
|
|
93
|
+
detect: detects_javascript_typescript,
|
|
94
|
+
path_rules: javascript_typescript_path_rules,
|
|
95
|
+
},
|
|
96
|
+
]
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
fn detects_rust(signals: &RepoSignals<'_>) -> bool {
|
|
100
|
+
signals.has_manifest("Cargo.toml") || signals.has_extension(&[".rs"])
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
fn detects_javascript_typescript(signals: &RepoSignals<'_>) -> bool {
|
|
104
|
+
signals.has_manifest("package.json")
|
|
105
|
+
|| signals.has_extension(&[".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"])
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
fn rust_path_rules() -> Vec<QualityPathRule> {
|
|
109
|
+
vec![path_rule(
|
|
110
|
+
"rust-tests",
|
|
111
|
+
&["**/tests/*.rs", "**/tests/**/*.rs"],
|
|
112
|
+
test_file_limits(80),
|
|
113
|
+
&[],
|
|
114
|
+
)]
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
fn javascript_typescript_path_rules() -> Vec<QualityPathRule> {
|
|
118
|
+
vec![QualityPathRule {
|
|
119
|
+
id: "javascript-typescript-tests".to_string(),
|
|
120
|
+
paths: javascript_typescript_test_paths(),
|
|
121
|
+
limits: test_file_limits(80),
|
|
122
|
+
disabled_checks: Vec::new(),
|
|
123
|
+
}]
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
fn javascript_typescript_test_paths() -> Vec<String> {
|
|
127
|
+
let mut paths = Vec::new();
|
|
128
|
+
for marker in ["test", "spec"] {
|
|
129
|
+
for extension in ["js", "jsx", "ts", "tsx"] {
|
|
130
|
+
paths.push(format!("**/*.{marker}.{extension}"));
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
for directory in ["test", "tests"] {
|
|
134
|
+
for extension in ["js", "jsx", "ts", "tsx"] {
|
|
135
|
+
paths.push(format!("**/{directory}/**/*.{extension}"));
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
paths.push("scripts/*.test.js".to_string());
|
|
139
|
+
paths
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
fn test_file_limits(max_top_level_symbols: usize) -> QualityLimitOverrides {
|
|
143
|
+
QualityLimitOverrides {
|
|
144
|
+
max_file_lines: Some(650),
|
|
145
|
+
max_diff_added_lines: Some(220),
|
|
146
|
+
max_function_lines: Some(140),
|
|
147
|
+
max_top_level_symbols: Some(max_top_level_symbols),
|
|
148
|
+
duplicate_block_lines: Some(14),
|
|
149
|
+
near_duplicate_similarity: Some(0.96),
|
|
150
|
+
..QualityLimitOverrides::default()
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
fn path_rule(
|
|
155
|
+
id: &str,
|
|
156
|
+
paths: &[&str],
|
|
157
|
+
limits: QualityLimitOverrides,
|
|
158
|
+
disabled_checks: &[&str],
|
|
159
|
+
) -> QualityPathRule {
|
|
160
|
+
QualityPathRule {
|
|
161
|
+
id: id.to_string(),
|
|
162
|
+
paths: string_list(paths),
|
|
163
|
+
limits,
|
|
164
|
+
disabled_checks: string_list(disabled_checks),
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
fn string_list(values: &[&str]) -> Vec<String> {
|
|
169
|
+
values.iter().map(|value| (*value).to_string()).collect()
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
fn extend_unique(target: &mut Vec<String>, values: &[&str]) {
|
|
173
|
+
for value in values {
|
|
174
|
+
if !target.iter().any(|existing| existing == value) {
|
|
175
|
+
target.push((*value).to_string());
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
use std::collections::HashSet;
|
|
2
|
+
use std::fs;
|
|
3
|
+
use std::path::Path;
|
|
4
|
+
|
|
5
|
+
use serde::{Deserialize, Serialize};
|
|
6
|
+
|
|
7
|
+
use crate::models::NaomeError;
|
|
8
|
+
|
|
9
|
+
use super::types::QualityViolation;
|
|
10
|
+
|
|
11
|
+
const BASELINE_RELATIVE_PATH: &str = ".naome/repository-quality-baseline.json";
|
|
12
|
+
|
|
13
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
14
|
+
#[serde(rename_all = "camelCase")]
|
|
15
|
+
struct QualityBaseline {
|
|
16
|
+
schema: String,
|
|
17
|
+
version: u32,
|
|
18
|
+
violations: Vec<QualityBaselineEntry>,
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
22
|
+
#[serde(rename_all = "camelCase")]
|
|
23
|
+
struct QualityBaselineEntry {
|
|
24
|
+
fingerprint: String,
|
|
25
|
+
check_id: String,
|
|
26
|
+
path: String,
|
|
27
|
+
message: String,
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
pub fn baseline_relative_path() -> &'static str {
|
|
31
|
+
BASELINE_RELATIVE_PATH
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
pub fn read_baseline_fingerprints(root: &Path) -> Result<HashSet<String>, NaomeError> {
|
|
35
|
+
let path = root.join(BASELINE_RELATIVE_PATH);
|
|
36
|
+
if !path.is_file() {
|
|
37
|
+
return Ok(HashSet::new());
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
let baseline: QualityBaseline = serde_json::from_str(&fs::read_to_string(path)?)?;
|
|
41
|
+
Ok(baseline
|
|
42
|
+
.violations
|
|
43
|
+
.into_iter()
|
|
44
|
+
.map(|violation| violation.fingerprint)
|
|
45
|
+
.collect())
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
pub fn write_baseline(root: &Path, violations: &[QualityViolation]) -> Result<bool, NaomeError> {
|
|
49
|
+
let path = root.join(BASELINE_RELATIVE_PATH);
|
|
50
|
+
if let Some(parent) = path.parent() {
|
|
51
|
+
fs::create_dir_all(parent)?;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
let entries = violations
|
|
55
|
+
.iter()
|
|
56
|
+
.map(|violation| QualityBaselineEntry {
|
|
57
|
+
fingerprint: violation.fingerprint.clone(),
|
|
58
|
+
check_id: violation.check_id.clone(),
|
|
59
|
+
path: violation.path.clone(),
|
|
60
|
+
message: violation.message.clone(),
|
|
61
|
+
})
|
|
62
|
+
.collect();
|
|
63
|
+
let baseline = QualityBaseline {
|
|
64
|
+
schema: "naome.repository-quality-baseline.v1".to_string(),
|
|
65
|
+
version: 1,
|
|
66
|
+
violations: entries,
|
|
67
|
+
};
|
|
68
|
+
let content = serde_json::to_string_pretty(&baseline)?;
|
|
69
|
+
let next = format!("{content}\n");
|
|
70
|
+
let changed = fs::read_to_string(&path).map_or(true, |current| current != next);
|
|
71
|
+
if changed {
|
|
72
|
+
fs::write(path, next)?;
|
|
73
|
+
}
|
|
74
|
+
Ok(changed)
|
|
75
|
+
}
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
use std::collections::{HashMap, HashSet};
|
|
2
|
+
|
|
3
|
+
use super::super::scanner::{stable_fingerprint, QualityContext};
|
|
4
|
+
use super::super::types::QualityViolation;
|
|
5
|
+
use super::{is_code_like_path, QualityCheck};
|
|
6
|
+
|
|
7
|
+
pub(super) struct DuplicateBlockCheck;
|
|
8
|
+
|
|
9
|
+
impl QualityCheck for DuplicateBlockCheck {
|
|
10
|
+
fn id(&self) -> &'static str {
|
|
11
|
+
"duplicate-blocks"
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
fn evaluate(&self, context: &QualityContext, violations: &mut Vec<QualityViolation>) {
|
|
15
|
+
let mut occurrences: HashMap<String, Vec<DuplicateOccurrence>> = HashMap::new();
|
|
16
|
+
for file in context.files.iter().filter(|file| {
|
|
17
|
+
is_code_like_path(&file.path)
|
|
18
|
+
&& context.config.check_enabled_for_path(self.id(), &file.path)
|
|
19
|
+
}) {
|
|
20
|
+
let window = context.limits_for(&file.path).duplicate_block_lines;
|
|
21
|
+
if file.normalized_lines.len() < window {
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
for lines in file.normalized_lines.windows(window) {
|
|
25
|
+
let joined = lines
|
|
26
|
+
.iter()
|
|
27
|
+
.map(|line| line.value.as_str())
|
|
28
|
+
.collect::<Vec<_>>()
|
|
29
|
+
.join("\n");
|
|
30
|
+
let fingerprint = stable_fingerprint(&[self.id(), &joined]);
|
|
31
|
+
occurrences
|
|
32
|
+
.entry(fingerprint.clone())
|
|
33
|
+
.or_default()
|
|
34
|
+
.push(DuplicateOccurrence {
|
|
35
|
+
path: file.path.clone(),
|
|
36
|
+
line: lines[0].line_number,
|
|
37
|
+
window,
|
|
38
|
+
fingerprint,
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
let mut emitted = HashSet::new();
|
|
44
|
+
let mut candidates = Vec::new();
|
|
45
|
+
for group in occurrences.values() {
|
|
46
|
+
for occurrence in group {
|
|
47
|
+
if !context.check_applies_to(self.id(), &occurrence.path)
|
|
48
|
+
|| !is_code_like_path(&occurrence.path)
|
|
49
|
+
{
|
|
50
|
+
continue;
|
|
51
|
+
}
|
|
52
|
+
let related_paths = related_duplicate_paths(group, occurrence);
|
|
53
|
+
if related_paths.is_empty() {
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
let key = format!(
|
|
57
|
+
"{}:{}:{}",
|
|
58
|
+
occurrence.path, occurrence.line, occurrence.fingerprint
|
|
59
|
+
);
|
|
60
|
+
if emitted.insert(key) {
|
|
61
|
+
candidates.push(DuplicateCandidate {
|
|
62
|
+
path: occurrence.path.clone(),
|
|
63
|
+
line: occurrence.line,
|
|
64
|
+
window: occurrence.window,
|
|
65
|
+
related_paths,
|
|
66
|
+
fingerprint: occurrence.fingerprint.clone(),
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
emit_duplicate_regions(self.id(), candidates, violations);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
#[derive(Debug, Clone)]
|
|
76
|
+
struct DuplicateOccurrence {
|
|
77
|
+
path: String,
|
|
78
|
+
line: usize,
|
|
79
|
+
window: usize,
|
|
80
|
+
fingerprint: String,
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
#[derive(Debug, Clone)]
|
|
84
|
+
struct DuplicateCandidate {
|
|
85
|
+
path: String,
|
|
86
|
+
line: usize,
|
|
87
|
+
window: usize,
|
|
88
|
+
related_paths: Vec<String>,
|
|
89
|
+
fingerprint: String,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
fn emit_duplicate_regions(
|
|
93
|
+
check_id: &str,
|
|
94
|
+
mut candidates: Vec<DuplicateCandidate>,
|
|
95
|
+
violations: &mut Vec<QualityViolation>,
|
|
96
|
+
) {
|
|
97
|
+
candidates.sort_by(|left, right| {
|
|
98
|
+
left.path
|
|
99
|
+
.cmp(&right.path)
|
|
100
|
+
.then(left.related_paths.cmp(&right.related_paths))
|
|
101
|
+
.then(left.line.cmp(&right.line))
|
|
102
|
+
.then(left.window.cmp(&right.window))
|
|
103
|
+
.then(left.fingerprint.cmp(&right.fingerprint))
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
let mut regions: Vec<DuplicateCandidate> = Vec::new();
|
|
107
|
+
for candidate in candidates {
|
|
108
|
+
if let Some(previous) = regions.last_mut() {
|
|
109
|
+
if previous.path == candidate.path
|
|
110
|
+
&& previous.related_paths == candidate.related_paths
|
|
111
|
+
&& candidate.line <= previous.line.saturating_add(previous.window)
|
|
112
|
+
{
|
|
113
|
+
previous.window = previous
|
|
114
|
+
.window
|
|
115
|
+
.max(candidate.line.saturating_sub(previous.line) + candidate.window);
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
regions.push(candidate);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
for region in regions {
|
|
123
|
+
violations.push(QualityViolation {
|
|
124
|
+
check_id: check_id.to_string(),
|
|
125
|
+
severity: "blocking".to_string(),
|
|
126
|
+
path: region.path.clone(),
|
|
127
|
+
line: Some(region.line),
|
|
128
|
+
message: format!(
|
|
129
|
+
"{} repeats a normalized code region already present in {}.",
|
|
130
|
+
region.path,
|
|
131
|
+
region.related_paths.join(", ")
|
|
132
|
+
),
|
|
133
|
+
value: Some(region.window as f64),
|
|
134
|
+
limit: Some(region.window as f64),
|
|
135
|
+
fingerprint: stable_fingerprint(&[
|
|
136
|
+
check_id,
|
|
137
|
+
®ion.path,
|
|
138
|
+
®ion.line.to_string(),
|
|
139
|
+
®ion.window.to_string(),
|
|
140
|
+
®ion.related_paths.join("\0"),
|
|
141
|
+
®ion.fingerprint,
|
|
142
|
+
]),
|
|
143
|
+
related_paths: region.related_paths,
|
|
144
|
+
baseline: false,
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
fn related_duplicate_paths(
|
|
150
|
+
group: &[DuplicateOccurrence],
|
|
151
|
+
occurrence: &DuplicateOccurrence,
|
|
152
|
+
) -> Vec<String> {
|
|
153
|
+
let mut related = group
|
|
154
|
+
.iter()
|
|
155
|
+
.filter(|other| duplicate_occurrences_are_related(occurrence, other))
|
|
156
|
+
.map(|other| other.path.clone())
|
|
157
|
+
.collect::<HashSet<_>>()
|
|
158
|
+
.into_iter()
|
|
159
|
+
.collect::<Vec<_>>();
|
|
160
|
+
related.sort();
|
|
161
|
+
related
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
fn duplicate_occurrences_are_related(
|
|
165
|
+
occurrence: &DuplicateOccurrence,
|
|
166
|
+
other: &DuplicateOccurrence,
|
|
167
|
+
) -> bool {
|
|
168
|
+
if occurrence.path != other.path {
|
|
169
|
+
return true;
|
|
170
|
+
}
|
|
171
|
+
if occurrence.line == other.line {
|
|
172
|
+
return false;
|
|
173
|
+
}
|
|
174
|
+
other.line.saturating_add(other.window).saturating_sub(1) < occurrence.line
|
|
175
|
+
}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
use std::collections::HashSet;
|
|
2
|
+
|
|
3
|
+
use super::super::scanner::{FileAnalysis, QualityContext, SymbolAnalysis};
|
|
4
|
+
use super::super::types::QualityViolation;
|
|
5
|
+
use super::{is_code_like_path, violation, QualityCheck};
|
|
6
|
+
|
|
7
|
+
pub(super) struct NearDuplicateFunctionCheck;
|
|
8
|
+
|
|
9
|
+
impl QualityCheck for NearDuplicateFunctionCheck {
|
|
10
|
+
fn id(&self) -> &'static str {
|
|
11
|
+
"near-duplicate-functions"
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
fn evaluate(&self, context: &QualityContext, violations: &mut Vec<QualityViolation>) {
|
|
15
|
+
let symbols = collect_function_occurrences(context, self.id());
|
|
16
|
+
let mut emitted = HashSet::new();
|
|
17
|
+
|
|
18
|
+
for (index, left) in symbols.iter().enumerate() {
|
|
19
|
+
if !context.applies_to(&left.file.path) {
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
for right in symbols.iter().skip(index + 1) {
|
|
23
|
+
if same_symbol(left, right)
|
|
24
|
+
|| symbols_have_parent_child_relationship(left.symbol, right.symbol)
|
|
25
|
+
|| !context
|
|
26
|
+
.config
|
|
27
|
+
.check_enabled_for_path(self.id(), &right.file.path)
|
|
28
|
+
{
|
|
29
|
+
continue;
|
|
30
|
+
}
|
|
31
|
+
let threshold = duplicate_threshold(context, left.file, right.file);
|
|
32
|
+
let similarity = jaccard(&left.symbol.tokens, &right.symbol.tokens);
|
|
33
|
+
if similarity < threshold {
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
let key = format!(
|
|
37
|
+
"{}:{}:{}:{}",
|
|
38
|
+
left.file.path,
|
|
39
|
+
left.symbol.start_line,
|
|
40
|
+
right.file.path,
|
|
41
|
+
right.symbol.start_line
|
|
42
|
+
);
|
|
43
|
+
if emitted.insert(key) {
|
|
44
|
+
emit_near_duplicate(self.id(), left, right, similarity, threshold, violations);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
#[derive(Debug, Clone, Copy)]
|
|
52
|
+
struct FunctionOccurrence<'a> {
|
|
53
|
+
file: &'a FileAnalysis,
|
|
54
|
+
symbol: &'a SymbolAnalysis,
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
fn collect_function_occurrences<'a>(
|
|
58
|
+
context: &'a QualityContext,
|
|
59
|
+
check_id: &str,
|
|
60
|
+
) -> Vec<FunctionOccurrence<'a>> {
|
|
61
|
+
context
|
|
62
|
+
.files
|
|
63
|
+
.iter()
|
|
64
|
+
.filter(|file| {
|
|
65
|
+
is_code_like_path(&file.path)
|
|
66
|
+
&& context.config.check_enabled_for_path(check_id, &file.path)
|
|
67
|
+
})
|
|
68
|
+
.flat_map(|file| {
|
|
69
|
+
file.symbols
|
|
70
|
+
.iter()
|
|
71
|
+
.map(move |symbol| FunctionOccurrence { file, symbol })
|
|
72
|
+
})
|
|
73
|
+
.filter(|occurrence| {
|
|
74
|
+
occurrence.symbol.kind == "function" && occurrence.symbol.tokens.len() >= 12
|
|
75
|
+
})
|
|
76
|
+
.collect()
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
fn emit_near_duplicate(
|
|
80
|
+
check_id: &str,
|
|
81
|
+
left: &FunctionOccurrence<'_>,
|
|
82
|
+
right: &FunctionOccurrence<'_>,
|
|
83
|
+
similarity: f64,
|
|
84
|
+
threshold: f64,
|
|
85
|
+
violations: &mut Vec<QualityViolation>,
|
|
86
|
+
) {
|
|
87
|
+
violations.push(violation(
|
|
88
|
+
check_id,
|
|
89
|
+
&left.file.path,
|
|
90
|
+
Some(left.symbol.start_line),
|
|
91
|
+
format!(
|
|
92
|
+
"{} {} is {:.0}% similar to {} {} in {}.",
|
|
93
|
+
left.symbol.kind,
|
|
94
|
+
left.symbol.name,
|
|
95
|
+
similarity * 100.0,
|
|
96
|
+
right.symbol.kind,
|
|
97
|
+
right.symbol.name,
|
|
98
|
+
right.file.path
|
|
99
|
+
),
|
|
100
|
+
Some(similarity),
|
|
101
|
+
Some(threshold),
|
|
102
|
+
vec![right.file.path.clone()],
|
|
103
|
+
));
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
fn same_symbol(left: &FunctionOccurrence<'_>, right: &FunctionOccurrence<'_>) -> bool {
|
|
107
|
+
left.file.path == right.file.path && left.symbol.start_line == right.symbol.start_line
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
fn duplicate_threshold(context: &QualityContext, left: &FileAnalysis, right: &FileAnalysis) -> f64 {
|
|
111
|
+
context
|
|
112
|
+
.limits_for(&left.path)
|
|
113
|
+
.near_duplicate_similarity
|
|
114
|
+
.max(context.limits_for(&right.path).near_duplicate_similarity)
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
fn symbols_have_parent_child_relationship(left: &SymbolAnalysis, right: &SymbolAnalysis) -> bool {
|
|
118
|
+
(left.start_line < right.start_line && left.end_line >= right.end_line)
|
|
119
|
+
|| (right.start_line < left.start_line && right.end_line >= left.end_line)
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
fn jaccard(left: &HashSet<String>, right: &HashSet<String>) -> f64 {
|
|
123
|
+
let intersection = left.intersection(right).count();
|
|
124
|
+
let union = left.union(right).count();
|
|
125
|
+
if union == 0 {
|
|
126
|
+
0.0
|
|
127
|
+
} else {
|
|
128
|
+
intersection as f64 / union as f64
|
|
129
|
+
}
|
|
130
|
+
}
|