npm - terramend - Versions diffs - 0.2.0 → 0.2.1 - Mend

terramend 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/dist/agents/claudePretoolGate.d.ts +2 -2
package/dist/cli.mjs +16554 -8100
package/dist/index.js +13484 -5037
package/dist/internal.js +75 -11
package/dist/mcp/assess.d.ts +86 -0
package/dist/mcp/changeSummary.d.ts +50 -0
package/dist/mcp/crosswalk.d.ts +5 -0
package/dist/mcp/localContext.d.ts +1 -1
package/dist/mcp/terraform/evidence.d.ts +99 -0
package/dist/mcp/terraform/scanners.d.ts +38 -3
package/dist/mcp/terraform/types.d.ts +16 -0
package/dist/mcp/terraform/verification.d.ts +74 -0
package/dist/mcp/terraform.d.ts +4 -0
package/dist/modes.d.ts +1 -1
package/dist/toolState.d.ts +1 -0
package/dist/utils/moduleFetch.d.ts +42 -0
package/dist/utils/payload.d.ts +4 -0
package/dist/utils/remediationCommand.d.ts +3 -0
package/dist/utils/terraformMcp.d.ts +2 -2
package/dist/utils/terramendConfig.d.ts +51 -0
package/dist/utils/toolLicensing.d.ts +56 -0
package/dist/utils/toolSelection.d.ts +72 -0
package/package.json +9 -8
package/src/agents/claudePretoolGate.ts +3 -3
package/src/mcp/assess.test.ts +135 -0
package/src/mcp/assess.ts +341 -0
package/src/mcp/changeSummary.test.ts +94 -0
package/src/mcp/changeSummary.ts +145 -0
package/src/mcp/crosswalk.ts +15 -1
package/src/mcp/guardrails.ts +11 -6
package/src/mcp/localContext.ts +7 -0
package/src/mcp/localServer.test.ts +2 -0
package/src/mcp/localServer.ts +14 -0
package/src/mcp/server.ts +6 -0
package/src/mcp/terraform/evidence.test.ts +72 -0
package/src/mcp/terraform/evidence.ts +187 -0
package/src/mcp/terraform/scanners.ts +86 -9
package/src/mcp/terraform/tools.test.ts +96 -1
package/src/mcp/terraform/tools.ts +115 -32
package/src/mcp/terraform/types.ts +24 -0
package/src/mcp/terraform/verification.test.ts +85 -0
package/src/mcp/terraform/verification.ts +133 -0
package/src/mcp/terraform.test.ts +108 -0
package/src/mcp/terraform.ts +4 -0
package/src/modes.test.ts +9 -1
package/src/modes.ts +81 -11
package/src/toolState.ts +6 -0
package/src/utils/moduleFetch.test.ts +68 -0
package/src/utils/moduleFetch.ts +86 -0
package/src/utils/payload.test.ts +66 -1
package/src/utils/payload.ts +39 -11
package/src/utils/remediationCommand.test.ts +32 -0
package/src/utils/remediationCommand.ts +11 -0
package/src/utils/terraformMcp.ts +6 -5
package/src/utils/terramendConfig.test.ts +98 -0
package/src/utils/terramendConfig.ts +143 -0
package/src/utils/toolLicensing.test.ts +54 -0
package/src/utils/toolLicensing.ts +103 -0
package/src/utils/toolSelection.test.ts +140 -0
package/src/utils/toolSelection.ts +231 -0

package/src/mcp/terraform/tools.ts CHANGED Viewed

@@ -44,6 +44,8 @@ import {
   collectProviderRequirements,
   computeRegressions,
   computeRemediationVerdict,
+  partitionByKey,
+  regressionIdsByKey,
   runScanners,
   scanFmt,
   scanTflint,
@@ -51,16 +53,20 @@ import {
 } from "#app/mcp/terraform/scanners";
 import {
   type Concern,
+  concernKeyOf,
   dedupe,
   isTerraformConcern,
   resolveRoots,
   run,
   SEVERITY_RANK,
   type Severity,
+  skipped,
   skipResult,
   sortConcerns,
 } from "#app/mcp/terraform/types";
 import { log } from "#app/utils/cli";
+import { resolveModuleFetchEnv } from "#app/utils/moduleFetch";
+import { resolveToolSelection } from "#app/utils/toolSelection";
 export const TerraformScanParams = type({
   "scan_scope?": type("'full' | 'diff'").describe(
@@ -98,7 +104,18 @@ export function TerraformScanTool(ctx: LocalToolContext) {
       const minRank = SEVERITY_RANK[threshold];
       const scope = scan_scope ?? ctx.payload.scanScope ?? "full";
-      const outcomes = runScanners(cwd);
+      // §1.5 — the unified tool selection (licence gate + allow/deny) and the
+      // optional module-fetch credential, both derived from the run payload so
+      // the scan and its ✗→✓ verification re-scan see the identical toolchain.
+      const selection = resolveToolSelection(ctx.payload);
+      const terraformEnv = resolveModuleFetchEnv(ctx.payload);
+      if (selection.unknownTokens.length > 0) {
+        log.warning(
+          `» tools_enabled: ignoring unrecognised tool(s) [${selection.unknownTokens.join(", ")}]`,
+        );
+      }
+      const outcomes = runScanners(cwd, { selection, terraformEnv });
       // diff scope: keep only concerns in Terraform files changed vs the base.
       let scopeNote: string | undefined;
@@ -115,12 +132,14 @@ export function TerraformScanTool(ctx: LocalToolContext) {
           ? true
           : changed.has(c.location.file.replace(/\\/g, "/").replace(/^\.\//, ""));
-      // §1.4 baseline: the full, severity-unfiltered concern-id set, captured
-      // BEFORE any fix and computed identically to verify's `current` set so the
-      // later regression diff (current − baseline) is apples-to-apples.
-      ctx.toolState.baselineConcernIds = dedupe(outcomes.flatMap((o) => o.concerns)).map(
-        (c) => c.id,
-      );
+      // §1.4 baseline: the full, severity-unfiltered concern set, captured BEFORE
+      // any fix and computed identically to verify's `current` set so the later
+      // regression diff is apples-to-apples. We store BOTH the line-pinned ids and
+      // the line-independent keys (concernKeyOf) — verify diffs on the keys so a
+      // line-shifting fix can't fabricate a resolution or a regression.
+      const fullBaseline = dedupe(outcomes.flatMap((o) => o.concerns));
+      ctx.toolState.baselineConcernIds = fullBaseline.map((c) => c.id);
+      ctx.toolState.baselineConcernKeys = fullBaseline.map((c) => concernKeyOf(c));
       const all = sortConcerns(dedupe(outcomes.flatMap((o) => o.concerns)))
         .filter(isTerraformConcern)
@@ -183,6 +202,12 @@ export function TerraformScanTool(ctx: LocalToolContext) {
         grouping,
         scanners_ran: ran,
         scanners_skipped: skippedScanners,
+        // §1.5 — licence-aware tool posture: which non-permissive tools are off
+        // for want of an explicit opt-in, and which were explicitly disabled.
+        tool_selection: {
+          licence_gated: selection.gated,
+          disabled: selection.disabled,
+        },
         summary: { total: all.length, groups: groups.length, by_severity },
         groups: groups.map((g) => ({ ...g, doc_urls: docUrlsForGroup(g, all) })),
         batch_plan: batchPlan,
@@ -218,9 +243,20 @@ export function TerraformValidateTool(ctx: LocalToolContext) {
     parameters: TerraformValidateParams,
     execute: execute(async () => {
       const cwd = ctx.payload.cwd ?? process.cwd();
+      // §1.5 — honour the same licence gate + module-fetch credential as
+      // terraform_scan: tflint (MPL-2.0) runs only when opted in; validate's init
+      // gets the credential so a private cross-repo module resolves.
+      const selection = resolveToolSelection(ctx.payload);
+      const terraformEnv = resolveModuleFetchEnv(ctx.payload);
       // `terraform validate` runs per-root (multi-root aware); fmt + tflint are
       // recursive over the whole tree.
-      const checks = [scanFmt(cwd), scanValidate(cwd), scanTflint(cwd)];
+      const checks = [
+        scanFmt(cwd),
+        scanValidate(cwd, terraformEnv),
+        selection.enabled("tflint")
+          ? scanTflint(cwd)
+          : skipped("tflint", selection.offReason("tflint") ?? "disabled by tools_enabled"),
+      ];
       const remaining = sortConcerns(dedupe(checks.flatMap((c) => c.concerns)));
       const ran = checks.filter((c) => c.ran).map((c) => c.source);
       // count of roots where terraform ran but `validate -json` couldn't be
@@ -294,22 +330,58 @@ export function TerraformVerifyRemediationTool(ctx: LocalToolContext) {
     parameters: TerraformVerifyRemediationParams,
     execute: execute(async ({ concern_ids }) => {
       const cwd = ctx.payload.cwd ?? process.cwd();
-      const outcomes = runScanners(cwd);
-      const currentIds = dedupe(outcomes.flatMap((o) => o.concerns)).map((c) => c.id);
-      const current = new Set(currentIds);
-      const verdict = computeRemediationVerdict(concern_ids, current);
-      // §1.4 — concern ids the fix INTRODUCED (present now, absent from the
-      // pre-fix baseline). Only computable when terraform_scan captured a
-      // baseline this run; absent that, regressions are reported as unknown
-      // rather than falsely empty.
-      const baseline = ctx.toolState.baselineConcernIds;
-      const regressions = baseline ? computeRegressions(baseline, currentIds) : [];
-      const regressionsKnown = baseline !== undefined;
+      // §1.5 — re-scan with the SAME selection + module-fetch credential the
+      // scan used, so a licence-gated tool is consistently off across baseline +
+      // verification and the ✗→✓ partition stays apples-to-apples.
+      const outcomes = runScanners(cwd, {
+        selection: resolveToolSelection(ctx.payload),
+        terraformEnv: resolveModuleFetchEnv(ctx.payload),
+      });
+      const currentConcerns = dedupe(outcomes.flatMap((o) => o.concerns));
+      const currentIds = currentConcerns.map((c) => c.id);
+      // line-INDEPENDENT keys: verify on (source|rule|file), not the line-pinned
+      // id, so a fix that shifts lines (almost every fix) can't make an unfixed
+      // concern look resolved nor a pre-existing one look like a regression.
+      const currentKeys = new Set(currentConcerns.map((c) => concernKeyOf(c)));
+      // Map each requested id → its key via the original scan's concerns
+      // (lastScanConcerns). Ids we can't key-map (verify called without a prior
+      // scan this run — e.g. a bare findings.json) fall back to exact-id matching.
+      const keyById = new Map(
+        (ctx.toolState.lastScanConcerns ?? []).map((c) => [c.id, concernKeyOf(c)] as const),
+      );
+      const keyed: { id: string; key: string }[] = [];
+      const unkeyed: string[] = [];
+      for (const id of concern_ids) {
+        const key = keyById.get(id);
+        if (key !== undefined) keyed.push({ id, key });
+        else unkeyed.push(id);
+      }
+      const keyedVerdict = partitionByKey(keyed, currentKeys);
+      const fallbackVerdict = computeRemediationVerdict(unkeyed, new Set(currentIds));
+      const resolved = [...keyedVerdict.resolved, ...fallbackVerdict.resolved];
+      const remaining = [...keyedVerdict.remaining, ...fallbackVerdict.remaining];
+      const verified = remaining.length === 0;
+      // §1.4 — concerns the fix INTRODUCED, on the SAME line-independent key basis
+      // (a pre-existing concern that merely shifted lines is NOT a regression).
+      // Prefer baseline keys; fall back to the legacy raw-id diff only when keys
+      // weren't captured (no scan this run) — then report as unknown if neither is.
+      const baselineKeys = ctx.toolState.baselineConcernKeys;
+      const baselineIds = ctx.toolState.baselineConcernIds;
+      const regressions = baselineKeys
+        ? regressionIdsByKey(
+            currentConcerns.map((c) => ({ id: c.id, key: concernKeyOf(c) })),
+            new Set(baselineKeys),
+          )
+        : baselineIds
+          ? computeRegressions(baselineIds, currentIds)
+          : [];
+      const regressionsKnown = baselineKeys !== undefined || baselineIds !== undefined;
       // §5.19 — deterministic confidence from the evidence on hand.
       const confidence = computeConfidence({
-        verified: verdict.verified,
+        verified,
         regressionCount: regressions.length,
         idempotent: ctx.toolState.lastIdempotent,
         blastTier: ctx.toolState.lastBlastTier,
@@ -318,17 +390,17 @@ export function TerraformVerifyRemediationTool(ctx: LocalToolContext) {
       const ran = outcomes.filter((o) => o.ran).map((o) => o.source);
       log.info(
-        `» terraform_verify_remediation: ${verdict.resolved.length}/${concern_ids.length} resolved` +
-          ` (${verdict.remaining.length} still present` +
+        `» terraform_verify_remediation: ${resolved.length}/${concern_ids.length} resolved` +
+          ` (${remaining.length} still present` +
           (regressionsKnown ? `, ${regressions.length} regression(s)` : "") +
           `) — confidence: ${confidence.level} — from [${ran.join(", ")}]`,
       );
       return toolOk({
-        verified: verdict.verified,
-        resolved_count: verdict.resolved.length,
-        remaining_count: verdict.remaining.length,
-        resolved: verdict.resolved,
-        remaining: verdict.remaining,
+        verified,
+        resolved_count: resolved.length,
+        remaining_count: remaining.length,
+        resolved,
+        remaining,
         // §1.4 regression guard
         has_regressions: regressions.length > 0,
         regressions,
@@ -449,7 +521,12 @@ export function TerraformEmitSarifTool(ctx: LocalToolContext) {
       const configured = ctx.payload.severityThreshold as Severity | undefined;
       const threshold: Severity = severity_threshold ?? configured ?? "low";
       const minRank = SEVERITY_RANK[threshold];
-      const outcomes = runScanners(cwd);
+      // §1.5 — same selection + module-fetch credential as terraform_scan, so the
+      // SARIF report mirrors exactly what a scan reports (no extra gated tools).
+      const outcomes = runScanners(cwd, {
+        selection: resolveToolSelection(ctx.payload),
+        terraformEnv: resolveModuleFetchEnv(ctx.payload),
+      });
       const concerns = sortConcerns(dedupe(outcomes.flatMap((o) => o.concerns)))
         .filter(isTerraformConcern)
         .filter((c) => SEVERITY_RANK[c.severity] >= minRank);
@@ -640,7 +717,10 @@ export function TerraformPlanTool(ctx: LocalToolContext) {
           "no cloud credentials detected — terraform plan needs provider/backend access; skipped (add AWS/Azure/GCP creds or an OIDC role to enable it)",
         );
       }
-      const creds = collectCloudCredentials();
+      // §1.5 — fold the optional module-fetch credential into the plan env so
+      // `terraform init` can resolve a private cross-repo `git::` module. The
+      // GIT_CONFIG_* keys never collide with the cloud creds.
+      const creds = { ...collectCloudCredentials(), ...(resolveModuleFetchEnv(ctx.payload) ?? {}) };
       // multi-root: plan EACH root (hepcare: terraform/ + terraform/core/) and
       // aggregate. resolveRoots falls back to [cwd] for a single-root repo, so
@@ -816,8 +896,11 @@ export function ReadFindingsTool(ctx: LocalToolContext) {
       const minRank = SEVERITY_RANK[threshold];
       // §1.4 baseline — same role as terraform_scan's, so a regression check
-      // after a reviewer-sourced fix has a baseline to diff against.
-      ctx.toolState.baselineConcernIds = dedupe(parsed).map((c) => c.id);
+      // after a reviewer-sourced fix has a baseline to diff against. Store ids +
+      // line-independent keys (verify diffs on the keys).
+      const fullBaseline = dedupe(parsed);
+      ctx.toolState.baselineConcernIds = fullBaseline.map((c) => c.id);
+      ctx.toolState.baselineConcernKeys = fullBaseline.map((c) => concernKeyOf(c));
       const all = sortConcerns(dedupe(parsed))
         .filter(isTerraformConcern)

package/src/mcp/terraform/types.ts CHANGED Viewed

@@ -79,6 +79,30 @@ export function concernId(
     .slice(0, 12);
 }
+/**
+ * A LINE-INDEPENDENT identity for a concern — which rule fires in which file,
+ * ignoring the exact line. Two instances of the same rule in the same file at
+ * different lines share a key.
+ *
+ * The full content `id` keys on the line so it's unique per instance (right for
+ * SARIF alerts + branch naming), but that makes it UNSTABLE under a fix: almost
+ * every fix adds or removes lines, shifting every concern below it to a new line
+ * → a new id. If ✗→✓ verification compared raw ids, a shifted-but-unfixed concern
+ * would look RESOLVED (old id gone) and simultaneously look like a REGRESSION
+ * (new id appeared) — a false attestation either way. `terraform_verify_remediation`
+ * compares on this key instead, so a line shift can't fabricate a resolution or a
+ * regression. Derived identically to `id` minus the line (same bare-rule
+ * normalization) so keys match across the original scan and the re-scan.
+ */
+export function concernKeyOf(c: Pick<Concern, "source" | "rule_id" | "location">): string {
+  const prefix = `${c.source}:`;
+  const bareRule = c.rule_id.startsWith(prefix) ? c.rule_id.slice(prefix.length) : c.rule_id;
+  return createHash("sha1")
+    .update(`${c.source}|${bareRule}|${c.location.file}`)
+    .digest("hex")
+    .slice(0, 12);
+}
 /**
  * Normalize a scanner-reported path to a repo-relative POSIX path. Each scanner
  * reports the file differently — tflint gives `main.tf` (relative), trivy a

package/src/mcp/terraform/verification.test.ts ADDED Viewed

@@ -0,0 +1,85 @@
+import { describe, expect, it } from "vitest";
+import type { ScannerOutcome } from "#app/mcp/terraform/types";
+import {
+  buildVerificationSummary,
+  concernVerificationStatus,
+  VERIFICATION_STATUS_LABEL,
+  VERIFICATION_STATUSES,
+} from "#app/mcp/terraform/verification";
+describe("the five-status taxonomy", () => {
+  it("has exactly the five statuses, each with a legend", () => {
+    expect([...VERIFICATION_STATUSES]).toEqual([
+      "pass",
+      "fail",
+      "not-applicable",
+      "inconclusive",
+      "not-code-verifiable",
+    ]);
+    for (const s of VERIFICATION_STATUSES) {
+      expect(VERIFICATION_STATUS_LABEL[s]).toBeTruthy();
+    }
+  });
+});
+describe("concernVerificationStatus", () => {
+  it("a code-verified violation is `fail`", () => {
+    const v = concernVerificationStatus({
+      rule_id: "trivy:AVD-AWS-0088",
+      evidence: "S3 bucket is unencrypted at rest",
+    });
+    expect(v.status).toBe("fail");
+    expect(v.reason).toBeUndefined();
+  });
+  it("a human-decision concern is `not-code-verifiable` with a reason", () => {
+    const v = concernVerificationStatus({
+      rule_id: "checkov:CKV_AWS_1",
+      evidence: "IAM policy uses a wildcard * action",
+    });
+    expect(v.status).toBe("not-code-verifiable");
+    expect(v.reason).toMatch(/human decision/i);
+  });
+});
+describe("buildVerificationSummary", () => {
+  const concerns = [
+    { id: "a", rule_id: "trivy:AVD-AWS-0088", evidence: "unencrypted at rest" },
+    { id: "b", rule_id: "checkov:CKV_AWS_1", evidence: "least-privilege wildcard policy" },
+  ];
+  const outcomes: ScannerOutcome[] = [
+    { source: "trivy", ran: true, concerns: [] },
+    { source: "checkov", ran: true, concerns: [] },
+    {
+      source: "tflint",
+      ran: false,
+      skipped_reason: "licence-gated (TFLint, MPL-2.0)",
+      concerns: [],
+    },
+  ];
+  it("classifies each concern and counts fail vs not-code-verifiable", () => {
+    const s = buildVerificationSummary(concerns, outcomes);
+    expect(s.counts.fail).toBe(1);
+    expect(s.counts.not_code_verifiable).toBe(1);
+    expect(s.concerns).toEqual([
+      { id: "a", status: "fail" },
+      { id: "b", status: "not-code-verifiable", reason: expect.stringMatching(/human decision/i) },
+    ]);
+  });
+  it("partitions scanners into verified (ran) vs inconclusive (skipped)", () => {
+    const s = buildVerificationSummary(concerns, outcomes);
+    expect(s.coverage.verified).toEqual(["trivy", "checkov"]);
+    expect(s.coverage.inconclusive).toEqual([
+      { source: "tflint", reason: "licence-gated (TFLint, MPL-2.0)" },
+    ]);
+    expect(s.counts.inconclusive).toBe(1);
+  });
+  it("carries the honesty note (no silent pass)", () => {
+    const s = buildVerificationSummary([], []);
+    expect(s.note).toMatch(/not proof of compliance/i);
+    expect(s.concerns).toEqual([]);
+  });
+});

package/src/mcp/terraform/verification.ts ADDED Viewed

@@ -0,0 +1,133 @@
+import { classifyRefusal } from "#app/mcp/terraform/decisions";
+import type { Concern, ScannerOutcome } from "#app/mcp/terraform/types";
+/**
+ * Five-status verification taxonomy (the auditor-credibility win the evidence
+ * pack + crosswalk both lean on). The point is HONESTY: never let "no finding"
+ * read as "compliant", and never claim the engine proved something it cannot see
+ * from code. Every assessment statement carries exactly one of these:
+ *
+ *   - `pass`                — a check ran and code-verified compliance.
+ *   - `fail`                — a check ran and code-verified a violation.
+ *   - `not-applicable`      — the control does not apply to the resources present.
+ *   - `inconclusive`        — a relevant check did NOT run (gated / not installed /
+ *                             unparseable). A coverage gap, never silently a pass.
+ *   - `not-code-verifiable` — the control needs human / process evidence
+ *                             (governance, training, a key-policy decision); IaC
+ *                             scanning structurally cannot prove it either way.
+ *
+ * What this engine asserts today: `fail` and `not-code-verifiable` per concern,
+ * and `inconclusive` per scanner that didn't run. It deliberately does NOT
+ * fabricate `pass` / `not-applicable` for controls nothing fired on — absence of
+ * a finding is not proof, and over-claiming is exactly what costs credibility
+ * with an assessor. The two reserved statuses are part of the shared vocabulary
+ * for the evidence consumer (and a future full-framework crosswalk). Pure.
+ */
+export const VERIFICATION_STATUSES = [
+  "pass",
+  "fail",
+  "not-applicable",
+  "inconclusive",
+  "not-code-verifiable",
+] as const;
+export type VerificationStatus = (typeof VERIFICATION_STATUSES)[number];
+/** one-line legend per status — for the report / evidence bundle. */
+export const VERIFICATION_STATUS_LABEL: Record<VerificationStatus, string> = {
+  pass: "a check ran and code-verified compliance",
+  fail: "a check ran and code-verified a violation",
+  "not-applicable": "the control does not apply to the resources present",
+  inconclusive: "a relevant check did not run — a coverage gap, not a pass",
+  "not-code-verifiable": "needs human / process evidence — IaC cannot prove it",
+};
+/** the statuses the engine asserts per concern (a concern is always one or the
+ * other — it fired, the only question is whether code can prove the fix). */
+export type ConcernVerificationStatus = Extract<VerificationStatus, "fail" | "not-code-verifiable">;
+/**
+ * Classify one concern: a code-verified violation (`fail`) — UNLESS its
+ * remediation is a human decision the engine can flag but not prove from code
+ * (IAM least-privilege, a KMS key policy, a real CIDR — the §29 refusal set), in
+ * which case it is `not-code-verifiable`. Pure.
+ */
+export function concernVerificationStatus(concern: Pick<Concern, "rule_id" | "evidence">): {
+  status: ConcernVerificationStatus;
+  reason?: string;
+} {
+  const refusal = classifyRefusal(concern);
+  if (refusal.refuse) {
+    return refusal.reason
+      ? { status: "not-code-verifiable", reason: refusal.reason }
+      : { status: "not-code-verifiable" };
+  }
+  return { status: "fail" };
+}
+export interface VerifiedConcern {
+  id: string;
+  status: ConcernVerificationStatus;
+  reason?: string;
+}
+export interface VerificationSummary {
+  /** per-concern verification status. */
+  concerns: VerifiedConcern[];
+  counts: {
+    fail: number;
+    not_code_verifiable: number;
+    /** scanners that did not run (each is a coverage gap). */
+    inconclusive: number;
+  };
+  coverage: {
+    /** scanners that ran — their checks are code-verified for what they cover. */
+    verified: string[];
+    /** scanners that did NOT run — their checks are INCONCLUSIVE, never a pass. */
+    inconclusive: { source: string; reason: string }[];
+  };
+  /** the honesty caveat an assessor should read alongside the statuses. */
+  note: string;
+}
+const HONESTY_NOTE =
+  "Statuses are code-verified only. A scanner that did not run leaves its checks " +
+  "INCONCLUSIVE (a coverage gap, not a pass); controls needing human/process " +
+  "evidence are NOT-CODE-VERIFIABLE. Absence of a finding is not proof of compliance.";
+/**
+ * Roll a scan up into a verification summary: every concern classified
+ * (fail / not-code-verifiable) and every scanner partitioned into verified (ran)
+ * vs inconclusive (skipped — gated, not installed, or unparseable). Pure;
+ * `outcomes` is the raw `runScanners` result, `concerns` the deduped,
+ * Terraform-only set the assessment reports on.
+ */
+export function buildVerificationSummary(
+  concerns: Pick<Concern, "id" | "rule_id" | "evidence">[],
+  outcomes: ScannerOutcome[],
+): VerificationSummary {
+  const verified: VerifiedConcern[] = concerns.map((c) => {
+    const v = concernVerificationStatus(c);
+    return v.reason
+      ? { id: c.id, status: v.status, reason: v.reason }
+      : { id: c.id, status: v.status };
+  });
+  const verifiedTools: string[] = [];
+  const inconclusiveTools: { source: string; reason: string }[] = [];
+  for (const o of outcomes) {
+    if (o.ran) verifiedTools.push(o.source);
+    else inconclusiveTools.push({ source: o.source, reason: o.skipped_reason ?? "did not run" });
+  }
+  return {
+    concerns: verified,
+    counts: {
+      fail: verified.filter((c) => c.status === "fail").length,
+      not_code_verifiable: verified.filter((c) => c.status === "not-code-verifiable").length,
+      inconclusive: inconclusiveTools.length,
+    },
+    coverage: { verified: verifiedTools, inconclusive: inconclusiveTools },
+    note: HONESTY_NOTE,
+  };
+}

package/src/mcp/terraform.test.ts CHANGED Viewed

@@ -20,6 +20,7 @@ import {
   computeCostDelta,
   computeRegressions,
   computeRemediationVerdict,
+  concernKeyOf,
   groupConcerns,
   groupConcernsByRule,
   isPureMovePlan,
@@ -39,10 +40,12 @@ import {
   parseTflintOutput,
   parseTrivyOutput,
   parseValidateOutput,
+  partitionByKey,
   planBatches,
   preventiveControlFor,
   type RootPlan,
   rebaseConcern,
+  regressionIdsByKey,
   resolveRoots,
   resourceTypeOf,
   ruleDocUrl,
@@ -1235,6 +1238,111 @@ describe("computeRegressions (§1.4)", () => {
   });
 });
+describe("concernKeyOf (line-independent identity — verification integrity)", () => {
+  const at = (line: number | null): Pick<Concern, "source" | "rule_id" | "location"> => ({
+    source: "checkov",
+    rule_id: "checkov:CKV_AWS_23",
+    location: { file: "main.tf", line },
+  });
+  it("is the SAME for the same rule+file at different lines (the core property)", () => {
+    // this is what makes ✗→✓ verification survive a line-shifting fix.
+    expect(concernKeyOf(at(76))).toBe(concernKeyOf(at(95)));
+    expect(concernKeyOf(at(76))).toBe(concernKeyOf(at(null)));
+  });
+  it("differs when the rule or the file differs", () => {
+    expect(concernKeyOf(at(10))).not.toBe(
+      concernKeyOf({
+        source: "checkov",
+        rule_id: "checkov:CKV_AWS_8",
+        location: { file: "main.tf", line: 10 },
+      }),
+    );
+    expect(concernKeyOf(at(10))).not.toBe(
+      concernKeyOf({
+        source: "checkov",
+        rule_id: "checkov:CKV_AWS_23",
+        location: { file: "vars.tf", line: 10 },
+      }),
+    );
+  });
+  it("ignores the `source:` rule prefix so it matches the id's bare-rule normalization", () => {
+    const withPrefix = concernKeyOf({
+      source: "trivy",
+      rule_id: "trivy:AVD-AWS-0130",
+      location: { file: "main.tf", line: 5 },
+    });
+    const bare = concernKeyOf({
+      source: "trivy",
+      rule_id: "AVD-AWS-0130",
+      location: { file: "main.tf", line: 5 },
+    });
+    expect(withPrefix).toBe(bare);
+  });
+});
+describe("partitionByKey (✗→✓ on line-independent keys)", () => {
+  it("marks a concern RESOLVED only when its key is gone from the re-scan", () => {
+    const v = partitionByKey(
+      [
+        { id: "id-imds", key: "k-imds" },
+        { id: "id-cidr", key: "k-cidr" },
+      ],
+      new Set(["k-cidr"]), // imds key gone (fixed), cidr key still present
+    );
+    expect(v.resolved).toEqual(["id-imds"]);
+    expect(v.remaining).toEqual(["id-cidr"]);
+    expect(v.verified).toBe(false);
+  });
+  it("the regression-bug scenario: a line-SHIFTED unfixed concern is NOT falsely resolved", () => {
+    // The fix added lines above an unfixed concern, so its id changed (line 76 →
+    // 95) but its KEY (source|rule|file) is stable. Requesting the old id with the
+    // old key, and the re-scan still carrying that key, must report it REMAINING.
+    const v = partitionByKey([{ id: "id-line76", key: "k-cidr" }], new Set(["k-cidr"]));
+    expect(v.resolved).toEqual([]);
+    expect(v.remaining).toEqual(["id-line76"]);
+  });
+});
+describe("regressionIdsByKey (§1.4 on line-independent keys)", () => {
+  it("does NOT flag a pre-existing concern that merely shifted lines", () => {
+    // baseline had key k-cidr; after the fix it's still present (at a new line/id)
+    // — same key, so NOT a regression (the raw-id diff would have flagged it).
+    const regressions = regressionIdsByKey(
+      [{ id: "id-line95", key: "k-cidr" }],
+      new Set(["k-cidr"]),
+    );
+    expect(regressions).toEqual([]);
+  });
+  it("flags a genuinely new (rule, file) defect the fix introduced", () => {
+    const regressions = regressionIdsByKey(
+      [
+        { id: "id-old", key: "k-old" },
+        { id: "id-new", key: "k-new" },
+      ],
+      new Set(["k-old"]),
+    );
+    expect(regressions).toEqual(["id-new"]);
+  });
+  it("returns one representative id per new key (dedups by key) and sorts", () => {
+    const regressions = regressionIdsByKey(
+      [
+        { id: "id-b", key: "k-new" },
+        { id: "id-a", key: "k-new" },
+        { id: "id-z", key: "k-other" },
+      ],
+      new Set<string>(),
+    );
+    // one id per new key, sorted; k-new keeps its FIRST-seen id (id-b)
+    expect(regressions).toEqual(["id-b", "id-z"]);
+  });
+});
 describe("classifyAutonomy (§3.9)", () => {
   const c = (severity: Concern["severity"], category: Concern["category"]) => ({
     severity,

package/src/mcp/terraform.ts CHANGED Viewed

@@ -11,13 +11,17 @@
  *   findings  — reviewer findings + SARIF ingest/emit
  *   plan      — plan parsing + destroy/blast/stability/aggregation
  *   tools     — the MCP Tool factories + their *Params schemas
+ *   verification — the five-status taxonomy (fail / not-code-verifiable / …)
+ *   evidence  — the backend-free compliance evidence bundle + emitter
  */
 export * from "#app/mcp/terraform/cost";
 export * from "#app/mcp/terraform/currency";
 export * from "#app/mcp/terraform/decisions";
+export * from "#app/mcp/terraform/evidence";
 export * from "#app/mcp/terraform/findings";
 export * from "#app/mcp/terraform/plan";
 export * from "#app/mcp/terraform/scanners";
 export * from "#app/mcp/terraform/tools";
 export * from "#app/mcp/terraform/types";
+export * from "#app/mcp/terraform/verification";

package/src/modes.test.ts CHANGED Viewed

@@ -14,9 +14,11 @@ const EXPECTED_MODE_NAMES = [
   "AddressReviews",
   "Review",
   "IncrementalReview",
+  "SummarizePr",
   "Plan",
   "Fix",
   "ResolveConflicts",
+  "Assess",
   "Remediate",
   "RefreshRemediation",
   "GenerateTerraform",
@@ -96,7 +98,13 @@ describe("static mode exports", () => {
   });
   it("NON_COMMITTING_MODES only names real built-in modes", () => {
-    expect([...NON_COMMITTING_MODES].sort()).toEqual(["IncrementalReview", "Plan", "Review"]);
+    expect([...NON_COMMITTING_MODES].sort()).toEqual([
+      "Assess",
+      "IncrementalReview",
+      "Plan",
+      "Review",
+      "SummarizePr",
+    ]);
     for (const mode of NON_COMMITTING_MODES) {
       expect(BUILTIN_MODE_NAMES).toContain(mode);
     }