terramend 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/agents/claudePretoolGate.d.ts +2 -2
  2. package/dist/cli.mjs +16554 -8100
  3. package/dist/index.js +13484 -5037
  4. package/dist/internal.js +75 -11
  5. package/dist/mcp/assess.d.ts +86 -0
  6. package/dist/mcp/changeSummary.d.ts +50 -0
  7. package/dist/mcp/crosswalk.d.ts +5 -0
  8. package/dist/mcp/localContext.d.ts +1 -1
  9. package/dist/mcp/terraform/evidence.d.ts +99 -0
  10. package/dist/mcp/terraform/scanners.d.ts +38 -3
  11. package/dist/mcp/terraform/types.d.ts +16 -0
  12. package/dist/mcp/terraform/verification.d.ts +74 -0
  13. package/dist/mcp/terraform.d.ts +4 -0
  14. package/dist/modes.d.ts +1 -1
  15. package/dist/toolState.d.ts +1 -0
  16. package/dist/utils/moduleFetch.d.ts +42 -0
  17. package/dist/utils/payload.d.ts +4 -0
  18. package/dist/utils/remediationCommand.d.ts +3 -0
  19. package/dist/utils/terraformMcp.d.ts +2 -2
  20. package/dist/utils/terramendConfig.d.ts +51 -0
  21. package/dist/utils/toolLicensing.d.ts +56 -0
  22. package/dist/utils/toolSelection.d.ts +72 -0
  23. package/package.json +9 -8
  24. package/src/agents/claudePretoolGate.ts +3 -3
  25. package/src/mcp/assess.test.ts +135 -0
  26. package/src/mcp/assess.ts +341 -0
  27. package/src/mcp/changeSummary.test.ts +94 -0
  28. package/src/mcp/changeSummary.ts +145 -0
  29. package/src/mcp/crosswalk.ts +15 -1
  30. package/src/mcp/guardrails.ts +11 -6
  31. package/src/mcp/localContext.ts +7 -0
  32. package/src/mcp/localServer.test.ts +2 -0
  33. package/src/mcp/localServer.ts +14 -0
  34. package/src/mcp/server.ts +6 -0
  35. package/src/mcp/terraform/evidence.test.ts +72 -0
  36. package/src/mcp/terraform/evidence.ts +187 -0
  37. package/src/mcp/terraform/scanners.ts +86 -9
  38. package/src/mcp/terraform/tools.test.ts +96 -1
  39. package/src/mcp/terraform/tools.ts +115 -32
  40. package/src/mcp/terraform/types.ts +24 -0
  41. package/src/mcp/terraform/verification.test.ts +85 -0
  42. package/src/mcp/terraform/verification.ts +133 -0
  43. package/src/mcp/terraform.test.ts +108 -0
  44. package/src/mcp/terraform.ts +4 -0
  45. package/src/modes.test.ts +9 -1
  46. package/src/modes.ts +81 -11
  47. package/src/toolState.ts +6 -0
  48. package/src/utils/moduleFetch.test.ts +68 -0
  49. package/src/utils/moduleFetch.ts +86 -0
  50. package/src/utils/payload.test.ts +66 -1
  51. package/src/utils/payload.ts +39 -11
  52. package/src/utils/remediationCommand.test.ts +32 -0
  53. package/src/utils/remediationCommand.ts +11 -0
  54. package/src/utils/terraformMcp.ts +6 -5
  55. package/src/utils/terramendConfig.test.ts +98 -0
  56. package/src/utils/terramendConfig.ts +143 -0
  57. package/src/utils/toolLicensing.test.ts +54 -0
  58. package/src/utils/toolLicensing.ts +103 -0
  59. package/src/utils/toolSelection.test.ts +140 -0
  60. package/src/utils/toolSelection.ts +231 -0
@@ -44,6 +44,8 @@ import {
44
44
  collectProviderRequirements,
45
45
  computeRegressions,
46
46
  computeRemediationVerdict,
47
+ partitionByKey,
48
+ regressionIdsByKey,
47
49
  runScanners,
48
50
  scanFmt,
49
51
  scanTflint,
@@ -51,16 +53,20 @@ import {
51
53
  } from "#app/mcp/terraform/scanners";
52
54
  import {
53
55
  type Concern,
56
+ concernKeyOf,
54
57
  dedupe,
55
58
  isTerraformConcern,
56
59
  resolveRoots,
57
60
  run,
58
61
  SEVERITY_RANK,
59
62
  type Severity,
63
+ skipped,
60
64
  skipResult,
61
65
  sortConcerns,
62
66
  } from "#app/mcp/terraform/types";
63
67
  import { log } from "#app/utils/cli";
68
+ import { resolveModuleFetchEnv } from "#app/utils/moduleFetch";
69
+ import { resolveToolSelection } from "#app/utils/toolSelection";
64
70
 
65
71
  export const TerraformScanParams = type({
66
72
  "scan_scope?": type("'full' | 'diff'").describe(
@@ -98,7 +104,18 @@ export function TerraformScanTool(ctx: LocalToolContext) {
98
104
  const minRank = SEVERITY_RANK[threshold];
99
105
  const scope = scan_scope ?? ctx.payload.scanScope ?? "full";
100
106
 
101
- const outcomes = runScanners(cwd);
107
+ // §1.5 the unified tool selection (licence gate + allow/deny) and the
108
+ // optional module-fetch credential, both derived from the run payload so
109
+ // the scan and its ✗→✓ verification re-scan see the identical toolchain.
110
+ const selection = resolveToolSelection(ctx.payload);
111
+ const terraformEnv = resolveModuleFetchEnv(ctx.payload);
112
+ if (selection.unknownTokens.length > 0) {
113
+ log.warning(
114
+ `» tools_enabled: ignoring unrecognised tool(s) [${selection.unknownTokens.join(", ")}]`,
115
+ );
116
+ }
117
+
118
+ const outcomes = runScanners(cwd, { selection, terraformEnv });
102
119
 
103
120
  // diff scope: keep only concerns in Terraform files changed vs the base.
104
121
  let scopeNote: string | undefined;
@@ -115,12 +132,14 @@ export function TerraformScanTool(ctx: LocalToolContext) {
115
132
  ? true
116
133
  : changed.has(c.location.file.replace(/\\/g, "/").replace(/^\.\//, ""));
117
134
 
118
- // §1.4 baseline: the full, severity-unfiltered concern-id set, captured
119
- // BEFORE any fix and computed identically to verify's `current` set so the
120
- // later regression diff (current − baseline) is apples-to-apples.
121
- ctx.toolState.baselineConcernIds = dedupe(outcomes.flatMap((o) => o.concerns)).map(
122
- (c) => c.id,
123
- );
135
+ // §1.4 baseline: the full, severity-unfiltered concern set, captured BEFORE
136
+ // any fix and computed identically to verify's `current` set so the later
137
+ // regression diff is apples-to-apples. We store BOTH the line-pinned ids and
138
+ // the line-independent keys (concernKeyOf) verify diffs on the keys so a
139
+ // line-shifting fix can't fabricate a resolution or a regression.
140
+ const fullBaseline = dedupe(outcomes.flatMap((o) => o.concerns));
141
+ ctx.toolState.baselineConcernIds = fullBaseline.map((c) => c.id);
142
+ ctx.toolState.baselineConcernKeys = fullBaseline.map((c) => concernKeyOf(c));
124
143
 
125
144
  const all = sortConcerns(dedupe(outcomes.flatMap((o) => o.concerns)))
126
145
  .filter(isTerraformConcern)
@@ -183,6 +202,12 @@ export function TerraformScanTool(ctx: LocalToolContext) {
183
202
  grouping,
184
203
  scanners_ran: ran,
185
204
  scanners_skipped: skippedScanners,
205
+ // §1.5 — licence-aware tool posture: which non-permissive tools are off
206
+ // for want of an explicit opt-in, and which were explicitly disabled.
207
+ tool_selection: {
208
+ licence_gated: selection.gated,
209
+ disabled: selection.disabled,
210
+ },
186
211
  summary: { total: all.length, groups: groups.length, by_severity },
187
212
  groups: groups.map((g) => ({ ...g, doc_urls: docUrlsForGroup(g, all) })),
188
213
  batch_plan: batchPlan,
@@ -218,9 +243,20 @@ export function TerraformValidateTool(ctx: LocalToolContext) {
218
243
  parameters: TerraformValidateParams,
219
244
  execute: execute(async () => {
220
245
  const cwd = ctx.payload.cwd ?? process.cwd();
246
+ // §1.5 — honour the same licence gate + module-fetch credential as
247
+ // terraform_scan: tflint (MPL-2.0) runs only when opted in; validate's init
248
+ // gets the credential so a private cross-repo module resolves.
249
+ const selection = resolveToolSelection(ctx.payload);
250
+ const terraformEnv = resolveModuleFetchEnv(ctx.payload);
221
251
  // `terraform validate` runs per-root (multi-root aware); fmt + tflint are
222
252
  // recursive over the whole tree.
223
- const checks = [scanFmt(cwd), scanValidate(cwd), scanTflint(cwd)];
253
+ const checks = [
254
+ scanFmt(cwd),
255
+ scanValidate(cwd, terraformEnv),
256
+ selection.enabled("tflint")
257
+ ? scanTflint(cwd)
258
+ : skipped("tflint", selection.offReason("tflint") ?? "disabled by tools_enabled"),
259
+ ];
224
260
  const remaining = sortConcerns(dedupe(checks.flatMap((c) => c.concerns)));
225
261
  const ran = checks.filter((c) => c.ran).map((c) => c.source);
226
262
  // count of roots where terraform ran but `validate -json` couldn't be
@@ -294,22 +330,58 @@ export function TerraformVerifyRemediationTool(ctx: LocalToolContext) {
294
330
  parameters: TerraformVerifyRemediationParams,
295
331
  execute: execute(async ({ concern_ids }) => {
296
332
  const cwd = ctx.payload.cwd ?? process.cwd();
297
- const outcomes = runScanners(cwd);
298
- const currentIds = dedupe(outcomes.flatMap((o) => o.concerns)).map((c) => c.id);
299
- const current = new Set(currentIds);
300
- const verdict = computeRemediationVerdict(concern_ids, current);
301
-
302
- // §1.4 — concern ids the fix INTRODUCED (present now, absent from the
303
- // pre-fix baseline). Only computable when terraform_scan captured a
304
- // baseline this run; absent that, regressions are reported as unknown
305
- // rather than falsely empty.
306
- const baseline = ctx.toolState.baselineConcernIds;
307
- const regressions = baseline ? computeRegressions(baseline, currentIds) : [];
308
- const regressionsKnown = baseline !== undefined;
333
+ // §1.5 re-scan with the SAME selection + module-fetch credential the
334
+ // scan used, so a licence-gated tool is consistently off across baseline +
335
+ // verification and the ✗→✓ partition stays apples-to-apples.
336
+ const outcomes = runScanners(cwd, {
337
+ selection: resolveToolSelection(ctx.payload),
338
+ terraformEnv: resolveModuleFetchEnv(ctx.payload),
339
+ });
340
+ const currentConcerns = dedupe(outcomes.flatMap((o) => o.concerns));
341
+ const currentIds = currentConcerns.map((c) => c.id);
342
+ // line-INDEPENDENT keys: verify on (source|rule|file), not the line-pinned
343
+ // id, so a fix that shifts lines (almost every fix) can't make an unfixed
344
+ // concern look resolved nor a pre-existing one look like a regression.
345
+ const currentKeys = new Set(currentConcerns.map((c) => concernKeyOf(c)));
346
+
347
+ // Map each requested id → its key via the original scan's concerns
348
+ // (lastScanConcerns). Ids we can't key-map (verify called without a prior
349
+ // scan this run — e.g. a bare findings.json) fall back to exact-id matching.
350
+ const keyById = new Map(
351
+ (ctx.toolState.lastScanConcerns ?? []).map((c) => [c.id, concernKeyOf(c)] as const),
352
+ );
353
+ const keyed: { id: string; key: string }[] = [];
354
+ const unkeyed: string[] = [];
355
+ for (const id of concern_ids) {
356
+ const key = keyById.get(id);
357
+ if (key !== undefined) keyed.push({ id, key });
358
+ else unkeyed.push(id);
359
+ }
360
+ const keyedVerdict = partitionByKey(keyed, currentKeys);
361
+ const fallbackVerdict = computeRemediationVerdict(unkeyed, new Set(currentIds));
362
+ const resolved = [...keyedVerdict.resolved, ...fallbackVerdict.resolved];
363
+ const remaining = [...keyedVerdict.remaining, ...fallbackVerdict.remaining];
364
+ const verified = remaining.length === 0;
365
+
366
+ // §1.4 — concerns the fix INTRODUCED, on the SAME line-independent key basis
367
+ // (a pre-existing concern that merely shifted lines is NOT a regression).
368
+ // Prefer baseline keys; fall back to the legacy raw-id diff only when keys
369
+ // weren't captured (no scan this run) — then report as unknown if neither is.
370
+ const baselineKeys = ctx.toolState.baselineConcernKeys;
371
+ const baselineIds = ctx.toolState.baselineConcernIds;
372
+ const regressions = baselineKeys
373
+ ? regressionIdsByKey(
374
+ currentConcerns.map((c) => ({ id: c.id, key: concernKeyOf(c) })),
375
+ new Set(baselineKeys),
376
+ )
377
+ : baselineIds
378
+ ? computeRegressions(baselineIds, currentIds)
379
+ : [];
380
+ const regressionsKnown = baselineKeys !== undefined || baselineIds !== undefined;
309
381
 
310
382
  // §5.19 — deterministic confidence from the evidence on hand.
311
383
  const confidence = computeConfidence({
312
- verified: verdict.verified,
384
+ verified,
313
385
  regressionCount: regressions.length,
314
386
  idempotent: ctx.toolState.lastIdempotent,
315
387
  blastTier: ctx.toolState.lastBlastTier,
@@ -318,17 +390,17 @@ export function TerraformVerifyRemediationTool(ctx: LocalToolContext) {
318
390
 
319
391
  const ran = outcomes.filter((o) => o.ran).map((o) => o.source);
320
392
  log.info(
321
- `» terraform_verify_remediation: ${verdict.resolved.length}/${concern_ids.length} resolved` +
322
- ` (${verdict.remaining.length} still present` +
393
+ `» terraform_verify_remediation: ${resolved.length}/${concern_ids.length} resolved` +
394
+ ` (${remaining.length} still present` +
323
395
  (regressionsKnown ? `, ${regressions.length} regression(s)` : "") +
324
396
  `) — confidence: ${confidence.level} — from [${ran.join(", ")}]`,
325
397
  );
326
398
  return toolOk({
327
- verified: verdict.verified,
328
- resolved_count: verdict.resolved.length,
329
- remaining_count: verdict.remaining.length,
330
- resolved: verdict.resolved,
331
- remaining: verdict.remaining,
399
+ verified,
400
+ resolved_count: resolved.length,
401
+ remaining_count: remaining.length,
402
+ resolved,
403
+ remaining,
332
404
  // §1.4 regression guard
333
405
  has_regressions: regressions.length > 0,
334
406
  regressions,
@@ -449,7 +521,12 @@ export function TerraformEmitSarifTool(ctx: LocalToolContext) {
449
521
  const configured = ctx.payload.severityThreshold as Severity | undefined;
450
522
  const threshold: Severity = severity_threshold ?? configured ?? "low";
451
523
  const minRank = SEVERITY_RANK[threshold];
452
- const outcomes = runScanners(cwd);
524
+ // §1.5 same selection + module-fetch credential as terraform_scan, so the
525
+ // SARIF report mirrors exactly what a scan reports (no extra gated tools).
526
+ const outcomes = runScanners(cwd, {
527
+ selection: resolveToolSelection(ctx.payload),
528
+ terraformEnv: resolveModuleFetchEnv(ctx.payload),
529
+ });
453
530
  const concerns = sortConcerns(dedupe(outcomes.flatMap((o) => o.concerns)))
454
531
  .filter(isTerraformConcern)
455
532
  .filter((c) => SEVERITY_RANK[c.severity] >= minRank);
@@ -640,7 +717,10 @@ export function TerraformPlanTool(ctx: LocalToolContext) {
640
717
  "no cloud credentials detected — terraform plan needs provider/backend access; skipped (add AWS/Azure/GCP creds or an OIDC role to enable it)",
641
718
  );
642
719
  }
643
- const creds = collectCloudCredentials();
720
+ // §1.5 fold the optional module-fetch credential into the plan env so
721
+ // `terraform init` can resolve a private cross-repo `git::` module. The
722
+ // GIT_CONFIG_* keys never collide with the cloud creds.
723
+ const creds = { ...collectCloudCredentials(), ...(resolveModuleFetchEnv(ctx.payload) ?? {}) };
644
724
 
645
725
  // multi-root: plan EACH root (hepcare: terraform/ + terraform/core/) and
646
726
  // aggregate. resolveRoots falls back to [cwd] for a single-root repo, so
@@ -816,8 +896,11 @@ export function ReadFindingsTool(ctx: LocalToolContext) {
816
896
  const minRank = SEVERITY_RANK[threshold];
817
897
 
818
898
  // §1.4 baseline — same role as terraform_scan's, so a regression check
819
- // after a reviewer-sourced fix has a baseline to diff against.
820
- ctx.toolState.baselineConcernIds = dedupe(parsed).map((c) => c.id);
899
+ // after a reviewer-sourced fix has a baseline to diff against. Store ids +
900
+ // line-independent keys (verify diffs on the keys).
901
+ const fullBaseline = dedupe(parsed);
902
+ ctx.toolState.baselineConcernIds = fullBaseline.map((c) => c.id);
903
+ ctx.toolState.baselineConcernKeys = fullBaseline.map((c) => concernKeyOf(c));
821
904
 
822
905
  const all = sortConcerns(dedupe(parsed))
823
906
  .filter(isTerraformConcern)
@@ -79,6 +79,30 @@ export function concernId(
79
79
  .slice(0, 12);
80
80
  }
81
81
 
82
+ /**
83
+ * A LINE-INDEPENDENT identity for a concern — which rule fires in which file,
84
+ * ignoring the exact line. Two instances of the same rule in the same file at
85
+ * different lines share a key.
86
+ *
87
+ * The full content `id` keys on the line so it's unique per instance (right for
88
+ * SARIF alerts + branch naming), but that makes it UNSTABLE under a fix: almost
89
+ * every fix adds or removes lines, shifting every concern below it to a new line
90
+ * → a new id. If ✗→✓ verification compared raw ids, a shifted-but-unfixed concern
91
+ * would look RESOLVED (old id gone) and simultaneously look like a REGRESSION
92
+ * (new id appeared) — a false attestation either way. `terraform_verify_remediation`
93
+ * compares on this key instead, so a line shift can't fabricate a resolution or a
94
+ * regression. Derived identically to `id` minus the line (same bare-rule
95
+ * normalization) so keys match across the original scan and the re-scan.
96
+ */
97
+ export function concernKeyOf(c: Pick<Concern, "source" | "rule_id" | "location">): string {
98
+ const prefix = `${c.source}:`;
99
+ const bareRule = c.rule_id.startsWith(prefix) ? c.rule_id.slice(prefix.length) : c.rule_id;
100
+ return createHash("sha1")
101
+ .update(`${c.source}|${bareRule}|${c.location.file}`)
102
+ .digest("hex")
103
+ .slice(0, 12);
104
+ }
105
+
82
106
  /**
83
107
  * Normalize a scanner-reported path to a repo-relative POSIX path. Each scanner
84
108
  * reports the file differently — tflint gives `main.tf` (relative), trivy a
@@ -0,0 +1,85 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import type { ScannerOutcome } from "#app/mcp/terraform/types";
3
+ import {
4
+ buildVerificationSummary,
5
+ concernVerificationStatus,
6
+ VERIFICATION_STATUS_LABEL,
7
+ VERIFICATION_STATUSES,
8
+ } from "#app/mcp/terraform/verification";
9
+
10
+ describe("the five-status taxonomy", () => {
11
+ it("has exactly the five statuses, each with a legend", () => {
12
+ expect([...VERIFICATION_STATUSES]).toEqual([
13
+ "pass",
14
+ "fail",
15
+ "not-applicable",
16
+ "inconclusive",
17
+ "not-code-verifiable",
18
+ ]);
19
+ for (const s of VERIFICATION_STATUSES) {
20
+ expect(VERIFICATION_STATUS_LABEL[s]).toBeTruthy();
21
+ }
22
+ });
23
+ });
24
+
25
+ describe("concernVerificationStatus", () => {
26
+ it("a code-verified violation is `fail`", () => {
27
+ const v = concernVerificationStatus({
28
+ rule_id: "trivy:AVD-AWS-0088",
29
+ evidence: "S3 bucket is unencrypted at rest",
30
+ });
31
+ expect(v.status).toBe("fail");
32
+ expect(v.reason).toBeUndefined();
33
+ });
34
+
35
+ it("a human-decision concern is `not-code-verifiable` with a reason", () => {
36
+ const v = concernVerificationStatus({
37
+ rule_id: "checkov:CKV_AWS_1",
38
+ evidence: "IAM policy uses a wildcard * action",
39
+ });
40
+ expect(v.status).toBe("not-code-verifiable");
41
+ expect(v.reason).toMatch(/human decision/i);
42
+ });
43
+ });
44
+
45
+ describe("buildVerificationSummary", () => {
46
+ const concerns = [
47
+ { id: "a", rule_id: "trivy:AVD-AWS-0088", evidence: "unencrypted at rest" },
48
+ { id: "b", rule_id: "checkov:CKV_AWS_1", evidence: "least-privilege wildcard policy" },
49
+ ];
50
+ const outcomes: ScannerOutcome[] = [
51
+ { source: "trivy", ran: true, concerns: [] },
52
+ { source: "checkov", ran: true, concerns: [] },
53
+ {
54
+ source: "tflint",
55
+ ran: false,
56
+ skipped_reason: "licence-gated (TFLint, MPL-2.0)",
57
+ concerns: [],
58
+ },
59
+ ];
60
+
61
+ it("classifies each concern and counts fail vs not-code-verifiable", () => {
62
+ const s = buildVerificationSummary(concerns, outcomes);
63
+ expect(s.counts.fail).toBe(1);
64
+ expect(s.counts.not_code_verifiable).toBe(1);
65
+ expect(s.concerns).toEqual([
66
+ { id: "a", status: "fail" },
67
+ { id: "b", status: "not-code-verifiable", reason: expect.stringMatching(/human decision/i) },
68
+ ]);
69
+ });
70
+
71
+ it("partitions scanners into verified (ran) vs inconclusive (skipped)", () => {
72
+ const s = buildVerificationSummary(concerns, outcomes);
73
+ expect(s.coverage.verified).toEqual(["trivy", "checkov"]);
74
+ expect(s.coverage.inconclusive).toEqual([
75
+ { source: "tflint", reason: "licence-gated (TFLint, MPL-2.0)" },
76
+ ]);
77
+ expect(s.counts.inconclusive).toBe(1);
78
+ });
79
+
80
+ it("carries the honesty note (no silent pass)", () => {
81
+ const s = buildVerificationSummary([], []);
82
+ expect(s.note).toMatch(/not proof of compliance/i);
83
+ expect(s.concerns).toEqual([]);
84
+ });
85
+ });
@@ -0,0 +1,133 @@
1
+ import { classifyRefusal } from "#app/mcp/terraform/decisions";
2
+ import type { Concern, ScannerOutcome } from "#app/mcp/terraform/types";
3
+
4
+ /**
5
+ * Five-status verification taxonomy (the auditor-credibility win the evidence
6
+ * pack + crosswalk both lean on). The point is HONESTY: never let "no finding"
7
+ * read as "compliant", and never claim the engine proved something it cannot see
8
+ * from code. Every assessment statement carries exactly one of these:
9
+ *
10
+ * - `pass` — a check ran and code-verified compliance.
11
+ * - `fail` — a check ran and code-verified a violation.
12
+ * - `not-applicable` — the control does not apply to the resources present.
13
+ * - `inconclusive` — a relevant check did NOT run (gated / not installed /
14
+ * unparseable). A coverage gap, never silently a pass.
15
+ * - `not-code-verifiable` — the control needs human / process evidence
16
+ * (governance, training, a key-policy decision); IaC
17
+ * scanning structurally cannot prove it either way.
18
+ *
19
+ * What this engine asserts today: `fail` and `not-code-verifiable` per concern,
20
+ * and `inconclusive` per scanner that didn't run. It deliberately does NOT
21
+ * fabricate `pass` / `not-applicable` for controls nothing fired on — absence of
22
+ * a finding is not proof, and over-claiming is exactly what costs credibility
23
+ * with an assessor. The two reserved statuses are part of the shared vocabulary
24
+ * for the evidence consumer (and a future full-framework crosswalk). Pure.
25
+ */
26
+
27
+ export const VERIFICATION_STATUSES = [
28
+ "pass",
29
+ "fail",
30
+ "not-applicable",
31
+ "inconclusive",
32
+ "not-code-verifiable",
33
+ ] as const;
34
+ export type VerificationStatus = (typeof VERIFICATION_STATUSES)[number];
35
+
36
+ /** one-line legend per status — for the report / evidence bundle. */
37
+ export const VERIFICATION_STATUS_LABEL: Record<VerificationStatus, string> = {
38
+ pass: "a check ran and code-verified compliance",
39
+ fail: "a check ran and code-verified a violation",
40
+ "not-applicable": "the control does not apply to the resources present",
41
+ inconclusive: "a relevant check did not run — a coverage gap, not a pass",
42
+ "not-code-verifiable": "needs human / process evidence — IaC cannot prove it",
43
+ };
44
+
45
+ /** the statuses the engine asserts per concern (a concern is always one or the
46
+ * other — it fired, the only question is whether code can prove the fix). */
47
+ export type ConcernVerificationStatus = Extract<VerificationStatus, "fail" | "not-code-verifiable">;
48
+
49
+ /**
50
+ * Classify one concern: a code-verified violation (`fail`) — UNLESS its
51
+ * remediation is a human decision the engine can flag but not prove from code
52
+ * (IAM least-privilege, a KMS key policy, a real CIDR — the §29 refusal set), in
53
+ * which case it is `not-code-verifiable`. Pure.
54
+ */
55
+ export function concernVerificationStatus(concern: Pick<Concern, "rule_id" | "evidence">): {
56
+ status: ConcernVerificationStatus;
57
+ reason?: string;
58
+ } {
59
+ const refusal = classifyRefusal(concern);
60
+ if (refusal.refuse) {
61
+ return refusal.reason
62
+ ? { status: "not-code-verifiable", reason: refusal.reason }
63
+ : { status: "not-code-verifiable" };
64
+ }
65
+ return { status: "fail" };
66
+ }
67
+
68
+ export interface VerifiedConcern {
69
+ id: string;
70
+ status: ConcernVerificationStatus;
71
+ reason?: string;
72
+ }
73
+
74
+ export interface VerificationSummary {
75
+ /** per-concern verification status. */
76
+ concerns: VerifiedConcern[];
77
+ counts: {
78
+ fail: number;
79
+ not_code_verifiable: number;
80
+ /** scanners that did not run (each is a coverage gap). */
81
+ inconclusive: number;
82
+ };
83
+ coverage: {
84
+ /** scanners that ran — their checks are code-verified for what they cover. */
85
+ verified: string[];
86
+ /** scanners that did NOT run — their checks are INCONCLUSIVE, never a pass. */
87
+ inconclusive: { source: string; reason: string }[];
88
+ };
89
+ /** the honesty caveat an assessor should read alongside the statuses. */
90
+ note: string;
91
+ }
92
+
93
+ const HONESTY_NOTE =
94
+ "Statuses are code-verified only. A scanner that did not run leaves its checks " +
95
+ "INCONCLUSIVE (a coverage gap, not a pass); controls needing human/process " +
96
+ "evidence are NOT-CODE-VERIFIABLE. Absence of a finding is not proof of compliance.";
97
+
98
+ /**
99
+ * Roll a scan up into a verification summary: every concern classified
100
+ * (fail / not-code-verifiable) and every scanner partitioned into verified (ran)
101
+ * vs inconclusive (skipped — gated, not installed, or unparseable). Pure;
102
+ * `outcomes` is the raw `runScanners` result, `concerns` the deduped,
103
+ * Terraform-only set the assessment reports on.
104
+ */
105
+ export function buildVerificationSummary(
106
+ concerns: Pick<Concern, "id" | "rule_id" | "evidence">[],
107
+ outcomes: ScannerOutcome[],
108
+ ): VerificationSummary {
109
+ const verified: VerifiedConcern[] = concerns.map((c) => {
110
+ const v = concernVerificationStatus(c);
111
+ return v.reason
112
+ ? { id: c.id, status: v.status, reason: v.reason }
113
+ : { id: c.id, status: v.status };
114
+ });
115
+
116
+ const verifiedTools: string[] = [];
117
+ const inconclusiveTools: { source: string; reason: string }[] = [];
118
+ for (const o of outcomes) {
119
+ if (o.ran) verifiedTools.push(o.source);
120
+ else inconclusiveTools.push({ source: o.source, reason: o.skipped_reason ?? "did not run" });
121
+ }
122
+
123
+ return {
124
+ concerns: verified,
125
+ counts: {
126
+ fail: verified.filter((c) => c.status === "fail").length,
127
+ not_code_verifiable: verified.filter((c) => c.status === "not-code-verifiable").length,
128
+ inconclusive: inconclusiveTools.length,
129
+ },
130
+ coverage: { verified: verifiedTools, inconclusive: inconclusiveTools },
131
+ note: HONESTY_NOTE,
132
+ };
133
+ }
@@ -20,6 +20,7 @@ import {
20
20
  computeCostDelta,
21
21
  computeRegressions,
22
22
  computeRemediationVerdict,
23
+ concernKeyOf,
23
24
  groupConcerns,
24
25
  groupConcernsByRule,
25
26
  isPureMovePlan,
@@ -39,10 +40,12 @@ import {
39
40
  parseTflintOutput,
40
41
  parseTrivyOutput,
41
42
  parseValidateOutput,
43
+ partitionByKey,
42
44
  planBatches,
43
45
  preventiveControlFor,
44
46
  type RootPlan,
45
47
  rebaseConcern,
48
+ regressionIdsByKey,
46
49
  resolveRoots,
47
50
  resourceTypeOf,
48
51
  ruleDocUrl,
@@ -1235,6 +1238,111 @@ describe("computeRegressions (§1.4)", () => {
1235
1238
  });
1236
1239
  });
1237
1240
 
1241
+ describe("concernKeyOf (line-independent identity — verification integrity)", () => {
1242
+ const at = (line: number | null): Pick<Concern, "source" | "rule_id" | "location"> => ({
1243
+ source: "checkov",
1244
+ rule_id: "checkov:CKV_AWS_23",
1245
+ location: { file: "main.tf", line },
1246
+ });
1247
+
1248
+ it("is the SAME for the same rule+file at different lines (the core property)", () => {
1249
+ // this is what makes ✗→✓ verification survive a line-shifting fix.
1250
+ expect(concernKeyOf(at(76))).toBe(concernKeyOf(at(95)));
1251
+ expect(concernKeyOf(at(76))).toBe(concernKeyOf(at(null)));
1252
+ });
1253
+
1254
+ it("differs when the rule or the file differs", () => {
1255
+ expect(concernKeyOf(at(10))).not.toBe(
1256
+ concernKeyOf({
1257
+ source: "checkov",
1258
+ rule_id: "checkov:CKV_AWS_8",
1259
+ location: { file: "main.tf", line: 10 },
1260
+ }),
1261
+ );
1262
+ expect(concernKeyOf(at(10))).not.toBe(
1263
+ concernKeyOf({
1264
+ source: "checkov",
1265
+ rule_id: "checkov:CKV_AWS_23",
1266
+ location: { file: "vars.tf", line: 10 },
1267
+ }),
1268
+ );
1269
+ });
1270
+
1271
+ it("ignores the `source:` rule prefix so it matches the id's bare-rule normalization", () => {
1272
+ const withPrefix = concernKeyOf({
1273
+ source: "trivy",
1274
+ rule_id: "trivy:AVD-AWS-0130",
1275
+ location: { file: "main.tf", line: 5 },
1276
+ });
1277
+ const bare = concernKeyOf({
1278
+ source: "trivy",
1279
+ rule_id: "AVD-AWS-0130",
1280
+ location: { file: "main.tf", line: 5 },
1281
+ });
1282
+ expect(withPrefix).toBe(bare);
1283
+ });
1284
+ });
1285
+
1286
+ describe("partitionByKey (✗→✓ on line-independent keys)", () => {
1287
+ it("marks a concern RESOLVED only when its key is gone from the re-scan", () => {
1288
+ const v = partitionByKey(
1289
+ [
1290
+ { id: "id-imds", key: "k-imds" },
1291
+ { id: "id-cidr", key: "k-cidr" },
1292
+ ],
1293
+ new Set(["k-cidr"]), // imds key gone (fixed), cidr key still present
1294
+ );
1295
+ expect(v.resolved).toEqual(["id-imds"]);
1296
+ expect(v.remaining).toEqual(["id-cidr"]);
1297
+ expect(v.verified).toBe(false);
1298
+ });
1299
+
1300
+ it("the regression-bug scenario: a line-SHIFTED unfixed concern is NOT falsely resolved", () => {
1301
+ // The fix added lines above an unfixed concern, so its id changed (line 76 →
1302
+ // 95) but its KEY (source|rule|file) is stable. Requesting the old id with the
1303
+ // old key, and the re-scan still carrying that key, must report it REMAINING.
1304
+ const v = partitionByKey([{ id: "id-line76", key: "k-cidr" }], new Set(["k-cidr"]));
1305
+ expect(v.resolved).toEqual([]);
1306
+ expect(v.remaining).toEqual(["id-line76"]);
1307
+ });
1308
+ });
1309
+
1310
+ describe("regressionIdsByKey (§1.4 on line-independent keys)", () => {
1311
+ it("does NOT flag a pre-existing concern that merely shifted lines", () => {
1312
+ // baseline had key k-cidr; after the fix it's still present (at a new line/id)
1313
+ // — same key, so NOT a regression (the raw-id diff would have flagged it).
1314
+ const regressions = regressionIdsByKey(
1315
+ [{ id: "id-line95", key: "k-cidr" }],
1316
+ new Set(["k-cidr"]),
1317
+ );
1318
+ expect(regressions).toEqual([]);
1319
+ });
1320
+
1321
+ it("flags a genuinely new (rule, file) defect the fix introduced", () => {
1322
+ const regressions = regressionIdsByKey(
1323
+ [
1324
+ { id: "id-old", key: "k-old" },
1325
+ { id: "id-new", key: "k-new" },
1326
+ ],
1327
+ new Set(["k-old"]),
1328
+ );
1329
+ expect(regressions).toEqual(["id-new"]);
1330
+ });
1331
+
1332
+ it("returns one representative id per new key (dedups by key) and sorts", () => {
1333
+ const regressions = regressionIdsByKey(
1334
+ [
1335
+ { id: "id-b", key: "k-new" },
1336
+ { id: "id-a", key: "k-new" },
1337
+ { id: "id-z", key: "k-other" },
1338
+ ],
1339
+ new Set<string>(),
1340
+ );
1341
+ // one id per new key, sorted; k-new keeps its FIRST-seen id (id-b)
1342
+ expect(regressions).toEqual(["id-b", "id-z"]);
1343
+ });
1344
+ });
1345
+
1238
1346
  describe("classifyAutonomy (§3.9)", () => {
1239
1347
  const c = (severity: Concern["severity"], category: Concern["category"]) => ({
1240
1348
  severity,
@@ -11,13 +11,17 @@
11
11
  * findings — reviewer findings + SARIF ingest/emit
12
12
  * plan — plan parsing + destroy/blast/stability/aggregation
13
13
  * tools — the MCP Tool factories + their *Params schemas
14
+ * verification — the five-status taxonomy (fail / not-code-verifiable / …)
15
+ * evidence — the backend-free compliance evidence bundle + emitter
14
16
  */
15
17
 
16
18
  export * from "#app/mcp/terraform/cost";
17
19
  export * from "#app/mcp/terraform/currency";
18
20
  export * from "#app/mcp/terraform/decisions";
21
+ export * from "#app/mcp/terraform/evidence";
19
22
  export * from "#app/mcp/terraform/findings";
20
23
  export * from "#app/mcp/terraform/plan";
21
24
  export * from "#app/mcp/terraform/scanners";
22
25
  export * from "#app/mcp/terraform/tools";
23
26
  export * from "#app/mcp/terraform/types";
27
+ export * from "#app/mcp/terraform/verification";
package/src/modes.test.ts CHANGED
@@ -14,9 +14,11 @@ const EXPECTED_MODE_NAMES = [
14
14
  "AddressReviews",
15
15
  "Review",
16
16
  "IncrementalReview",
17
+ "SummarizePr",
17
18
  "Plan",
18
19
  "Fix",
19
20
  "ResolveConflicts",
21
+ "Assess",
20
22
  "Remediate",
21
23
  "RefreshRemediation",
22
24
  "GenerateTerraform",
@@ -96,7 +98,13 @@ describe("static mode exports", () => {
96
98
  });
97
99
 
98
100
  it("NON_COMMITTING_MODES only names real built-in modes", () => {
99
- expect([...NON_COMMITTING_MODES].sort()).toEqual(["IncrementalReview", "Plan", "Review"]);
101
+ expect([...NON_COMMITTING_MODES].sort()).toEqual([
102
+ "Assess",
103
+ "IncrementalReview",
104
+ "Plan",
105
+ "Review",
106
+ "SummarizePr",
107
+ ]);
100
108
  for (const mode of NON_COMMITTING_MODES) {
101
109
  expect(BUILTIN_MODE_NAMES).toContain(mode);
102
110
  }