npm - @pugi/cli - Versions diffs - 0.1.0-beta.93 → 0.1.0-beta.95 - Mend

@pugi/cli 0.1.0-beta.93 → 0.1.0-beta.95

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/dist/commands/retro.js +210 -0
package/dist/core/diagnostics/probes/sandbox.js +65 -33
package/dist/core/engine/native-pugi.js +184 -10
package/dist/core/engine/tool-bridge.js +35 -0
package/dist/core/engine/verification-patterns.js +9 -9
package/dist/core/mcp/orchestrator-config.js +192 -0
package/dist/core/mcp/orchestrator-tools.js +147 -3
package/dist/core/pugi-gitignore.js +52 -0
package/dist/core/repl/engine-bridge.js +199 -0
package/dist/core/repl/session.js +395 -6
package/dist/core/repl/tool-route.js +382 -0
package/dist/core/retro/git-collector.js +251 -0
package/dist/core/retro/health-card.js +25 -0
package/dist/core/retro/metrics.js +342 -0
package/dist/core/retro/narrative.js +249 -0
package/dist/core/retro/plane-collector.js +274 -0
package/dist/core/retro/pr-issue-link.js +65 -0
package/dist/core/retro/types.js +16 -0
package/dist/core/sandboxing/adapter.js +29 -0
package/dist/core/sandboxing/index.js +49 -0
package/dist/core/sandboxing/none.js +19 -0
package/dist/core/sandboxing/seatbelt.js +183 -0
package/dist/core/session.js +27 -0
package/dist/core/settings.js +22 -0
package/dist/runtime/cli.js +167 -33
package/dist/runtime/commands/mcp.js +64 -8
package/dist/runtime/deprecation-warning.js +69 -0
package/dist/runtime/headless.js +8 -3
package/dist/runtime/stream-renderer.js +195 -0
package/dist/runtime/version.js +1 -1
package/dist/tui/agent-tree.js +11 -0
package/dist/tui/ask-user-question-chips.js +1 -1
package/dist/tui/multi-file-diff-approval.js +3 -3
package/dist/tui/repl-render.js +42 -0
package/package.json +2 -2

package/dist/commands/retro.js ADDED Viewed

@@ -0,0 +1,210 @@
+import { jsxs as _jsxs, jsx as _jsx } from "react/jsx-runtime";
+import { Box, Text, render } from 'ink';
+import { collectGitContext, countCommitsAheadOfBase, } from '../core/retro/git-collector.js';
+import { ensurePugiGitIgnore } from '../core/pugi-gitignore.js';
+import { computeMetrics } from '../core/retro/metrics.js';
+import { persistRetro } from '../core/retro/narrative.js';
+import { collectPlaneSlice, postRetroToPlane, resolvePlaneConfig, } from '../core/retro/plane-collector.js';
+import { enrichLinks } from '../core/retro/pr-issue-link.js';
+import { computeHealthCard } from '../core/retro/health-card.js';
+/** Parse `7d` | `14d` | `30d` | `24h` into a duration in days
+ * (fractional for sub-day windows). Defaults to 7 days when omitted.
+ */
+function parseDurationToken(token) {
+    if (!token)
+        return undefined;
+    const match = /^(\d+)(h|d)$/.exec(token);
+    if (!match)
+        return undefined;
+    const value = Number.parseInt(match[1] ?? '0', 10);
+    const unit = match[2];
+    if (!Number.isFinite(value) || value <= 0)
+        return undefined;
+    const days = unit === 'h' ? value / 24 : value;
+    return { days, label: token };
+}
+function buildWindow(durationDays, label, now) {
+    const until = now;
+    const sinceMs = until.getTime() - durationDays * 24 * 60 * 60 * 1000;
+    const since = new Date(sinceMs);
+    // Midnight-align the lower bound to keep `--since` deterministic per day.
+    since.setHours(0, 0, 0, 0);
+    return { since, until, label, days: Math.max(1, Math.round(durationDays)) };
+}
+function buildPriorWindow(current) {
+    const until = new Date(current.since.getTime());
+    const sinceMs = until.getTime() - current.days * 24 * 60 * 60 * 1000;
+    const since = new Date(sinceMs);
+    since.setHours(0, 0, 0, 0);
+    return { since, until, label: `prior ${current.label}`, days: current.days };
+}
+function parseRetroArgs(rawArgs, now) {
+    const args = [...rawArgs];
+    const postPlane = args.includes('--post-plane');
+    const enrichPlane = args.includes('--plane') || postPlane;
+    const positional = args.filter((a) => !a.startsWith('-'));
+    let compare = false;
+    let durationToken;
+    if (positional[0] === 'compare') {
+        compare = true;
+        durationToken = positional[1];
+    }
+    else {
+        durationToken = positional[0];
+    }
+    const parsed = parseDurationToken(durationToken) ?? { days: 7, label: '7d' };
+    return {
+        window: buildWindow(parsed.days, parsed.label, now),
+        compare,
+        enrichPlane,
+        postPlane,
+    };
+}
+function SummaryCard(props) {
+    const { persisted, metrics, plane, planePostUrl } = props;
+    return (_jsxs(Box, { flexDirection: "column", borderStyle: "single", borderRight: false, borderTop: false, borderBottom: false, paddingLeft: 1, children: [_jsxs(Text, { bold: true, children: ["pugi retro \u00B7 ", metrics.window.label] }), _jsxs(Text, { dimColor: true, children: ["Branch ", metrics.branch.current, " over ", metrics.branch.base] }), _jsxs(Text, { children: [metrics.commits.total, " commits \u00B7 +", metrics.loc.insertions, " / -", metrics.loc.deletions, " LOC \u00B7 ", metrics.activeDays, " active days"] }), _jsxs(Text, { children: ["Focus ", metrics.focus.score, "% on ", metrics.focus.topDir ?? 'n/a', " \u00B7 Streak ", metrics.streak.personalDays, "d personal / ", metrics.streak.teamDays, "d team"] }), metrics.shipOfTheWeek ? (_jsxs(Text, { children: ["Ship of the week: ", metrics.shipOfTheWeek.subject.slice(0, 60)] })) : null, plane ? (_jsxs(Text, { children: ["Plane: closed ", plane.closedIssues.length, " \u00B7 created ", plane.createdIssues.length, " \u00B7 oversized modules ", plane.oversizedModules.length] })) : null, _jsxs(Text, { dimColor: true, children: ["Markdown: ", persisted.markdownPath] }), _jsxs(Text, { dimColor: true, children: ["JSON:     ", persisted.jsonPath] }), planePostUrl ? _jsxs(Text, { children: ["Posted to Plane: ", planePostUrl] }) : null] }));
+}
+function renderSummary(props) {
+    const app = render(_jsx(SummaryCard, { ...props }));
+    app.unmount();
+}
+export async function runRetroCommand(ctx) {
+    const now = ctx.now ?? new Date();
+    const parsed = parseRetroArgs(ctx.args, now);
+    const gitCtx = await collectGitContext({ cwd: ctx.cwd, window: parsed.window });
+    if (!gitCtx.hasGit) {
+        const msg = 'pugi retro: not a git workspace - initialise git or cd into one.';
+        if (ctx.flags.json) {
+            ctx.io.write(`${JSON.stringify({ ok: false, error: 'no_git_workspace' })}\n`);
+        }
+        else {
+            ctx.io.writeError(msg);
+        }
+        return 2;
+    }
+    // Triple-review P1.2 (): before we write anything
+    // under `.pugi/retros/`, guarantee `.gitignore` covers `.pugi/`. Without
+    // this, the first customer run of `pugi retro` in a fresh repo would
+    // leave retros (and any future `.pugi/settings.json` secret store)
+    // tracked by git on the next `git add -A`. Idempotent.
+    //
+    // Round 2 P1 (2026-06-04): surface failure к stderr — silent catch
+    // defeats the gate's purpose. If `.gitignore` is read-only or perms
+    // refuse, the operator must know retros may be tracked by git.
+    const gitIgnoreCreated = [];
+    const gitIgnoreSkipped = [];
+    try {
+        ensurePugiGitIgnore(ctx.cwd, gitIgnoreCreated, gitIgnoreSkipped);
+    }
+    catch (err) {
+        const reason = err instanceof Error ? err.message : String(err);
+        ctx.io.writeError(`pugi retro: could not update .gitignore (${reason}). ` +
+            `Manually add ".pugi/" to .gitignore so retros are not tracked.`);
+    }
+    const toBaseHeadCount = await countCommitsAheadOfBase(ctx.cwd, gitCtx.baseBranch, parsed.window.since);
+    const metrics = computeMetrics({
+        window: parsed.window,
+        currentBranch: gitCtx.currentBranch,
+        baseBranch: gitCtx.baseBranch,
+        toBaseHeadCount,
+        currentUserName: gitCtx.userName,
+        currentUserEmail: gitCtx.userEmail,
+        commits: gitCtx.commits,
+    });
+    let compare;
+    if (parsed.compare) {
+        const priorWindow = buildPriorWindow(parsed.window);
+        const priorCtx = await collectGitContext({ cwd: ctx.cwd, window: priorWindow });
+        const priorAhead = await countCommitsAheadOfBase(ctx.cwd, gitCtx.baseBranch, priorWindow.since);
+        const priorMetrics = computeMetrics({
+            window: priorWindow,
+            currentBranch: gitCtx.currentBranch,
+            baseBranch: gitCtx.baseBranch,
+            toBaseHeadCount: priorAhead,
+            currentUserName: gitCtx.userName,
+            currentUserEmail: gitCtx.userEmail,
+            commits: priorCtx.commits,
+        });
+        compare = { current: metrics, prior: priorMetrics };
+    }
+    let plane;
+    let planeUnavailableReason;
+    if (parsed.enrichPlane) {
+        const cfgResult = resolvePlaneConfig(ctx.cwd);
+        if (!cfgResult.ok) {
+            planeUnavailableReason = cfgResult.reason;
+        }
+        else {
+            try {
+                const slice = await collectPlaneSlice({
+                    config: cfgResult.config,
+                    since: parsed.window.since,
+                });
+                const links = enrichLinks(gitCtx.commits, slice.closedIssues.concat(slice.createdIssues));
+                const health = computeHealthCard(slice.modules);
+                plane = {
+                    ...slice,
+                    prToIssueLinks: links,
+                    oversizedModules: health.oversized,
+                };
+            }
+            catch (err) {
+                planeUnavailableReason = err instanceof Error ? err.message : String(err);
+            }
+        }
+    }
+    const persisted = persistRetro({
+        root: ctx.cwd,
+        metrics,
+        plane,
+        compare,
+        now,
+    });
+    let planePostUrl;
+    if (parsed.postPlane) {
+        if (!plane) {
+            ctx.io.writeError(`pugi retro --post-plane: Plane unavailable (${planeUnavailableReason ?? 'unknown'}).`);
+        }
+        else {
+            const cfgResult = resolvePlaneConfig(ctx.cwd);
+            if (cfgResult.ok) {
+                try {
+                    const result = await postRetroToPlane({
+                        config: cfgResult.config,
+                        markdown: persisted.markdown,
+                        sequence: persisted.sequence,
+                        dateLabel: persisted.dateLabel,
+                    });
+                    planePostUrl = result.url;
+                    if (result.alreadyExists) {
+                        ctx.io.write(`pugi retro: already exists at ${result.url}\n`);
+                    }
+                }
+                catch (err) {
+                    const msg = err instanceof Error ? err.message : String(err);
+                    ctx.io.writeError(`pugi retro --post-plane failed: ${msg}`);
+                }
+            }
+        }
+    }
+    if (ctx.flags.json) {
+        ctx.io.write(`${JSON.stringify({
+            ok: true,
+            markdownPath: persisted.markdownPath,
+            jsonPath: persisted.jsonPath,
+            sequence: persisted.sequence,
+            metrics,
+            plane: plane ?? null,
+            planePostUrl: planePostUrl ?? null,
+            planeUnavailableReason: planeUnavailableReason ?? null,
+        }, null, 2)}\n`);
+    }
+    else {
+        renderSummary({ persisted, metrics, plane, planePostUrl });
+        if (planeUnavailableReason && !plane) {
+            ctx.io.writeError(`pugi retro: Plane integration unavailable (${planeUnavailableReason}).`);
+        }
+    }
+    return 0;
+}
+//# sourceMappingURL=retro.js.map

package/dist/core/diagnostics/probes/sandbox.js CHANGED Viewed

@@ -1,40 +1,72 @@
 /**
  * SANDBOX probe — surfaces the current OS-level sandbox posture (
- * spec: sandbox-adapter.ts macOS Seatbelt / Linux Landlock / WSL2 detect).
+ * Trust Sprint item 6: macOS Seatbelt adapter wired; Linux Landlock
+ * and Docker variants still backlog).
  *
- * Pugi sandbox enforcement is tracked under task #5 (P0/L1+L16). Until
- * that lands, this probe reports the platform's available primitive and
- * a clear "not yet armed" warning so the operator sees the gap in
- * `pugi doctor` instead of assuming bash dispatches run jailed.
- *
- * When the sandbox does ship, the probe upgrade path:
- *  - Replace the static "not_armed" detail with a real config probe
- *    (read .pugi/settings.json::sandbox.mode, verify the OS primitive
- *    resolves, return ok when both line up).
- *  - Keep the same probe NAME so doctor output / spec assertions
- *    don't churn.
+ * Sources `bash.sandbox` from `.pugi/settings.json`, defaults to
+ * `none`. When set to `macOS-seatbelt` the probe verifies the OS
+ * primitive is callable and reports `ok` (armed) or `error`
+ * (configured-but-unavailable). When set to `none` the probe reports
+ * `warn` with the operator-readable reason "policy 'none' selected".
  */
-export function probeSandbox(_ctx) {
-    const platform = process.platform;
-    let availablePrimitive;
-    switch (platform) {
-        case 'darwin':
-            availablePrimitive = 'macOS Seatbelt (/usr/bin/sandbox-exec)';
-            break;
-        case 'linux':
-            availablePrimitive = 'Linux Landlock / nsjail (kernel-dependent)';
-            break;
-        case 'win32':
-            availablePrimitive = 'Windows AppContainer / Job Object';
-            break;
-        default:
-            availablePrimitive = `unknown platform ${platform}`;
+import { homedir } from 'node:os';
+import { loadSettings } from '../../settings.js';
+import { probeSandbox as probeSandboxAdapter } from '../../sandboxing/index.js';
+export function probeSandbox(ctx) {
+    const settings = loadSettings(ctx.cwd);
+    const configured = (settings.bash?.sandbox ?? 'none');
+    const home = ctx.home || homedir();
+    const extraWritePaths = [`${home}/.pugi`];
+    try {
+        const state = probeSandboxAdapter({
+            mode: configured,
+            workspaceRoot: ctx.cwd,
+            extraWritePaths,
+        });
+        if (state.armed) {
+            // Discipline-gap honesty (Trust Sprint thesis): the adapter
+            // probes ok, but spawn-wrap is NOT yet wired into the bash
+            // runner (that file is owned by another agent on PUGI-VERIFY-
+            // GATE). Reporting status=ok would overstate the posture — an
+            // operator reading 'armed' would assume their bash calls were
+            // jailed when they still run with full process privileges. We
+            // surface 'warn' with a precise reason instead and flip к 'ok'
+            // when the runner indirection lands.
+            return {
+                name: 'SANDBOX',
+                status: 'warn',
+                detail: `configured (mode=${state.mode}) but spawn-wrap not yet wired — bash dispatches still run with full process privileges. ` +
+                    `Adapter posture: ${state.details.join('; ')}`,
+                remediation: 'The seatbelt adapter is in-tree and exercised by tests; the bash runner indirection that consumes it lands in a follow-up. ' +
+                    'Bash classifier denylist + permission FSM remain in force in the meantime.',
+            };
+        }
+        // Not armed — distinguish "operator chose none" from "configured
+        // mode failed". The latter is an error; the former is a documented
+        // posture and stays a warning.
+        if (state.mode === 'none') {
+            return {
+                name: 'SANDBOX',
+                status: 'warn',
+                detail: `not armed: ${state.reason ?? 'mode none'}`,
+                remediation: 'Set `bash.sandbox = "macOS-seatbelt"` in .pugi/settings.json on macOS to enable workspace-scoped write isolation. ' +
+                    'Bash classifier denylist + permission FSM still apply.',
+            };
+        }
+        return {
+            name: 'SANDBOX',
+            status: 'error',
+            detail: `configured mode "${state.mode}" failed to arm: ${state.reason ?? 'unknown'}`,
+            remediation: 'Set `bash.sandbox` to a supported mode for this platform or remove the key to fall back to "none".',
+        };
+    }
+    catch (err) {
+        return {
+            name: 'SANDBOX',
+            status: 'error',
+            detail: `sandbox probe threw: ${err.message}`,
+            remediation: 'Remove the bash.sandbox key from .pugi/settings.json or set it to "none".',
+        };
     }
-    return {
-        name: 'SANDBOX',
-        status: 'warn',
-        detail: `OS primitive available: ${availablePrimitive}. Sandbox enforcement NOT yet armed (Pugi task #5 pending — bash tool currently runs с full process privileges).`,
-        remediation: 'Bash tool dispatches run unsandboxed today. Track progress on the OS-level sandbox adapter via the operator-trust roadmap. Until then, rely on the bash classifier denylist + permission FSM.',
-    };
 }
 //# sourceMappingURL=sandbox.js.map

package/dist/core/engine/native-pugi.js CHANGED Viewed

@@ -5,6 +5,7 @@ import { AsyncEventQueue, EngineEventEmitter, modelSupportsThinking, runEngineLo
 import { FileReadCache } from '../file-cache.js';
 import { loadSettings } from '../settings.js';
 import { openSession, recordToolCall, recordToolResult } from '../session.js';
+import { REGRESSION_DISPUTE_PHRASES } from './verification-patterns.js';
 import { prewarmRealDispatch } from '../subagents/dispatcher.js';
 import { resolveAutoCompactConfig, resolveBudget } from './budgets.js';
 import { maybeCompact } from './auto-compact.js';
@@ -936,15 +937,32 @@ export class NativePugiEngineAdapter {
                 return;
             }
             // Translate the loop outcome into an EngineResult.
-            // `aborted` maps to `blocked`
-            // because the operator chose the outcome, same shape as
-            // budget_exhausted / tool_refused.
-            const status = finalOutcome.status === 'completed'
+            // `aborted` maps to `blocked` because the operator chose the
+            // outcome, same shape as budget_exhausted / tool_refused.
+            //
+            // PUGI-VERIFY-GATE: the verification gate runs AFTER this
+            // base mapping. When the agent ran verification commands and
+            // any exited non-zero, the loop's `completed` collapses to
+            // `failed` (the agent's claim of "done" is unverified). When
+            // the loop `completed` but no verification command ever ran,
+            // we surface `needs_verification` (CLI exit 2) so the operator
+            // sees the missing signal instead of false confidence. The
+            // gate is non-negotiable per the contract: `done` is reserved
+            // for `verified: true` outcomes.
+            const baseStatus = finalOutcome.status === 'completed'
                 ? 'done'
                 : finalOutcome.status === 'failed'
                     ? 'failed'
                     : 'blocked';
-            const summaryPrefix = finalOutcome.status === 'completed'
+            const filesChangedList = Array.from(filesChanged).sort();
+            const verification = computeVerificationOutcome({
+                ledger: session.verificationLedger,
+                baseStatus,
+                finalText: finalOutcome.finalText,
+                filesChanged: filesChangedList,
+            });
+            const status = verification.status;
+            const summaryPrefix = status === 'done'
                 ? ''
                 : finalOutcome.status === 'budget_exhausted'
                     ? '[budget_exhausted] '
@@ -952,8 +970,11 @@ export class NativePugiEngineAdapter {
                         ? '[plan_mode_refused] '
                         : finalOutcome.status === 'aborted'
                             ? '[operator_aborted] '
-                            : '[failed] ';
-            const filesChangedList = Array.from(filesChanged).sort();
+                            : status === 'needs_verification'
+                                ? '[needs_verification] '
+                                : verification.unverifiedReason === 'verification_command_failed'
+                                    ? '[verification_failed] '
+                                    : '[failed] ';
             appendSessionMirror(sessionEventsPath, {
                 type: 'outcome',
                 status: finalOutcome.status,
@@ -1014,6 +1035,18 @@ export class NativePugiEngineAdapter {
             const synthesisedFromFiles = finalOutcome.finalText.trim() === '' && filesChangedList.length > 0
                 ? `Updated ${filesChangedList.length} file(s): ${filesChangedList.slice(0, 5).join(', ')}${filesChangedList.length > 5 ? ` (+${filesChangedList.length - 5} more)` : ''}`
                 : '';
+            // PUGI-VERIFY-GATE: thread verification state into the risks
+            // array so a consumer reading only the legacy fields still
+            // gets a human-readable summary of what was not verified.
+            const baseRisks = finalOutcome.status === 'completed' && status === 'done'
+                ? []
+                : [finalOutcome.reason ?? `outcome=${finalOutcome.status}`];
+            if (verification.unverifiedReason && status !== 'done') {
+                baseRisks.push(`unverified: ${verification.unverifiedReason}`);
+            }
+            if (verification.regressionOwnershipDispute) {
+                baseRisks.push('regression_ownership_dispute: agent disclaimed ownership of failing verification');
+            }
             yield {
                 type: 'result',
                 result: {
@@ -1022,9 +1055,7 @@ export class NativePugiEngineAdapter {
                     filesChanged: filesChangedList,
                     patchRefs: [],
                     testsRun: [],
-                    risks: finalOutcome.status === 'completed'
-                        ? []
-                        : [finalOutcome.reason ?? `outcome=${finalOutcome.status}`],
+                    risks: baseRisks,
                     eventRefs: [
                         `tool_calls=${finalOutcome.toolCallCount}`,
                         `turns=${finalOutcome.turnsUsed}`,
@@ -1039,7 +1070,22 @@ export class NativePugiEngineAdapter {
                         `session=${session.id}`,
                         `ctx=${ctx.sessionId}`,
                         `mirror=${sessionEventsPath}`,
+                        // PUGI-VERIFY-GATE: machine-readable verification echo so
+                        // downstream consumers (MCP wrapper, cabinet UI, audit
+                        // pipeline) can branch on the gate state without parsing
+                        // the new structured fields.
+                        `verified=${verification.verified}`,
+                        `verification_count=${verification.verificationCommands.length}`,
                     ],
+                    verified: verification.verified,
+                    verificationCommands: verification.verificationCommands,
+                    verificationFailures: verification.verificationFailures,
+                    ...(verification.unverifiedReason !== undefined
+                        ? { unverifiedReason: verification.unverifiedReason }
+                        : {}),
+                    ...(verification.regressionOwnershipDispute
+                        ? { regressionOwnershipDispute: true }
+                        : {}),
                 },
             };
         }
@@ -1439,4 +1485,132 @@ async function expandHierarchyWithImports(hierarchy, cwd) {
     }
     return out;
 }
+export function computeVerificationOutcome(input) {
+    const { ledger, baseStatus, finalText, filesChanged } = input;
+    const verificationCommands = ledger.map((entry) => entry.command);
+    const failures = ledger
+        .filter((entry) => entry.exitCode !== 0)
+        .map((entry) => ({
+        command: entry.command,
+        exitCode: entry.exitCode,
+        tailStderr: entry.tailStderr,
+    }));
+    // Verification PASS only when at least one verification call ran AND
+    // the most recent (chronologically last) verification exited zero.
+    // The "most recent" rule lets the agent intentionally retry a failed
+    // verification — only the final state matters.
+    const lastCall = ledger.length > 0 ? ledger[ledger.length - 1] : undefined;
+    const ranAny = ledger.length > 0;
+    const lastPassed = lastCall !== undefined && lastCall.exitCode === 0;
+    const anyFailed = failures.length > 0;
+    const verified = ranAny && lastPassed && !anyFailed;
+    // Status precedence:
+    //   verification_command_failed > base failure modes > needs_verification > done
+    // Override `baseStatus` ONLY when verification failed (the
+    // agent's loop may have ended `completed` while a test failed) OR
+    // when `baseStatus === 'done'` and no verification ran (the
+    // engine completed but produced no signal of correctness).
+    let status;
+    let unverifiedReason;
+    if (anyFailed) {
+        status = 'failed';
+        unverifiedReason = 'verification_command_failed';
+    }
+    else if (!ranAny && baseStatus === 'done') {
+        status = 'needs_verification';
+        unverifiedReason = 'no_verification_command_run';
+    }
+    else if (baseStatus !== 'done') {
+        status = baseStatus;
+        if (!verified)
+            unverifiedReason = 'verification_inconclusive';
+    }
+    else {
+        status = 'done';
+    }
+    // Regression ownership dispute heuristic. Only meaningful when a
+    // verification command failed; keep the predicate simple and
+    // documented so a future reviewer can audit the false-positive
+    // surface.
+    let regressionOwnershipDispute = false;
+    if (anyFailed && filesChanged.length > 0 && finalText !== '') {
+        const lower = finalText.toLowerCase();
+        const disputed = REGRESSION_DISPUTE_PHRASES.some((phrase) => lower.includes(phrase));
+        if (disputed && agentTouchedFailingModule(filesChanged, failures)) {
+            regressionOwnershipDispute = true;
+        }
+    }
+    return {
+        status,
+        verified,
+        verificationCommands,
+        verificationFailures: failures,
+        ...(unverifiedReason !== undefined ? { unverifiedReason } : {}),
+        regressionOwnershipDispute,
+    };
+}
+/**
+ * Predicate: at least one mutated file shares a top-level module
+ * directory with a path referenced in any verification failure's
+ * stderr tail. The rule is intentionally loose ("same dir + same
+ * basename without extension or .test./.spec. infix") so it
+ * catches the typical `src/foo.ts` ↔ `src/foo.test.ts` pairing
+ * without overfitting to one test runner's stack-trace format.
+ *
+ * Implementation: extract every `src/...`-shaped path mention from
+ * each failure's stderr tail, then check whether ANY mutated file
+ * shares a module key with ANY mentioned path. The module key
+ * strips the trailing filename's extension AND any `.test.` /
+ * `.spec.` infix so the pair resolves to the same key.
+ */
+function agentTouchedFailingModule(filesChanged, failures) {
+    const stderrJoined = failures.map((f) => f.tailStderr).join('\n');
+    if (stderrJoined === '')
+        return false;
+    // Match common test-runner path shapes: `src/foo/bar.ts`,
+    // `apps/x/test/y.spec.ts`, `packages/z/baz.test.ts`. Not
+    // exhaustive — false negatives are acceptable here because the
+    // predicate's job is to FLAG dispute, not enforce it.
+    const pathMentions = new Set();
+    const pathRegex = /(?:^|[\s(])((?:src|app|apps|test|tests|lib|packages)\/[\w./-]+\.[a-zA-Z]+)/g;
+    for (const match of stderrJoined.matchAll(pathRegex)) {
+        const captured = match[1];
+        if (typeof captured === 'string' && captured.length > 0) {
+            pathMentions.add(captured);
+        }
+    }
+    if (pathMentions.size === 0)
+        return false;
+    // Module key strips the trailing filename's extension (and any
+    // `.test.` / `.spec.` infix) so `src/existing.ts` and
+    // `src/existing.test.ts` resolve to the same key. Keep the full
+    // directory path plus the bare basename (no ext) — this catches
+    // the typical `foo.ts` ↔ `foo.test.ts` pairing in the same dir
+    // without overfitting to one test-runner convention.
+    const moduleKey = (p) => {
+        const segments = p.split('/').filter(Boolean);
+        if (segments.length === 0)
+            return '';
+        const lastIndex = segments.length - 1;
+        const bareLast = segments[lastIndex]
+            .replace(/\.(spec|test)\./, '.')
+            .replace(/\.[a-zA-Z][a-zA-Z0-9]*$/, '');
+        const dir = segments.slice(0, lastIndex).join('/');
+        return dir === '' ? bareLast : `${dir}/${bareLast}`;
+    };
+    const failingModuleKeys = new Set();
+    for (const mention of pathMentions) {
+        const key = moduleKey(mention);
+        if (key !== '')
+            failingModuleKeys.add(key);
+    }
+    if (failingModuleKeys.size === 0)
+        return false;
+    for (const file of filesChanged) {
+        const key = moduleKey(file);
+        if (failingModuleKeys.has(key))
+            return true;
+    }
+    return false;
+}
 //# sourceMappingURL=native-pugi.js.map

package/dist/core/engine/tool-bridge.js CHANGED Viewed

@@ -21,6 +21,8 @@ import { webFetchTool } from '../../tools/web-fetch.js';
 import { webSearchTool } from '../../tools/web-search.js';
 import { agentTool } from '../../tools/agent-tool.js';
 import { multiEdit } from '../../tools/multi-edit.js';
+import { recordVerificationCall } from '../session.js';
+import { detectVerificationCommand, tailStderr } from './verification-patterns.js';
 import { buildMcpToolDefs, defaultNonInteractiveMcpPrompt, dispatchMcpTool, MCP_TOOL_PREFIX, } from '../../tools/mcp-tool.js';
 import { firePostToolUseFailureChain } from '../hook-chains.js';
 import { buildDenialContext, DENIAL_REMINDER_THRESHOLD, } from '../denial-tracking/state.js';
@@ -1507,6 +1509,29 @@ function dispatchTool(name, args, ctx) {
                 session: ctx.session,
                 source: 'agent',
             });
+            // PUGI-VERIFY-GATE: tag verification commands and record them
+            // on the session ledger so the engine outcome assembler can
+            // gate the final `status` on test/lint/build pass. The check
+            // is pure — `detectVerificationCommand` matches the regex
+            // allowlist in `verification-patterns.ts`. Record BEFORE
+            // building the model-facing envelope so the ledger is durable
+            // even if the model stops the loop on this turn.
+            const detection = detectVerificationCommand(command);
+            const verificationFailed = detection.isVerification && result.exitCode !== 0;
+            if (detection.isVerification && detection.tool !== null) {
+                recordVerificationCall(ctx.session, {
+                    command,
+                    tool: detection.tool,
+                    exitCode: result.exitCode,
+                    tailStderr: tailStderr(
+                    // Prefer buffered stderr; fall back to redirect tail
+                    // when stdout/stderr lives on disk (`logPath` mode).
+                    result.stderr === '' && typeof result.tail === 'string'
+                        ? result.tail
+                        : result.stderr),
+                    timestamp: new Date().toISOString(),
+                });
+            }
             const parts = [
                 `exit=${result.exitCode}`,
                 result.stdout ? `stdout:\n${result.stdout}` : '',
@@ -1522,6 +1547,16 @@ function dispatchTool(name, args, ctx) {
                 parts.push('truncated=true');
             if (result.timedOut)
                 parts.push('timedOut=true');
+            // PUGI-VERIFY-GATE: when a verification command exited non-zero,
+            // tag the envelope so the model cannot honestly claim "tests
+            // pass" — and so the engine outcome assembler can scan the
+            // ledger and gate `done`. The stringified envelope keeps
+            // `exit=N` for legacy parsers; the new `verification.tool=` /
+            // `verification.ok=` lines surface the gate state explicitly.
+            if (detection.isVerification) {
+                parts.push(`verification.tool=${detection.tool}`);
+                parts.push(`verification.ok=${verificationFailed ? 'false' : 'true'}`);
+            }
             const body = parts.filter(Boolean).join('\n');
             return body || '(no output)';
         }

package/dist/core/engine/verification-patterns.js CHANGED Viewed

@@ -2,12 +2,12 @@
  * PUGI-VERIFY-GATE — verification command detection.
  *
  * Background: Codex dogfood 2026-06-04 surfaced a P0 trust failure
- * where the Pugi engine returned `status: done` + `exitCode: 0` even
- * after `npm test` exited non-zero on a regression the agent itself
- * had introduced. Root cause: no layer of the dispatch pipeline knew
- * which bash invocations were verification commands, so the engine
- * outcome had no way to gate the final status on test/lint/build
- * pass.
+ * where the Pugi engine returned `status: done` + `exitCode: 0`
+ * even after `npm test` exited non-zero on a regression the agent
+ * itself had introduced. Root cause: no layer of the dispatch
+ * pipeline knew which bash invocations were verification commands,
+ * so the engine outcome had no way to gate the final status on
+ * test/lint/build pass.
  *
  * This module is the deterministic, configurable allowlist of regex
  * patterns the engine uses to recognise verification commands at
@@ -110,7 +110,7 @@ export function extractCommandHead(component) {
             continue;
         }
         // env A=1 B=2 prefix (inline env assignments before the verb).
-        // We peel one token at a time so `FOO=bar BAZ=qux pnpm test` resolves to `pnpm test`.
+        // Peel one token at a time so `FOO=bar BAZ=qux pnpm test` resolves to `pnpm test`.
         const firstToken = head.split(/\s+/, 1)[0] ?? '';
         if (firstToken !== '' && ENV_ASSIGN.test(firstToken)) {
             head = head.slice(firstToken.length).trimStart();
@@ -162,8 +162,8 @@ export function detectVerificationCommand(cmd) {
  * downstream reviewer can decide whether to escalate.
  *
  * The list is case-insensitive at match time. Punctuation around the
- * phrase is allowed because `.test()` looks for the substring, not
- * word boundaries (an agent that writes "this is a pre-existing
+ * phrase is allowed because `.includes()` looks for the substring,
+ * not word boundaries (an agent that writes "this is a pre-existing
  * test bug" still trips the flag).
  */
 export const REGRESSION_DISPUTE_PHRASES = [