@pugi/cli 0.1.0-beta.93 → 0.1.0-beta.95
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/retro.js +210 -0
- package/dist/core/diagnostics/probes/sandbox.js +65 -33
- package/dist/core/engine/native-pugi.js +184 -10
- package/dist/core/engine/tool-bridge.js +35 -0
- package/dist/core/engine/verification-patterns.js +9 -9
- package/dist/core/mcp/orchestrator-config.js +192 -0
- package/dist/core/mcp/orchestrator-tools.js +147 -3
- package/dist/core/pugi-gitignore.js +52 -0
- package/dist/core/repl/engine-bridge.js +199 -0
- package/dist/core/repl/session.js +395 -6
- package/dist/core/repl/tool-route.js +382 -0
- package/dist/core/retro/git-collector.js +251 -0
- package/dist/core/retro/health-card.js +25 -0
- package/dist/core/retro/metrics.js +342 -0
- package/dist/core/retro/narrative.js +249 -0
- package/dist/core/retro/plane-collector.js +274 -0
- package/dist/core/retro/pr-issue-link.js +65 -0
- package/dist/core/retro/types.js +16 -0
- package/dist/core/sandboxing/adapter.js +29 -0
- package/dist/core/sandboxing/index.js +49 -0
- package/dist/core/sandboxing/none.js +19 -0
- package/dist/core/sandboxing/seatbelt.js +183 -0
- package/dist/core/session.js +27 -0
- package/dist/core/settings.js +22 -0
- package/dist/runtime/cli.js +167 -33
- package/dist/runtime/commands/mcp.js +64 -8
- package/dist/runtime/deprecation-warning.js +69 -0
- package/dist/runtime/headless.js +8 -3
- package/dist/runtime/stream-renderer.js +195 -0
- package/dist/runtime/version.js +1 -1
- package/dist/tui/agent-tree.js +11 -0
- package/dist/tui/ask-user-question-chips.js +1 -1
- package/dist/tui/multi-file-diff-approval.js +3 -3
- package/dist/tui/repl-render.js +42 -0
- package/package.json +2 -2
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
import { jsxs as _jsxs, jsx as _jsx } from "react/jsx-runtime";
|
|
2
|
+
import { Box, Text, render } from 'ink';
|
|
3
|
+
import { collectGitContext, countCommitsAheadOfBase, } from '../core/retro/git-collector.js';
|
|
4
|
+
import { ensurePugiGitIgnore } from '../core/pugi-gitignore.js';
|
|
5
|
+
import { computeMetrics } from '../core/retro/metrics.js';
|
|
6
|
+
import { persistRetro } from '../core/retro/narrative.js';
|
|
7
|
+
import { collectPlaneSlice, postRetroToPlane, resolvePlaneConfig, } from '../core/retro/plane-collector.js';
|
|
8
|
+
import { enrichLinks } from '../core/retro/pr-issue-link.js';
|
|
9
|
+
import { computeHealthCard } from '../core/retro/health-card.js';
|
|
10
|
+
/** Parse `7d` | `14d` | `30d` | `24h` into a duration in days
|
|
11
|
+
* (fractional for sub-day windows). Defaults to 7 days when omitted.
|
|
12
|
+
*/
|
|
13
|
+
function parseDurationToken(token) {
|
|
14
|
+
if (!token)
|
|
15
|
+
return undefined;
|
|
16
|
+
const match = /^(\d+)(h|d)$/.exec(token);
|
|
17
|
+
if (!match)
|
|
18
|
+
return undefined;
|
|
19
|
+
const value = Number.parseInt(match[1] ?? '0', 10);
|
|
20
|
+
const unit = match[2];
|
|
21
|
+
if (!Number.isFinite(value) || value <= 0)
|
|
22
|
+
return undefined;
|
|
23
|
+
const days = unit === 'h' ? value / 24 : value;
|
|
24
|
+
return { days, label: token };
|
|
25
|
+
}
|
|
26
|
+
function buildWindow(durationDays, label, now) {
|
|
27
|
+
const until = now;
|
|
28
|
+
const sinceMs = until.getTime() - durationDays * 24 * 60 * 60 * 1000;
|
|
29
|
+
const since = new Date(sinceMs);
|
|
30
|
+
// Midnight-align the lower bound to keep `--since` deterministic per day.
|
|
31
|
+
since.setHours(0, 0, 0, 0);
|
|
32
|
+
return { since, until, label, days: Math.max(1, Math.round(durationDays)) };
|
|
33
|
+
}
|
|
34
|
+
function buildPriorWindow(current) {
|
|
35
|
+
const until = new Date(current.since.getTime());
|
|
36
|
+
const sinceMs = until.getTime() - current.days * 24 * 60 * 60 * 1000;
|
|
37
|
+
const since = new Date(sinceMs);
|
|
38
|
+
since.setHours(0, 0, 0, 0);
|
|
39
|
+
return { since, until, label: `prior ${current.label}`, days: current.days };
|
|
40
|
+
}
|
|
41
|
+
function parseRetroArgs(rawArgs, now) {
|
|
42
|
+
const args = [...rawArgs];
|
|
43
|
+
const postPlane = args.includes('--post-plane');
|
|
44
|
+
const enrichPlane = args.includes('--plane') || postPlane;
|
|
45
|
+
const positional = args.filter((a) => !a.startsWith('-'));
|
|
46
|
+
let compare = false;
|
|
47
|
+
let durationToken;
|
|
48
|
+
if (positional[0] === 'compare') {
|
|
49
|
+
compare = true;
|
|
50
|
+
durationToken = positional[1];
|
|
51
|
+
}
|
|
52
|
+
else {
|
|
53
|
+
durationToken = positional[0];
|
|
54
|
+
}
|
|
55
|
+
const parsed = parseDurationToken(durationToken) ?? { days: 7, label: '7d' };
|
|
56
|
+
return {
|
|
57
|
+
window: buildWindow(parsed.days, parsed.label, now),
|
|
58
|
+
compare,
|
|
59
|
+
enrichPlane,
|
|
60
|
+
postPlane,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
function SummaryCard(props) {
|
|
64
|
+
const { persisted, metrics, plane, planePostUrl } = props;
|
|
65
|
+
return (_jsxs(Box, { flexDirection: "column", borderStyle: "single", borderRight: false, borderTop: false, borderBottom: false, paddingLeft: 1, children: [_jsxs(Text, { bold: true, children: ["pugi retro \u00B7 ", metrics.window.label] }), _jsxs(Text, { dimColor: true, children: ["Branch ", metrics.branch.current, " over ", metrics.branch.base] }), _jsxs(Text, { children: [metrics.commits.total, " commits \u00B7 +", metrics.loc.insertions, " / -", metrics.loc.deletions, " LOC \u00B7 ", metrics.activeDays, " active days"] }), _jsxs(Text, { children: ["Focus ", metrics.focus.score, "% on ", metrics.focus.topDir ?? 'n/a', " \u00B7 Streak ", metrics.streak.personalDays, "d personal / ", metrics.streak.teamDays, "d team"] }), metrics.shipOfTheWeek ? (_jsxs(Text, { children: ["Ship of the week: ", metrics.shipOfTheWeek.subject.slice(0, 60)] })) : null, plane ? (_jsxs(Text, { children: ["Plane: closed ", plane.closedIssues.length, " \u00B7 created ", plane.createdIssues.length, " \u00B7 oversized modules ", plane.oversizedModules.length] })) : null, _jsxs(Text, { dimColor: true, children: ["Markdown: ", persisted.markdownPath] }), _jsxs(Text, { dimColor: true, children: ["JSON: ", persisted.jsonPath] }), planePostUrl ? _jsxs(Text, { children: ["Posted to Plane: ", planePostUrl] }) : null] }));
|
|
66
|
+
}
|
|
67
|
+
function renderSummary(props) {
|
|
68
|
+
const app = render(_jsx(SummaryCard, { ...props }));
|
|
69
|
+
app.unmount();
|
|
70
|
+
}
|
|
71
|
+
export async function runRetroCommand(ctx) {
|
|
72
|
+
const now = ctx.now ?? new Date();
|
|
73
|
+
const parsed = parseRetroArgs(ctx.args, now);
|
|
74
|
+
const gitCtx = await collectGitContext({ cwd: ctx.cwd, window: parsed.window });
|
|
75
|
+
if (!gitCtx.hasGit) {
|
|
76
|
+
const msg = 'pugi retro: not a git workspace - initialise git or cd into one.';
|
|
77
|
+
if (ctx.flags.json) {
|
|
78
|
+
ctx.io.write(`${JSON.stringify({ ok: false, error: 'no_git_workspace' })}\n`);
|
|
79
|
+
}
|
|
80
|
+
else {
|
|
81
|
+
ctx.io.writeError(msg);
|
|
82
|
+
}
|
|
83
|
+
return 2;
|
|
84
|
+
}
|
|
85
|
+
// Triple-review P1.2 (): before we write anything
|
|
86
|
+
// under `.pugi/retros/`, guarantee `.gitignore` covers `.pugi/`. Without
|
|
87
|
+
// this, the first customer run of `pugi retro` in a fresh repo would
|
|
88
|
+
// leave retros (and any future `.pugi/settings.json` secret store)
|
|
89
|
+
// tracked by git on the next `git add -A`. Idempotent.
|
|
90
|
+
//
|
|
91
|
+
// Round 2 P1 (2026-06-04): surface failure к stderr — silent catch
|
|
92
|
+
// defeats the gate's purpose. If `.gitignore` is read-only or perms
|
|
93
|
+
// refuse, the operator must know retros may be tracked by git.
|
|
94
|
+
const gitIgnoreCreated = [];
|
|
95
|
+
const gitIgnoreSkipped = [];
|
|
96
|
+
try {
|
|
97
|
+
ensurePugiGitIgnore(ctx.cwd, gitIgnoreCreated, gitIgnoreSkipped);
|
|
98
|
+
}
|
|
99
|
+
catch (err) {
|
|
100
|
+
const reason = err instanceof Error ? err.message : String(err);
|
|
101
|
+
ctx.io.writeError(`pugi retro: could not update .gitignore (${reason}). ` +
|
|
102
|
+
`Manually add ".pugi/" to .gitignore so retros are not tracked.`);
|
|
103
|
+
}
|
|
104
|
+
const toBaseHeadCount = await countCommitsAheadOfBase(ctx.cwd, gitCtx.baseBranch, parsed.window.since);
|
|
105
|
+
const metrics = computeMetrics({
|
|
106
|
+
window: parsed.window,
|
|
107
|
+
currentBranch: gitCtx.currentBranch,
|
|
108
|
+
baseBranch: gitCtx.baseBranch,
|
|
109
|
+
toBaseHeadCount,
|
|
110
|
+
currentUserName: gitCtx.userName,
|
|
111
|
+
currentUserEmail: gitCtx.userEmail,
|
|
112
|
+
commits: gitCtx.commits,
|
|
113
|
+
});
|
|
114
|
+
let compare;
|
|
115
|
+
if (parsed.compare) {
|
|
116
|
+
const priorWindow = buildPriorWindow(parsed.window);
|
|
117
|
+
const priorCtx = await collectGitContext({ cwd: ctx.cwd, window: priorWindow });
|
|
118
|
+
const priorAhead = await countCommitsAheadOfBase(ctx.cwd, gitCtx.baseBranch, priorWindow.since);
|
|
119
|
+
const priorMetrics = computeMetrics({
|
|
120
|
+
window: priorWindow,
|
|
121
|
+
currentBranch: gitCtx.currentBranch,
|
|
122
|
+
baseBranch: gitCtx.baseBranch,
|
|
123
|
+
toBaseHeadCount: priorAhead,
|
|
124
|
+
currentUserName: gitCtx.userName,
|
|
125
|
+
currentUserEmail: gitCtx.userEmail,
|
|
126
|
+
commits: priorCtx.commits,
|
|
127
|
+
});
|
|
128
|
+
compare = { current: metrics, prior: priorMetrics };
|
|
129
|
+
}
|
|
130
|
+
let plane;
|
|
131
|
+
let planeUnavailableReason;
|
|
132
|
+
if (parsed.enrichPlane) {
|
|
133
|
+
const cfgResult = resolvePlaneConfig(ctx.cwd);
|
|
134
|
+
if (!cfgResult.ok) {
|
|
135
|
+
planeUnavailableReason = cfgResult.reason;
|
|
136
|
+
}
|
|
137
|
+
else {
|
|
138
|
+
try {
|
|
139
|
+
const slice = await collectPlaneSlice({
|
|
140
|
+
config: cfgResult.config,
|
|
141
|
+
since: parsed.window.since,
|
|
142
|
+
});
|
|
143
|
+
const links = enrichLinks(gitCtx.commits, slice.closedIssues.concat(slice.createdIssues));
|
|
144
|
+
const health = computeHealthCard(slice.modules);
|
|
145
|
+
plane = {
|
|
146
|
+
...slice,
|
|
147
|
+
prToIssueLinks: links,
|
|
148
|
+
oversizedModules: health.oversized,
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
catch (err) {
|
|
152
|
+
planeUnavailableReason = err instanceof Error ? err.message : String(err);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
const persisted = persistRetro({
|
|
157
|
+
root: ctx.cwd,
|
|
158
|
+
metrics,
|
|
159
|
+
plane,
|
|
160
|
+
compare,
|
|
161
|
+
now,
|
|
162
|
+
});
|
|
163
|
+
let planePostUrl;
|
|
164
|
+
if (parsed.postPlane) {
|
|
165
|
+
if (!plane) {
|
|
166
|
+
ctx.io.writeError(`pugi retro --post-plane: Plane unavailable (${planeUnavailableReason ?? 'unknown'}).`);
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
const cfgResult = resolvePlaneConfig(ctx.cwd);
|
|
170
|
+
if (cfgResult.ok) {
|
|
171
|
+
try {
|
|
172
|
+
const result = await postRetroToPlane({
|
|
173
|
+
config: cfgResult.config,
|
|
174
|
+
markdown: persisted.markdown,
|
|
175
|
+
sequence: persisted.sequence,
|
|
176
|
+
dateLabel: persisted.dateLabel,
|
|
177
|
+
});
|
|
178
|
+
planePostUrl = result.url;
|
|
179
|
+
if (result.alreadyExists) {
|
|
180
|
+
ctx.io.write(`pugi retro: already exists at ${result.url}\n`);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
catch (err) {
|
|
184
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
185
|
+
ctx.io.writeError(`pugi retro --post-plane failed: ${msg}`);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
if (ctx.flags.json) {
|
|
191
|
+
ctx.io.write(`${JSON.stringify({
|
|
192
|
+
ok: true,
|
|
193
|
+
markdownPath: persisted.markdownPath,
|
|
194
|
+
jsonPath: persisted.jsonPath,
|
|
195
|
+
sequence: persisted.sequence,
|
|
196
|
+
metrics,
|
|
197
|
+
plane: plane ?? null,
|
|
198
|
+
planePostUrl: planePostUrl ?? null,
|
|
199
|
+
planeUnavailableReason: planeUnavailableReason ?? null,
|
|
200
|
+
}, null, 2)}\n`);
|
|
201
|
+
}
|
|
202
|
+
else {
|
|
203
|
+
renderSummary({ persisted, metrics, plane, planePostUrl });
|
|
204
|
+
if (planeUnavailableReason && !plane) {
|
|
205
|
+
ctx.io.writeError(`pugi retro: Plane integration unavailable (${planeUnavailableReason}).`);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
return 0;
|
|
209
|
+
}
|
|
210
|
+
//# sourceMappingURL=retro.js.map
|
|
@@ -1,40 +1,72 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* SANDBOX probe — surfaces the current OS-level sandbox posture (
|
|
3
|
-
*
|
|
3
|
+
* Trust Sprint item 6: macOS Seatbelt adapter wired; Linux Landlock
|
|
4
|
+
* and Docker variants still backlog).
|
|
4
5
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
* When the sandbox does ship, the probe upgrade path:
|
|
11
|
-
* - Replace the static "not_armed" detail with a real config probe
|
|
12
|
-
* (read .pugi/settings.json::sandbox.mode, verify the OS primitive
|
|
13
|
-
* resolves, return ok when both line up).
|
|
14
|
-
* - Keep the same probe NAME so doctor output / spec assertions
|
|
15
|
-
* don't churn.
|
|
6
|
+
* Sources `bash.sandbox` from `.pugi/settings.json`, defaults to
|
|
7
|
+
* `none`. When set to `macOS-seatbelt` the probe verifies the OS
|
|
8
|
+
* primitive is callable and reports `ok` (armed) or `error`
|
|
9
|
+
* (configured-but-unavailable). When set to `none` the probe reports
|
|
10
|
+
* `warn` with the operator-readable reason "policy 'none' selected".
|
|
16
11
|
*/
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
12
|
+
import { homedir } from 'node:os';
|
|
13
|
+
import { loadSettings } from '../../settings.js';
|
|
14
|
+
import { probeSandbox as probeSandboxAdapter } from '../../sandboxing/index.js';
|
|
15
|
+
export function probeSandbox(ctx) {
|
|
16
|
+
const settings = loadSettings(ctx.cwd);
|
|
17
|
+
const configured = (settings.bash?.sandbox ?? 'none');
|
|
18
|
+
const home = ctx.home || homedir();
|
|
19
|
+
const extraWritePaths = [`${home}/.pugi`];
|
|
20
|
+
try {
|
|
21
|
+
const state = probeSandboxAdapter({
|
|
22
|
+
mode: configured,
|
|
23
|
+
workspaceRoot: ctx.cwd,
|
|
24
|
+
extraWritePaths,
|
|
25
|
+
});
|
|
26
|
+
if (state.armed) {
|
|
27
|
+
// Discipline-gap honesty (Trust Sprint thesis): the adapter
|
|
28
|
+
// probes ok, but spawn-wrap is NOT yet wired into the bash
|
|
29
|
+
// runner (that file is owned by another agent on PUGI-VERIFY-
|
|
30
|
+
// GATE). Reporting status=ok would overstate the posture — an
|
|
31
|
+
// operator reading 'armed' would assume their bash calls were
|
|
32
|
+
// jailed when they still run with full process privileges. We
|
|
33
|
+
// surface 'warn' with a precise reason instead and flip к 'ok'
|
|
34
|
+
// when the runner indirection lands.
|
|
35
|
+
return {
|
|
36
|
+
name: 'SANDBOX',
|
|
37
|
+
status: 'warn',
|
|
38
|
+
detail: `configured (mode=${state.mode}) but spawn-wrap not yet wired — bash dispatches still run with full process privileges. ` +
|
|
39
|
+
`Adapter posture: ${state.details.join('; ')}`,
|
|
40
|
+
remediation: 'The seatbelt adapter is in-tree and exercised by tests; the bash runner indirection that consumes it lands in a follow-up. ' +
|
|
41
|
+
'Bash classifier denylist + permission FSM remain in force in the meantime.',
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
// Not armed — distinguish "operator chose none" from "configured
|
|
45
|
+
// mode failed". The latter is an error; the former is a documented
|
|
46
|
+
// posture and stays a warning.
|
|
47
|
+
if (state.mode === 'none') {
|
|
48
|
+
return {
|
|
49
|
+
name: 'SANDBOX',
|
|
50
|
+
status: 'warn',
|
|
51
|
+
detail: `not armed: ${state.reason ?? 'mode none'}`,
|
|
52
|
+
remediation: 'Set `bash.sandbox = "macOS-seatbelt"` in .pugi/settings.json on macOS to enable workspace-scoped write isolation. ' +
|
|
53
|
+
'Bash classifier denylist + permission FSM still apply.',
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
return {
|
|
57
|
+
name: 'SANDBOX',
|
|
58
|
+
status: 'error',
|
|
59
|
+
detail: `configured mode "${state.mode}" failed to arm: ${state.reason ?? 'unknown'}`,
|
|
60
|
+
remediation: 'Set `bash.sandbox` to a supported mode for this platform or remove the key to fall back to "none".',
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
catch (err) {
|
|
64
|
+
return {
|
|
65
|
+
name: 'SANDBOX',
|
|
66
|
+
status: 'error',
|
|
67
|
+
detail: `sandbox probe threw: ${err.message}`,
|
|
68
|
+
remediation: 'Remove the bash.sandbox key from .pugi/settings.json or set it to "none".',
|
|
69
|
+
};
|
|
32
70
|
}
|
|
33
|
-
return {
|
|
34
|
-
name: 'SANDBOX',
|
|
35
|
-
status: 'warn',
|
|
36
|
-
detail: `OS primitive available: ${availablePrimitive}. Sandbox enforcement NOT yet armed (Pugi task #5 pending — bash tool currently runs с full process privileges).`,
|
|
37
|
-
remediation: 'Bash tool dispatches run unsandboxed today. Track progress on the OS-level sandbox adapter via the operator-trust roadmap. Until then, rely on the bash classifier denylist + permission FSM.',
|
|
38
|
-
};
|
|
39
71
|
}
|
|
40
72
|
//# sourceMappingURL=sandbox.js.map
|
|
@@ -5,6 +5,7 @@ import { AsyncEventQueue, EngineEventEmitter, modelSupportsThinking, runEngineLo
|
|
|
5
5
|
import { FileReadCache } from '../file-cache.js';
|
|
6
6
|
import { loadSettings } from '../settings.js';
|
|
7
7
|
import { openSession, recordToolCall, recordToolResult } from '../session.js';
|
|
8
|
+
import { REGRESSION_DISPUTE_PHRASES } from './verification-patterns.js';
|
|
8
9
|
import { prewarmRealDispatch } from '../subagents/dispatcher.js';
|
|
9
10
|
import { resolveAutoCompactConfig, resolveBudget } from './budgets.js';
|
|
10
11
|
import { maybeCompact } from './auto-compact.js';
|
|
@@ -936,15 +937,32 @@ export class NativePugiEngineAdapter {
|
|
|
936
937
|
return;
|
|
937
938
|
}
|
|
938
939
|
// Translate the loop outcome into an EngineResult.
|
|
939
|
-
// `aborted` maps to `blocked`
|
|
940
|
-
//
|
|
941
|
-
//
|
|
942
|
-
|
|
940
|
+
// `aborted` maps to `blocked` because the operator chose the
|
|
941
|
+
// outcome, same shape as budget_exhausted / tool_refused.
|
|
942
|
+
//
|
|
943
|
+
// PUGI-VERIFY-GATE: the verification gate runs AFTER this
|
|
944
|
+
// base mapping. When the agent ran verification commands and
|
|
945
|
+
// any exited non-zero, the loop's `completed` collapses to
|
|
946
|
+
// `failed` (the agent's claim of "done" is unverified). When
|
|
947
|
+
// the loop `completed` but no verification command ever ran,
|
|
948
|
+
// we surface `needs_verification` (CLI exit 2) so the operator
|
|
949
|
+
// sees the missing signal instead of false confidence. The
|
|
950
|
+
// gate is non-negotiable per the contract: `done` is reserved
|
|
951
|
+
// for `verified: true` outcomes.
|
|
952
|
+
const baseStatus = finalOutcome.status === 'completed'
|
|
943
953
|
? 'done'
|
|
944
954
|
: finalOutcome.status === 'failed'
|
|
945
955
|
? 'failed'
|
|
946
956
|
: 'blocked';
|
|
947
|
-
const
|
|
957
|
+
const filesChangedList = Array.from(filesChanged).sort();
|
|
958
|
+
const verification = computeVerificationOutcome({
|
|
959
|
+
ledger: session.verificationLedger,
|
|
960
|
+
baseStatus,
|
|
961
|
+
finalText: finalOutcome.finalText,
|
|
962
|
+
filesChanged: filesChangedList,
|
|
963
|
+
});
|
|
964
|
+
const status = verification.status;
|
|
965
|
+
const summaryPrefix = status === 'done'
|
|
948
966
|
? ''
|
|
949
967
|
: finalOutcome.status === 'budget_exhausted'
|
|
950
968
|
? '[budget_exhausted] '
|
|
@@ -952,8 +970,11 @@ export class NativePugiEngineAdapter {
|
|
|
952
970
|
? '[plan_mode_refused] '
|
|
953
971
|
: finalOutcome.status === 'aborted'
|
|
954
972
|
? '[operator_aborted] '
|
|
955
|
-
:
|
|
956
|
-
|
|
973
|
+
: status === 'needs_verification'
|
|
974
|
+
? '[needs_verification] '
|
|
975
|
+
: verification.unverifiedReason === 'verification_command_failed'
|
|
976
|
+
? '[verification_failed] '
|
|
977
|
+
: '[failed] ';
|
|
957
978
|
appendSessionMirror(sessionEventsPath, {
|
|
958
979
|
type: 'outcome',
|
|
959
980
|
status: finalOutcome.status,
|
|
@@ -1014,6 +1035,18 @@ export class NativePugiEngineAdapter {
|
|
|
1014
1035
|
const synthesisedFromFiles = finalOutcome.finalText.trim() === '' && filesChangedList.length > 0
|
|
1015
1036
|
? `Updated ${filesChangedList.length} file(s): ${filesChangedList.slice(0, 5).join(', ')}${filesChangedList.length > 5 ? ` (+${filesChangedList.length - 5} more)` : ''}`
|
|
1016
1037
|
: '';
|
|
1038
|
+
// PUGI-VERIFY-GATE: thread verification state into the risks
|
|
1039
|
+
// array so a consumer reading only the legacy fields still
|
|
1040
|
+
// gets a human-readable summary of what was not verified.
|
|
1041
|
+
const baseRisks = finalOutcome.status === 'completed' && status === 'done'
|
|
1042
|
+
? []
|
|
1043
|
+
: [finalOutcome.reason ?? `outcome=${finalOutcome.status}`];
|
|
1044
|
+
if (verification.unverifiedReason && status !== 'done') {
|
|
1045
|
+
baseRisks.push(`unverified: ${verification.unverifiedReason}`);
|
|
1046
|
+
}
|
|
1047
|
+
if (verification.regressionOwnershipDispute) {
|
|
1048
|
+
baseRisks.push('regression_ownership_dispute: agent disclaimed ownership of failing verification');
|
|
1049
|
+
}
|
|
1017
1050
|
yield {
|
|
1018
1051
|
type: 'result',
|
|
1019
1052
|
result: {
|
|
@@ -1022,9 +1055,7 @@ export class NativePugiEngineAdapter {
|
|
|
1022
1055
|
filesChanged: filesChangedList,
|
|
1023
1056
|
patchRefs: [],
|
|
1024
1057
|
testsRun: [],
|
|
1025
|
-
risks:
|
|
1026
|
-
? []
|
|
1027
|
-
: [finalOutcome.reason ?? `outcome=${finalOutcome.status}`],
|
|
1058
|
+
risks: baseRisks,
|
|
1028
1059
|
eventRefs: [
|
|
1029
1060
|
`tool_calls=${finalOutcome.toolCallCount}`,
|
|
1030
1061
|
`turns=${finalOutcome.turnsUsed}`,
|
|
@@ -1039,7 +1070,22 @@ export class NativePugiEngineAdapter {
|
|
|
1039
1070
|
`session=${session.id}`,
|
|
1040
1071
|
`ctx=${ctx.sessionId}`,
|
|
1041
1072
|
`mirror=${sessionEventsPath}`,
|
|
1073
|
+
// PUGI-VERIFY-GATE: machine-readable verification echo so
|
|
1074
|
+
// downstream consumers (MCP wrapper, cabinet UI, audit
|
|
1075
|
+
// pipeline) can branch on the gate state without parsing
|
|
1076
|
+
// the new structured fields.
|
|
1077
|
+
`verified=${verification.verified}`,
|
|
1078
|
+
`verification_count=${verification.verificationCommands.length}`,
|
|
1042
1079
|
],
|
|
1080
|
+
verified: verification.verified,
|
|
1081
|
+
verificationCommands: verification.verificationCommands,
|
|
1082
|
+
verificationFailures: verification.verificationFailures,
|
|
1083
|
+
...(verification.unverifiedReason !== undefined
|
|
1084
|
+
? { unverifiedReason: verification.unverifiedReason }
|
|
1085
|
+
: {}),
|
|
1086
|
+
...(verification.regressionOwnershipDispute
|
|
1087
|
+
? { regressionOwnershipDispute: true }
|
|
1088
|
+
: {}),
|
|
1043
1089
|
},
|
|
1044
1090
|
};
|
|
1045
1091
|
}
|
|
@@ -1439,4 +1485,132 @@ async function expandHierarchyWithImports(hierarchy, cwd) {
|
|
|
1439
1485
|
}
|
|
1440
1486
|
return out;
|
|
1441
1487
|
}
|
|
1488
|
+
export function computeVerificationOutcome(input) {
|
|
1489
|
+
const { ledger, baseStatus, finalText, filesChanged } = input;
|
|
1490
|
+
const verificationCommands = ledger.map((entry) => entry.command);
|
|
1491
|
+
const failures = ledger
|
|
1492
|
+
.filter((entry) => entry.exitCode !== 0)
|
|
1493
|
+
.map((entry) => ({
|
|
1494
|
+
command: entry.command,
|
|
1495
|
+
exitCode: entry.exitCode,
|
|
1496
|
+
tailStderr: entry.tailStderr,
|
|
1497
|
+
}));
|
|
1498
|
+
// Verification PASS only when at least one verification call ran AND
|
|
1499
|
+
// the most recent (chronologically last) verification exited zero.
|
|
1500
|
+
// The "most recent" rule lets the agent intentionally retry a failed
|
|
1501
|
+
// verification — only the final state matters.
|
|
1502
|
+
const lastCall = ledger.length > 0 ? ledger[ledger.length - 1] : undefined;
|
|
1503
|
+
const ranAny = ledger.length > 0;
|
|
1504
|
+
const lastPassed = lastCall !== undefined && lastCall.exitCode === 0;
|
|
1505
|
+
const anyFailed = failures.length > 0;
|
|
1506
|
+
const verified = ranAny && lastPassed && !anyFailed;
|
|
1507
|
+
// Status precedence:
|
|
1508
|
+
// verification_command_failed > base failure modes > needs_verification > done
|
|
1509
|
+
// Override `baseStatus` ONLY when verification failed (the
|
|
1510
|
+
// agent's loop may have ended `completed` while a test failed) OR
|
|
1511
|
+
// when `baseStatus === 'done'` and no verification ran (the
|
|
1512
|
+
// engine completed but produced no signal of correctness).
|
|
1513
|
+
let status;
|
|
1514
|
+
let unverifiedReason;
|
|
1515
|
+
if (anyFailed) {
|
|
1516
|
+
status = 'failed';
|
|
1517
|
+
unverifiedReason = 'verification_command_failed';
|
|
1518
|
+
}
|
|
1519
|
+
else if (!ranAny && baseStatus === 'done') {
|
|
1520
|
+
status = 'needs_verification';
|
|
1521
|
+
unverifiedReason = 'no_verification_command_run';
|
|
1522
|
+
}
|
|
1523
|
+
else if (baseStatus !== 'done') {
|
|
1524
|
+
status = baseStatus;
|
|
1525
|
+
if (!verified)
|
|
1526
|
+
unverifiedReason = 'verification_inconclusive';
|
|
1527
|
+
}
|
|
1528
|
+
else {
|
|
1529
|
+
status = 'done';
|
|
1530
|
+
}
|
|
1531
|
+
// Regression ownership dispute heuristic. Only meaningful when a
|
|
1532
|
+
// verification command failed; keep the predicate simple and
|
|
1533
|
+
// documented so a future reviewer can audit the false-positive
|
|
1534
|
+
// surface.
|
|
1535
|
+
let regressionOwnershipDispute = false;
|
|
1536
|
+
if (anyFailed && filesChanged.length > 0 && finalText !== '') {
|
|
1537
|
+
const lower = finalText.toLowerCase();
|
|
1538
|
+
const disputed = REGRESSION_DISPUTE_PHRASES.some((phrase) => lower.includes(phrase));
|
|
1539
|
+
if (disputed && agentTouchedFailingModule(filesChanged, failures)) {
|
|
1540
|
+
regressionOwnershipDispute = true;
|
|
1541
|
+
}
|
|
1542
|
+
}
|
|
1543
|
+
return {
|
|
1544
|
+
status,
|
|
1545
|
+
verified,
|
|
1546
|
+
verificationCommands,
|
|
1547
|
+
verificationFailures: failures,
|
|
1548
|
+
...(unverifiedReason !== undefined ? { unverifiedReason } : {}),
|
|
1549
|
+
regressionOwnershipDispute,
|
|
1550
|
+
};
|
|
1551
|
+
}
|
|
1552
|
+
/**
|
|
1553
|
+
* Predicate: at least one mutated file shares a top-level module
|
|
1554
|
+
* directory with a path referenced in any verification failure's
|
|
1555
|
+
* stderr tail. The rule is intentionally loose ("same dir + same
|
|
1556
|
+
* basename without extension or .test./.spec. infix") so it
|
|
1557
|
+
* catches the typical `src/foo.ts` ↔ `src/foo.test.ts` pairing
|
|
1558
|
+
* without overfitting to one test runner's stack-trace format.
|
|
1559
|
+
*
|
|
1560
|
+
* Implementation: extract every `src/...`-shaped path mention from
|
|
1561
|
+
* each failure's stderr tail, then check whether ANY mutated file
|
|
1562
|
+
* shares a module key with ANY mentioned path. The module key
|
|
1563
|
+
* strips the trailing filename's extension AND any `.test.` /
|
|
1564
|
+
* `.spec.` infix so the pair resolves to the same key.
|
|
1565
|
+
*/
|
|
1566
|
+
function agentTouchedFailingModule(filesChanged, failures) {
|
|
1567
|
+
const stderrJoined = failures.map((f) => f.tailStderr).join('\n');
|
|
1568
|
+
if (stderrJoined === '')
|
|
1569
|
+
return false;
|
|
1570
|
+
// Match common test-runner path shapes: `src/foo/bar.ts`,
|
|
1571
|
+
// `apps/x/test/y.spec.ts`, `packages/z/baz.test.ts`. Not
|
|
1572
|
+
// exhaustive — false negatives are acceptable here because the
|
|
1573
|
+
// predicate's job is to FLAG dispute, not enforce it.
|
|
1574
|
+
const pathMentions = new Set();
|
|
1575
|
+
const pathRegex = /(?:^|[\s(])((?:src|app|apps|test|tests|lib|packages)\/[\w./-]+\.[a-zA-Z]+)/g;
|
|
1576
|
+
for (const match of stderrJoined.matchAll(pathRegex)) {
|
|
1577
|
+
const captured = match[1];
|
|
1578
|
+
if (typeof captured === 'string' && captured.length > 0) {
|
|
1579
|
+
pathMentions.add(captured);
|
|
1580
|
+
}
|
|
1581
|
+
}
|
|
1582
|
+
if (pathMentions.size === 0)
|
|
1583
|
+
return false;
|
|
1584
|
+
// Module key strips the trailing filename's extension (and any
|
|
1585
|
+
// `.test.` / `.spec.` infix) so `src/existing.ts` and
|
|
1586
|
+
// `src/existing.test.ts` resolve to the same key. Keep the full
|
|
1587
|
+
// directory path plus the bare basename (no ext) — this catches
|
|
1588
|
+
// the typical `foo.ts` ↔ `foo.test.ts` pairing in the same dir
|
|
1589
|
+
// without overfitting to one test-runner convention.
|
|
1590
|
+
const moduleKey = (p) => {
|
|
1591
|
+
const segments = p.split('/').filter(Boolean);
|
|
1592
|
+
if (segments.length === 0)
|
|
1593
|
+
return '';
|
|
1594
|
+
const lastIndex = segments.length - 1;
|
|
1595
|
+
const bareLast = segments[lastIndex]
|
|
1596
|
+
.replace(/\.(spec|test)\./, '.')
|
|
1597
|
+
.replace(/\.[a-zA-Z][a-zA-Z0-9]*$/, '');
|
|
1598
|
+
const dir = segments.slice(0, lastIndex).join('/');
|
|
1599
|
+
return dir === '' ? bareLast : `${dir}/${bareLast}`;
|
|
1600
|
+
};
|
|
1601
|
+
const failingModuleKeys = new Set();
|
|
1602
|
+
for (const mention of pathMentions) {
|
|
1603
|
+
const key = moduleKey(mention);
|
|
1604
|
+
if (key !== '')
|
|
1605
|
+
failingModuleKeys.add(key);
|
|
1606
|
+
}
|
|
1607
|
+
if (failingModuleKeys.size === 0)
|
|
1608
|
+
return false;
|
|
1609
|
+
for (const file of filesChanged) {
|
|
1610
|
+
const key = moduleKey(file);
|
|
1611
|
+
if (failingModuleKeys.has(key))
|
|
1612
|
+
return true;
|
|
1613
|
+
}
|
|
1614
|
+
return false;
|
|
1615
|
+
}
|
|
1442
1616
|
//# sourceMappingURL=native-pugi.js.map
|
|
@@ -21,6 +21,8 @@ import { webFetchTool } from '../../tools/web-fetch.js';
|
|
|
21
21
|
import { webSearchTool } from '../../tools/web-search.js';
|
|
22
22
|
import { agentTool } from '../../tools/agent-tool.js';
|
|
23
23
|
import { multiEdit } from '../../tools/multi-edit.js';
|
|
24
|
+
import { recordVerificationCall } from '../session.js';
|
|
25
|
+
import { detectVerificationCommand, tailStderr } from './verification-patterns.js';
|
|
24
26
|
import { buildMcpToolDefs, defaultNonInteractiveMcpPrompt, dispatchMcpTool, MCP_TOOL_PREFIX, } from '../../tools/mcp-tool.js';
|
|
25
27
|
import { firePostToolUseFailureChain } from '../hook-chains.js';
|
|
26
28
|
import { buildDenialContext, DENIAL_REMINDER_THRESHOLD, } from '../denial-tracking/state.js';
|
|
@@ -1507,6 +1509,29 @@ function dispatchTool(name, args, ctx) {
|
|
|
1507
1509
|
session: ctx.session,
|
|
1508
1510
|
source: 'agent',
|
|
1509
1511
|
});
|
|
1512
|
+
// PUGI-VERIFY-GATE: tag verification commands and record them
|
|
1513
|
+
// on the session ledger so the engine outcome assembler can
|
|
1514
|
+
// gate the final `status` on test/lint/build pass. The check
|
|
1515
|
+
// is pure — `detectVerificationCommand` matches the regex
|
|
1516
|
+
// allowlist in `verification-patterns.ts`. Record BEFORE
|
|
1517
|
+
// building the model-facing envelope so the ledger is durable
|
|
1518
|
+
// even if the model stops the loop on this turn.
|
|
1519
|
+
const detection = detectVerificationCommand(command);
|
|
1520
|
+
const verificationFailed = detection.isVerification && result.exitCode !== 0;
|
|
1521
|
+
if (detection.isVerification && detection.tool !== null) {
|
|
1522
|
+
recordVerificationCall(ctx.session, {
|
|
1523
|
+
command,
|
|
1524
|
+
tool: detection.tool,
|
|
1525
|
+
exitCode: result.exitCode,
|
|
1526
|
+
tailStderr: tailStderr(
|
|
1527
|
+
// Prefer buffered stderr; fall back to redirect tail
|
|
1528
|
+
// when stdout/stderr lives on disk (`logPath` mode).
|
|
1529
|
+
result.stderr === '' && typeof result.tail === 'string'
|
|
1530
|
+
? result.tail
|
|
1531
|
+
: result.stderr),
|
|
1532
|
+
timestamp: new Date().toISOString(),
|
|
1533
|
+
});
|
|
1534
|
+
}
|
|
1510
1535
|
const parts = [
|
|
1511
1536
|
`exit=${result.exitCode}`,
|
|
1512
1537
|
result.stdout ? `stdout:\n${result.stdout}` : '',
|
|
@@ -1522,6 +1547,16 @@ function dispatchTool(name, args, ctx) {
|
|
|
1522
1547
|
parts.push('truncated=true');
|
|
1523
1548
|
if (result.timedOut)
|
|
1524
1549
|
parts.push('timedOut=true');
|
|
1550
|
+
// PUGI-VERIFY-GATE: when a verification command exited non-zero,
|
|
1551
|
+
// tag the envelope so the model cannot honestly claim "tests
|
|
1552
|
+
// pass" — and so the engine outcome assembler can scan the
|
|
1553
|
+
// ledger and gate `done`. The stringified envelope keeps
|
|
1554
|
+
// `exit=N` for legacy parsers; the new `verification.tool=` /
|
|
1555
|
+
// `verification.ok=` lines surface the gate state explicitly.
|
|
1556
|
+
if (detection.isVerification) {
|
|
1557
|
+
parts.push(`verification.tool=${detection.tool}`);
|
|
1558
|
+
parts.push(`verification.ok=${verificationFailed ? 'false' : 'true'}`);
|
|
1559
|
+
}
|
|
1525
1560
|
const body = parts.filter(Boolean).join('\n');
|
|
1526
1561
|
return body || '(no output)';
|
|
1527
1562
|
}
|
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
* PUGI-VERIFY-GATE — verification command detection.
|
|
3
3
|
*
|
|
4
4
|
* Background: Codex dogfood 2026-06-04 surfaced a P0 trust failure
|
|
5
|
-
* where the Pugi engine returned `status: done` + `exitCode: 0`
|
|
6
|
-
* after `npm test` exited non-zero on a regression the agent
|
|
7
|
-
* had introduced. Root cause: no layer of the dispatch
|
|
8
|
-
* which bash invocations were verification commands,
|
|
9
|
-
* outcome had no way to gate the final status on
|
|
10
|
-
* pass.
|
|
5
|
+
* where the Pugi engine returned `status: done` + `exitCode: 0`
|
|
6
|
+
* even after `npm test` exited non-zero on a regression the agent
|
|
7
|
+
* itself had introduced. Root cause: no layer of the dispatch
|
|
8
|
+
* pipeline knew which bash invocations were verification commands,
|
|
9
|
+
* so the engine outcome had no way to gate the final status on
|
|
10
|
+
* test/lint/build pass.
|
|
11
11
|
*
|
|
12
12
|
* This module is the deterministic, configurable allowlist of regex
|
|
13
13
|
* patterns the engine uses to recognise verification commands at
|
|
@@ -110,7 +110,7 @@ export function extractCommandHead(component) {
|
|
|
110
110
|
continue;
|
|
111
111
|
}
|
|
112
112
|
// env A=1 B=2 prefix (inline env assignments before the verb).
|
|
113
|
-
//
|
|
113
|
+
// Peel one token at a time so `FOO=bar BAZ=qux pnpm test` resolves to `pnpm test`.
|
|
114
114
|
const firstToken = head.split(/\s+/, 1)[0] ?? '';
|
|
115
115
|
if (firstToken !== '' && ENV_ASSIGN.test(firstToken)) {
|
|
116
116
|
head = head.slice(firstToken.length).trimStart();
|
|
@@ -162,8 +162,8 @@ export function detectVerificationCommand(cmd) {
|
|
|
162
162
|
* downstream reviewer can decide whether to escalate.
|
|
163
163
|
*
|
|
164
164
|
* The list is case-insensitive at match time. Punctuation around the
|
|
165
|
-
* phrase is allowed because `.
|
|
166
|
-
* word boundaries (an agent that writes "this is a pre-existing
|
|
165
|
+
* phrase is allowed because `.includes()` looks for the substring,
|
|
166
|
+
* not word boundaries (an agent that writes "this is a pre-existing
|
|
167
167
|
* test bug" still trips the flag).
|
|
168
168
|
*/
|
|
169
169
|
export const REGRESSION_DISPUTE_PHRASES = [
|