opencode-goal-mode 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +180 -0
- package/README.md +158 -52
- package/agents/goal-api-reviewer.md +0 -2
- package/agents/goal-architect.md +0 -2
- package/agents/goal-commentator.md +0 -2
- package/agents/goal-completion-guard.md +0 -2
- package/agents/goal-coordinator.md +0 -2
- package/agents/goal-data-reviewer.md +0 -2
- package/agents/goal-deep-researcher.md +0 -2
- package/agents/goal-diff-reviewer.md +0 -2
- package/agents/goal-doc-reviewer.md +0 -2
- package/agents/goal-doc-writer.md +0 -2
- package/agents/goal-explorer.md +9 -8
- package/agents/goal-final-auditor.md +0 -2
- package/agents/goal-implementer.md +0 -2
- package/agents/goal-mapper.md +0 -2
- package/agents/goal-ops-reviewer.md +0 -2
- package/agents/goal-perf-reviewer.md +0 -2
- package/agents/goal-planner.md +10 -5
- package/agents/goal-prompt-auditor.md +0 -2
- package/agents/goal-quality-gate.md +0 -2
- package/agents/goal-researcher.md +8 -7
- package/agents/goal-reviewer.md +0 -2
- package/agents/goal-security-reviewer.md +0 -2
- package/agents/goal-test-reviewer.md +0 -2
- package/agents/goal-ux-reviewer.md +0 -2
- package/agents/goal-verifier.md +0 -2
- package/agents/goal-web-researcher.md +0 -2
- package/agents/goal.md +9 -8
- package/package.json +13 -9
- package/plugins/goal-guard/agents.js +132 -0
- package/plugins/goal-guard/completion.js +64 -0
- package/plugins/goal-guard/config.js +87 -0
- package/plugins/goal-guard/events.js +65 -0
- package/plugins/goal-guard/gates.js +85 -0
- package/plugins/goal-guard/logger.js +36 -0
- package/plugins/goal-guard/persistence.js +122 -0
- package/plugins/goal-guard/shell.js +1159 -0
- package/plugins/goal-guard/state.js +182 -0
- package/plugins/goal-guard/summary.js +46 -0
- package/plugins/goal-guard/system.js +43 -0
- package/plugins/goal-guard/tools.js +129 -0
- package/plugins/goal-guard/verdicts.js +87 -0
- package/plugins/goal-guard.js +267 -379
- package/plugins/package.json +3 -0
- package/scripts/install.mjs +170 -36
- package/docs/research-report.md +0 -37
- package/scripts/check-npm-publish-ready.mjs +0 -54
- package/scripts/validate-opencode-config.mjs +0 -82
- package/tests/agents.test.mjs +0 -70
- package/tests/commands.test.mjs +0 -23
- package/tests/helpers.mjs +0 -23
- package/tests/install.test.mjs +0 -64
- package/tests/plugin.test.mjs +0 -195
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-session guard state and the store that owns it.
|
|
3
|
+
*
|
|
4
|
+
* Two correctness fixes versus the original design live here:
|
|
5
|
+
*
|
|
6
|
+
* 1. A monotonic `seq` counter (project-scoped) orders every state-changing
|
|
7
|
+
* event. Staleness ("is this review newer than the latest edit?") is
|
|
8
|
+
* decided by comparing seq numbers, not millisecond ISO strings, so two
|
|
9
|
+
* events in the same millisecond can never tie and a review can never be
|
|
10
|
+
* accepted as fresh against an edit it did not actually post-date.
|
|
11
|
+
*
|
|
12
|
+
* 2. The store is created PER PLUGIN INSTANCE (closure state), not as a module
|
|
13
|
+
* global, so concurrent OpenCode projects can no longer cross-contaminate
|
|
14
|
+
* each other's verdicts and dirty flags. Eviction is true LRU by last-touch
|
|
15
|
+
* time and PREFERS inactive sessions: an active session is only evicted when
|
|
16
|
+
* every tracked session is active and the cap is exceeded, in which case the
|
|
17
|
+
* least-recently-touched active one is dropped.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
/** @returns a fresh per-session state record. */
|
|
21
|
+
export function createState(nowIso) {
|
|
22
|
+
const at = nowIso || new Date(0).toISOString();
|
|
23
|
+
return {
|
|
24
|
+
active: false,
|
|
25
|
+
goalText: "",
|
|
26
|
+
contract: null,
|
|
27
|
+
stickyGates: [],
|
|
28
|
+
dirty: false,
|
|
29
|
+
dirtyReasons: [],
|
|
30
|
+
changedFiles: [],
|
|
31
|
+
reviewCycles: 0,
|
|
32
|
+
lastEditSeq: 0,
|
|
33
|
+
lastVerificationSeq: 0,
|
|
34
|
+
lastReviewSeq: 0,
|
|
35
|
+
lastEditAt: null,
|
|
36
|
+
lastReviewAt: null,
|
|
37
|
+
lastVerificationAt: null,
|
|
38
|
+
verdicts: [],
|
|
39
|
+
evidence: [],
|
|
40
|
+
latestVerdict: {},
|
|
41
|
+
currentAgent: undefined,
|
|
42
|
+
completedBlocked: 0,
|
|
43
|
+
completionRejections: [],
|
|
44
|
+
verificationSeen: false,
|
|
45
|
+
lastCompletionRejectAt: null,
|
|
46
|
+
createdAt: at,
|
|
47
|
+
updatedAt: at,
|
|
48
|
+
touchedAt: 0,
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const KNOWN_FIELDS = Object.keys(createState());
|
|
53
|
+
|
|
54
|
+
/** Rebuild a state object from persisted JSON, dropping unknown fields. */
|
|
55
|
+
function reviveState(raw) {
|
|
56
|
+
const base = createState();
|
|
57
|
+
if (!raw || typeof raw !== "object") return base;
|
|
58
|
+
for (const field of KNOWN_FIELDS) {
|
|
59
|
+
if (raw[field] !== undefined) base[field] = raw[field];
|
|
60
|
+
}
|
|
61
|
+
// Defensive normalisation of array/object shapes.
|
|
62
|
+
for (const arrField of ["dirtyReasons", "changedFiles", "verdicts", "evidence", "completionRejections"]) {
|
|
63
|
+
if (!Array.isArray(base[arrField])) base[arrField] = [];
|
|
64
|
+
}
|
|
65
|
+
if (!base.latestVerdict || typeof base.latestVerdict !== "object") base.latestVerdict = {};
|
|
66
|
+
return base;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Create a guard store.
|
|
71
|
+
*
|
|
72
|
+
* @param {object} opts
|
|
73
|
+
* @param {number} [opts.maxSessions=200]
|
|
74
|
+
* @param {number} [opts.ttlMs=0] Idle TTL in ms (0 disables).
|
|
75
|
+
* @param {() => number} [opts.clock] Monotonic-ish wall clock for touch/TTL.
|
|
76
|
+
*/
|
|
77
|
+
export function createStore({ maxSessions = 200, ttlMs = 0, clock = () => Date.now() } = {}) {
|
|
78
|
+
const sessions = new Map();
|
|
79
|
+
let seq = 0;
|
|
80
|
+
let touchCounter = 0;
|
|
81
|
+
|
|
82
|
+
const nowIso = () => new Date(clock()).toISOString();
|
|
83
|
+
const nextSeq = () => (seq += 1);
|
|
84
|
+
|
|
85
|
+
function evictIfNeeded() {
|
|
86
|
+
// Drop TTL-expired idle sessions first.
|
|
87
|
+
if (ttlMs > 0) {
|
|
88
|
+
const cutoff = clock() - ttlMs;
|
|
89
|
+
for (const [key, st] of sessions) {
|
|
90
|
+
if (st.touchedWall !== undefined && st.touchedWall < cutoff && !st.active) {
|
|
91
|
+
sessions.delete(key);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
while (sessions.size >= maxSessions) {
|
|
96
|
+
let oldestKey = null;
|
|
97
|
+
let oldest = Infinity;
|
|
98
|
+
for (const [key, st] of sessions) {
|
|
99
|
+
// Prefer evicting inactive sessions; only evict active ones if nothing else.
|
|
100
|
+
const rank = (st.active ? Number.MAX_SAFE_INTEGER / 2 : 0) + (st.touchedAt || 0);
|
|
101
|
+
if (rank < oldest) {
|
|
102
|
+
oldest = rank;
|
|
103
|
+
oldestKey = key;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
if (oldestKey === null) break;
|
|
107
|
+
sessions.delete(oldestKey);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function touch(st) {
|
|
112
|
+
st.touchedAt = (touchCounter += 1);
|
|
113
|
+
st.touchedWall = clock();
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function stateFor(sessionID) {
|
|
117
|
+
const key = String(sessionID || "default").trim() || "default";
|
|
118
|
+
let st = sessions.get(key);
|
|
119
|
+
if (!st) {
|
|
120
|
+
evictIfNeeded();
|
|
121
|
+
st = createState(nowIso());
|
|
122
|
+
sessions.set(key, st);
|
|
123
|
+
}
|
|
124
|
+
touch(st);
|
|
125
|
+
return st;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function snapshot() {
|
|
129
|
+
return {
|
|
130
|
+
version: 1,
|
|
131
|
+
seq,
|
|
132
|
+
touchCounter,
|
|
133
|
+
sessions: Array.from(sessions.entries()).map(([key, st]) => [key, st]),
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function restore(data) {
|
|
138
|
+
if (!data || typeof data !== "object") return;
|
|
139
|
+
if (Number.isFinite(data.seq)) seq = Math.max(seq, data.seq);
|
|
140
|
+
if (Number.isFinite(data.touchCounter)) touchCounter = Math.max(touchCounter, data.touchCounter);
|
|
141
|
+
if (Array.isArray(data.sessions)) {
|
|
142
|
+
const wall = clock();
|
|
143
|
+
for (const entry of data.sessions) {
|
|
144
|
+
if (!Array.isArray(entry) || entry.length !== 2) continue;
|
|
145
|
+
const [key, raw] = entry;
|
|
146
|
+
const st = reviveState(raw);
|
|
147
|
+
// Seed a wall time so restored sessions are subject to TTL eviction
|
|
148
|
+
// (otherwise undefined touchedWall makes them immortal).
|
|
149
|
+
if (st.touchedWall === undefined) st.touchedWall = wall;
|
|
150
|
+
sessions.set(String(key), st);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
// Restoring a snapshot must respect the configured cap, or a persisted
|
|
154
|
+
// oversized store would exceed maxSessions forever (and a later add could
|
|
155
|
+
// evict a live active session in one burst).
|
|
156
|
+
while (sessions.size > maxSessions) {
|
|
157
|
+
let oldestKey = null;
|
|
158
|
+
let oldest = Infinity;
|
|
159
|
+
for (const [key, st] of sessions) {
|
|
160
|
+
const rank = (st.active ? Number.MAX_SAFE_INTEGER / 2 : 0) + (st.touchedAt || 0);
|
|
161
|
+
if (rank < oldest) {
|
|
162
|
+
oldest = rank;
|
|
163
|
+
oldestKey = key;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
if (oldestKey === null) break;
|
|
167
|
+
sessions.delete(oldestKey);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
sessions,
|
|
173
|
+
stateFor,
|
|
174
|
+
nowIso,
|
|
175
|
+
nextSeq,
|
|
176
|
+
seqValue: () => seq,
|
|
177
|
+
snapshot,
|
|
178
|
+
restore,
|
|
179
|
+
size: () => sessions.size,
|
|
180
|
+
clear: () => sessions.clear(),
|
|
181
|
+
};
|
|
182
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Human-readable summaries of guard state, used in compaction context, block
|
|
3
|
+
* messages, and the `goal_status` tool. Kept pure and dependency-light.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { requiredGates, missingGates } from "./gates.js";
|
|
7
|
+
|
|
8
|
+
export function summarizeState(state, config) {
|
|
9
|
+
const verdictSummary =
|
|
10
|
+
state.verdicts
|
|
11
|
+
.slice(-8)
|
|
12
|
+
.map((v) => `${v.agent}:${v.verdict}`)
|
|
13
|
+
.join(", ") || "none";
|
|
14
|
+
return [
|
|
15
|
+
`active=${Boolean(state.active)}`,
|
|
16
|
+
`dirty=${Boolean(state.dirty)}`,
|
|
17
|
+
`reviewCycles=${state.reviewCycles}`,
|
|
18
|
+
`lastEditSeq=${state.lastEditSeq || 0}`,
|
|
19
|
+
`lastReviewSeq=${state.lastReviewSeq || 0}`,
|
|
20
|
+
`recentVerdicts=${verdictSummary}`,
|
|
21
|
+
`missingGates=${missingGates(state, config).join(" ") || "none"}`,
|
|
22
|
+
`dirtyReasons=${state.dirtyReasons.slice(-5).join(" | ") || "none"}`,
|
|
23
|
+
].join("; ");
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/** Structured status object for the goal_status tool / diagnostics. */
|
|
27
|
+
export function statusReport(state, config) {
|
|
28
|
+
const required = requiredGates(state, config);
|
|
29
|
+
const missing = missingGates(state, config);
|
|
30
|
+
return {
|
|
31
|
+
active: Boolean(state.active),
|
|
32
|
+
dirty: Boolean(state.dirty),
|
|
33
|
+
reviewCycles: state.reviewCycles,
|
|
34
|
+
requiredGates: required,
|
|
35
|
+
passingGates: required.filter((g) => !missing.includes(g)),
|
|
36
|
+
missingGates: missing,
|
|
37
|
+
verificationSeen: Boolean(state.verificationSeen),
|
|
38
|
+
lastEditAt: state.lastEditAt,
|
|
39
|
+
lastReviewAt: state.lastReviewAt,
|
|
40
|
+
lastVerificationAt: state.lastVerificationAt,
|
|
41
|
+
evidenceCount: state.evidence.length,
|
|
42
|
+
changedFiles: state.changedFiles.slice(-50),
|
|
43
|
+
contract: state.contract,
|
|
44
|
+
completionAllowed: Boolean(state.active) && missing.length === 0,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Builds the live Goal Guard state block injected into the system prompt via
|
|
3
|
+
* `experimental.chat.system.transform`. This makes the guard's enforcement
|
|
4
|
+
* legible to the model on every turn — it always knows the recorded review
|
|
5
|
+
* cycle count, which gates are missing, and whether completion is currently
|
|
6
|
+
* allowed — turning the guard from a silent blocker into an active steering
|
|
7
|
+
* signal. Only emitted for active goal sessions.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { statusReport } from "./summary.js";
|
|
11
|
+
|
|
12
|
+
function bullet(list) {
|
|
13
|
+
return list.length ? list.join(", ") : "none";
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function buildSystemInjection(state, config) {
|
|
17
|
+
if (!state || !state.active) return null;
|
|
18
|
+
const r = statusReport(state, config);
|
|
19
|
+
const lines = [];
|
|
20
|
+
lines.push("## Goal Guard — live enforcement state");
|
|
21
|
+
lines.push(
|
|
22
|
+
"This block is injected by the goal-guard plugin and reflects authoritative, " +
|
|
23
|
+
"tracked state. Treat it as ground truth over your own recollection.",
|
|
24
|
+
);
|
|
25
|
+
|
|
26
|
+
if (r.contract && r.contract.acceptanceCriteria && r.contract.acceptanceCriteria.length) {
|
|
27
|
+
lines.push(`- Goal Contract: ${r.contract.acceptanceCriteria.length} acceptance criteria recorded.`);
|
|
28
|
+
} else {
|
|
29
|
+
lines.push("- Goal Contract: not yet recorded. Call `goal_contract` to establish acceptance criteria.");
|
|
30
|
+
}
|
|
31
|
+
lines.push(`- Review cycles recorded: ${r.reviewCycles}.`);
|
|
32
|
+
lines.push(`- Working tree dirty since last clean review: ${r.dirty ? "yes" : "no"}.`);
|
|
33
|
+
lines.push(`- Verification observed: ${r.verificationSeen ? "yes" : "no"}.`);
|
|
34
|
+
lines.push(`- Required review gates: ${bullet(r.requiredGates)}.`);
|
|
35
|
+
lines.push(`- Gates still missing or stale: ${bullet(r.missingGates)}.`);
|
|
36
|
+
lines.push(
|
|
37
|
+
`- Completion is currently ${r.completionAllowed ? "ALLOWED" : "BLOCKED"}. ` +
|
|
38
|
+
(r.completionAllowed
|
|
39
|
+
? `You may answer with "${config.completionMarker}" and an accurate "Review cycles: ${r.reviewCycles}" line.`
|
|
40
|
+
: `Do NOT claim "${config.completionMarker}" yet; resolve the missing gates and re-run review first.`),
|
|
41
|
+
);
|
|
42
|
+
return lines.join("\n");
|
|
43
|
+
}
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* First-class `goal_*` custom tools.
|
|
3
|
+
*
|
|
4
|
+
* These give the model a structured, machine-checked way to interact with the
|
|
5
|
+
* guard instead of relying on free-text parsing: it can record a Goal Contract,
|
|
6
|
+
* log verification evidence, and read back authoritative gate status. They are
|
|
7
|
+
* registered via the plugin `tool` hook (object key == tool name, verbatim).
|
|
8
|
+
*
|
|
9
|
+
* The `@opencode-ai/plugin` import is isolated to this module so that, if the
|
|
10
|
+
* host cannot resolve it, the entry can skip tool registration while the core
|
|
11
|
+
* guard hooks still load.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { tool } from "@opencode-ai/plugin";
|
|
15
|
+
import { statusReport } from "./summary.js";
|
|
16
|
+
import { recordEvidence } from "./events.js";
|
|
17
|
+
import { refreshStickyGates } from "./gates.js";
|
|
18
|
+
import { createState } from "./state.js";
|
|
19
|
+
|
|
20
|
+
const s = tool.schema;
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* @param {object} deps
|
|
24
|
+
* @param {ReturnType<import("./state.js").createStore>} deps.store
|
|
25
|
+
* @param {object} deps.config
|
|
26
|
+
* @param {() => void} deps.persist Debounced persistence trigger.
|
|
27
|
+
*/
|
|
28
|
+
export function createGoalTools({ store, config, persist }) {
|
|
29
|
+
const save = typeof persist === "function" ? persist : () => {};
|
|
30
|
+
|
|
31
|
+
return {
|
|
32
|
+
goal_status: tool({
|
|
33
|
+
description:
|
|
34
|
+
"Return the authoritative Goal Guard state for this session: recorded review " +
|
|
35
|
+
"cycles, required vs. passing vs. missing review gates, dirty/verification status, " +
|
|
36
|
+
"and whether completion is currently allowed. Read-only.",
|
|
37
|
+
args: {},
|
|
38
|
+
async execute(_args, ctx) {
|
|
39
|
+
const state = store.stateFor(ctx.sessionID);
|
|
40
|
+
const report = statusReport(state, config);
|
|
41
|
+
return {
|
|
42
|
+
title: `Goal status: completion ${report.completionAllowed ? "allowed" : "blocked"}`,
|
|
43
|
+
output: JSON.stringify(report, null, 2),
|
|
44
|
+
metadata: { completionAllowed: report.completionAllowed, reviewCycles: report.reviewCycles },
|
|
45
|
+
};
|
|
46
|
+
},
|
|
47
|
+
}),
|
|
48
|
+
|
|
49
|
+
goal_contract: tool({
|
|
50
|
+
description:
|
|
51
|
+
"Record or update the Goal Contract for this session (the explicit requirements, " +
|
|
52
|
+
"inferred requirements, non-goals, and acceptance criteria). Establishing a contract " +
|
|
53
|
+
"activates strict goal enforcement and drives which specialist review gates are required.",
|
|
54
|
+
args: {
|
|
55
|
+
original: s.string().describe("The original user request, verbatim or faithfully summarized."),
|
|
56
|
+
requirements: s.array(s.string()).optional().describe("Explicit requirements stated by the user."),
|
|
57
|
+
inferred: s.array(s.string()).optional().describe("Reasonable inferred requirements."),
|
|
58
|
+
nonGoals: s.array(s.string()).optional().describe("Things explicitly out of scope."),
|
|
59
|
+
acceptanceCriteria: s
|
|
60
|
+
.array(s.string())
|
|
61
|
+
.describe("Concrete, checkable acceptance criteria that define done."),
|
|
62
|
+
},
|
|
63
|
+
async execute(args, ctx) {
|
|
64
|
+
const state = store.stateFor(ctx.sessionID);
|
|
65
|
+
state.active = true;
|
|
66
|
+
state.contract = {
|
|
67
|
+
original: String(args.original || ""),
|
|
68
|
+
requirements: args.requirements || [],
|
|
69
|
+
inferred: args.inferred || [],
|
|
70
|
+
nonGoals: args.nonGoals || [],
|
|
71
|
+
acceptanceCriteria: args.acceptanceCriteria || [],
|
|
72
|
+
at: store.nowIso(),
|
|
73
|
+
};
|
|
74
|
+
state.goalText = [state.goalText, state.contract.original].filter(Boolean).join(" ");
|
|
75
|
+
refreshStickyGates(state);
|
|
76
|
+
state.updatedAt = store.nowIso();
|
|
77
|
+
save();
|
|
78
|
+
const report = statusReport(state, config);
|
|
79
|
+
return {
|
|
80
|
+
title: `Goal Contract recorded (${state.contract.acceptanceCriteria.length} acceptance criteria)`,
|
|
81
|
+
output:
|
|
82
|
+
`Goal Contract stored. Required review gates for this goal: ` +
|
|
83
|
+
`${report.requiredGates.join(", ")}.`,
|
|
84
|
+
metadata: { requiredGates: report.requiredGates },
|
|
85
|
+
};
|
|
86
|
+
},
|
|
87
|
+
}),
|
|
88
|
+
|
|
89
|
+
goal_evidence: tool({
|
|
90
|
+
description:
|
|
91
|
+
"Record a piece of verification evidence (a command that was run and its result, " +
|
|
92
|
+
"optionally the acceptance criteria it covers). Counts as observed verification.",
|
|
93
|
+
args: {
|
|
94
|
+
command: s.string().describe("The verification command that was executed."),
|
|
95
|
+
result: s.string().describe("Pass/fail summary and any salient output."),
|
|
96
|
+
criteria: s.array(s.string()).optional().describe("Acceptance criteria this evidence covers."),
|
|
97
|
+
},
|
|
98
|
+
async execute(args, ctx) {
|
|
99
|
+
const state = store.stateFor(ctx.sessionID);
|
|
100
|
+
state.active = true;
|
|
101
|
+
recordEvidence(store, state, args.command, args.result, args.criteria);
|
|
102
|
+
save();
|
|
103
|
+
return {
|
|
104
|
+
title: "Verification evidence recorded",
|
|
105
|
+
output: `Recorded evidence for: ${args.command}. Total evidence entries: ${state.evidence.length}.`,
|
|
106
|
+
metadata: { evidenceCount: state.evidence.length },
|
|
107
|
+
};
|
|
108
|
+
},
|
|
109
|
+
}),
|
|
110
|
+
|
|
111
|
+
goal_reset: tool({
|
|
112
|
+
description:
|
|
113
|
+
"Clear all Goal Guard state for this session (contract, dirty flags, verdicts, review " +
|
|
114
|
+
"cycles). Requires confirm=true. Use only when abandoning or restarting a goal.",
|
|
115
|
+
args: {
|
|
116
|
+
confirm: s.boolean().describe("Must be true to actually reset."),
|
|
117
|
+
},
|
|
118
|
+
async execute(args, ctx) {
|
|
119
|
+
if (!args.confirm) {
|
|
120
|
+
return { title: "Reset not confirmed", output: "Pass confirm=true to reset Goal Guard state." };
|
|
121
|
+
}
|
|
122
|
+
const fresh = createState(store.nowIso());
|
|
123
|
+
store.sessions.set(String(ctx.sessionID || "default"), fresh);
|
|
124
|
+
save();
|
|
125
|
+
return { title: "Goal Guard state reset", output: "All goal state cleared for this session." };
|
|
126
|
+
},
|
|
127
|
+
}),
|
|
128
|
+
};
|
|
129
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Verdict extraction and recording.
|
|
3
|
+
*
|
|
4
|
+
* Fixes versus the original:
|
|
5
|
+
* - Last-verdict-wins. A reviewer transcript that says
|
|
6
|
+
* "previously Verdict: FAIL, after fixes Verdict: PASS" records PASS, not
|
|
7
|
+
* FAIL. The reviewer's FINAL line is its conclusion.
|
|
8
|
+
* - Line-anchored matching is preferred, so a `Verdict: PASS` buried in quoted
|
|
9
|
+
* context or tool metadata does not register a gate the reviewer never
|
|
10
|
+
* actually rendered.
|
|
11
|
+
* - The `<task><task_result>…</task_result></task>` wrapper that the task tool
|
|
12
|
+
* puts around a subagent's output is unwrapped before scanning.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { CYCLE_CLOSING_AGENT } from "./agents.js";
|
|
16
|
+
|
|
17
|
+
const TASK_RESULT_RE = /<task_result>([\s\S]*?)<\/task_result>/i;
|
|
18
|
+
const ANCHORED_RE = /^[ \t>*_-]*\*{0,2}Verdict:?\*{0,2}[\s*_]*(PASS|FAIL)\b/gim;
|
|
19
|
+
const LOOSE_RE = /Verdict:?\*{0,2}[\s*_]*(PASS|FAIL)\b/gi;
|
|
20
|
+
|
|
21
|
+
/** Pull the human-readable text out of a tool/subagent output object. */
|
|
22
|
+
export function textOf(output) {
|
|
23
|
+
if (output == null) return "";
|
|
24
|
+
if (typeof output === "string") return output;
|
|
25
|
+
const raw = output.output ?? output.text ?? output.message ?? output.title ?? "";
|
|
26
|
+
let text;
|
|
27
|
+
if (typeof raw === "string") text = raw;
|
|
28
|
+
else if (raw && typeof raw === "object") text = raw.output ?? raw.text ?? JSON.stringify(raw);
|
|
29
|
+
else text = String(raw ?? "");
|
|
30
|
+
const m = TASK_RESULT_RE.exec(text);
|
|
31
|
+
return m ? m[1] : text;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Extract the final verdict from a body of text.
|
|
36
|
+
*
|
|
37
|
+
* True textual last-wins: a reviewer's FINAL `Verdict: …` line is its
|
|
38
|
+
* conclusion. We scan ALL matches (loose) and take the one with the greatest
|
|
39
|
+
* position, using line-anchored matches only as a tiebreak when an anchored and
|
|
40
|
+
* a loose match share the same end position. The earlier "prefer the anchored
|
|
41
|
+
* set whenever any anchored match exists" logic was a critical bug: a transcript
|
|
42
|
+
* like "Verdict: PASS (happy path)\nHowever, Verdict: FAIL — blocking" has only
|
|
43
|
+
* the early PASS anchored (the FAIL line starts with "However,"), so it wrongly
|
|
44
|
+
* returned PASS and let a failing final review complete the goal.
|
|
45
|
+
*
|
|
46
|
+
* @returns {"PASS"|"FAIL"|null}
|
|
47
|
+
*/
|
|
48
|
+
export function parseVerdict(text) {
|
|
49
|
+
if (typeof text !== "string" || !text) return null;
|
|
50
|
+
const loose = [...text.matchAll(LOOSE_RE)];
|
|
51
|
+
if (!loose.length) return null;
|
|
52
|
+
const lastLoose = loose[loose.length - 1];
|
|
53
|
+
const anchored = [...text.matchAll(ANCHORED_RE)];
|
|
54
|
+
const lastAnchored = anchored.length ? anchored[anchored.length - 1] : null;
|
|
55
|
+
// Prefer whichever genuinely occurs last in the text; on a tie, the anchored
|
|
56
|
+
// (conclusion-formatted) one wins.
|
|
57
|
+
const lastAnchoredEnd = lastAnchored ? lastAnchored.index + lastAnchored[0].length : -1;
|
|
58
|
+
const lastLooseEnd = lastLoose.index + lastLoose[0].length;
|
|
59
|
+
const chosen = lastAnchoredEnd >= lastLooseEnd && lastAnchored ? lastAnchored : lastLoose;
|
|
60
|
+
return chosen[1].toUpperCase();
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export function hasVerdict(text) {
|
|
64
|
+
return parseVerdict(text) !== null;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function latestVerdictFor(state, agent) {
|
|
68
|
+
return state.latestVerdict[agent] || null;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Record a review verdict for `agent`, stamping it with the next monotonic seq.
|
|
73
|
+
* Increments the review-cycle count when the cycle-closing agent reports.
|
|
74
|
+
*/
|
|
75
|
+
export function recordVerdict(store, state, agent, verdict) {
|
|
76
|
+
const at = store.nowIso();
|
|
77
|
+
const seq = store.nextSeq();
|
|
78
|
+
const entry = { agent, verdict, at, seq };
|
|
79
|
+
state.verdicts.push(entry);
|
|
80
|
+
if (state.verdicts.length > 200) state.verdicts.splice(0, state.verdicts.length - 200);
|
|
81
|
+
state.latestVerdict[agent] = { verdict, at, seq };
|
|
82
|
+
state.lastReviewAt = at;
|
|
83
|
+
state.lastReviewSeq = seq;
|
|
84
|
+
state.updatedAt = at;
|
|
85
|
+
if (agent === CYCLE_CLOSING_AGENT) state.reviewCycles += 1;
|
|
86
|
+
return entry;
|
|
87
|
+
}
|