@really-knows-ai/foundry 3.8.4 → 3.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.opencode/plugins/foundry-tools/stage-output-tool.js +113 -0
- package/dist/.opencode/plugins/foundry-tools/stage-tools.js +98 -21
- package/dist/.opencode/plugins/foundry.js +2 -0
- package/dist/CHANGELOG.md +33 -0
- package/dist/docs/architecture.md +1 -1
- package/dist/scripts/appraise-module.js +76 -74
- package/dist/scripts/lib/forge-contract.js +13 -13
- package/dist/scripts/lib/stage-output-schemas.js +174 -0
- package/dist/scripts/orchestrate-cycle.js +6 -6
- package/dist/scripts/orchestrate-dispatch.js +240 -0
- package/dist/scripts/orchestrate-finalise.js +21 -17
- package/dist/scripts/orchestrate-phases.js +3 -3
- package/dist/scripts/orchestrate.js +10 -92
- package/dist/skills/appraise/SKILL.md +9 -27
- package/dist/skills/assay/SKILL.md +6 -13
- package/dist/skills/forge/SKILL.md +10 -11
- package/dist/skills/human-appraise/SKILL.md +144 -57
- package/dist/skills/orchestrate/SKILL.md +2 -2
- package/dist/skills/quench/SKILL.md +5 -5
- package/package.json +1 -1
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
// src/plugin/tools/stage-output-tool.js
|
|
2
|
+
// Stage output tool for foundry stages — validates and accumulates structured
|
|
3
|
+
// output before stage end. Registered as `foundry_stage_output`.
|
|
4
|
+
|
|
5
|
+
import { stageBaseOf, requireActiveStage } from '../../../scripts/lib/stage-guard.js';
|
|
6
|
+
import { guarded, notFailedGuard } from '../../../scripts/lib/guards.js';
|
|
7
|
+
import { makeIO, flowBranchGuard, branchIoFactory, asyncIoFactory } from './helpers.js';
|
|
8
|
+
import {
|
|
9
|
+
validateForgeOutput,
|
|
10
|
+
validateAppraiseOutput,
|
|
11
|
+
validateHumanAppraiseOutput,
|
|
12
|
+
} from '../../../scripts/lib/stage-output-schemas.js';
|
|
13
|
+
|
|
14
|
+
/** @type {Map<string, object[]>} In-memory buffer keyed by stageId::tokenHash. */
|
|
15
|
+
const stageOutputsBuffer = new Map();
|
|
16
|
+
|
|
17
|
+
/** Gate that rejects when the subagent's flow is in a failed state. */
|
|
18
|
+
const gateNotFailed = notFailedGuard(makeIO);
|
|
19
|
+
|
|
20
|
+
/** Validator dispatch table keyed by stage base name. */
|
|
21
|
+
const VALIDATORS = Object.freeze({
|
|
22
|
+
forge: validateForgeOutput,
|
|
23
|
+
appraise: validateAppraiseOutput,
|
|
24
|
+
'human-appraise': validateHumanAppraiseOutput,
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Execute the stage output handler: validate data against the active stage
|
|
29
|
+
* schema and accumulate it in the in-memory buffer.
|
|
30
|
+
* @param {{ data: object }} args
|
|
31
|
+
* @param {{ worktree: string }} context
|
|
32
|
+
* @returns {Promise<string>} JSON result
|
|
33
|
+
*/
|
|
34
|
+
async function handleStageOutput(args, context) {
|
|
35
|
+
const io = makeIO(context.worktree);
|
|
36
|
+
const activeResult = requireActiveStage(io);
|
|
37
|
+
if (!activeResult.ok) {
|
|
38
|
+
return JSON.stringify({ error: `foundry_stage_output: ${activeResult.error}` });
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const base = stageBaseOf(activeResult.active.stage);
|
|
42
|
+
const validator = VALIDATORS[base];
|
|
43
|
+
if (!validator) {
|
|
44
|
+
return JSON.stringify({ error: `unknown stage base: ${base}` });
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const validationResult = validator(args.data);
|
|
48
|
+
if (!validationResult.ok) {
|
|
49
|
+
const msg = `${base} stage_output: ${validationResult.errors.join('; ')}`;
|
|
50
|
+
return JSON.stringify({ error: msg });
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const stageId = activeResult.active.stage;
|
|
54
|
+
const tokenHash = activeResult.active.tokenHash;
|
|
55
|
+
const key = `${stageId}::${tokenHash}`;
|
|
56
|
+
const buf = stageOutputsBuffer.get(key) || [];
|
|
57
|
+
buf.push(args.data);
|
|
58
|
+
stageOutputsBuffer.set(key, buf);
|
|
59
|
+
|
|
60
|
+
const totalCount = getStageOutputs(stageId).length;
|
|
61
|
+
return JSON.stringify({ ok: true, count: totalCount });
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export function createStageOutputTool({ tool }) {
|
|
65
|
+
return {
|
|
66
|
+
foundry_stage_output: tool({
|
|
67
|
+
description: 'Validate and store structured output for the active stage. Call before foundry_stage_end(). Forge and human-appraise stages require exactly one call; appraise stages accept zero or more.',
|
|
68
|
+
args: {
|
|
69
|
+
data: tool.schema.object().describe('The JSON data to validate against the active stage schema'),
|
|
70
|
+
},
|
|
71
|
+
execute: guarded('foundry_stage_output', [flowBranchGuard, gateNotFailed],
|
|
72
|
+
handleStageOutput,
|
|
73
|
+
{ branchIo: branchIoFactory, io: asyncIoFactory }),
|
|
74
|
+
}),
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Retrieve all accumulated outputs for a given stage ID.
|
|
80
|
+
* Returns a shallow copy of the internal array to prevent mutation.
|
|
81
|
+
* @param {string} stageId - The full stage alias (e.g. "forge:cycle-1")
|
|
82
|
+
* @returns {object[]} Array of validated data objects
|
|
83
|
+
*/
|
|
84
|
+
export function getStageOutputs(stageId) {
|
|
85
|
+
const results = [];
|
|
86
|
+
for (const [key, outputs] of stageOutputsBuffer) {
|
|
87
|
+
if (key.startsWith(stageId + '::') || key === stageId) {
|
|
88
|
+
results.push(...outputs);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return results;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Clear all accumulated outputs for a given stage ID.
|
|
96
|
+
* Used after flushing buffer entries to disk.
|
|
97
|
+
* @param {string} stageId - The full stage alias (e.g. "forge:cycle-1")
|
|
98
|
+
*/
|
|
99
|
+
export function clearStageOutputs(stageId) {
|
|
100
|
+
for (const key of stageOutputsBuffer.keys()) {
|
|
101
|
+
if (key.startsWith(stageId + '::') || key === stageId) {
|
|
102
|
+
stageOutputsBuffer.delete(key);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Clear all accumulated outputs for every stage.
|
|
109
|
+
* Internal helper for test isolation — exported with underscore prefix.
|
|
110
|
+
*/
|
|
111
|
+
export function _clearAllOutputs() {
|
|
112
|
+
stageOutputsBuffer.clear();
|
|
113
|
+
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { execSync } from 'child_process';
|
|
2
2
|
import { createHash } from 'node:crypto';
|
|
3
|
+
import { join } from 'node:path';
|
|
3
4
|
import { readActiveStage, writeActiveStage, clearActiveStage, writeLastStage, clearLastStage } from '../../../scripts/lib/state.js';
|
|
4
5
|
import { verifyToken } from '../../../scripts/lib/token.js';
|
|
5
6
|
import { readOrCreateSecret } from '../../../scripts/lib/secret.js';
|
|
@@ -10,6 +11,17 @@ import { markWorkfileFailed, readFailedStatus, clearWorkfileFailed } from '../..
|
|
|
10
11
|
import { guarded, notFailedGuard } from '../../../scripts/lib/guards.js';
|
|
11
12
|
import { initForgeCallLog, readForgeCallSet } from '../../../scripts/lib/stage-calls.js';
|
|
12
13
|
import { openFeedbackStore } from '../../../scripts/lib/feedback-store.js';
|
|
14
|
+
import { stageBaseOf } from '../../../scripts/lib/stage-guard.js';
|
|
15
|
+
import { ulid } from '../../../scripts/lib/ulid.js';
|
|
16
|
+
import { getStageOutputs, clearStageOutputs } from './stage-output-tool.js';
|
|
17
|
+
|
|
18
|
+
function ensureDir(io, outDir) {
|
|
19
|
+
io.mkdir(outDir);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function contractError(stage, expected, got) {
|
|
23
|
+
return `${stage} stage_end: expected exactly ${expected} stage_output call${expected === 1 ? '' : 's'}, got ${got}`;
|
|
24
|
+
}
|
|
13
25
|
|
|
14
26
|
const FORGE_REQUIRED_TOOLS = [
|
|
15
27
|
'foundry_config_cycle',
|
|
@@ -106,6 +118,9 @@ async function executeStageBegin(args, context, pending) {
|
|
|
106
118
|
};
|
|
107
119
|
writeActiveStage(io, active);
|
|
108
120
|
initForgeIfApplicable(io, active.stage);
|
|
121
|
+
|
|
122
|
+
cleanStageOutputDir(io);
|
|
123
|
+
|
|
109
124
|
return JSON.stringify({ ok: true, active });
|
|
110
125
|
}
|
|
111
126
|
|
|
@@ -113,6 +128,53 @@ function initForgeIfApplicable(io, stage) {
|
|
|
113
128
|
if (stageBase(stage) === 'forge') initForgeCallLog(io);
|
|
114
129
|
}
|
|
115
130
|
|
|
131
|
+
// -- Stage output directory helpers --
|
|
132
|
+
|
|
133
|
+
function cleanStageOutputDir(io) {
|
|
134
|
+
const outDir = '.foundry/stage-outputs/';
|
|
135
|
+
if (io.exists(outDir)) {
|
|
136
|
+
for (const f of io.readDir(outDir)) {
|
|
137
|
+
io.unlink(join(outDir, f));
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
io.mkdir(outDir);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
function checkContractViolation(outputs, base) {
|
|
144
|
+
if (base === 'forge' || base === 'human-appraise') {
|
|
145
|
+
if (outputs.length !== 1) {
|
|
146
|
+
return contractError(base, 1, outputs.length);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
return null;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function writeAtomicOutputFile(io, outputs, id) {
|
|
153
|
+
const outDir = '.foundry/stage-outputs/';
|
|
154
|
+
ensureDir(io, outDir);
|
|
155
|
+
if (outputs.length === 0) {
|
|
156
|
+
io.writeFile(outDir + '.tmp-' + id, '');
|
|
157
|
+
} else {
|
|
158
|
+
const content = outputs.map(o => JSON.stringify(o)).join('\n') + '\n';
|
|
159
|
+
io.writeFile(outDir + '.tmp-' + id, content);
|
|
160
|
+
}
|
|
161
|
+
io.rename(outDir + '.tmp-' + id, outDir + id + '.jsonl');
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function trySyncMemory(worktree) {
|
|
165
|
+
try {
|
|
166
|
+
return syncMemoryAtStageEnd(worktree);
|
|
167
|
+
} catch {
|
|
168
|
+
return { error: 'memory sync at stage end failed' };
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
function activeStageOrError(io) {
|
|
173
|
+
const active = readActiveStage(io);
|
|
174
|
+
if (!active) return null;
|
|
175
|
+
return active;
|
|
176
|
+
}
|
|
177
|
+
|
|
116
178
|
// -- Helpers for foundry_stage_end --
|
|
117
179
|
|
|
118
180
|
function markWorkfileFailedSilently(io, msg) {
|
|
@@ -127,34 +189,51 @@ async function syncMemoryAtStageEnd(worktree) {
|
|
|
127
189
|
}
|
|
128
190
|
}
|
|
129
191
|
|
|
192
|
+
async function finishStageAndSync(io, active, context) {
|
|
193
|
+
writeLastStage(io, { cycle: active.cycle, stage: active.stage, baseSha: active.baseSha, summary: '' });
|
|
194
|
+
clearActiveStage(io);
|
|
195
|
+
|
|
196
|
+
try {
|
|
197
|
+
await syncMemoryAtStageEnd(context.worktree);
|
|
198
|
+
return {};
|
|
199
|
+
} catch (err) {
|
|
200
|
+
const detail = err instanceof Error ? err.message : String(err);
|
|
201
|
+
const msg = `memory sync at stage end failed: ${detail}`;
|
|
202
|
+
markWorkfileFailedSilently(io, msg);
|
|
203
|
+
return { error: msg, flow_failed: true };
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
130
207
|
async function executeStageEnd(args, context) {
|
|
131
208
|
const io = makeIO(context.worktree);
|
|
209
|
+
|
|
132
210
|
const active = readActiveStage(io);
|
|
133
211
|
if (!active) {
|
|
134
212
|
return JSON.stringify({ error: 'foundry_stage_end requires active stage; current: none' });
|
|
135
213
|
}
|
|
136
214
|
|
|
137
|
-
|
|
138
|
-
|
|
215
|
+
verifyForgeToolsIfApplicable(io, active);
|
|
216
|
+
|
|
217
|
+
const outputs = getStageOutputs(active.stage + '::' + active.tokenHash);
|
|
218
|
+
const base = stageBaseOf(active.stage);
|
|
219
|
+
const violation = checkContractViolation(outputs, base);
|
|
220
|
+
if (violation) {
|
|
221
|
+
return JSON.stringify({ error: violation });
|
|
139
222
|
}
|
|
140
223
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
baseSha: active.baseSha,
|
|
145
|
-
summary: args.summary,
|
|
146
|
-
});
|
|
147
|
-
clearActiveStage(io);
|
|
224
|
+
const id = ulid();
|
|
225
|
+
writeAtomicOutputFile(io, outputs, id);
|
|
226
|
+
clearStageOutputs(active.stage + '::' + active.tokenHash);
|
|
148
227
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
228
|
+
const result = await finishStageAndSync(io, active, context);
|
|
229
|
+
if (result.error) return JSON.stringify(result);
|
|
230
|
+
return JSON.stringify({ ok: true });
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
function verifyForgeToolsIfApplicable(io, active) {
|
|
234
|
+
if (stageBase(active.stage) === 'forge') {
|
|
235
|
+
verifyAndManageForgeTools(io, active);
|
|
156
236
|
}
|
|
157
|
-
return JSON.stringify({ ok: true, summary: args.summary });
|
|
158
237
|
}
|
|
159
238
|
|
|
160
239
|
function postForbiddenToolsFeedback(io, active, forbidden) {
|
|
@@ -255,10 +334,8 @@ export function createStageTools({ tool, pending }) {
|
|
|
255
334
|
}),
|
|
256
335
|
|
|
257
336
|
foundry_stage_end: tool({
|
|
258
|
-
description: 'Close the active subagent work stage
|
|
259
|
-
args: {
|
|
260
|
-
summary: tool.schema.string().describe('Short summary of the work done'),
|
|
261
|
-
},
|
|
337
|
+
description: 'Close the active subagent work stage. Output must be provided via foundry_stage_output before calling this tool. Validates the output contract for the active stage, writes accumulated outputs to a JSONL file, and clears the stage.',
|
|
338
|
+
args: {},
|
|
262
339
|
execute: guarded('foundry_stage_end', [flowBranchGuard],
|
|
263
340
|
executeStageEnd,
|
|
264
341
|
{ branchIo: branchIoFactory, io: asyncIoFactory }),
|
|
@@ -36,6 +36,7 @@ import { createMemoryAdminTools } from './foundry-tools/memory-admin-tools.js';
|
|
|
36
36
|
import { createSnapshotTools } from './foundry-tools/snapshot-tools.js';
|
|
37
37
|
import { createAttestationTools } from './foundry-tools/attestation-tools.js';
|
|
38
38
|
import { createRefreshAgentsTool } from './foundry-tools/refresh-agents-tool.js';
|
|
39
|
+
import { createStageOutputTool } from './foundry-tools/stage-output-tool.js';
|
|
39
40
|
import { resolveGit, resolvePnpm } from '../../scripts/lib/tool-paths.js';
|
|
40
41
|
|
|
41
42
|
function findPackageRoot(startDir) {
|
|
@@ -201,6 +202,7 @@ function buildTools(createTool, pending) {
|
|
|
201
202
|
...createSnapshotTools({ tool: createTool }),
|
|
202
203
|
...createAttestationTools({ tool: createTool }),
|
|
203
204
|
...createRefreshAgentsTool({ tool: createTool }),
|
|
205
|
+
...createStageOutputTool({ tool: createTool }),
|
|
204
206
|
};
|
|
205
207
|
}
|
|
206
208
|
|
package/dist/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,38 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [3.9.0] - 2026-05-30
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- Structured stage output schemas with JSON Schema-style validation for forge, appraise, and human-appraise outputs. Each schema defines required and optional fields with type and enum constraints.
|
|
8
|
+
- `foundry_stage_output` tool: stages can now return structured JSON output instead of free-text summaries.
|
|
9
|
+
- `foundry_stage_end` contract enforcement: validates stage output against the declared schema and rejects malformed responses with actionable error messages.
|
|
10
|
+
- Orchestrator integration: stage output validation wired into the orchestration post-dispatch path.
|
|
11
|
+
- E2E integration tests and skill guidance for the stage output tooling.
|
|
12
|
+
|
|
13
|
+
### Fixed
|
|
14
|
+
|
|
15
|
+
- Appraise context now prefers the cycle frontmatter `models.appraise` value over the caller's `defaultModel` when resolving the appraise subagent model.
|
|
16
|
+
|
|
17
|
+
### Changed
|
|
18
|
+
|
|
19
|
+
- Reviewer agent and systematic-fix-and-review skill updated.
|
|
20
|
+
|
|
21
|
+
## [3.8.5] - 2026-05-27
|
|
22
|
+
|
|
23
|
+
### Changed
|
|
24
|
+
|
|
25
|
+
- Human-appraise skill redesigned with two distinct review modes:
|
|
26
|
+
- **Clean review** (no unresolved feedback): Shows `git diff --stat` filtered to artefact type file-patterns, not full file content. Uses the question tool to offer Approve or Provide feedback.
|
|
27
|
+
- **Feedback review** (unresolved feedback exists): Presents each item individually via the question tool. User picks Agree (keep open for forge), Disagree (resolve with approved override), or Comment (add human feedback).
|
|
28
|
+
- All user interaction now uses the question tool (structured options) instead of free-text prompts.
|
|
29
|
+
|
|
30
|
+
- Appraise consolidation now propagates finalize violations instead of swallowing them. If `finaliseStage` returns a violation (e.g. commit rejected), `consolidateAppraise` surfaces it rather than returning `{ ok: true }`.
|
|
31
|
+
|
|
32
|
+
### Fixed
|
|
33
|
+
|
|
34
|
+
- Sort violation details are now passed through to the terminal handler. Previously, `handleSortResult` destructured `sortResult` but didn't forward the `details` field to `resolveRouteResult`, causing all sort violations to show the default "sort returned violation" message.
|
|
35
|
+
|
|
3
36
|
## [3.8.4] - 2026-05-27
|
|
4
37
|
|
|
5
38
|
### Added
|
|
@@ -122,7 +122,7 @@ Foundry uses HMAC-SHA256 tokens to gate stage execution. Tokens are single-use,
|
|
|
122
122
|
- The token has not expired.
|
|
123
123
|
- The claimed `stage` and `cycle` match the token's signed payload.
|
|
124
124
|
4. **Activate.** On success, the stage is recorded in `.foundry/active-stage.json`. Mutation tools (`foundry_feedback_*`, `foundry_artefacts_*`, etc.) now check that their role matches the active stage.
|
|
125
|
-
5. **End.** The sub-agent's **last** call is `foundry_stage_end(
|
|
125
|
+
5. **End.** The sub-agent's **last** call is `foundry_stage_end()`. This removes `.foundry/active-stage.json` and writes `.foundry/last-stage.json` for the orchestrator's finalize step.
|
|
126
126
|
6. **Finalize.** The orchestrator's internal finalize step runs after `stage_end`, scanning the git diff and committing the stage.
|
|
127
127
|
|
|
128
128
|
### Secret key
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
* action.
|
|
17
17
|
*/
|
|
18
18
|
|
|
19
|
+
import path from 'node:path';
|
|
19
20
|
import { getArtefactFiles, computeArtefactVersion } from './lib/artefacts.js';
|
|
20
21
|
import { selectAppraisers, getCycleDefinition } from './lib/config.js';
|
|
21
22
|
import { openFeedbackStore } from './lib/feedback-store.js';
|
|
@@ -170,9 +171,29 @@ async function resolveStaleAppraiseFeedback(ctx) {
|
|
|
170
171
|
* appraise feedback, and advances the cycle to the next stage via finalize.
|
|
171
172
|
*
|
|
172
173
|
* @param {object} ctx
|
|
173
|
-
* @param {Array<{ok: boolean,
|
|
174
|
+
* @param {Array<{ok: boolean, error?: string}>} lastResults
|
|
174
175
|
* @returns {Promise<{ok: boolean, summary?: string}|violation>}
|
|
175
176
|
*/
|
|
177
|
+
async function readAppraiseStageOutputs(io) {
|
|
178
|
+
try {
|
|
179
|
+
const entries = await io.readDir('.foundry/stage-outputs');
|
|
180
|
+
if (!Array.isArray(entries)) return [];
|
|
181
|
+
return entries
|
|
182
|
+
.filter(f => f.endsWith('.jsonl'))
|
|
183
|
+
.map(f => path.join('.foundry/stage-outputs', f));
|
|
184
|
+
} catch {
|
|
185
|
+
return [];
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
function cleanupStageOutputFiles(filePaths, io) {
|
|
190
|
+
for (const fp of filePaths) {
|
|
191
|
+
try { io.unlink(fp); } catch (err) {
|
|
192
|
+
if (err.code !== 'ENOENT') console.warn('appraise: failed to delete output file', fp, err.message);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
176
197
|
export async function consolidateAppraise(ctx, lastResults) {
|
|
177
198
|
const baseSha = ctx.activeStage?.baseSha;
|
|
178
199
|
if (!baseSha) {
|
|
@@ -188,89 +209,76 @@ export async function consolidateAppraise(ctx, lastResults) {
|
|
|
188
209
|
|
|
189
210
|
await resolveStaleAppraiseFeedback(ctx);
|
|
190
211
|
|
|
191
|
-
const
|
|
212
|
+
const filePaths = await readAppraiseStageOutputs(ctx.io);
|
|
213
|
+
const consolidated = parseConsolidated(filePaths, ctx.io);
|
|
192
214
|
const stageId = `appraise:${ctx.cycleId}`;
|
|
193
215
|
|
|
194
216
|
const artefactVersion = await computeAppraiseArtefactVersion(ctx);
|
|
195
217
|
postConsolidatedFeedback(ctx, consolidated, artefactVersion);
|
|
196
218
|
resolvePriorAppraise(ctx, consolidated, stageId);
|
|
197
219
|
|
|
220
|
+
cleanupStageOutputFiles(filePaths, ctx.io);
|
|
221
|
+
|
|
198
222
|
const summary = buildConsolidateSummary(consolidated.length);
|
|
223
|
+
return finalizeAndReturn(ctx, stageId, summary, baseSha);
|
|
224
|
+
}
|
|
199
225
|
|
|
200
|
-
|
|
226
|
+
async function finalizeAndReturn(ctx, stageId, summary, baseSha) {
|
|
227
|
+
const result = await ctx.finalize({
|
|
201
228
|
lastStage: { stage: stageId, summary, baseSha },
|
|
202
229
|
activeStage: ctx.activeStage,
|
|
203
230
|
});
|
|
204
231
|
|
|
232
|
+
if (result && result.action === 'violation') return result;
|
|
205
233
|
return { ok: true, summary };
|
|
206
234
|
}
|
|
207
235
|
|
|
208
236
|
/**
|
|
209
|
-
* Parse
|
|
210
|
-
* combined issue list by (file, law-id, issue text).
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
for (const result of successful) {
|
|
216
|
-
const issues = parseAppraiserJsonl(result.output || '');
|
|
217
|
-
all.push(...issues);
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
return deduplicateIssues(all);
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
/**
|
|
224
|
-
* Parse appraiser JSONL output.
|
|
237
|
+
* Parse consolidated findings from stage output files and de-duplicate
|
|
238
|
+
* the combined issue list by (file, law-id, issue text).
|
|
239
|
+
*
|
|
240
|
+
* Reads each file as JSONL (one JSON object per line), parses every line,
|
|
241
|
+
* and collects appraiser findings. Invalid lines are skipped with a
|
|
242
|
+
* warning, not a crash.
|
|
225
243
|
*
|
|
226
|
-
*
|
|
227
|
-
*
|
|
228
|
-
*
|
|
244
|
+
* @param {string[]} filePaths - Array of paths to .jsonl files
|
|
245
|
+
* @param {object} io - IO adapter with readFile
|
|
246
|
+
* @returns {Array<{file: string, law: string, issue: string, evidence: string}>}
|
|
229
247
|
*/
|
|
230
|
-
function
|
|
231
|
-
|
|
232
|
-
const lines = output.trim().split('\n');
|
|
233
|
-
|
|
234
|
-
for (const line of lines) {
|
|
235
|
-
const issue = parseAppraiserLine(line);
|
|
236
|
-
if (issue) issues.push(issue);
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
return issues;
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
function parseAppraiserLine(line) {
|
|
243
|
-
const trimmed = line.trim();
|
|
244
|
-
if (!trimmed) return null;
|
|
245
|
-
|
|
246
|
-
const obj = tryJsonParseLine(trimmed);
|
|
247
|
-
if (!obj) return null;
|
|
248
|
-
|
|
249
|
-
return validateJsonlIssue(obj);
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
function tryJsonParseLine(line) {
|
|
253
|
-
try { return JSON.parse(line); } catch { return null; }
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
function validateJsonlIssue(obj) {
|
|
257
|
-
if (!hasStringField(obj, 'file')) return null;
|
|
258
|
-
if (!hasStringField(obj, 'text')) return null;
|
|
259
|
-
|
|
260
|
-
return {
|
|
261
|
-
file: obj.file,
|
|
262
|
-
law: strOrEmpty(obj.law),
|
|
263
|
-
issue: obj.text,
|
|
264
|
-
evidence: strOrEmpty(obj.evidence),
|
|
265
|
-
};
|
|
248
|
+
function isValidIssue(obj) {
|
|
249
|
+
return Boolean(obj) && typeof obj.file === 'string' && obj.file.length > 0 && typeof obj.text === 'string' && obj.text.length > 0;
|
|
266
250
|
}
|
|
267
251
|
|
|
268
|
-
function
|
|
269
|
-
|
|
252
|
+
function parseConsolidatedLine(line) {
|
|
253
|
+
try {
|
|
254
|
+
const obj = JSON.parse(line);
|
|
255
|
+
if (!isValidIssue(obj)) return null;
|
|
256
|
+
return {
|
|
257
|
+
file: obj.file,
|
|
258
|
+
law: typeof obj.law === 'string' ? obj.law : '',
|
|
259
|
+
issue: obj.text,
|
|
260
|
+
evidence: typeof obj.evidence === 'string' ? obj.evidence : '',
|
|
261
|
+
};
|
|
262
|
+
} catch {
|
|
263
|
+
return null;
|
|
264
|
+
}
|
|
270
265
|
}
|
|
271
266
|
|
|
272
|
-
function
|
|
273
|
-
|
|
267
|
+
function parseConsolidated(filePaths, io) {
|
|
268
|
+
const all = [];
|
|
269
|
+
for (const fp of filePaths) {
|
|
270
|
+
let content;
|
|
271
|
+
try { content = io.readFile(fp); } catch (err) {
|
|
272
|
+
console.warn(`appraise: failed to read output file ${fp}:`, err.message);
|
|
273
|
+
continue;
|
|
274
|
+
}
|
|
275
|
+
const lines = content.trim().split('\n').filter(Boolean);
|
|
276
|
+
for (const line of lines) {
|
|
277
|
+
const item = parseConsolidatedLine(line);
|
|
278
|
+
if (item) all.push(item);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
return deduplicateIssues(all);
|
|
274
282
|
}
|
|
275
283
|
|
|
276
284
|
/**
|
|
@@ -343,9 +351,7 @@ function resolvePriorAppraise(ctx, consolidated, stageId) {
|
|
|
343
351
|
* Build the summary string for consolidation.
|
|
344
352
|
*/
|
|
345
353
|
function buildConsolidateSummary(count) {
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
return `${count} issue(s) found by appraisers`;
|
|
354
|
+
return count === 0 ? 'No issues found by appraisers' : `actioned:${count}`;
|
|
349
355
|
}
|
|
350
356
|
|
|
351
357
|
// ---------------------------------------------------------------------------
|
|
@@ -357,7 +363,7 @@ function buildConsolidateSummary(count) {
|
|
|
357
363
|
*
|
|
358
364
|
* The prompt contains only the appraiser's personality and the artefact type
|
|
359
365
|
* ID. The subagent discovers artefact files, laws, and file-patterns via tool
|
|
360
|
-
* calls and
|
|
366
|
+
* calls and uses foundry_stage_output to report each violation.
|
|
361
367
|
*/
|
|
362
368
|
function buildAppraiserPrompt({ appraiser, typeId }) {
|
|
363
369
|
const lines = [
|
|
@@ -373,16 +379,12 @@ function buildAppraiserPrompt({ appraiser, typeId }) {
|
|
|
373
379
|
'- foundry_artefacts_list for changed files',
|
|
374
380
|
'- Read matching files from the worktree',
|
|
375
381
|
'',
|
|
376
|
-
'For each
|
|
377
|
-
'
|
|
378
|
-
'',
|
|
379
|
-
'{"file": "<path>", "law": "<law-slug>", "text": "<issue description>", "evidence": "<quote>"}',
|
|
380
|
-
'',
|
|
381
|
-
'`file` and `text` are required. `law` and `evidence` are recommended.',
|
|
382
|
+
'For each violation, call `foundry_stage_output({ file, law, text, evidence })`.',
|
|
383
|
+
'`file`, `law`, and `text` are required. `evidence` is recommended.',
|
|
382
384
|
'Optional fields `severity` and `location` are passed through unchanged.',
|
|
383
385
|
'',
|
|
384
|
-
'
|
|
385
|
-
'
|
|
386
|
+
'If no issues, call `foundry_stage_end()` directly — no `stage_output` calls needed.',
|
|
387
|
+
'Do NOT write JSONL as text. Call the tool.',
|
|
386
388
|
];
|
|
387
389
|
|
|
388
390
|
return lines.join('\n');
|
|
@@ -4,12 +4,12 @@
|
|
|
4
4
|
*
|
|
5
5
|
* Rules (per spec R4):
|
|
6
6
|
* - Version changed → transition item to `actioned`.
|
|
7
|
-
* - Version unchanged +
|
|
8
|
-
* `appraise` → transition item to `wont-fix` with
|
|
9
|
-
* as the
|
|
10
|
-
* - Version unchanged +
|
|
7
|
+
* - Version unchanged + output.status is `wont-fix` + source base is
|
|
8
|
+
* `appraise` → transition item to `wont-fix` with output.reason
|
|
9
|
+
* as the justification.
|
|
10
|
+
* - Version unchanged + output.status is `wont-fix` + source base is
|
|
11
11
|
* NOT `appraise` → contract violation.
|
|
12
|
-
* - Version unchanged +
|
|
12
|
+
* - Version unchanged + output.status is `done` → contract violation.
|
|
13
13
|
* - No item (null/undefined) → no-op, contract passes.
|
|
14
14
|
*/
|
|
15
15
|
|
|
@@ -45,12 +45,13 @@ function handleVersionChanged(item, feedbackStore, cycleId, postVersion) {
|
|
|
45
45
|
}
|
|
46
46
|
|
|
47
47
|
function handleWontFixWithReason(item, feedbackStore, cycleId, postVersion, reason) {
|
|
48
|
+
const reasonStr = reason || '';
|
|
48
49
|
const result = feedbackStore.transition({
|
|
49
50
|
id: item.id,
|
|
50
51
|
target: 'wont-fix',
|
|
51
52
|
stage: 'forge:' + cycleId,
|
|
52
53
|
cycle: cycleId,
|
|
53
|
-
reason,
|
|
54
|
+
reason: reasonStr,
|
|
54
55
|
});
|
|
55
56
|
if (!result.ok) {
|
|
56
57
|
postSystemFeedback(feedbackStore, cycleId, postVersion, result.error || 'store transition failed');
|
|
@@ -67,21 +68,20 @@ function handleWontFixWithReason(item, feedbackStore, cycleId, postVersion, reas
|
|
|
67
68
|
* exists yet and subsequent runs where all items were already resolved.
|
|
68
69
|
*
|
|
69
70
|
* @param {{ item: object|null, preVersion: string, postVersion: string,
|
|
70
|
-
*
|
|
71
|
+
* output: { status: string, reason?: string }, feedbackStore: object,
|
|
72
|
+
* cycleId: string }} params
|
|
71
73
|
* @returns {{ contractPassed: boolean }}
|
|
72
74
|
*/
|
|
73
|
-
export function enforceForgeContract({ item, preVersion, postVersion,
|
|
75
|
+
export function enforceForgeContract({ item, preVersion, postVersion, output, feedbackStore, cycleId }) {
|
|
74
76
|
if (!item) return { contractPassed: true };
|
|
75
77
|
|
|
76
|
-
const wontFixMatch = summary.match(/WONT-FIX:\s*(.+)/);
|
|
77
78
|
const versionChanged = preVersion !== postVersion;
|
|
78
|
-
const actioned = summary.trim() === 'ACTIONED';
|
|
79
79
|
|
|
80
|
-
if (
|
|
81
|
-
return handleWontFixWithReason(item, feedbackStore, cycleId, postVersion,
|
|
80
|
+
if (output.status === 'wont-fix') {
|
|
81
|
+
return handleWontFixWithReason(item, feedbackStore, cycleId, postVersion, output.reason);
|
|
82
82
|
}
|
|
83
83
|
|
|
84
|
-
if (versionChanged || actioned) {
|
|
84
|
+
if (versionChanged || output.status === 'actioned') {
|
|
85
85
|
return handleVersionChanged(item, feedbackStore, cycleId, postVersion);
|
|
86
86
|
}
|
|
87
87
|
|