@really-knows-ai/foundry 3.8.1 → 3.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.opencode/plugins/foundry-tools/stage-tools.js +30 -4
- package/dist/CHANGELOG.md +28 -0
- package/dist/scripts/appraise-module.js +126 -202
- package/dist/scripts/lib/feedback-transitions.js +1 -3
- package/dist/scripts/lib/forge-contract.js +19 -30
- package/dist/scripts/lib/sort-reason.js +1 -1
- package/dist/scripts/lib/stage-calls.js +4 -0
- package/dist/scripts/orchestrate-cycle.js +10 -10
- package/dist/skills/appraise/SKILL.md +36 -128
- package/dist/skills/forge/SKILL.md +18 -23
- package/dist/skills/orchestrate/SKILL.md +3 -5
- package/package.json +1 -1
|
@@ -8,7 +8,7 @@ import { syncStore } from '../../../scripts/lib/memory/store.js';
|
|
|
8
8
|
import { makeIO, makeMemoryIO, branchIoFactory, asyncIoFactory, flowBranchGuard } from './helpers.js';
|
|
9
9
|
import { markWorkfileFailed, readFailedStatus, clearWorkfileFailed } from '../../../scripts/lib/failed-flow.js';
|
|
10
10
|
import { guarded, notFailedGuard } from '../../../scripts/lib/guards.js';
|
|
11
|
-
import { initForgeCallLog,
|
|
11
|
+
import { initForgeCallLog, readForgeCallSet } from '../../../scripts/lib/stage-calls.js';
|
|
12
12
|
import { openFeedbackStore } from '../../../scripts/lib/feedback-store.js';
|
|
13
13
|
|
|
14
14
|
const FORGE_REQUIRED_TOOLS = [
|
|
@@ -18,6 +18,12 @@ const FORGE_REQUIRED_TOOLS = [
|
|
|
18
18
|
'foundry_config_laws',
|
|
19
19
|
];
|
|
20
20
|
|
|
21
|
+
const FORGE_FORBIDDEN_TOOLS = [
|
|
22
|
+
'foundry_feedback_action',
|
|
23
|
+
'foundry_feedback_wontfix',
|
|
24
|
+
'foundry_feedback_resolve',
|
|
25
|
+
];
|
|
26
|
+
|
|
21
27
|
function stageBase(stage) { return stage.split(':')[0]; }
|
|
22
28
|
|
|
23
29
|
const gateNotFailed = notFailedGuard(makeIO);
|
|
@@ -25,9 +31,16 @@ const gateNotFailed = notFailedGuard(makeIO);
|
|
|
25
31
|
// -- Helpers for forge tool call verification --
|
|
26
32
|
|
|
27
33
|
function verifyAndManageForgeTools(io, active) {
|
|
28
|
-
const
|
|
29
|
-
|
|
30
|
-
|
|
34
|
+
const callSet = readForgeCallSet(io);
|
|
35
|
+
const forbidden = FORGE_FORBIDDEN_TOOLS.filter(t => callSet.has(t));
|
|
36
|
+
const missing = FORGE_REQUIRED_TOOLS.filter(t => !callSet.has(t));
|
|
37
|
+
io.unlink('.foundry/.forge-tool-calls.jsonl');
|
|
38
|
+
if (forbidden.length) {
|
|
39
|
+
postForbiddenToolsFeedback(io, active, forbidden);
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
if (missing.length) {
|
|
43
|
+
postMissingToolsFeedback(io, active, missing);
|
|
31
44
|
return;
|
|
32
45
|
}
|
|
33
46
|
resolveSystemFeedback(io, active);
|
|
@@ -144,6 +157,19 @@ async function executeStageEnd(args, context) {
|
|
|
144
157
|
return JSON.stringify({ ok: true, summary: args.summary });
|
|
145
158
|
}
|
|
146
159
|
|
|
160
|
+
function postForbiddenToolsFeedback(io, active, forbidden) {
|
|
161
|
+
try {
|
|
162
|
+
const store = openFeedbackStore('WORK.feedback.yaml', io);
|
|
163
|
+
store.add({
|
|
164
|
+
file: '(forge)',
|
|
165
|
+
tag: 'system:forbidden-tool-calls',
|
|
166
|
+
text: `Forbidden forge tool calls: ${forbidden.join(', ')}. Forge subagents do not manage feedback — the orchestrator handles transitions.`,
|
|
167
|
+
source: active.stage,
|
|
168
|
+
cycle: active.cycle,
|
|
169
|
+
});
|
|
170
|
+
} catch { /* feedback file not initialised yet; non-critical */ }
|
|
171
|
+
}
|
|
172
|
+
|
|
147
173
|
function postMissingToolsFeedback(io, active, missing) {
|
|
148
174
|
try {
|
|
149
175
|
const store = openFeedbackStore('WORK.feedback.yaml', io);
|
package/dist/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,33 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [3.8.3] - 2026-05-27
|
|
4
|
+
|
|
5
|
+
### Changed
|
|
6
|
+
|
|
7
|
+
- Appraise subagents no longer receive artefact content or laws in their prompt. The dispatch prompt contains only the appraiser's personality and the artefact type ID. The subagent discovers artefact files, laws, and file-patterns via tool calls (`foundry_config_artefact_type`, `foundry_config_laws`, `foundry_artefacts_list`) and reads files from the worktree.
|
|
8
|
+
|
|
9
|
+
- Appraise subagent output format changed from YAML to JSONL (one JSON object per line), matching the quench validator protocol. Required fields: `file`, `text`. Recommended: `law`, `evidence`. Optional: `severity`, `location`. The consolidate phase parses JSONL and posts feedback with tag `law:<slug>`.
|
|
10
|
+
|
|
11
|
+
- Gather phase now creates one task per appraiser (not per artefact × appraiser). Each appraiser covers all artefacts of the given type via tool-based discovery.
|
|
12
|
+
|
|
13
|
+
### Fixed
|
|
14
|
+
|
|
15
|
+
- Removed `js-yaml` dependency from appraise-module.js. All YAML parsing and fallback line-parsing code replaced with JSONL parsing.
|
|
16
|
+
|
|
17
|
+
## [3.8.2] - 2026-05-27
|
|
18
|
+
|
|
19
|
+
### Changed
|
|
20
|
+
|
|
21
|
+
- Forge subagent protocol simplified to three keywords: `DONE` (first generation), `ACTIONED` (file changed), `WONT-FIX: <justification>` (no changes needed). The `foundry_stage_end` summary must be exactly one of these — no descriptions, no explanations. The forge contract recognises `ACTIONED` even without a version change.
|
|
22
|
+
|
|
23
|
+
- WONT-FIX is now allowed for all feedback source types (quench, appraise, human-appraise), not just appraise. The `WONT-FIX:` keyword in the summary transitions the item regardless of source.
|
|
24
|
+
|
|
25
|
+
### Fixed
|
|
26
|
+
|
|
27
|
+
- Forge subagents are blocked from calling `foundry_feedback_action`, `foundry_feedback_wontfix`, or `foundry_feedback_resolve` — `foundry_stage_end` checks the call log and posts `system:forbidden-tool-calls` feedback if any were called.
|
|
28
|
+
|
|
29
|
+
- Single-item dispatch prompts clarified: the orchestrator dispatches one feedback item per forge call. The orchestrate skill instructs the LLM to pass the prompt verbatim without injecting extra items from quench output.
|
|
30
|
+
|
|
3
31
|
## [3.8.1] - 2026-05-27
|
|
4
32
|
|
|
5
33
|
### Fixed
|
|
@@ -2,28 +2,33 @@
|
|
|
2
2
|
* Appraise module — gathers context for parallel appraiser dispatch and
|
|
3
3
|
* consolidates results after all appraisers have run.
|
|
4
4
|
*
|
|
5
|
-
* Gather phase: reads artefacts,
|
|
6
|
-
*
|
|
7
|
-
*
|
|
5
|
+
* Gather phase: reads artefacts, selects appraisers, builds subagent prompts
|
|
6
|
+
* with only personality + type ID (no artefact content or laws inlined), and
|
|
7
|
+
* returns a dispatch_multi action so the orchestrator's LLM dispatches
|
|
8
|
+
* appraisers in parallel.
|
|
8
9
|
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
10
|
+
* Each appraiser subagent discovers artefacts, laws, and file-patterns via
|
|
11
|
+
* tool calls and returns JSONL — one JSON object per line.
|
|
12
|
+
*
|
|
13
|
+
* Consolidate phase: receives lastResults from the orchestrator, parses JSONL
|
|
14
|
+
* from each appraiser, unions and de-duplicates issues, posts feedback, and
|
|
15
|
+
* finalises the stage so the orchestrator can re-sort and determine the next
|
|
16
|
+
* action.
|
|
12
17
|
*/
|
|
13
18
|
|
|
14
19
|
import { getArtefactFiles, computeArtefactVersion } from './lib/artefacts.js';
|
|
15
|
-
import { selectAppraisers,
|
|
20
|
+
import { selectAppraisers, getCycleDefinition } from './lib/config.js';
|
|
16
21
|
import { openFeedbackStore } from './lib/feedback-store.js';
|
|
17
|
-
import yaml from 'js-yaml';
|
|
18
22
|
|
|
19
23
|
// ---------------------------------------------------------------------------
|
|
20
24
|
// Public API — gather
|
|
21
25
|
// ---------------------------------------------------------------------------
|
|
22
26
|
|
|
23
27
|
/**
|
|
24
|
-
* Gather appraise context: read draft artefacts, select appraisers,
|
|
25
|
-
*
|
|
26
|
-
*
|
|
28
|
+
* Gather appraise context: read draft artefacts, select appraisers, build a
|
|
29
|
+
* dispatch_multi action with one task per appraiser. The subagent prompt
|
|
30
|
+
* contains only the appraiser personality and artefact type ID — the subagent
|
|
31
|
+
* discovers artefact files, laws, and file-patterns via tool calls.
|
|
27
32
|
*
|
|
28
33
|
* @param {object} ctx
|
|
29
34
|
* @param {string} ctx.cycleId
|
|
@@ -36,91 +41,54 @@ import yaml from 'js-yaml';
|
|
|
36
41
|
* @returns {Promise<{action: string, tasks: Array, stage: string, cycle: string}>}
|
|
37
42
|
*/
|
|
38
43
|
export async function gatherAppraiseContext(ctx) {
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
}
|
|
44
|
+
const guarded = guardAppraiseGather(ctx);
|
|
45
|
+
if (guarded) return guarded;
|
|
42
46
|
|
|
43
47
|
await resolveStaleAppraiseFeedback(ctx);
|
|
44
48
|
|
|
45
49
|
const cd = await getCycleDefinition(ctx.foundryDir, ctx.cycleId, ctx.io);
|
|
46
|
-
const outputType = cd.
|
|
47
|
-
if (
|
|
48
|
-
return violation(`cycle ${ctx.cycleId} missing output-type field`, []);
|
|
49
|
-
}
|
|
50
|
-
const baseBranch = ctx.baseBranch || 'main';
|
|
51
|
-
const artefacts = await getArtefactFiles(ctx.foundryDir, outputType, ctx.io, { baseBranch });
|
|
52
|
-
if (artefacts.length === 0) {
|
|
53
|
-
return emptyDispatch(ctx.cycleId);
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
const typedArtefacts = artefacts.map(artefact => ({ ...artefact, type: outputType }));
|
|
57
|
-
const tasks = await collectTasks(typedArtefacts, ctx);
|
|
58
|
-
|
|
59
|
-
return {
|
|
60
|
-
action: 'dispatch_multi',
|
|
61
|
-
tasks,
|
|
62
|
-
stage: `appraise:${ctx.cycleId}`,
|
|
63
|
-
cycle: ctx.cycleId,
|
|
64
|
-
};
|
|
65
|
-
}
|
|
50
|
+
const outputType = validateOutputType(cd, ctx.cycleId);
|
|
51
|
+
if (typeof outputType !== 'string') return outputType;
|
|
66
52
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
*/
|
|
70
|
-
async function collectTasks(artefacts, ctx) {
|
|
71
|
-
const tasks = [];
|
|
72
|
-
const typeCache = new Map();
|
|
73
|
-
|
|
74
|
-
for (const artefact of artefacts) {
|
|
75
|
-
const entry = await resolveTypeEntry(artefact.type, typeCache, ctx);
|
|
76
|
-
if (!entry) continue;
|
|
53
|
+
const artefacts = await fetchAppraiseArtefacts(ctx, outputType);
|
|
54
|
+
if (!Array.isArray(artefacts)) return artefacts;
|
|
77
55
|
|
|
78
|
-
|
|
56
|
+
const appraisers = await selectAppraisers(ctx.foundryDir, outputType, { io: ctx.io });
|
|
57
|
+
if (appraisers.length === 0) {
|
|
58
|
+
return emptyDispatch(ctx.cycleId);
|
|
79
59
|
}
|
|
80
60
|
|
|
81
|
-
return
|
|
61
|
+
return buildGatherResponse(appraisers, outputType, ctx);
|
|
82
62
|
}
|
|
83
63
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
*/
|
|
88
|
-
async function resolveTypeEntry(typeId, cache, ctx) {
|
|
89
|
-
if (cache.has(typeId)) {
|
|
90
|
-
return cache.get(typeId);
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
const [appraisers, laws] = await Promise.all([
|
|
94
|
-
selectAppraisers(ctx.foundryDir, typeId, { io: ctx.io }),
|
|
95
|
-
getLaws(ctx.foundryDir, ctx.io, { typeId }),
|
|
96
|
-
]);
|
|
64
|
+
function guardAppraiseGather(ctx) {
|
|
65
|
+
return ctx.cycleId ? null : violation('cycleId is required', []);
|
|
66
|
+
}
|
|
97
67
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
return
|
|
68
|
+
function validateOutputType(cd, cycleId) {
|
|
69
|
+
const outputType = cd.frontmatter['output-type'];
|
|
70
|
+
return outputType ?? violation(`cycle ${cycleId} missing output-type field`, []);
|
|
101
71
|
}
|
|
102
72
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
content = ctx.io.readFile(artefact.file);
|
|
110
|
-
}
|
|
73
|
+
async function fetchAppraiseArtefacts(ctx, outputType) {
|
|
74
|
+
const baseBranch = ctx.baseBranch || 'main';
|
|
75
|
+
const artefacts = await getArtefactFiles(ctx.foundryDir, outputType, ctx.io, { baseBranch });
|
|
76
|
+
if (artefacts.length === 0) return emptyDispatch(ctx.cycleId);
|
|
77
|
+
return artefacts;
|
|
78
|
+
}
|
|
111
79
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
});
|
|
80
|
+
function buildGatherResponse(appraisers, outputType, ctx) {
|
|
81
|
+
const tasks = appraisers.map(appraiser => ({
|
|
82
|
+
subagent_type: resolveSubagentType(appraiser, ctx),
|
|
83
|
+
prompt: buildAppraiserPrompt({ appraiser, typeId: outputType }),
|
|
84
|
+
}));
|
|
118
85
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
}
|
|
123
|
-
|
|
86
|
+
return {
|
|
87
|
+
action: 'dispatch_multi',
|
|
88
|
+
tasks,
|
|
89
|
+
stage: `appraise:${ctx.cycleId}`,
|
|
90
|
+
cycle: ctx.cycleId,
|
|
91
|
+
};
|
|
124
92
|
}
|
|
125
93
|
|
|
126
94
|
/**
|
|
@@ -197,9 +165,9 @@ async function resolveStaleAppraiseFeedback(ctx) {
|
|
|
197
165
|
/**
|
|
198
166
|
* Consolidate appraiser results and finalise the appraise stage.
|
|
199
167
|
*
|
|
200
|
-
* Called by orchestrator after all appraisers have completed. Parses
|
|
201
|
-
* posts combined feedback, resolves prior
|
|
202
|
-
* the cycle to the next stage via finalize.
|
|
168
|
+
* Called by orchestrator after all appraisers have completed. Parses JSONL
|
|
169
|
+
* from each appraiser's output, posts combined feedback, resolves prior
|
|
170
|
+
* appraise feedback, and advances the cycle to the next stage via finalize.
|
|
203
171
|
*
|
|
204
172
|
* @param {object} ctx
|
|
205
173
|
* @param {Array<{ok: boolean, output?: string, error?: string}>} lastResults
|
|
@@ -238,20 +206,73 @@ export async function consolidateAppraise(ctx, lastResults) {
|
|
|
238
206
|
}
|
|
239
207
|
|
|
240
208
|
/**
|
|
241
|
-
* Parse all successful appraiser outputs and de-duplicate the
|
|
242
|
-
* list by (file, law-id, issue text).
|
|
209
|
+
* Parse JSONL from all successful appraiser outputs and de-duplicate the
|
|
210
|
+
* combined issue list by (file, law-id, issue text).
|
|
243
211
|
*/
|
|
244
212
|
function parseConsolidated(successful) {
|
|
245
213
|
const all = [];
|
|
246
214
|
|
|
247
215
|
for (const result of successful) {
|
|
248
|
-
const issues =
|
|
216
|
+
const issues = parseAppraiserJsonl(result.output || '');
|
|
249
217
|
all.push(...issues);
|
|
250
218
|
}
|
|
251
219
|
|
|
252
220
|
return deduplicateIssues(all);
|
|
253
221
|
}
|
|
254
222
|
|
|
223
|
+
/**
|
|
224
|
+
* Parse appraiser JSONL output.
|
|
225
|
+
*
|
|
226
|
+
* Each line must be a JSON object with at least `file` and `text` fields.
|
|
227
|
+
* Extra fields (`law`, `evidence`, `severity`, `location`) are preserved.
|
|
228
|
+
* The `text` field maps to the issue description used for feedback text.
|
|
229
|
+
*/
|
|
230
|
+
function parseAppraiserJsonl(output) {
|
|
231
|
+
const issues = [];
|
|
232
|
+
const lines = output.trim().split('\n');
|
|
233
|
+
|
|
234
|
+
for (const line of lines) {
|
|
235
|
+
const issue = parseAppraiserLine(line);
|
|
236
|
+
if (issue) issues.push(issue);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
return issues;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
function parseAppraiserLine(line) {
|
|
243
|
+
const trimmed = line.trim();
|
|
244
|
+
if (!trimmed) return null;
|
|
245
|
+
|
|
246
|
+
const obj = tryJsonParseLine(trimmed);
|
|
247
|
+
if (!obj) return null;
|
|
248
|
+
|
|
249
|
+
return validateJsonlIssue(obj);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
function tryJsonParseLine(line) {
|
|
253
|
+
try { return JSON.parse(line); } catch { return null; }
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
function validateJsonlIssue(obj) {
|
|
257
|
+
if (!hasStringField(obj, 'file')) return null;
|
|
258
|
+
if (!hasStringField(obj, 'text')) return null;
|
|
259
|
+
|
|
260
|
+
return {
|
|
261
|
+
file: obj.file,
|
|
262
|
+
law: strOrEmpty(obj.law),
|
|
263
|
+
issue: obj.text,
|
|
264
|
+
evidence: strOrEmpty(obj.evidence),
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
function hasStringField(obj, key) {
|
|
269
|
+
return typeof obj[key] === 'string' && obj[key].length > 0;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
function strOrEmpty(value) {
|
|
273
|
+
return typeof value === 'string' ? value : '';
|
|
274
|
+
}
|
|
275
|
+
|
|
255
276
|
/**
|
|
256
277
|
* De-duplicate an issue array by (file, law, issue text).
|
|
257
278
|
*/
|
|
@@ -332,138 +353,41 @@ function buildConsolidateSummary(count) {
|
|
|
332
353
|
// ---------------------------------------------------------------------------
|
|
333
354
|
|
|
334
355
|
/**
|
|
335
|
-
* Build a subagent prompt for
|
|
356
|
+
* Build a subagent prompt for an appraiser.
|
|
336
357
|
*
|
|
337
|
-
*
|
|
338
|
-
*
|
|
358
|
+
* The prompt contains only the appraiser's personality and the artefact type
|
|
359
|
+
* ID. The subagent discovers artefact files, laws, and file-patterns via tool
|
|
360
|
+
* calls and returns JSONL — one JSON object per line.
|
|
339
361
|
*/
|
|
340
|
-
function buildAppraiserPrompt({ appraiser,
|
|
341
|
-
const lawSections = laws
|
|
342
|
-
.map(law => `## ${law.id}\n\n${law.text}`)
|
|
343
|
-
.join('\n\n');
|
|
344
|
-
|
|
362
|
+
function buildAppraiserPrompt({ appraiser, typeId }) {
|
|
345
363
|
const lines = [
|
|
346
364
|
'You are an appraiser. Your personality:',
|
|
347
365
|
'',
|
|
348
366
|
appraiser.personality,
|
|
349
367
|
'',
|
|
350
|
-
|
|
351
|
-
'either:',
|
|
352
|
-
'- Note no issues (pass)',
|
|
353
|
-
'- Describe the issue, quoting evidence from the artefact',
|
|
368
|
+
`Evaluate artefacts of type "${typeId}" against applicable laws.`,
|
|
354
369
|
'',
|
|
355
|
-
'
|
|
370
|
+
'Use tools to discover context:',
|
|
371
|
+
`- foundry_config_artefact_type with typeId "${typeId}" for file-patterns`,
|
|
372
|
+
`- foundry_config_laws with typeId "${typeId}" for applicable laws (prose only)`,
|
|
373
|
+
'- foundry_artefacts_list for changed files',
|
|
374
|
+
'- Read matching files from the worktree',
|
|
356
375
|
'',
|
|
357
|
-
|
|
376
|
+
'For each law, evaluate each relevant file. If a violation is found,',
|
|
377
|
+
'output a JSONL line:',
|
|
358
378
|
'',
|
|
359
|
-
'
|
|
379
|
+
'{"file": "<path>", "law": "<law-slug>", "text": "<issue description>", "evidence": "<quote>"}',
|
|
360
380
|
'',
|
|
361
|
-
|
|
381
|
+
'`file` and `text` are required. `law` and `evidence` are recommended.',
|
|
382
|
+
'Optional fields `severity` and `location` are passed through unchanged.',
|
|
362
383
|
'',
|
|
363
|
-
'
|
|
364
|
-
'',
|
|
365
|
-
'Return a list of issues. For each issue:',
|
|
366
|
-
`- file: ${artefact.file}`,
|
|
367
|
-
' law: <law-id>',
|
|
368
|
-
' issue: <description>',
|
|
369
|
-
' evidence: <quote from artefact>',
|
|
370
|
-
'',
|
|
371
|
-
'If there are no issues, return an empty list.',
|
|
384
|
+
'Output ONLY JSONL — one JSON object per line. No markdown, no commentary.',
|
|
385
|
+
'If no issues are found, output nothing.',
|
|
372
386
|
];
|
|
373
387
|
|
|
374
388
|
return lines.join('\n');
|
|
375
389
|
}
|
|
376
390
|
|
|
377
|
-
// ---------------------------------------------------------------------------
|
|
378
|
-
// Output parsing
|
|
379
|
-
// ---------------------------------------------------------------------------
|
|
380
|
-
|
|
381
|
-
/**
|
|
382
|
-
* Parse a structured issue list from an appraiser subagent output.
|
|
383
|
-
*
|
|
384
|
-
* LLM output is free-form text that may contain a YAML list of issues.
|
|
385
|
-
* Tries js-yaml first; falls back to line-scanning when the output is
|
|
386
|
-
* not clean YAML (LLMs may include surrounding text, quotes in bare
|
|
387
|
-
* strings, or other quirks that trip up a strict YAML parser).
|
|
388
|
-
*
|
|
389
|
-
* Returns an array of { file, law, issue, evidence } objects.
|
|
390
|
-
*/
|
|
391
|
-
function parseAppraiserOutput(output) {
|
|
392
|
-
const text = output || '';
|
|
393
|
-
const yamlBlock = extractYamlBlock(text);
|
|
394
|
-
const issues = tryYamlParse(yamlBlock);
|
|
395
|
-
if (issues) return issues;
|
|
396
|
-
|
|
397
|
-
return parseFallback(text);
|
|
398
|
-
}
|
|
399
|
-
|
|
400
|
-
function extractYamlBlock(text) {
|
|
401
|
-
if (text.startsWith('- file:')) return text;
|
|
402
|
-
const afterNewline = text.indexOf('\n- file:');
|
|
403
|
-
if (afterNewline >= 0) return text.slice(afterNewline + 1);
|
|
404
|
-
return text;
|
|
405
|
-
}
|
|
406
|
-
|
|
407
|
-
function tryYamlParse(yamlBlock) {
|
|
408
|
-
try {
|
|
409
|
-
const parsed = yaml.load(yamlBlock);
|
|
410
|
-
if (Array.isArray(parsed)) {
|
|
411
|
-
return parsed
|
|
412
|
-
.filter(e => e && typeof e === 'object' && e.file && e.law && e.issue)
|
|
413
|
-
.map(e => ({ file: e.file, law: e.law, issue: e.issue, evidence: e.evidence || '' }));
|
|
414
|
-
}
|
|
415
|
-
} catch { /* fall through to fallback */ }
|
|
416
|
-
return null;
|
|
417
|
-
}
|
|
418
|
-
|
|
419
|
-
const FALLBACK_FIELDS = new Set(['law', 'issue', 'evidence']);
|
|
420
|
-
|
|
421
|
-
function isCompleteIssue(obj) {
|
|
422
|
-
return obj && obj.file && obj.law && obj.issue;
|
|
423
|
-
}
|
|
424
|
-
|
|
425
|
-
function applyFallbackField(kv, entry, issues) {
|
|
426
|
-
if (kv.key === 'file') {
|
|
427
|
-
const e = { file: kv.value, law: '', issue: '', evidence: '' };
|
|
428
|
-
issues.push(e);
|
|
429
|
-
return e;
|
|
430
|
-
}
|
|
431
|
-
if (entry && FALLBACK_FIELDS.has(kv.key)) {
|
|
432
|
-
entry[kv.key] = kv.value;
|
|
433
|
-
}
|
|
434
|
-
return entry;
|
|
435
|
-
}
|
|
436
|
-
|
|
437
|
-
function parseFallback(text) {
|
|
438
|
-
const issues = [];
|
|
439
|
-
let entry = null;
|
|
440
|
-
|
|
441
|
-
for (const line of text.split('\n')) {
|
|
442
|
-
const kv = parseFallbackLine(line);
|
|
443
|
-
if (kv) entry = applyFallbackField(kv, entry, issues);
|
|
444
|
-
}
|
|
445
|
-
|
|
446
|
-
return issues.filter(isCompleteIssue);
|
|
447
|
-
}
|
|
448
|
-
|
|
449
|
-
function parseFallbackLine(line) {
|
|
450
|
-
const trimmed = line.trim();
|
|
451
|
-
if (!trimmed) return null;
|
|
452
|
-
|
|
453
|
-
const colon = trimmed.indexOf(':');
|
|
454
|
-
if (colon < 1) return null;
|
|
455
|
-
|
|
456
|
-
const key = stripDash(trimmed.slice(0, colon));
|
|
457
|
-
return {
|
|
458
|
-
key: key.trim(),
|
|
459
|
-
value: trimmed.slice(colon + 1).trim(),
|
|
460
|
-
};
|
|
461
|
-
}
|
|
462
|
-
|
|
463
|
-
function stripDash(s) {
|
|
464
|
-
return s.startsWith('- ') ? s.slice(2) : s;
|
|
465
|
-
}
|
|
466
|
-
|
|
467
391
|
// ---------------------------------------------------------------------------
|
|
468
392
|
// Shared helpers
|
|
469
393
|
// ---------------------------------------------------------------------------
|
|
@@ -99,7 +99,5 @@ export function hashText(text) {
|
|
|
99
99
|
*/
|
|
100
100
|
export function canForgeWontFix(item, callerStageBase) {
|
|
101
101
|
if (callerStageBase !== 'forge') return false;
|
|
102
|
-
|
|
103
|
-
const sourceBase = item.source.split(':')[0];
|
|
104
|
-
return sourceBase === 'appraise';
|
|
102
|
+
return !!(item && typeof item.source === 'string' && item.source);
|
|
105
103
|
}
|
|
@@ -45,28 +45,18 @@ function handleVersionChanged(item, feedbackStore, cycleId, postVersion) {
|
|
|
45
45
|
}
|
|
46
46
|
|
|
47
47
|
function handleWontFixWithReason(item, feedbackStore, cycleId, postVersion, reason) {
|
|
48
|
-
const
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
postSystemFeedback(feedbackStore, cycleId, postVersion, result.error || 'store transition failed');
|
|
59
|
-
feedbackStore.forceState(item.id, 'open', cycleId, `forge:${cycleId}`);
|
|
60
|
-
}
|
|
61
|
-
return { contractPassed: result.ok };
|
|
48
|
+
const result = feedbackStore.transition({
|
|
49
|
+
id: item.id,
|
|
50
|
+
target: 'wont-fix',
|
|
51
|
+
stage: 'forge:' + cycleId,
|
|
52
|
+
cycle: cycleId,
|
|
53
|
+
reason,
|
|
54
|
+
});
|
|
55
|
+
if (!result.ok) {
|
|
56
|
+
postSystemFeedback(feedbackStore, cycleId, postVersion, result.error || 'store transition failed');
|
|
57
|
+
feedbackStore.forceState(item.id, 'open', cycleId, `forge:${cycleId}`);
|
|
62
58
|
}
|
|
63
|
-
|
|
64
|
-
postSystemFeedback(
|
|
65
|
-
feedbackStore, cycleId, postVersion,
|
|
66
|
-
`wont-fix not allowed on ${sourceBase}-sourced item; wont-fix is only allowed for appraise-sourced items`,
|
|
67
|
-
);
|
|
68
|
-
feedbackStore.forceState(item.id, 'open', cycleId, `forge:${cycleId}`);
|
|
69
|
-
return { contractPassed: false };
|
|
59
|
+
return { contractPassed: result.ok };
|
|
70
60
|
}
|
|
71
61
|
|
|
72
62
|
/**
|
|
@@ -81,24 +71,23 @@ function handleWontFixWithReason(item, feedbackStore, cycleId, postVersion, reas
|
|
|
81
71
|
* @returns {{ contractPassed: boolean }}
|
|
82
72
|
*/
|
|
83
73
|
export function enforceForgeContract({ item, preVersion, postVersion, summary, feedbackStore, cycleId }) {
|
|
84
|
-
// No item means forge had no prior feedback to respond to.
|
|
85
74
|
if (!item) return { contractPassed: true };
|
|
86
75
|
|
|
87
|
-
// Version changed → forge fixed the issue
|
|
88
|
-
if (preVersion !== postVersion) {
|
|
89
|
-
return handleVersionChanged(item, feedbackStore, cycleId, postVersion);
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
// Version unchanged — check for WONT-FIX justification
|
|
93
76
|
const wontFixMatch = summary.match(/WONT-FIX:\s*(.+)/);
|
|
77
|
+
const versionChanged = preVersion !== postVersion;
|
|
78
|
+
const actioned = summary.trim() === 'ACTIONED';
|
|
79
|
+
|
|
94
80
|
if (wontFixMatch) {
|
|
95
81
|
return handleWontFixWithReason(item, feedbackStore, cycleId, postVersion, wontFixMatch[1]);
|
|
96
82
|
}
|
|
97
83
|
|
|
98
|
-
|
|
84
|
+
if (versionChanged || actioned) {
|
|
85
|
+
return handleVersionChanged(item, feedbackStore, cycleId, postVersion);
|
|
86
|
+
}
|
|
87
|
+
|
|
99
88
|
postSystemFeedback(
|
|
100
89
|
feedbackStore, cycleId, postVersion,
|
|
101
|
-
'forge did not change artefacts and did not provide WONT-FIX justification',
|
|
90
|
+
'forge did not change artefacts and did not provide ACTIONED or WONT-FIX justification',
|
|
102
91
|
);
|
|
103
92
|
feedbackStore.forceState(item.id, 'open', cycleId, `forge:${cycleId}`);
|
|
104
93
|
return { contractPassed: false };
|
|
@@ -35,7 +35,7 @@ function forgeReason(d) {
|
|
|
35
35
|
if (d.forgeCount === 0 && d.needingForge === 0) {
|
|
36
36
|
return `starting cycle — routing to forge (iteration 1 of ${d.maxIt})`;
|
|
37
37
|
}
|
|
38
|
-
return `found ${d.needingForge} unresolved feedback item(s) —
|
|
38
|
+
return `found ${d.needingForge} unresolved feedback item(s) — dispatching one item at a time to forge (revision ${d.forgeCount + 1} of ${d.maxIt})`;
|
|
39
39
|
}
|
|
40
40
|
|
|
41
41
|
function appraiseReason(d) {
|
|
@@ -29,6 +29,10 @@ function readCallSet(io) {
|
|
|
29
29
|
return called;
|
|
30
30
|
}
|
|
31
31
|
|
|
32
|
+
export function readForgeCallSet(io) {
|
|
33
|
+
return readCallSet(io);
|
|
34
|
+
}
|
|
35
|
+
|
|
32
36
|
export function verifyAndClearForgeCallLog(io, expected) {
|
|
33
37
|
const called = readCallSet(io);
|
|
34
38
|
const missing = expected.filter(t => !called.has(t));
|
|
@@ -266,15 +266,17 @@ function buildForgePromptLines({ cycle, outputType, forgeItem }) {
|
|
|
266
266
|
`File: ${forgeItem.file}`,
|
|
267
267
|
`Issue: ${forgeItem.text}`,
|
|
268
268
|
``,
|
|
269
|
-
`
|
|
270
|
-
`
|
|
271
|
-
`
|
|
272
|
-
` b) If this is an appraise-sourced item (subjective quality`,
|
|
273
|
-
` feedback), you may respond with:`,
|
|
274
|
-
` WONT-FIX: <justification for why you disagree>`,
|
|
269
|
+
`Respond with EXACTLY one of:`,
|
|
270
|
+
` - ACTIONED — fix the issue by changing the artefact file`,
|
|
271
|
+
` - WONT-FIX: <justification> — the issue is already resolved or does not apply`,
|
|
275
272
|
``,
|
|
276
|
-
`
|
|
277
|
-
|
|
273
|
+
`Write NOTHING else in the stage_end summary — no descriptions, no explanations.`,
|
|
274
|
+
);
|
|
275
|
+
} else {
|
|
276
|
+
lines.push(
|
|
277
|
+
``,
|
|
278
|
+
`First generation — no feedback to address yet.`,
|
|
279
|
+
`Produce the artefact and call foundry_stage_end({summary: "DONE"}).`,
|
|
278
280
|
);
|
|
279
281
|
}
|
|
280
282
|
return lines;
|
|
@@ -300,8 +302,6 @@ export function renderDispatchPrompt({ stage, cycle, token, cwd, filePatterns, o
|
|
|
300
302
|
``,
|
|
301
303
|
`Your FIRST tool call MUST be foundry_stage_begin({stage, cycle, token}) using the values above.`,
|
|
302
304
|
`Your LAST tool call MUST be foundry_stage_end({summary}).`,
|
|
303
|
-
``,
|
|
304
|
-
`When done, report back a brief summary. Do NOT call foundry_history_append, foundry_git_commit, or foundry_artefacts_add — the orchestrator handles all of those.`
|
|
305
305
|
);
|
|
306
306
|
return lines.join('\n');
|
|
307
307
|
}
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: appraise
|
|
3
3
|
type: atomic
|
|
4
|
-
description: Subjective evaluation of an artefact against laws via
|
|
4
|
+
description: Subjective evaluation of an artefact against laws via independent appraiser subagents.
|
|
5
5
|
---
|
|
6
6
|
|
|
7
7
|
# Appraise
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
**This skill is subagent-only.** It describes the protocol an appraiser subagent follows when dispatched via `task()` from the orchestrate loop. Do NOT load this skill and run appraise inline — the orchestrate skill returns a `dispatch_multi` action with pre-built prompts; call `task()` with each.
|
|
10
|
+
|
|
11
|
+
You evaluate artefacts against laws. Your dispatch prompt contains your personality and the artefact type ID. You discover artefact files, laws, and file-patterns via tool calls.
|
|
10
12
|
|
|
11
13
|
## Prerequisites
|
|
12
14
|
|
|
@@ -18,152 +20,58 @@ Before running this skill, verify that the `foundry/` directory exists in the pr
|
|
|
18
20
|
|
|
19
21
|
Appraise runs inside an enforced stage. Your **first** and **last** tool calls are fixed:
|
|
20
22
|
|
|
21
|
-
1. **First:** `foundry_stage_begin({stage, cycle, token})` — copy the token verbatim from the dispatch prompt.
|
|
23
|
+
1. **First:** `foundry_stage_begin({stage, cycle, token})` — copy the token verbatim from the dispatch prompt. No other tool call is permitted before this one.
|
|
22
24
|
2. **Last:** `foundry_stage_end({summary})`.
|
|
23
25
|
|
|
24
|
-
Appraise makes **no disk writes**. Feedback output flows through
|
|
26
|
+
Appraise makes **no disk writes**. Feedback output flows through JSONL returned in your response text. The orchestrator's internal consolidate step parses the JSONL, posts feedback, and resolves prior items.
|
|
25
27
|
|
|
26
28
|
## Protocol
|
|
27
29
|
|
|
28
|
-
1. `foundry_stage_begin(...)
|
|
29
|
-
2.
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
>
|
|
38
|
-
> 1. `foundry_workfile_delete({confirm: true})` to abandon the cycle.
|
|
39
|
-
> 2. Back out to main (`git checkout main`) and delete the work branch.
|
|
40
|
-
> 3. Investigate and fix the root cause of the failure before restarting.
|
|
41
|
-
|
|
42
|
-
Then return control to the user and stop.
|
|
43
|
-
- `foundry_artefacts_list({})` — enumerate the current cycle's branch artefact changes as `[{ file, state }]` entries.
|
|
44
|
-
- For each artefact change, gather its type-specific context:
|
|
45
|
-
- `foundry_config_laws` with the cycle's output type — applicable laws (global + type-specific)
|
|
46
|
-
- `foundry_config_artefact_type` with the type ID — the artefact type definition
|
|
47
|
-
- `foundry_appraisers_select` with the type ID — selected appraiser personalities with their raw model IDs
|
|
48
|
-
|
|
49
|
-
3. Dispatch each appraiser as an independent sub-agent (see Dispatch below). If this cycle produced multiple artefacts, appraisers evaluate each.
|
|
50
|
-
|
|
51
|
-
4. Collect results from all appraisers
|
|
52
|
-
|
|
53
|
-
5. Consolidate (this is judgment):
|
|
54
|
-
- Union of all issues — if any one appraiser flags it, it's feedback
|
|
55
|
-
- De-duplicate: merge overlapping observations into a single feedback item
|
|
56
|
-
- Preserve which appraiser(s) raised each issue (for traceability)
|
|
57
|
-
|
|
58
|
-
6. For each consolidated issue: `foundry_feedback_add` with `{ file, text, tag: 'law:<slug>' }`. Tags must match `law:<slug>`, and dedup uses the non-resolved `(file, tag, hash(text))` semantics described in Feedback handling.
|
|
59
|
-
|
|
60
|
-
7. If no appraiser found any issues, the artefact clears appraisal.
|
|
61
|
-
|
|
62
|
-
8. `foundry_stage_end({summary})`.
|
|
63
|
-
|
|
64
|
-
## Feedback handling
|
|
65
|
-
|
|
66
|
-
As an appraise stage, you have two feedback responsibilities:
|
|
67
|
-
|
|
68
|
-
1. **Adding new law-violation feedback.** For each unmet law, call
|
|
69
|
-
`foundry_feedback_add` with `{ file, text, tag: 'law:<slug>' }`.
|
|
70
|
-
The `source` is automatically your stage id (e.g. `appraise:write-check`).
|
|
71
|
-
The tool rejects any tag not matching `law:<slug>` during an appraise
|
|
72
|
-
stage; do not attempt bare `'appraise'` or `'review'` tags.
|
|
73
|
-
|
|
74
|
-
The tool returns `{ ok: true, id, deduped }` on success. `deduped: true`
|
|
75
|
-
means an existing non-resolved item with the same `(file, tag,
|
|
76
|
-
hash(text))` was found (no new snapshot written); `deduped: false`
|
|
77
|
-
means a new item was created. Resolved items are NOT considered for
|
|
78
|
-
dedup — a re-added item after a resolution is a legitimate new item
|
|
79
|
-
(regression feedback).
|
|
30
|
+
1. `foundry_stage_begin(...)` with the token from the dispatch prompt.
|
|
31
|
+
2. `foundry_config_artefact_type` with the type ID — get the artefact type definition and `file-patterns`.
|
|
32
|
+
3. `foundry_config_laws` with the type ID — get all applicable laws (prose only).
|
|
33
|
+
4. `foundry_artefacts_list` — enumerate the current cycle's branch artefact changes.
|
|
34
|
+
5. For each artefact file that matches the type's `file-patterns`, read the file from the worktree.
|
|
35
|
+
6. Evaluate each file against each law. For each law, either:
|
|
36
|
+
- Note no issues (pass)
|
|
37
|
+
- Describe the violation, quoting evidence from the artefact
|
|
38
|
+
7. Output JSONL. Each line is one JSON object:
|
|
80
39
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
- Approve: `foundry_feedback_resolve` with `{ id, resolution: 'approved' }`.
|
|
85
|
-
`reason` is optional.
|
|
86
|
-
- Reject: `foundry_feedback_resolve` with `{ id, resolution: 'rejected', reason: '...' }`.
|
|
87
|
-
`reason` is required. A rejection sends the item back to forge for
|
|
88
|
-
another attempt (the `rejected` state is a legal forge input per
|
|
89
|
-
§5.1 rule 2).
|
|
40
|
+
```json
|
|
41
|
+
{"file": "<path>", "law": "<law-slug>", "text": "<issue description>", "evidence": "<quote from artefact>"}
|
|
42
|
+
```
|
|
90
43
|
|
|
91
|
-
|
|
92
|
-
any deadlock-override transition. On `resolution: 'approved'` for a
|
|
93
|
-
non-deadlocked item, `reason` is optional.
|
|
44
|
+
`file` and `text` are required. `law` and `evidence` are recommended — `law` tells the orchestrator which law tag to use, `evidence` quotes the offending passage. Optional extra fields (`severity`, `location`) are passed through unchanged.
|
|
94
45
|
|
|
95
|
-
|
|
96
|
-
matches your own stage id — not every appraise stage in the cycle, just yours.
|
|
97
|
-
This prevents a second appraise stage from rubber-stamping work it didn't
|
|
98
|
-
request. For deadlocked items, only human-appraise has the override authority.
|
|
46
|
+
If there are no issues, output nothing (empty response).
|
|
99
47
|
|
|
100
|
-
|
|
101
|
-
human-appraise see non-deadlocked unresolved feedback before the orchestrator routes.
|
|
102
|
-
Not available in v2.6.0; appraise stages today are the sole resolver of
|
|
103
|
-
their own non-deadlocked items.
|
|
48
|
+
Your response text is ONLY JSONL — one JSON object per line. No markdown headings, no code blocks, no commentary, no YAML.
|
|
104
49
|
|
|
105
|
-
|
|
50
|
+
8. `foundry_stage_end({summary})`. The summary describes how many issues were found (e.g. "3 issues found" or "No issues found").
|
|
106
51
|
|
|
107
|
-
|
|
108
|
-
- The appraiser's personality (from their definition)
|
|
109
|
-
- The artefact content
|
|
110
|
-
- All applicable laws (global + type-specific)
|
|
111
|
-
- Instructions to evaluate the artefact against each law and return issues as a structured list
|
|
52
|
+
## Output examples
|
|
112
53
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
`foundry_appraisers_select` returns raw model IDs for each appraiser. Convert each to an agent name: `foundry-<model.replace(/[/.]/g, '-')>` — both `/` and `.` are replaced with `-`. Examples:
|
|
116
|
-
- `openai/gpt-4o` → `foundry-openai-gpt-4o`
|
|
117
|
-
- `github-copilot/claude-sonnet-4.6` → `foundry-github-copilot-claude-sonnet-4-6`
|
|
118
|
-
|
|
119
|
-
- If a model is specified: dispatch with `subagent_type: "foundry-<converted-name>"`. If no agent with that name exists, **hard fail**.
|
|
120
|
-
- If no model is specified: dispatch with `subagent_type: "general"` (inherits session model).
|
|
121
|
-
|
|
122
|
-
Note: per-appraiser `model` overrides are applied here at dispatch time. The cycle-level `models.appraise` value (if set) is used for routing-time agent-file validation only; this skill does not consult it when iterating appraisers.
|
|
123
|
-
|
|
124
|
-
Dispatch all appraisers in parallel (multiple Task calls in a single response).
|
|
125
|
-
|
|
126
|
-
### Sub-agent prompt template
|
|
54
|
+
Good (issues found):
|
|
127
55
|
|
|
128
56
|
```
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
<contents of appraiser personality>
|
|
132
|
-
|
|
133
|
-
Evaluate the following artefact against each law below. For each law, either:
|
|
134
|
-
- Note no issues (pass)
|
|
135
|
-
- Describe the issue, quoting evidence from the artefact
|
|
136
|
-
|
|
137
|
-
## Artefact
|
|
138
|
-
|
|
139
|
-
<artefact content>
|
|
140
|
-
|
|
141
|
-
## Laws
|
|
142
|
-
|
|
143
|
-
<all applicable laws>
|
|
144
|
-
|
|
145
|
-
## Output
|
|
146
|
-
|
|
147
|
-
Return a list of issues. For each issue:
|
|
148
|
-
- law: <law-id>
|
|
149
|
-
- issue: <description>
|
|
150
|
-
- evidence: <quote from artefact>
|
|
151
|
-
|
|
152
|
-
If there are no issues, return an empty list.
|
|
57
|
+
{"file": "haikus/mountain.md", "law": "syllable-count", "text": "Line 2 has 8 syllables, expected 7", "evidence": "A frog jumps into the pond", "location": "2:1"}
|
|
58
|
+
{"file": "haikus/mountain.md", "law": "nature-imagery", "text": "Contains industrial imagery violating nature-only requirement", "evidence": "The rusty old machine"}
|
|
153
59
|
```
|
|
154
60
|
|
|
155
|
-
|
|
61
|
+
Good (no issues found — empty response, then stage_end):
|
|
156
62
|
|
|
157
|
-
|
|
63
|
+
(no output text)
|
|
158
64
|
|
|
159
|
-
|
|
65
|
+
## Feedback handling
|
|
160
66
|
|
|
161
|
-
|
|
67
|
+
You do NOT call `foundry_feedback_add` or `foundry_feedback_resolve`. The orchestrator's consolidate step reads your JSONL output, de-duplicates across all appraisers, posts feedback items with tag `law:<slug>`, and resolves prior appraise-sourced feedback.
|
|
162
68
|
|
|
163
69
|
## What you do NOT do
|
|
164
70
|
|
|
165
|
-
- You do not write files — feedback output goes through
|
|
166
|
-
- You do not revise the artefact.
|
|
71
|
+
- You do not write files — feedback output goes through JSONL, not `foundry_feedback_add`.
|
|
72
|
+
- You do not revise the artefact — that is the forge skill's job.
|
|
167
73
|
- You do not run deterministic validators — that is the quench skill's job.
|
|
168
|
-
- You do not
|
|
169
|
-
- You do not
|
|
74
|
+
- You do not call `foundry_feedback_add`, `foundry_feedback_action`, `foundry_feedback_wontfix`, or `foundry_feedback_resolve`.
|
|
75
|
+
- You do not call `foundry_history_append` or `foundry_git_commit` — `foundry_orchestrate` handles those.
|
|
76
|
+
- You do not register artefacts — that happens automatically.
|
|
77
|
+
- You do not output YAML, markdown, or prose — only JSONL.
|
|
@@ -51,43 +51,38 @@ Forge runs inside an enforced stage. Your **first** and **last** tool calls are
|
|
|
51
51
|
- Read the selected files for context.
|
|
52
52
|
7. Produce the artefact, respecting all applicable laws from the start.
|
|
53
53
|
8. Write the artefact file to a location that matches the artefact type's `file-patterns`.
|
|
54
|
-
9. `foundry_stage_end({summary})`.
|
|
54
|
+
9. `foundry_stage_end({summary: "DONE"})`.
|
|
55
55
|
|
|
56
56
|
### Revision (feedback exists)
|
|
57
57
|
|
|
58
58
|
1. `foundry_stage_begin(...)`.
|
|
59
59
|
2. Read the artefact file.
|
|
60
60
|
3. If the cycle declares `inputs`, discover them via filesystem scan against each input type's `file-patterns` (same protocol as first-generation step 6). Re-read the relevant files — they may have changed on disk since the previous iteration (nothing in this cycle wrote to them, but the user may have modified them between iterations).
|
|
61
|
-
4. Address the single feedback item from the dispatch prompt following the feedback handling rules below
|
|
62
|
-
5. Update the artefact file.
|
|
63
|
-
6. `foundry_stage_end({summary})`.
|
|
61
|
+
4. Address the single feedback item from the dispatch prompt following the feedback handling rules below.
|
|
62
|
+
5. Update the artefact file (if fixing), or skip (if WONT-FIX).
|
|
63
|
+
6. `foundry_stage_end({summary})`. The summary must be EXACTLY one of:
|
|
64
|
+
- `"ACTIONED"` — file was changed to address the feedback
|
|
65
|
+
- `"WONT-FIX: <justification>"` — item already resolved or does not apply
|
|
66
|
+
Write NOTHING else in the summary.
|
|
64
67
|
|
|
65
68
|
## Feedback handling
|
|
66
69
|
|
|
67
|
-
The dispatch prompt
|
|
68
|
-
iteration. Each item has the shape `{ id, file, tag, text, source, state,
|
|
69
|
-
depth, reason? }`.
|
|
70
|
+
The dispatch prompt contains one feedback item to address.
|
|
70
71
|
|
|
71
|
-
|
|
72
|
-
|
|
72
|
+
**To fix the issue** — change the artefact file and call
|
|
73
|
+
`foundry_stage_end({summary: "ACTIONED"})`.
|
|
73
74
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
75
|
+
**If the issue is already resolved** — call
|
|
76
|
+
`foundry_stage_end({summary: "WONT-FIX: <justification>"})`.
|
|
77
|
+
Do NOT change the file.
|
|
77
78
|
|
|
78
|
-
|
|
79
|
-
`
|
|
80
|
-
**must** be fixed. There is no wont-fix option for these.
|
|
79
|
+
**If the issue does not apply** (appraise judgement you disagree with) — same
|
|
80
|
+
`WONT-FIX:` flow.
|
|
81
81
|
|
|
82
|
-
|
|
83
|
-
returns `{ ok, id, deduped }`. `deduped: true` means an existing
|
|
84
|
-
non-resolved item with the same `(file, tag, hash(text))` was found and no
|
|
85
|
-
new item was written; the returned `id` is the existing item's id.
|
|
86
|
-
`deduped: false` means a new item was created.
|
|
82
|
+
The summary is ONLY one of these keywords. No descriptions, no explanations.
|
|
87
83
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
human-appraise can override any non-resolved item.
|
|
84
|
+
Do NOT call `foundry_feedback_action`, `foundry_feedback_wontfix`, or
|
|
85
|
+
`foundry_feedback_resolve`. The orchestrator handles transitions automatically.
|
|
91
86
|
|
|
92
87
|
## Write invariant
|
|
93
88
|
|
|
@@ -45,11 +45,8 @@ task tool:
|
|
|
45
45
|
description: "Run <stage> for <cycle>"
|
|
46
46
|
prompt: <prompt-from-payload — pass verbatim>
|
|
47
47
|
```
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
description: "Run <stage> for <cycle>"
|
|
51
|
-
prompt: <prompt-from-payload — pass verbatim>
|
|
52
|
-
```
|
|
48
|
+
|
|
49
|
+
**Critical for forge dispatch:** The orchestrator dispatches one feedback item per forge subagent call. The `prompt` already contains exactly one `FEEDBACK ITEM TO ADDRESS`. Pass the prompt verbatim — do NOT read quench output, do NOT add additional feedback items, do NOT inject validator results. The orchestrator will dispatch a separate `task()` call for each unresolved item.
|
|
53
50
|
|
|
54
51
|
When the task returns, call `foundry_orchestrate({lastResult: {ok: true}})`. If the task tool itself errored or reported a subagent crash, pass `{ok: false, error: '<message>'}`.
|
|
55
52
|
|
|
@@ -114,6 +111,7 @@ Report to the user: "Cycle halted (violation): `<details>`. Affected files: `<af
|
|
|
114
111
|
- You do NOT mint, modify, or cache tokens. The `prompt` from orchestrate already contains the token verbatim.
|
|
115
112
|
- `foundry_history_append`, `foundry_git_commit`, `foundry_stage_finalize`, and `foundry_sort` are not registered tools; orchestrate handles them internally via the loop.
|
|
116
113
|
- You do NOT reorder the protocol. `foundry_orchestrate` returns, you act, you call back. Nothing else between.
|
|
114
|
+
- You do NOT add extra feedback items to the forge dispatch prompt. The orchestrator dispatches one item at a time. Each prompt already contains exactly one `FEEDBACK ITEM TO ADDRESS`. Do not read quench output and inject additional items.
|
|
117
115
|
|
|
118
116
|
## Feedback visibility
|
|
119
117
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@really-knows-ai/foundry",
|
|
3
|
-
"version": "3.8.
|
|
3
|
+
"version": "3.8.3",
|
|
4
4
|
"description": "A skill-driven framework for governed artefact generation with AI coding tools. Define your own artefact types, laws, and flows — Foundry handles the forge → quench → appraise pipeline with deterministic routing, quality gates, and iterative refinement.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/.opencode/plugins/foundry.js",
|