@really-knows-ai/foundry 3.8.3 → 3.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -133,6 +133,7 @@ function artefactTypeArgs(s) { return {
133
133
  name: s.string().describe('Human-readable display name (accepted at boundary, not persisted — id becomes frontmatter.name)'),
134
134
  filePatterns: s.array(s.string()).describe('Glob patterns defining forge write scope (written to frontmatter.file-patterns)'),
135
135
  description: s.string().describe('Prose description placed under ## Definition'),
136
+ example: s.string().optional().describe('Example artefact structure (markdown with code blocks). Written to example.md alongside definition.md. Guides forge agents on the expected output format.'),
136
137
  appraisers: s.object({
137
138
  count: s.number().optional().describe('Number of appraisers per cycle'),
138
139
  allowed: s.array(s.string()).optional().describe('Restrict to specific appraiser IDs'),
package/dist/CHANGELOG.md CHANGED
@@ -1,5 +1,32 @@
1
1
  # Changelog
2
2
 
3
+ ## [3.8.5] - 2026-05-27
4
+
5
+ ### Changed
6
+
7
+ - Human-appraise skill redesigned with two distinct review modes:
8
+ - **Clean review** (no unresolved feedback): Shows `git diff --stat` filtered to artefact type file-patterns, not full file content. Uses the question tool to offer Approve or Provide feedback.
9
+ - **Feedback review** (unresolved feedback exists): Presents each item individually via the question tool. User picks Agree (keep open for forge), Disagree (resolve with approved override), or Comment (add human feedback).
10
+ - All user interaction now uses the question tool (structured options) instead of free-text prompts.
11
+
12
+ - Appraise consolidation now propagates finalize violations instead of swallowing them. If `finaliseStage` returns a violation (e.g. commit rejected), `consolidateAppraise` surfaces it rather than returning `{ ok: true }`.
13
+
14
+ ### Fixed
15
+
16
+ - Sort violation details are now passed through to the terminal handler. Previously, `handleSortResult` destructured `sortResult` but didn't forward the `details` field to `resolveRouteResult`, causing all sort violations to show the default "sort returned violation" message.
17
+
18
+ ## [3.8.4] - 2026-05-27
19
+
20
+ ### Added
21
+
22
+ - `foundry_config_create_artefact_type` now accepts an optional `example` arg. When provided, it writes `foundry/artefacts/<id>/example.md` alongside `definition.md`. The example file is a structure document — markdown with code blocks showing the expected output format, plus documentation for the forge agent.
23
+
24
+ - The `add-artefact-type` skill now prompts users to provide an example artefact during the Understand phase, includes it in the Plan, and passes it to the create tool in Build.
25
+
26
+ ### Fixed
27
+
28
+ - Human-appraise stage tokens no longer carry a subagent model scope. Previously, `sort.js` resolved a model for human-appraise routes (falling through to `defaultModel`), which got embedded in the token. `foundry_stage_begin` then rejected the token because the main Foundry agent is not the scoped subagent, causing the human-appraise stage to fail to start and user feedback to be lost. Human-appraise always runs inline by the Foundry agent.
29
+
3
30
  ## [3.8.3] - 2026-05-27
4
31
 
5
32
  ### Changed
@@ -197,11 +197,16 @@ export async function consolidateAppraise(ctx, lastResults) {
197
197
 
198
198
  const summary = buildConsolidateSummary(consolidated.length);
199
199
 
200
- await ctx.finalize({
200
+ return finalizeAndReturn(ctx, stageId, summary, baseSha);
201
+ }
202
+
203
+ async function finalizeAndReturn(ctx, stageId, summary, baseSha) {
204
+ const result = await ctx.finalize({
201
205
  lastStage: { stage: stageId, summary, baseSha },
202
206
  activeStage: ctx.activeStage,
203
207
  });
204
208
 
209
+ if (result && result.action === 'violation') return result;
205
210
  return { ok: true, summary };
206
211
  }
207
212
 
@@ -50,7 +50,30 @@ const _create = makeCreator({
50
50
  validator: validate,
51
51
  });
52
52
 
53
+ /**
54
+ * Assemble the markdown body for example.md from structured arguments.
55
+ *
56
+ * The example file is a structure document: markdown with code blocks showing
57
+ * the expected output format, plus documentation for the forge agent.
58
+ *
59
+ * @param {string} exampleContent - Raw markdown for example.md
60
+ * @returns {string} Trimmed content with trailing newline.
61
+ */
62
+ export function assembleExampleMarkdown(exampleContent) {
63
+ return `${exampleContent.trim()}\n`;
64
+ }
65
+
53
66
  export async function create(args) {
54
67
  const body = assembleArtefactTypeMarkdown(args);
68
+
69
+ if (args.example) {
70
+ const exampleDir = join('foundry', 'artefacts', args.id);
71
+ await args.io.mkdirp(exampleDir);
72
+ await args.io.writeFile(
73
+ join(exampleDir, 'example.md'),
74
+ assembleExampleMarkdown(args.example),
75
+ );
76
+ }
77
+
55
78
  return _create({ ...args, name: args.id, body });
56
79
  }
@@ -75,13 +75,13 @@ function isTerminalRoute(route) {
75
75
  export async function handleSortResult(sortResult, ctx) {
76
76
  const { route, model, token, reason } = sortResult;
77
77
  const routeBase = routeDispatch(route);
78
- const result = await resolveRouteResult({ route, routeBase, model, token, ctx });
78
+ const result = await resolveRouteResult({ route, routeBase, model, token, ctx, sortResult });
79
79
  if (reason !== undefined) result.reason = reason;
80
80
  return result;
81
81
  }
82
82
 
83
- async function resolveRouteResult({ route, routeBase, model, token, ctx }) {
84
- if (isTerminalRoute(route)) return handleTerminalRoute(route, { route }, ctx);
83
+ async function resolveRouteResult({ route, routeBase, model, token, ctx, sortResult }) {
84
+ if (isTerminalRoute(route)) return handleTerminalRoute(route, sortResult, ctx);
85
85
  if (routeBase === 'quench' || routeBase === 'appraise') return violation(routeBase + ' route reached handleSortResult');
86
86
  if (routeBase === 'human-appraise') return humanAppraiseAction(route, token, ctx);
87
87
  return buildDispatchAction(route, model, token, ctx);
@@ -166,6 +166,7 @@ function resolveModelId(routeBase, models, defaultModel) {
166
166
 
167
167
  function pickModelId(route, frontmatter, defaultModel) {
168
168
  const routeBase = baseStage(route);
169
+ if (routeBase === 'human-appraise') return null;
169
170
  const resolved = frontmatter.models ? resolveModelId(routeBase, frontmatter.models, defaultModel) : null;
170
171
  return resolved || defaultModel || defaultForStage(routeBase);
171
172
  }
@@ -38,7 +38,7 @@ Do not tell the user to call branch tools directly.
38
38
 
39
39
  When invoked with pre-filled fields matching the `foundry_config_create_artefact_type` tool args, skip questions for provided fields. Missing fields trigger clarifying questions.
40
40
 
41
- Context fields: `{id, name, filePatterns, description, appraisers?}`
41
+ Context fields: `{id, name, filePatterns, description, example?, appraisers?}`
42
42
 
43
43
  When invoked with a context:
44
44
  - If all required fields are present, skip the Understand phase and proceed to Plan → Confirm → Build.
@@ -48,6 +48,12 @@ When invoked with a context:
48
48
 
49
49
  Ask for each field one question at a time. Prefer multiple choice for `filePatterns`, deriving options from the artefact type name and common conventions (e.g. `haikus/*.md`, `haiku.md`, `output/haiku/*.md`). Ask about `appraisers` (optional) — either provide an existing appraiser ID or skip.
50
50
 
51
+ After the core fields, ask about the example:
52
+
53
+ > Would you like to provide an example artefact? An example shows forge agents the expected output structure — markdown with code blocks, plus any conventions, constraints, or required sections. Give a short example file that demonstrates what a valid output looks like.
54
+
55
+ If the user provides an example, capture it verbatim. If the artefact type has no structured output (e.g. free-form prose with no required format), the user may skip this step.
56
+
51
57
  **Naming conflict check**: Read all existing artefact type definitions in `foundry/artefacts/*/definition.md`. Exact id match means a hard conflict — choose a different id. A semantically similar name or description triggers a warning:
52
58
 
53
59
  > An artefact type `<existing-id>` already exists that seems similar:
@@ -74,6 +80,7 @@ Present the definition to the user with these structured fields:
74
80
  - `name` (string) — human-readable label.
75
81
  - `filePatterns` (string[]) — glob patterns for files this type produces.
76
82
  - `description` (string) — prose description of what this artefact type is.
83
+ - `example` (string, optional) — example artefact to guide forge agents on the expected output structure.
77
84
  - `appraisers` ({ count?: number, allowed?: string[] }, optional) — appraiser configuration.
78
85
 
79
86
  Ask: does this capture the artefact type correctly? Iterate until the user is satisfied.
@@ -86,7 +93,7 @@ Ask: "Proceed with this plan?" — wait for user answer before building. If the
86
93
 
87
94
  1. **Validate**: Call `foundry_config_validate_artefact_type({ name: "<id>", body: "<assembled markdown>" })`. Assemble the body from the fields using the frontmatter format the tool produces internally. If the result is `{ ok: false, errors: [...] }`, address each error and re-run until `{ ok: true }`. Common issues: missing required frontmatter keys, references to artefact types or flows that do not exist yet.
88
95
 
89
- 2. **Create**: Call `foundry_config_create_artefact_type({ id: "<id>", name: "<name>", filePatterns: ["<pattern>"], description: "<description>" })`. The tool re-validates the body (TOCTOU), writes `foundry/artefacts/<id>/definition.md`, and produces one git commit on the current `config/*` branch. Show the user the resulting commit hash.
96
+ 2. **Create**: Call `foundry_config_create_artefact_type({ id: "<id>", name: "<name>", filePatterns: ["<pattern>"], description: "<description>", example: "<example>" })`. Include `example` only when the user provided one. The tool re-validates the body (TOCTOU), writes `foundry/artefacts/<id>/definition.md` (and `example.md` if provided), and produces one git commit on the current `config/*` branch. Show the user the resulting commit hash.
90
97
 
91
98
  If the tool returns `{ ok: false, errors }` because the target file already exists, read the existing file, incorporate the user's requested changes into the current body, propose the merged result for review, then write and commit the updated file.
92
99
 
@@ -39,79 +39,166 @@ Your LAST tool call must be `foundry_stage_end({summary: '<one-sentence descript
39
39
 
40
40
  ## Protocol
41
41
 
42
+ ### Step 1: Gather context
43
+
42
44
  1. `foundry_stage_begin(...)`.
43
- 2. Gather context by calling:
44
- - `foundry_workfile_get` — current state, goal, cycle
45
+ 2. `foundry_workfile_get` current state, goal, cycle.
46
+
47
+ **Check for failed flow state.** If `foundry_workfile_get` returns `{status: "failed", reason: ...}`, STOP. Do not do any substantive work. Tell the user:
48
+
49
+ > The flow is in a failed state. Reason: `<reason>`.
50
+ >
51
+ > No further work is permitted. To recover:
52
+ >
53
+ > 1. `foundry_workfile_delete({confirm: true})` to abandon the cycle.
54
+ > 2. Back out to main (`git checkout main`) and delete the work branch.
55
+ > 3. Investigate and fix the root cause of the failure before restarting.
56
+
57
+ Then call `foundry_stage_end({summary: 'Flow is failed; no human appraisal performed'})`, return control to the user, and stop.
58
+
59
+ 3. `foundry_artefacts_list({})` — this cycle's branch artefact changes as `[{ file, state }]` entries.
60
+ 4. `foundry_feedback_list` — all existing feedback items.
61
+ 5. `foundry_history_list({cycle: <current-cycle>})` — what has happened so far.
62
+
63
+ ### Step 2: Classify feedback
64
+
65
+ Split the feedback list into two categories by `state`:
66
+ - **Resolved**: `state === 'resolved'` — no action needed, informational only.
67
+ - **Unresolved**: all other states (`open`, `rejected`, `actioned`, `wont-fix`).
68
+
69
+ ### Step 3: Route to the correct review mode
70
+
71
+ - **If there are NO unresolved feedback items** → go to **Mode A: Clean review**.
72
+ - **If there ARE unresolved feedback items** → go to **Mode B: Feedback review**.
73
+
74
+ ---
75
+
76
+ ## Mode A: Clean review (no unresolved feedback)
77
+
78
+ The cycle is in good shape — all feedback from appraisers and quench has been addressed. You present the artefact summary only, not the full content.
79
+
80
+ ### A.1 Show the artefact summary
81
+
82
+ Get the artefact type's file-patterns: call `foundry_config_artefact_type` with the cycle's output type (from `foundry_workfile_get`). The response includes `file-patterns` — glob patterns for this artefact type (e.g. `["haikus/*.md"]`).
83
+
84
+ Run `git diff --stat main..HEAD` restricted to only those files by passing each glob as a pathspec:
85
+
86
+ ```
87
+ git diff --stat main..HEAD -- haikus/*.md
88
+ ```
89
+
90
+ Example output:
91
+
92
+ ```
93
+ haikus/sunburn.md | 4 +++-
94
+ 1 file changed, 4 insertions(+), 1 deletion(-)
95
+ ```
96
+
97
+ Also show the goal from `foundry_workfile_get` so the user knows what was being produced.
98
+
99
+ ### A.2 Ask the user
45
100
 
46
- **Check for failed flow state.** If `foundry_workfile_get` returns `{status: "failed", reason: ...}`, STOP. Do not do any substantive work. Tell the user:
101
+ Use the **question tool** (NOT a plain text prompt the question tool pauses and waits for the user):
47
102
 
48
- > The flow is in a failed state. Reason: `<reason>`.
49
- >
50
- > No further work is permitted. To recover:
51
- >
52
- > 1. `foundry_workfile_delete({confirm: true})` to abandon the cycle.
53
- > 2. Back out to main (`git checkout main`) and delete the work branch.
54
- > 3. Investigate and fix the root cause of the failure before restarting.
103
+ ```
104
+ header: "Review changes"
105
+ question: "The cycle finished with no unresolved feedback. Here are the changes:
55
106
 
56
- Then call `foundry_stage_end({summary: 'Flow is failed; no human appraisal performed'})`, return control to the user, and stop.
57
- - `foundry_artefacts_list({})` — this cycle's branch artefact changes as `[{ file, state }]` entries
58
- - `foundry_feedback_list` — all existing feedback
59
- - `foundry_history_list({cycle: <current-cycle>})` — what has happened so far
107
+ <diff stat output>
60
108
 
61
- 3. Read the artefact file(s) for this cycle.
109
+ Goal: <goal from workfile>
62
110
 
63
- 4. Present to the human:
64
- - The current artefact content (full file content or multi-file diff)
65
- - A summary of this iteration's feedback (resolved and open)
66
- - Ask the human to review, provide feedback, or approve
111
+ What would you like to do?"
112
+ options:
113
+ - label: "Approve"
114
+ description: "Looks good close the cycle as done"
115
+ - label: "Provide feedback"
116
+ description: "Send feedback to forge for another iteration"
117
+ ```
67
118
 
68
- 5. Wait for the human's response.
119
+ ### A.3 Act on response
69
120
 
70
- 6. Act on the response (tag MUST be `human` on any added feedback the tool rejects other tags during human-appraise):
71
- - **Approve** "looks good" / "continue" no feedback added, sort will advance.
72
- - **Provide feedback** — `foundry_feedback_add({ file, text, tag: 'human' })`. Sort will route back to forge.
73
- - **Resolve feedback** — `foundry_feedback_resolve({ id, resolution, reason? })` for items in `{actioned, wont-fix}`. See "Feedback handling" below for the legal transitions and authority rules.
74
- - **Abort** — human-appraise cannot directly mark the artefact `blocked` (the repository no longer has a per-artefact status tool or table). To abort: end the stage with a summary explaining the abort, then either (a) instruct the user to call `foundry_workfile_delete({ confirm: true })` to discard the cycle, or (b) reject outstanding feedback so routing exhausts iterations and sort blocks the cycle on its own.
121
+ - **Approve**: No feedback added. Call `foundry_stage_end({summary: 'Human approvedno issues'})`. Sort will route to `done`.
122
+ - **Provide feedback**: Ask the user what needs changing (the user types their feedback). Then call `foundry_feedback_add({ file: '<artefact-file>', text: '<user feedback>', tag: 'human' })`. Call `foundry_stage_end({summary: 'Human requested changes'})`. Sort will route to forge.
75
123
 
76
- 7. `foundry_stage_end({summary})` — describe what the human decided so sort can log it.
124
+ ---
125
+
126
+ ## Mode B: Feedback review (unresolved feedback exists)
127
+
128
+ The cycle has outstanding feedback from appraisers, quench, or prior human reviews. You present each item to the user for a verdict.
129
+
130
+ ### B.1 Summarise the state
131
+
132
+ Briefly tell the user how many unresolved items exist and what the goal is.
133
+
134
+ ### B.2 Review each unresolved item
135
+
136
+ Present each unresolved item **one at a time** using the **question tool**. For each item:
137
+
138
+ ```
139
+ header: "Feedback N of M"
140
+ question: "Feedback item:
141
+
142
+ File: <file>
143
+ Source: <source stage>
144
+ Tag: <tag>
145
+ Issue: <text>
146
+
147
+ <if the item has a reason, show: "Reason: <reason>">
148
+
149
+ Do you agree with this feedback?"
150
+ options:
151
+ - label: "Agree"
152
+ description: "This needs fixing — let forge handle it"
153
+ - label: "Disagree"
154
+ description: "Override this item — resolve it"
155
+ - label: "Comment"
156
+ description: "Add my own note or context about this item"
157
+ ```
158
+
159
+ After the user responds:
160
+
161
+ - **Agree**: Do nothing — the item stays in its current state. Sort will route to forge to address it.
162
+ - **Disagree**: Call `foundry_feedback_resolve({ id: '<item-id>', resolution: 'approved' })`. Optionally pass a `reason`.
163
+ - **Comment**: Ask the user what they want to say. Then call `foundry_feedback_add({ file: '<file>', text: '<user comment>', tag: 'human' })`. The original item stays open so forge still addresses it alongside the human comment.
164
+
165
+ Repeat for every unresolved item.
166
+
167
+ ### B.3 Final question
168
+
169
+ After all unresolved items have been reviewed, ask one final question using the **question tool**:
170
+
171
+ ```
172
+ header: "Any other feedback?"
173
+ question: "All unresolved feedback items have been reviewed. Any other changes you want before the cycle continues?"
174
+ options:
175
+ - label: "None — continue"
176
+ description: "Proceed with the current state"
177
+ - label: "Add more feedback"
178
+ description: "Provide additional notes for forge"
179
+ ```
180
+
181
+ - **None — continue**: Call `foundry_stage_end({summary: 'Human reviewed — <count> item(s) agreed, <count> overridden'})`.
182
+ - **Add more feedback**: Ask the user what they want to add, then call `foundry_feedback_add({ file: '<file>', text: '<text>', tag: 'human' })`. Then call `foundry_stage_end({summary: 'Human reviewed with additional feedback'})`.
183
+
184
+ ---
77
185
 
78
186
  ## Feedback handling
79
187
 
80
- As a human-appraise stage, you can add human feedback and resolve
81
- feedback items. **Human-appraise can resolve any non-resolved
82
- source-stage item regardless of source** — this is the universal
83
- override authority recorded in spec §5.1 rule 5.
188
+ As a human-appraise stage, you can add human feedback and resolve feedback items. **Human-appraise can resolve any non-resolved source-stage item regardless of source** — this is the universal override authority recorded in spec §5.1 rule 5.
84
189
 
85
190
  What human-appraise can NOT do:
86
191
 
87
- - **No forward transitions.** `foundry_feedback_action` and
88
- `foundry_feedback_wontfix` move items from `{open, rejected}` to
89
- `{actioned, wont-fix}` — that is forge's lane (spec §5.1 rule 1) and
90
- the tools reject calls from any non-forge stage. If an open or rejected
91
- item needs work, sort will route to forge after this stage ends.
92
- - **No artefact status writes.** The repository no longer has a per-artefact
93
- status tool or table. Status is owned by the cycle state machine through
94
- sort and orchestrate routing.
192
+ - **No forward transitions.** `foundry_feedback_action` and `foundry_feedback_wontfix` move items from `{open, rejected}` to `{actioned, wont-fix}` — that is forge's lane (spec §5.1 rule 1) and the tools reject calls from any non-forge stage. If an open or rejected item needs work, sort will route to forge after this stage ends.
193
+ - **No artefact status writes.** The repository no longer has a per-artefact status tool or table. Status is owned by the cycle state machine through sort and orchestrate routing.
95
194
 
96
195
  What human-appraise CAN do:
97
196
 
98
- 1. **Add new human feedback.** Call `foundry_feedback_add` with
99
- `{ file, text, tag: 'human' }`. The `source` is your stage id. The tool
100
- returns `{ ok: true, id, deduped }`; `deduped: true` indicates an
101
- existing non-resolved item with the same `(file, tag, hash(text))` was
102
- found and no new snapshot was written, `deduped: false` indicates a new
103
- item was created.
197
+ 1. **Add new human feedback.** Call `foundry_feedback_add` with `{ file, text, tag: 'human' }`. The `source` is your stage id. The tool returns `{ ok: true, id, deduped }`; `deduped: true` indicates an existing non-resolved item with the same `(file, tag, hash(text))` was found and no new snapshot was written, `deduped: false` indicates a new item was created.
104
198
 
105
- 2. **Resolve any non-resolved item.** For items in
106
- `{actioned, wont-fix}`, call `foundry_feedback_resolve` with
107
- `{ id, resolution: 'approved' | 'rejected', reason? }`. Human-appraise
108
- may resolve any such item regardless of source, including items from
109
- other stage ids.
199
+ 2. **Resolve any non-resolved item.** For items in `{actioned, wont-fix}`, call `foundry_feedback_resolve` with `{ id, resolution: 'approved' | 'rejected', reason? }`. Human-appraise may resolve any such item regardless of source, including items from other stage ids.
110
200
 
111
- **Reason rules.** `reason` is required when rejecting feedback
112
- (`resolution: 'rejected'`). Approved resolution via
113
- `foundry_feedback_resolve({ id, resolution: 'approved', reason? })` may
114
- omit `reason`.
201
+ **Reason rules.** `reason` is required when rejecting feedback (`resolution: 'rejected'`). Approved resolution via `foundry_feedback_resolve({ id, resolution: 'approved', reason? })` may omit `reason`.
115
202
 
116
203
  ## What you do NOT do
117
204
 
@@ -119,6 +206,6 @@ omit `reason`.
119
206
  - You do not make decisions for the human — present the state and wait.
120
207
  - You do not modify the artefact.
121
208
  - You do not skip the pause — the human must respond before continuing.
122
- - You do not filter or summarise away important details — show the full picture.
123
209
  - You do not call `foundry_history_append` or `foundry_git_commit` — `foundry_orchestrate` owns those (the tools are not registered publicly).
124
210
  - You do not register artefacts — handled by `foundry_stage_end({summary})`.
211
+ - You do not present the full artefact file content — the human can inspect files themselves if curious. Show summaries only.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@really-knows-ai/foundry",
3
- "version": "3.8.3",
3
+ "version": "3.8.5",
4
4
  "description": "A skill-driven framework for governed artefact generation with AI coding tools. Define your own artefact types, laws, and flows — Foundry handles the forge → quench → appraise pipeline with deterministic routing, quality gates, and iterative refinement.",
5
5
  "type": "module",
6
6
  "main": "dist/.opencode/plugins/foundry.js",