@really-knows-ai/foundry 3.5.7 → 3.5.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -10
- package/dist/.opencode/plugins/foundry-tools/config-create-tools.js +2 -3
- package/dist/.opencode/plugins/foundry.js +11 -1
- package/dist/CHANGELOG.md +23 -0
- package/dist/README.md +16 -10
- package/dist/docs/README.md +6 -6
- package/dist/docs/architecture.md +59 -19
- package/dist/docs/concepts.md +55 -19
- package/dist/docs/getting-started.md +37 -15
- package/dist/docs/memory-maintenance.md +3 -3
- package/dist/docs/tools.md +131 -70
- package/dist/docs/work-spec.md +38 -52
- package/dist/scripts/lib/config-creators/cycle.js +6 -10
- package/dist/scripts/lib/config-validators/cycle.js +1 -9
- package/dist/scripts/lib/feedback-store.js +1 -52
- package/dist/scripts/lib/sort-reason.js +8 -7
- package/dist/scripts/lib/sort-routing.js +106 -28
- package/dist/scripts/lib/tool-paths.js +5 -1
- package/dist/scripts/orchestrate-cycle.js +3 -13
- package/dist/scripts/orchestrate-phases.js +3 -7
- package/dist/scripts/sort.js +16 -53
- package/dist/skills/add-cycle/SKILL.md +4 -4
- package/dist/skills/add-flow/SKILL.md +1 -1
- package/dist/skills/add-law/SKILL.md +1 -1
- package/dist/skills/human-appraise/SKILL.md +12 -40
- package/package.json +1 -1
package/dist/scripts/sort.js
CHANGED
|
@@ -29,38 +29,6 @@ import {
|
|
|
29
29
|
getDirtyToolManagedFiles,
|
|
30
30
|
} from './lib/sort-fs-check.js';
|
|
31
31
|
|
|
32
|
-
// ---------------------------------------------------------------------------
|
|
33
|
-
// Top-level deadlock pass (spec §6.1)
|
|
34
|
-
// ---------------------------------------------------------------------------
|
|
35
|
-
|
|
36
|
-
/**
|
|
37
|
-
* Walk the feedback store and write a `state=deadlocked` snapshot for every
|
|
38
|
-
* non-resolved item whose history depth has reached the configured threshold.
|
|
39
|
-
* One atomic batch write via `store.writeDeadlockedSnapshots(ids, ...)`.
|
|
40
|
-
*
|
|
41
|
-
* Sort is the only writer of `state=deadlocked` per spec §6.1.
|
|
42
|
-
*
|
|
43
|
-
* @returns {boolean} true iff at least one snapshot was written.
|
|
44
|
-
*/
|
|
45
|
-
function runDeadlockPass(store, { threshold, enabled, cycle }) {
|
|
46
|
-
if (!enabled) return false;
|
|
47
|
-
const qualifying = store.list().filter(item => {
|
|
48
|
-
// history[0] is the most recent state per the feedback-store invariant
|
|
49
|
-
// (entries are prepended to keep newest at head).
|
|
50
|
-
const head = item.history[0];
|
|
51
|
-
if (head.state === 'resolved' || head.state === 'deadlocked') return false;
|
|
52
|
-
return item.history.length >= threshold;
|
|
53
|
-
});
|
|
54
|
-
if (qualifying.length === 0) return false;
|
|
55
|
-
store.writeDeadlockedSnapshots(
|
|
56
|
-
qualifying.map(it => it.id),
|
|
57
|
-
`depth >= threshold=${threshold}`,
|
|
58
|
-
'sort',
|
|
59
|
-
cycle,
|
|
60
|
-
);
|
|
61
|
-
return true;
|
|
62
|
-
}
|
|
63
|
-
|
|
64
32
|
// ---------------------------------------------------------------------------
|
|
65
33
|
// runSort — structured result for programmatic use
|
|
66
34
|
// ---------------------------------------------------------------------------
|
|
@@ -89,9 +57,8 @@ function extractFrontmatterDefaults(frontmatter) {
|
|
|
89
57
|
const maxIt = frontmatter['max-iterations'] ?? 3;
|
|
90
58
|
return {
|
|
91
59
|
maxIterations: maxIt,
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
deadlockIterations: frontmatter['deadlock-iterations'] ?? maxIt,
|
|
60
|
+
alwaysHumanAppraise: frontmatter['always-human-appraise'] === true,
|
|
61
|
+
deadlockHumanAppraise: frontmatter['deadlock-human-appraise'] !== false,
|
|
95
62
|
};
|
|
96
63
|
}
|
|
97
64
|
|
|
@@ -105,17 +72,14 @@ function checkDirtyFiles(history, io) {
|
|
|
105
72
|
+ `Re-run foundry_orchestrate or commit the listed files manually before retrying.`;
|
|
106
73
|
}
|
|
107
74
|
|
|
108
|
-
function
|
|
75
|
+
function loadFeedback(io, cycle) {
|
|
109
76
|
const store = openFeedbackStore('WORK.feedback.yaml', io);
|
|
110
|
-
|
|
111
|
-
const feedback = store.list().map(item => ({
|
|
77
|
+
return store.list().map(item => ({
|
|
112
78
|
id: item.id,
|
|
113
79
|
file: item.file,
|
|
114
80
|
state: item.history[0].state,
|
|
115
81
|
depth: item.history.length,
|
|
116
82
|
}));
|
|
117
|
-
const anyDeadlocked = feedback.some(f => f.state === 'deadlocked');
|
|
118
|
-
return { feedback, anyDeadlocked };
|
|
119
83
|
}
|
|
120
84
|
|
|
121
85
|
function resolveCycleDef(cycleDef, frontmatter, foundryDir, cycle) {
|
|
@@ -138,15 +102,15 @@ function getCurrentNonSortStage(nonSortHistory) {
|
|
|
138
102
|
return nonSortHistory.length > 0 ? nonSortHistory[nonSortHistory.length - 1].stage : null;
|
|
139
103
|
}
|
|
140
104
|
|
|
141
|
-
function resolveDeadlockRoute(stages, nonSortHistory, cycle) {
|
|
142
|
-
const currentNonSort = getCurrentNonSortStage(nonSortHistory);
|
|
143
|
-
if (currentNonSort && baseStage(currentNonSort) === 'human-appraise') return 'blocked';
|
|
144
|
-
return findFirst(stages, 'human-appraise') || `human-appraise:${cycle}`;
|
|
145
|
-
}
|
|
146
|
-
|
|
147
105
|
function resolveRoute(ctx) {
|
|
148
|
-
|
|
149
|
-
|
|
106
|
+
return determineRoute(
|
|
107
|
+
ctx.stages, ctx.history, ctx.feedback, ctx.maxIterations,
|
|
108
|
+
{
|
|
109
|
+
alwaysHumanAppraise: ctx.alwaysHumanAppraise,
|
|
110
|
+
deadlockHumanAppraise: ctx.deadlockHumanAppraise,
|
|
111
|
+
cycle: ctx.cycle,
|
|
112
|
+
},
|
|
113
|
+
);
|
|
150
114
|
}
|
|
151
115
|
|
|
152
116
|
function firstModelValue(models) {
|
|
@@ -215,9 +179,7 @@ function preparePhases({ workPath, historyPath, foundryDir, cycleDef, io }) {
|
|
|
215
179
|
const history = loadHistory(historyPath, cycle, io);
|
|
216
180
|
const dirtyError = checkDirtyFiles(history, io);
|
|
217
181
|
if (dirtyError) return { kind: 'violation', details: dirtyError };
|
|
218
|
-
const
|
|
219
|
-
cycle, defaults.deadlockIterations, defaults.deadlockAppraise, io,
|
|
220
|
-
);
|
|
182
|
+
const feedback = loadFeedback(io, cycle);
|
|
221
183
|
const fileCheck = checkModifiedFilesAfterLastStage({
|
|
222
184
|
history, foundryDir, cycleDef, cycle, frontmatter, io,
|
|
223
185
|
});
|
|
@@ -225,7 +187,7 @@ function preparePhases({ workPath, historyPath, foundryDir, cycleDef, io }) {
|
|
|
225
187
|
if (violation) return { kind: 'violation', details: violation };
|
|
226
188
|
return {
|
|
227
189
|
kind: 'ok',
|
|
228
|
-
frontmatter, cycle, stages, defaults, history, feedback,
|
|
190
|
+
frontmatter, cycle, stages, defaults, history, feedback,
|
|
229
191
|
nonSortHistory: fileCheck.nonSortHistory,
|
|
230
192
|
};
|
|
231
193
|
}
|
|
@@ -253,8 +215,9 @@ function buildRouteCtx(prep) {
|
|
|
253
215
|
history: prep.history,
|
|
254
216
|
feedback: prep.feedback,
|
|
255
217
|
maxIterations: prep.defaults.maxIterations,
|
|
218
|
+
alwaysHumanAppraise: prep.defaults.alwaysHumanAppraise,
|
|
219
|
+
deadlockHumanAppraise: prep.defaults.deadlockHumanAppraise,
|
|
256
220
|
cycle: prep.cycle,
|
|
257
|
-
anyDeadlocked: prep.anyDeadlocked,
|
|
258
221
|
nonSortHistory: prep.nonSortHistory,
|
|
259
222
|
};
|
|
260
223
|
}
|
|
@@ -38,7 +38,7 @@ Do not tell the user to call branch tools directly.
|
|
|
38
38
|
|
|
39
39
|
When invoked with pre-filled fields matching the `foundry_config_create_cycle` tool args, skip questions for provided fields. Missing fields trigger clarifying questions.
|
|
40
40
|
|
|
41
|
-
Context fields: `{id, name, outputType, description, inputs?, targets?,
|
|
41
|
+
Context fields: `{id, name, outputType, description, inputs?, targets?, alwaysHumanAppraise?, deadlockHumanAppraise?, maxIterations?, assay?, memory?, models?}`
|
|
42
42
|
|
|
43
43
|
`inputs` is optional. A source cycle that starts from the user's run goal and has no upstream artefact dependency omits `inputs` entirely. Empty input contracts are invalid: do not pass `inputs: {type: "any-of", artefacts: []}`.
|
|
44
44
|
|
|
@@ -85,12 +85,12 @@ If the parent flow or required artefact type is missing and the user's goal clea
|
|
|
85
85
|
**Optional clusters** — After each cluster, ask whether the user wants to configure it; if not, skip:
|
|
86
86
|
|
|
87
87
|
- **Routing**: `inputs` (input contract: `{type: "any-of"|"all-of", artefacts: string[]}`; omit for source cycles with no upstream artefact dependency), `targets` (cycle IDs to route to after completion), `maxIterations` (maximum iterations before forced progression)
|
|
88
|
-
- **Human-appraise**: `
|
|
88
|
+
- **Human-appraise**: `alwaysHumanAppraise` (boolean, default false) — human reviews every iteration; when true, `max-iterations` is not enforced. `deadlockHumanAppraise` (boolean, default true) — route to human for review when the iteration cap is reached, instead of blocking the cycle. Only applies when `alwaysHumanAppraise` is false.
|
|
89
89
|
- **Memory and models**: `assay` (assay configuration), `memory` (memory configuration), `models` (stage-specific model overrides, e.g. `{forge: "openai/gpt-4o", appraise: "openai/gpt-4o"}`). For models, offer each stage (forge, quench, appraise) individually. If the user has no preference, omit the `models` map and use the session defaults.
|
|
90
90
|
|
|
91
91
|
### 2. Plan
|
|
92
92
|
|
|
93
|
-
Present a structured summary of the cycle definition: id, name, outputType, description, and any configured optional fields (inputs, targets,
|
|
93
|
+
Present a structured summary of the cycle definition: id, name, outputType, description, and any configured optional fields (inputs, targets, alwaysHumanAppraise, deadlockHumanAppraise, maxIterations, assay, memory, models). Include only fields that have values.
|
|
94
94
|
|
|
95
95
|
Ask: "Does this capture the cycle correctly?" Iterate until the user is satisfied.
|
|
96
96
|
|
|
@@ -102,7 +102,7 @@ Ask: "Proceed with this plan?" — wait for user answer before building. If the
|
|
|
102
102
|
|
|
103
103
|
1. **Validate**: Call `foundry_config_validate_cycle({ name: "<id>", body: "<assembled markdown>" })`. Assemble the body from the fields using the frontmatter format the tool produces internally. If the result is `{ ok: false, errors: [...] }`, address each error and re-run until `{ ok: true }`. Common issues: missing required frontmatter keys, references to artefact types or flows that do not exist yet.
|
|
104
104
|
|
|
105
|
-
2. **Create**: Call `foundry_config_create_cycle({ id: "<id>", name: "<name>", outputType: "<type>", description: "<description>", targets: ...,
|
|
105
|
+
2. **Create**: Call `foundry_config_create_cycle({ id: "<id>", name: "<name>", outputType: "<type>", description: "<description>", targets: ..., alwaysHumanAppraise: ..., deadlockHumanAppraise: ..., maxIterations: ..., assay: ..., memory: ..., models: ... })`. Include `inputs` only when the cycle reads upstream artefacts, and include `models` whenever the user selected stage-specific model overrides. The tool:
|
|
106
106
|
- re-validates the body (TOCTOU);
|
|
107
107
|
- writes `foundry/cycles/<id>.md`;
|
|
108
108
|
- produces one git commit on the current `config/*` branch.
|
|
@@ -69,7 +69,7 @@ Create missing dependencies in validation order:
|
|
|
69
69
|
|
|
70
70
|
3. **Appraisers** (may reference models): For each new appraiser, gather `id`, `name`, `description`, and optional `model` preference. Context object: `{id, name, description, model?}`.
|
|
71
71
|
|
|
72
|
-
4. **Cycles** (reference artefact types, laws, appraisers): For each new cycle, gather `id`, `name`, `outputType`, `description`, and any optional settings (inputs, targets, appraise, assay, memory, models). Context object: `{id, name, outputType, description, inputs?, targets?,
|
|
72
|
+
4. **Cycles** (reference artefact types, laws, appraisers): For each new cycle, gather `id`, `name`, `outputType`, `description`, and any optional settings (inputs, targets, appraise, assay, memory, models). Context object: `{id, name, outputType, description, inputs?, targets?, alwaysHumanAppraise?, deadlockHumanAppraise?, maxIterations?, assay?, memory?, models?}`. For a source cycle that starts from the user's run goal and has no upstream artefact dependency, omit `inputs` entirely; never pass `inputs` with an empty `artefacts` array.
|
|
73
73
|
|
|
74
74
|
For the haiku example, default to a `haiku` artefact type, `haikus/*.md` file pattern, laws for form, imagery, and mood, a deterministic syllable validator where project dependencies allow it, two or three distinct appraisers, one cycle, and one flow.
|
|
75
75
|
|
|
@@ -66,7 +66,7 @@ Walk the user through which elements of the law can be validated deterministical
|
|
|
66
66
|
>
|
|
67
67
|
> Shall I add validators for the script-checkable elements?
|
|
68
68
|
|
|
69
|
-
For each script-checkable element, write a standalone `.mjs` script next to the artefacts it validates (e.g. `foundry/artefacts/<type>/check-line-count.mjs`) and reference it in the command (e.g. `node foundry/artefacts/<type>/check-line-count.mjs {files}`). Place validators alongside the artefacts so they colocate with what they validate.
|
|
69
|
+
For each script-checkable element, write a standalone `.mjs` script next to the artefacts it validates (e.g. `foundry/artefacts/<type>/check-line-count.mjs`) and reference it in the command (e.g. `node foundry/artefacts/<type>/check-line-count.mjs {files}`). Place validators alongside the artefacts so they colocate with what they validate. Use existing project dependencies and Node.js built‑ins. Hand‑rolled heuristics (custom syllable counters, regex parsers, manual character walks) are a last resort — they produce false positives, waste tokens on debugging, and break on edge cases. Install a library instead. Only write validation logic from scratch when no npm package exists for the task and the heuristic is trivially correct.
|
|
70
70
|
|
|
71
71
|
**Validators**: Ask about `validators` (optional) — offer to create one or skip.
|
|
72
72
|
|
|
@@ -6,7 +6,7 @@ description: Human quality gate. Presents the artefact to the human for review a
|
|
|
6
6
|
|
|
7
7
|
# Human Appraise
|
|
8
8
|
|
|
9
|
-
You are a human quality gate. Sort has routed to you
|
|
9
|
+
You are a human quality gate. Sort has routed to you for the human to review the current artefact and provide feedback or approve.
|
|
10
10
|
|
|
11
11
|
## Prerequisites
|
|
12
12
|
|
|
@@ -31,7 +31,7 @@ When invoked from orchestrate, you receive `{cycle, token, context}`:
|
|
|
31
31
|
- `cycle` — the current cycle id
|
|
32
32
|
- `token` — single-use token for `foundry_stage_begin`
|
|
33
33
|
- `context.artefact_file` — the target artefact
|
|
34
|
-
- `context.recent_feedback` — recent
|
|
34
|
+
- `context.recent_feedback` — recent unresolved feedback items to present to the user
|
|
35
35
|
|
|
36
36
|
Your FIRST tool call must be `foundry_stage_begin({stage: 'human-appraise:<cycle>', cycle, token})`.
|
|
37
37
|
|
|
@@ -63,31 +63,24 @@ Your LAST tool call must be `foundry_stage_end({summary: '<one-sentence descript
|
|
|
63
63
|
4. Present to the human:
|
|
64
64
|
- The current artefact content (full file content or multi-file diff)
|
|
65
65
|
- A summary of this iteration's feedback (resolved and open)
|
|
66
|
-
-
|
|
67
|
-
- Which feedback item(s) are stuck
|
|
68
|
-
- The appraiser's reasoning
|
|
69
|
-
- Forge's wont-fix or revision justification
|
|
70
|
-
- Ask the human to resolve the disagreement
|
|
66
|
+
- Ask the human to review, provide feedback, or approve
|
|
71
67
|
|
|
72
68
|
5. Wait for the human's response.
|
|
73
69
|
|
|
74
70
|
6. Act on the response (tag MUST be `human` on any added feedback — the tool rejects other tags during human-appraise):
|
|
75
71
|
- **Approve** — "looks good" / "continue" — no feedback added, sort will advance.
|
|
76
72
|
- **Provide feedback** — `foundry_feedback_add({ file, text, tag: 'human' })`. Sort will route back to forge.
|
|
77
|
-
- **Resolve feedback** — `foundry_feedback_resolve({ id, resolution, reason? })` for items in `{actioned, wont-fix
|
|
73
|
+
- **Resolve feedback** — `foundry_feedback_resolve({ id, resolution, reason? })` for items in `{actioned, wont-fix}`. See "Feedback handling" below for the legal transitions and authority rules.
|
|
78
74
|
- **Abort** — human-appraise cannot directly mark the artefact `blocked` (the repository no longer has a per-artefact status tool or table). To abort: end the stage with a summary explaining the abort, then either (a) instruct the user to call `foundry_workfile_delete({ confirm: true })` to discard the cycle, or (b) reject outstanding feedback so routing exhausts iterations and sort blocks the cycle on its own.
|
|
79
75
|
|
|
80
76
|
7. `foundry_stage_end({summary})` — describe what the human decided so sort can log it.
|
|
81
77
|
|
|
82
78
|
## Feedback handling
|
|
83
79
|
|
|
84
|
-
As a human-appraise stage, you can add human feedback and resolve
|
|
85
|
-
items
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
limited to deadlocked items, though in practice most overrides today are
|
|
89
|
-
on deadlocked items because default sort routing only surfaces deadlocked
|
|
90
|
-
items to human-appraise (see §17 future-work note below).
|
|
80
|
+
As a human-appraise stage, you can add human feedback and resolve
|
|
81
|
+
feedback items. **Human-appraise can resolve any non-resolved
|
|
82
|
+
source-stage item regardless of source** — this is the universal
|
|
83
|
+
override authority recorded in spec §5.1 rule 5.
|
|
91
84
|
|
|
92
85
|
What human-appraise can NOT do:
|
|
93
86
|
|
|
@@ -109,37 +102,16 @@ What human-appraise CAN do:
|
|
|
109
102
|
found and no new snapshot was written, `deduped: false` indicates a new
|
|
110
103
|
item was created.
|
|
111
104
|
|
|
112
|
-
2. **Resolve any non-resolved
|
|
113
|
-
`{actioned, wont-fix}
|
|
114
|
-
human-appraise), call `foundry_feedback_resolve` with
|
|
105
|
+
2. **Resolve any non-resolved item.** For items in
|
|
106
|
+
`{actioned, wont-fix}`, call `foundry_feedback_resolve` with
|
|
115
107
|
`{ id, resolution: 'approved' | 'rejected', reason? }`. Human-appraise
|
|
116
108
|
may resolve any such item regardless of source, including items from
|
|
117
109
|
other stage ids.
|
|
118
110
|
|
|
119
|
-
3. **Resolve deadlocked items.** When items reach `state: deadlocked`
|
|
120
|
-
(written by sort when an item's history depth hits
|
|
121
|
-
`deadlock-iterations`), human-appraise is the ONLY stage authorised
|
|
122
|
-
to resolve them. Call `foundry_feedback_resolve` with
|
|
123
|
-
`{ id, resolution: 'approved' | 'rejected', reason: '...' }`.
|
|
124
|
-
`reason` is always required on deadlock override — it documents why
|
|
125
|
-
the deadlock is being broken. After human-appraise resolves every
|
|
126
|
-
deadlocked item, the cycle resumes normal forge/appraise routing. If
|
|
127
|
-
deadlocks remain after human-appraise, the cycle blocks (per spec §5.2).
|
|
128
|
-
|
|
129
111
|
**Reason rules.** `reason` is required when rejecting feedback
|
|
130
|
-
(`resolution: 'rejected'`)
|
|
131
|
-
Non-deadlocked approved resolution via
|
|
112
|
+
(`resolution: 'rejected'`). Approved resolution via
|
|
132
113
|
`foundry_feedback_resolve({ id, resolution: 'approved', reason? })` may
|
|
133
|
-
omit `reason
|
|
134
|
-
the deadlock is being broken.
|
|
135
|
-
|
|
136
|
-
**Future work.** Spec §17 notes that a cycle-level mode flag letting
|
|
137
|
-
human-appraise see all unresolved feedback (not just deadlocked items)
|
|
138
|
-
before sort routes is planned for a future release. In v2.6.0 the
|
|
139
|
-
authority is universal but reachability is limited — you typically only
|
|
140
|
-
see deadlocked items on the route from sort. If you do see non-deadlocked
|
|
141
|
-
items (e.g. you were invoked directly by the user), the same authority
|
|
142
|
-
applies.
|
|
114
|
+
omit `reason`.
|
|
143
115
|
|
|
144
116
|
## What you do NOT do
|
|
145
117
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@really-knows-ai/foundry",
|
|
3
|
-
"version": "3.5.
|
|
3
|
+
"version": "3.5.9",
|
|
4
4
|
"description": "A skill-driven framework for governed artefact generation with AI coding tools. Define your own artefact types, laws, and flows — Foundry handles the forge → quench → appraise pipeline with deterministic routing, quality gates, and iterative refinement.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/.opencode/plugins/foundry.js",
|