npm - @really-knows-ai/foundry - Versions diffs - 3.7.1 → 3.8.0 - Mend

@really-knows-ai/foundry 3.7.1 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/.opencode/plugins/foundry-tools/agent-refresh.js +39 -13
package/dist/.opencode/plugins/foundry-tools/config-law-tools.js +3 -3
package/dist/.opencode/plugins/foundry-tools/git-helpers.js +24 -36
package/dist/.opencode/plugins/foundry-tools/git-tools.js +31 -1
package/dist/.opencode/plugins/foundry-tools/orchestrate-tool.js +2 -2
package/dist/.opencode/plugins/foundry-tools/stage-tools.js +8 -3
package/dist/CHANGELOG.md +30 -0
package/dist/scripts/appraise-module.js +2 -3
package/dist/scripts/lib/git-policy.js +18 -0
package/dist/scripts/orchestrate.js +13 -4
package/dist/scripts/sort.js +11 -4
package/dist/skills/add-law/SKILL.md +54 -5
package/dist/skills/forge/SKILL.md +2 -0
package/dist/skills/orchestrate/SKILL.md +8 -2
package/package.json +1 -1

package/dist/.opencode/plugins/foundry-tools/agent-refresh.js CHANGED Viewed

@@ -46,12 +46,15 @@ function deleteStaleAgents(agentsDir) {
     existing = [];
   }
   for (const entry of existing) {
-    if (entry.startsWith('foundry-') && entry.endsWith('.md')) {
-      unlinkSync(path.join(agentsDir, entry));
-    }
+    if (isModelledAgent(entry)) unlinkSync(path.join(agentsDir, entry));
   }
 }
+function isModelledAgent(entry) {
+  return entry.startsWith('foundry-') && entry.endsWith('.md')
+    && entry !== 'foundry-forge.md' && entry !== 'foundry-appraise.md';
+}
 function writeAgentFiles(agentsDir, models) {
   for (const modelId of models) {
     const slug = makeSlug(modelId);
@@ -60,6 +63,23 @@ function writeAgentFiles(agentsDir, models) {
   }
 }
+const DEFAULT_AGENT = `---
+description: "Default Foundry STAGE stage agent"
+mode: subagent
+hidden: true
+---
+You are a Foundry stage agent. Follow the skill instructions provided in your task prompt exactly.
+`;
+function writeDefaultAgents(agentsDir) {
+  for (const stage of ['forge', 'appraise']) {
+    const filePath = path.join(agentsDir, `foundry-${stage}.md`);
+    if (!existsSync(filePath)) {
+      writeFileSync(filePath, DEFAULT_AGENT.replace('STAGE', stage), 'utf8');
+    }
+  }
+}
 /**
  * Snapshot the current foundry-*.md agent files in the agents directory.
  * Returns a plain object mapping filename → sha256 hex digest.
@@ -110,6 +130,7 @@ export function refreshAgents(worktree) {
     mkdirSync(agentsDir, { recursive: true });
     deleteStaleAgents(agentsDir);
     writeAgentFiles(agentsDir, models);
+    writeDefaultAgents(agentsDir);
     return { ok: true, count: models.length };
   } catch (err) {
@@ -155,18 +176,23 @@ function resolveGuideSource(packageRoot) {
 export function writeFoundryGuideAgent(worktree, packageRoot) {
   const targetDir = path.join(worktree, '.opencode', 'agents');
   const targetPath = path.join(targetDir, 'foundry.md');
+  let written = false;
+  if (!existsSync(targetPath)) {
+    const sourcePath = resolveGuideSource(packageRoot);
+    try {
+      const content = readFileSync(sourcePath, 'utf8');
+      mkdirSync(targetDir, { recursive: true });
+      writeFileSync(targetPath, content, 'utf8');
+      written = true;
+    } catch (err) {
+      return { ok: false, error: `Failed to write guide agent: ${err.message ?? String(err)}` };
+    }
+  }
-  if (existsSync(targetPath)) return { ok: true, written: false };
+  writeDefaultAgents(targetDir);
-  const sourcePath = resolveGuideSource(packageRoot);
-  try {
-    const content = readFileSync(sourcePath, 'utf8');
-    mkdirSync(targetDir, { recursive: true });
-    writeFileSync(targetPath, content, 'utf8');
-    return { ok: true, written: true };
-  } catch (err) {
-    return { ok: false, error: `Failed to write guide agent: ${err.message ?? String(err)}` };
-  }
+  return { ok: true, written };
 }
 function resolveSkillsSource(packageRoot) {

package/dist/.opencode/plugins/foundry-tools/config-law-tools.js CHANGED Viewed

@@ -318,7 +318,7 @@ function makeReadLawTool(tool) {
 function makeAddLawTool(tool) {
   return tool({
     description: 'Add a new law (config-tier; requires a config/* branch). ' +
-      'Fields: id, name, description, passing, failing, target ({kind, file|typeId}), validators ([{id, command, failureMeans?}]).',
+      'Args: id, name, description, passing, failing, target ({kind, file|typeId}), validators ([{id, command, failureMeans?}]). Every validator needs a companion test (TDD) before the law is created.',
     args: {
       id: tool.schema.string().describe('Law identifier. Becomes the ## <id> heading.'),
       name: tool.schema.string().describe('Human-readable name stored as prose after heading.'),
@@ -332,9 +332,9 @@ function makeAddLawTool(tool) {
       }).describe('Where to write the law'),
       validators: tool.schema.array(tool.schema.object({
         id: tool.schema.string().describe('Validator identifier'),
-        command: tool.schema.string().describe('CLI command with optional {pattern} / {files} placeholders. Prefer JavaScript (.mjs) scripts as separate files (e.g. "node foundry/artefacts/<type>/check.mjs {files}"). Stdout must be NDJSON: one JSON object per line with required fields "file" (relative path) and "text" (message). Optional: "location" (line:col), "severity" (error|warning). Exit code is ignored.'),
+        command: tool.schema.string().describe('CLI command with optional {pattern} / {files} placeholders. Prefer .mjs scripts (e.g. "node foundry/artefacts/<type>/check.mjs {files}") with a companion .test.js file (TDD). Stdout must be NDJSON: one JSON object per line with required fields "file" (relative path) and "text" (message). Optional: "location" (line:col), "severity" (error|warning). Exit code is ignored.'),
         failureMeans: tool.schema.string().optional().describe('Description of what failure means'),
-      })).optional().describe('Optional deterministic validators'),
+      })).optional().describe('Optional deterministic validators. Each requires a companion test file.'),
     },
     execute: guarded('foundry_config_add_law', CREATE_GUARDS, executeAddLaw, { branchIo: branchIoFactory, io: asyncIoFactory }),
   });

package/dist/.opencode/plugins/foundry-tools/git-helpers.js CHANGED Viewed

@@ -4,8 +4,7 @@ import { existsSync, unlinkSync, writeFileSync, readFileSync } from 'fs';
 import { slugify } from '../../../scripts/lib/slug.js';
 import { CONFIG_RE, DRY_RUN_RE } from '../../../scripts/lib/branch-guard.js';
 import { finishWorkBranchWithArchive } from '../../../scripts/lib/git-finish/work-finish.js';
-import { finishDryRun } from '../../../scripts/lib/snapshot/finish.js';
-import { asyncIoFactory } from './helpers.js';
+import { checkConfigBranchFiles } from '../../../scripts/lib/git-policy.js';
 const WORK_FILES = ['WORK.md', 'WORK.history.yaml', 'WORK.feedback.yaml'];
@@ -233,15 +232,35 @@ export function finishBranchCommon({ branchName, branchType, base, cwd, args })
   const opts = { cwd, encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] };
   const planned = computeFinishPlan({ branchName, branchType, base, args, cwd });
   if (args.confirm !== true) return makeConfirmRefusal(planned);
-  const dirty = dirtyTrackedFiles(cwd);
-  if (dirty.length) return makeDirtyRefusal(dirty);
-  if (branchType === 'work') deleteWorkFilesAndCommit(planned.filesToDelete, cwd, branchName);
+  const guardErr = runPreMergeGuards({ branchName, branchType, base, cwd, opts, planned });
+  if (guardErr) return guardErr;
   const mergeErr = squashMergeIntoBase(base, branchName, branchType, opts);
   if (mergeErr) return mergeErr;
   const { hash } = commitAndDeleteBranch(args.message, branchName, opts);
   return JSON.stringify({ ok: true, hash, branch: base });
 }
+function runPreMergeGuards({ branchName, branchType, base, cwd, opts, planned }) {
+  const dirty = dirtyTrackedFiles(cwd);
+  if (dirty.length) return makeDirtyRefusal(dirty);
+  if (branchType === 'work') {
+    deleteWorkFilesAndCommit(planned.filesToDelete, cwd, branchName);
+    return null;
+  }
+  if (branchType !== 'config') return null;
+  const diff = execFileSync('git', ['diff', '--name-only', `${base}..${branchName}`],
+    { cwd, encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }).trim();
+  const result = checkConfigBranchFiles(diff);
+  if (result) {
+    return JSON.stringify({
+      ok: false,
+      error: 'Config branches may only change files inside foundry/. Outside files detected:',
+      outside: result.files,
+    });
+  }
+  return null;
+}
 // -- finishWorkBranch helpers --
 function makeExecGit(cwd, opts) {
@@ -321,34 +340,3 @@ export function finishConfigBranch({ configBranch, base, cwd, args }) {
   });
 }
-// -- finishDryRunBranch --
-export async function finishDryRunBranch({ branch, args, cwd }) {
-  const io = asyncIoFactory({ worktree: cwd });
-  const exec = (argv) => execFileSync('git', argv,
-    { cwd, encoding: 'utf8', stdio: 'pipe' });
-  if (args.confirm !== true) {
-    return JSON.stringify({
-      ok: false,
-      error: 'foundry_git_finish requires {confirm: true} to perform destructive operations. Re-invoke with confirm:true to apply the plan.',
-      planned: {
-        branch,
-        action: 'snapshot + discard (dry-run finish)',
-        snapshotPath: '.snapshots/<runId> (computed at apply time)',
-      },
-    });
-  }
-  try {
-    const out = await finishDryRun({
-      message: args.message, branch, io, execFile: exec,
-    });
-    return JSON.stringify(out);
-  } catch (err) {
-    return JSON.stringify({
-      ok: false,
-      error: `foundry_git_finish: dry-run finish failed: ${err.message ?? String(err)}`,
-    });
-  }
-}

package/dist/.opencode/plugins/foundry-tools/git-tools.js CHANGED Viewed

@@ -6,7 +6,6 @@ import {
   classifyBranch,
   finishWorkBranch,
   finishConfigBranch,
-  finishDryRunBranch,
   KIND_DRY_RUN,
   KINDS,
 } from './git-helpers.js';
@@ -14,6 +13,7 @@ import { makeIO, makeExec, asyncIoFactory } from './helpers.js';
 import { requireNoActiveStage } from '../../../scripts/lib/stage-guard.js';
 import { currentBranch } from '../../../scripts/lib/branch-guard.js';
 import { truncateTrace } from '../../../scripts/lib/tracing.js';
+import { finishDryRun } from '../../../scripts/lib/snapshot/finish.js';
 function refuse(error) { return JSON.stringify({ error }); }
@@ -107,6 +107,36 @@ function refuseUnknownFinishBranch(branch) {
     `(expected work/<x>, config/<x>, or dry-run/<x>/<y>).`);
 }
+async function finishDryRunBranch({ branch, args, cwd }) {
+  const io = asyncIoFactory({ worktree: cwd });
+  const exec = (argv) => execFileSync('git', argv,
+    { cwd, encoding: 'utf8', stdio: 'pipe' });
+  if (args.confirm !== true) {
+    return JSON.stringify({
+      ok: false,
+      error: 'foundry_git_finish requires {confirm: true} to perform destructive operations. Re-invoke with confirm:true to apply the plan.',
+      planned: {
+        branch,
+        action: 'snapshot + discard (dry-run finish)',
+        snapshotPath: '.snapshots/<runId> (computed at apply time)',
+      },
+    });
+  }
+  try {
+    const out = await finishDryRun({
+      message: args.message, branch, io, execFile: exec,
+    });
+    return JSON.stringify(out);
+  } catch (err) {
+    return JSON.stringify({
+      ok: false,
+      error: `foundry_git_finish: dry-run finish failed: ${err.message ?? String(err)}`,
+    });
+  }
+}
 function routeDryRunFinish(branch, args, cwd) {
   if (args.baseBranch !== undefined)
     return refuseBaseBranchForDryRun();

package/dist/.opencode/plugins/foundry-tools/orchestrate-tool.js CHANGED Viewed

@@ -11,9 +11,9 @@ import { requireNotFailed } from '../../../scripts/lib/failed-flow.js';
 import { requireOnFlowBranch } from '../../../scripts/lib/branch-guard.js';
 function createMint(secret, pending) {
-  return ({ route, cycle, exp }) => {
+  return ({ route, cycle, exp, model }) => {
     const nonce = randomUUID();
-    const payload = { route, cycle, nonce, exp };
+    const payload = model ? { route, cycle, nonce, exp, model } : { route, cycle, nonce, exp };
     pending.add(nonce, payload);
     return signToken(payload, secret);
   };

package/dist/.opencode/plugins/foundry-tools/stage-tools.js CHANGED Viewed

@@ -41,13 +41,18 @@ function resolveBaseSha(worktree) {
   }
 }
-function verifyStageToken(token, secret, stage, cycle) {
+function verifyStageToken(token, secret, stage, cycle, agent) {
   const v = verifyToken(token, secret);
   if (!v.ok) return { error: `foundry_stage_begin: token ${v.reason}` };
   if (v.payload.route !== stage || v.payload.cycle !== cycle) {
     return { error: `foundry_stage_begin: token payload mismatch (route=${v.payload.route}, cycle=${v.payload.cycle})` };
   }
-  return { payload: v.payload };
+  return checkTokenAgentBinding(v.payload, agent);
+}
+function checkTokenAgentBinding(payload, agent) {
+  if (!payload.model || !agent || payload.model === agent) return { payload };
+  return { error: `foundry_stage_begin: token is scoped to subagent '${payload.model}', not '${agent}'. Dispatch forge via task(), not inline.` };
 }
 async function executeStageBegin(args, context, pending) {
@@ -59,7 +64,7 @@ async function executeStageBegin(args, context, pending) {
     return JSON.stringify({ error: `foundry_stage_begin requires no active stage; current: ${current.stage}` });
   }
-  const tokenResult = verifyStageToken(args.token, secret, args.stage, args.cycle);
+  const tokenResult = verifyStageToken(args.token, secret, args.stage, args.cycle, context.agent);
   if (tokenResult.error) return JSON.stringify({ error: tokenResult.error });
   const baseSha = resolveBaseSha(context.worktree);

package/dist/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,35 @@
 # Changelog
+## [3.8.0] - 2026-05-27
+### Added
+- Default `foundry-forge` and `foundry-appraise` subagents: hidden, model-less stage agents created during bootstrap and preserved across model refreshes. When a cycle has no model overrides, these agents handle forge and appraise dispatch, ensuring tokens are always scoped to a known subagent type.
+- Stage token agent binding: `foundry_stage_begin` now verifies the calling agent matches the token's scoped subagent. The main Foundry agent cannot use tokens issued for `foundry-forge` or `foundry-opencode-*` — it must dispatch via `task()`. This enforces the subagent dispatch model at the protocol level.
+### Fixed
+- Cycle-level `models.appraise` now correctly flows into appraise dispatch as the default model when individual appraisers lack an explicit `model` field. Previously the value was read for agent-file validation but discarded before dispatch, causing all appraisers to run on the session default.
+- Forge and orchestrate skill guidance clarify that stage skills are subagent-only and must not be run inline. The `forge` SKILL.md opens with an explicit "This skill is subagent-only" warning.
+## [3.7.3] - 2026-05-27
+### Added
+- Config branch file enforcement: `foundry_git_finish` on a `config/*` branch now validates that every changed file lives inside `foundry/` or is tool-managed. Files outside `foundry/` are rejected with a clear list of offending paths. This prevents test fixtures, artefact output, or other non-config files from accidentally landing on config branches.
+### Changed
+- The `add-law` skill clarifies that all flow artefacts — validator scripts, tests, and test fixtures — must live inside `foundry/`. Test fixtures colocate under `foundry/artefacts/<type>/test/fixtures/`. The worked example and "what you do NOT do" list updated accordingly.
+## [3.7.2] - 2026-05-27
+### Changed
+- Every validator now requires a companion test file written with TDD. The `add-law` skill walks through TDD (test first, confirm failure, implement, verify pass), produces a `.test.js` file alongside each validator, and refuses to create validators without passing tests. The `foundry_config_add_law` tool description surfaces the requirement.
 ## [3.7.1] - 2026-05-27
 ### Fixed

package/dist/scripts/appraise-module.js CHANGED Viewed

@@ -127,9 +127,8 @@ function addTasksForArtefact(tasks, artefact, entry, ctx) {
  * Map an appraiser's model to a subagent type string.
  */
 function resolveSubagentType(appraiser, ctx) {
-  const name = appraiser.model || ctx.defaultModel || 'general';
-  if (name === 'general') return 'general';
+  const name = appraiser.model || ctx.defaultModel || 'appraise';
+  if (name === 'appraise') return 'foundry-appraise';
   return `foundry-${name.replace(/[/.]/g, '-')}`;
 }

package/dist/scripts/lib/git-policy.js CHANGED Viewed

@@ -99,3 +99,21 @@ export function allowedPatternsForStage({ stageBase, forgeFilePatterns = [] } =
   if (stageBase === 'assay') return ['foundry-memory/**'];
   return [];
 }
+/**
+ * Check that every file changed on a config branch lives inside foundry/ or
+ * is tool-managed. Returns null when clean, or { files: [...] } when outside
+ * files are detected.
+ *
+ * @param {string} diffOut - Raw `git diff --name-only base..branch` output
+ * @returns {null|{files: string[]}}
+ */
+export function checkConfigBranchFiles(diffOut) {
+  if (!diffOut) return null;
+  const toolManaged = new Set(TOOL_MANAGED);
+  const outside = diffOut.split('\n')
+    .map(f => f.trim())
+    .filter(f => f.length > 0 && !toolManaged.has(f) && !f.startsWith('foundry/'))
+    .filter(f => !TOOL_MANAGED_PREFIX.some(p => f.startsWith(p)));
+  return outside.length ? { files: outside } : null;
+}

package/dist/scripts/orchestrate.js CHANGED Viewed

@@ -10,6 +10,7 @@ import { ulid as defaultUlid } from './lib/ulid.js';
 import { computeArtefactVersion } from './lib/artefacts.js';
 import { enforceForgeContract } from './lib/forge-contract.js';
 import { loadHistory } from './lib/history.js';
+import { getCycleDefinition } from './lib/config.js';
 import {
   readCycleTargets,
   readForgeFilePatterns,
@@ -109,15 +110,23 @@ function buildQuenchContext(cycleId, args, io) {
     feedback: buildFeedback(cycleId, stageId, io) };
 }
-function buildAppraiseCtx(cycleId, args, io) {
+async function buildAppraiseCtx(cycleId, args, io) {
   const stageId = `appraise:${cycleId}`;
+  const defaultModel = args.defaultModel ?? await readAppraiseModel(cycleId, io);
   return { cycleId, io, git: args.git, finalize: buildFinalizeWrapper(cycleId, args, io),
-    foundryDir: 'foundry', defaultModel: args.defaultModel,
+    foundryDir: 'foundry', defaultModel,
     baseBranch: args.baseBranch ?? 'main', cwd: args.cwd ?? process.cwd(),
     activeStage: readActiveStage(io), lastStage: readLastStage(io),
     feedback: buildFeedback(cycleId, stageId, io) };
 }
+async function readAppraiseModel(cycleId, io) {
+  try {
+    const cd = await getCycleDefinition('foundry', cycleId, io);
+    return cd.frontmatter?.models?.appraise;
+  } catch { return undefined; }
+}
 function resolveBaseSha(io) {
   try {
     const sha = io.exec(['git', 'rev-parse', 'HEAD']);
@@ -222,13 +231,13 @@ async function dispatchAppraiseOrConsolidate(sortResult, preCheck, args, io, res
 async function handleAppraiseGatherRoute(sortResult, preCheck, args, io) {
   writeStageRecord(io, preCheck.cycleId, sortResult.route);
-  const result = await gatherAppraiseContext(buildAppraiseCtx(preCheck.cycleId, args, io));
+  const result = await gatherAppraiseContext(await buildAppraiseCtx(preCheck.cycleId, args, io));
   if (result.action === 'violation') { clearActiveStage(io); return result; }
   return dispatchAppraiseOrConsolidate(sortResult, preCheck, args, io, result);
 }
 async function handleAppraiseConsolidateRoute(sortResult, preCheck, args, io) {
-  const ctx = buildAppraiseCtx(preCheck.cycleId, args, io);
+  const ctx = await buildAppraiseCtx(preCheck.cycleId, args, io);
   const result = await consolidateAppraise(ctx, args.lastResults);
   if (result.action === 'violation') {
     clearActiveStage(io);

package/dist/scripts/sort.js CHANGED Viewed

@@ -165,9 +165,15 @@ function resolveModelId(routeBase, models, defaultModel) {
 }
 function pickModelId(route, frontmatter, defaultModel) {
-  const models = frontmatter.models;
-  if (!models) return defaultModel || null;
-  return resolveModelId(baseStage(route), models, defaultModel) || null;
+  const routeBase = baseStage(route);
+  const resolved = frontmatter.models ? resolveModelId(routeBase, frontmatter.models, defaultModel) : null;
+  return resolved || defaultModel || defaultForStage(routeBase);
+}
+function defaultForStage(routeBase) {
+  if (routeBase === 'forge') return 'forge';
+  if (routeBase === 'appraise') return 'appraise';
+  return null;
 }
 function resolveModel(route, frontmatter, agentsDir, io, defaultModel) {
@@ -177,6 +183,7 @@ function resolveModel(route, frontmatter, agentsDir, io, defaultModel) {
   const model = `foundry-${modelId.replace(/[/.]/g, '-')}`;
   const agentPath = `${agentsDir}/${model}.md`;
   if (!io.exists(agentPath)) {
+    if (modelId === 'forge' || modelId === 'appraise') return model;
     return {
       error: `Missing required subagent: ${model}.md is not present in ${agentsDir}/. `
         + `Call foundry_refresh_agents() to regenerate agent files, then restart.`,
@@ -194,7 +201,7 @@ function checkModel(route, frontmatter, agentsDir, io, defaultModel) {
 function mintToken({ route, model, mint, cycle, now, ulid, reason }) {
   const result = { route, ...(model ? { model } : {}), reason };
   if (mint && isDispatchableRoute(route)) {
-    const token = mint({ route, cycle, exp: now + 10 * 60 * 1000, nonce: ulid(now) });
+    const token = mint({ route, cycle, exp: now + 10 * 60 * 1000, nonce: ulid(now), model });
     if (token) result.token = token;
   }
   return result;

package/dist/skills/add-law/SKILL.md CHANGED Viewed

@@ -68,6 +68,10 @@ Walk the user through which elements of the law can be validated deterministical
 For each script-checkable element, write a standalone `.mjs` script next to the artefacts it validates (e.g. `foundry/artefacts/<type>/check-line-count.mjs`) and reference it in the command (e.g. `node foundry/artefacts/<type>/check-line-count.mjs {files}`). Place validators alongside the artefacts so they colocate with what they validate. Use existing project dependencies and Node.js built‑ins. Hand‑rolled heuristics (custom syllable counters, regex parsers, manual character walks) are a last resort — they produce false positives, waste tokens on debugging, and break on edge cases. Install a library instead. Only write validation logic from scratch when no npm package exists for the task and the heuristic is trivially correct.
+All flow artefacts — validator scripts, tests, test fixtures — live inside `foundry/`. Never place artefacts outside `foundry/`. Test fixtures colocate with the validator's test file under `foundry/artefacts/<type>/test/fixtures/`. When test fixtures match an artefact type's `file-patterns:`, they trigger false-positive quench feedback during flow runs. Keeping them inside `foundry/` prevents this.
+Every validator carries a companion test file alongside it (e.g. `check-line-count.test.js`). The test uses Node's built‑in test runner — `node --test check-line-count.test.js`. Follow TDD: write the test, confirm it fails against a current artefact, implement the validator, verify the test passes. The test feeds sample inputs to the validator script and asserts the correct JSONL output on stdout — it validates the JSONL contract, not just that the script runs.
 **Validators**: Ask about `validators` (optional) — offer to create one or skip.
 **Conflict check**: Read all existing laws that would apply to the same artefact types. Check for contradiction, duplication, or overlap. If any conflict is found, present it to the user:
@@ -85,7 +89,7 @@ For each script-checkable element, write a standalone `.mjs` script next to the
 ### 2. Plan
-Present a structured summary: law id, name, description, passing/failing criteria, target (global or type-specific with typeId), and validators (which elements are checked deterministically). Ask: "Does this capture what you want, or should we adjust the wording?" Iterate until the user is satisfied.
+Present a structured summary: law id, name, description, passing/failing criteria, target (global or type-specific with typeId), validators (which elements are checked deterministically), and the companion test file for each validator. Ask: "Does this capture what you want, or should we adjust the wording?" Iterate until the user is satisfied.
 ### 3. Confirm
@@ -93,9 +97,15 @@ Ask: "Proceed with this plan?" — wait for user answer before building. If the
 ### 4. Build
-1. **Validate**: Call `foundry_config_validate_law({ name: "<id>", body: "<assembled markdown>" })`. Assemble the body from the fields using the `## <id>` heading format the tool produces internally. If the result is `{ ok: false, errors: [...] }`, address each error and re-run until `{ ok: true }`. Common issues: missing required frontmatter keys, references to artefact types that do not exist yet.
+1. **Write validators with TDD**: For each validator declared in the plan:
+   a. **Write the test first** — create a companion test file alongside the validator (e.g. `foundry/artefacts/<type>/check-line-count.test.js`). The test imports or spawns the validator script with sample inputs and asserts the correct JSONL output on stdout. Run `node --test` to confirm it fails.
-2. **Create**: Translate the scope into the `target` argument:
+   b. **Implement the validator** — write the `.mjs` script. Run the test again to confirm it passes. Do not commit the validator without its passing test.
+2. **Validate**: Call `foundry_config_validate_law({ name: "<id>", body: "<assembled markdown>" })`. Assemble the body from the fields using the `## <id>` heading format the tool produces internally. If the result is `{ ok: false, errors: [...] }`, address each error and re-run until `{ ok: true }`. Common issues: missing required frontmatter keys, references to artefact types that do not exist yet.
+3. **Create**: Translate the scope into the `target` argument:
    - Global → `target: { kind: "global", file: "<file-name>.md" }`
    - Type-specific → `target: { kind: "type-specific", typeId: "<artefact-type>" }`
@@ -117,7 +127,7 @@ Ask: "Proceed with this plan?" — wait for user answer before building. If the
    The tool appends to an existing `laws.md` automatically when the new law id is not already present. It only errors when a law with the same id is already in the file — in that case use `foundry_config_edit_law({ id: "<law-id>", description: "<updated>", passing: "<updated>", failing: "<updated>" })` to modify the existing law in place.
-3. **Verify uniqueness**: After the file is created, confirm the law id is unique across all law files. If a collision exists, read the colliding law, present the conflict to the user, propose a rename or merge, ask one focused question about the user's preference, then write and commit the resolution.
+4. **Verify uniqueness**: After the file is created, confirm the law id is unique across all law files. If a collision exists, read the colliding law, present the conflict to the user, propose a rename or merge, ask one focused question about the user's preference, then write and commit the resolution.
 ### 5. Editing existing laws (prose or validators)
@@ -145,7 +155,7 @@ Then proceed with the update.
 > 🔍 **Drift check:** Verify that the changed validator still aligns with the law's prose. If the validator has narrowed or broadened, the prose may need a corresponding update.
-Then proceed with the update.
+After the validator implementation changes, update the companion test file. Run the tests to confirm they pass against the updated validator before committing.
 #### 5e. Apply the update
@@ -258,8 +268,47 @@ validators:
     failure-means: The artefact file does not contain exactly three non-empty lines.
 ~~~
+#### Companion test
+`foundry/artefacts/haiku/check-line-count.test.js`:
+~~~js
+import { describe, it } from 'node:test';
+import { execSync } from 'node:child_process';
+import assert from 'node:assert/strict';
+describe('check-line-count', () => {
+  it('passes for exactly three non-empty lines', () => {
+    const result = execSync(
+      `node foundry/artefacts/haiku/check-line-count.mjs foundry/artefacts/haiku/test/fixtures/haiku-valid.md`,
+      { encoding: 'utf8' },
+    );
+    assert.strictEqual(result.trim(), '');
+  });
+  it('reports an error for fewer than three lines', () => {
+    const result = execSync(
+      `node foundry/artefacts/haiku/check-line-count.mjs foundry/artefacts/haiku/test/fixtures/haiku-short.md`,
+      { encoding: 'utf8' },
+    );
+    assert.match(result, /Expected 3 non-empty lines/);
+  });
+  it('reports an error for more than three lines', () => {
+    const result = execSync(
+      `node foundry/artefacts/haiku/check-line-count.mjs foundry/artefacts/haiku/test/fixtures/haiku-long.md`,
+      { encoding: 'utf8' },
+    );
+    assert.match(result, /Expected 3 non-empty lines/);
+  });
+});
+~~~
 ## What you do NOT do
 - You do not skip the conflict check
 - You do not silently overwrite existing laws
 - You do not create artefact types unless the user's stated goal clearly requires it; ask one focused question when multiple designs are plausible
+- You do not write validators without companion tests
+- You do not place flow artefacts or test fixtures outside `foundry/`
+- You do not accept test failures — fix the validator and retry until every test passes

package/dist/skills/forge/SKILL.md CHANGED Viewed

@@ -6,6 +6,8 @@ description: Produces or revises an artefact, guided by WORK.md and the foundry
 # Forge
+**This skill is subagent-only.** It describes the protocol a forge subagent follows when dispatched via `task()` from the orchestrate loop. Do NOT load this skill and run forge inline — the orchestrate skill returns a `dispatch` action with a pre-built prompt; call `task()` with it.
 You produce or revise artefacts. You read the work file to understand the goal and follow the feedback item in the dispatch prompt, and read the foundry cycle definition to understand what you're producing and what inputs you can read.
 ## Prerequisites

package/dist/skills/orchestrate/SKILL.md CHANGED Viewed

@@ -37,7 +37,13 @@ Loop until `foundry_orchestrate` returns a terminal action (`done`, `blocked`, o
 Payload: `{stage, subagent_type, prompt}`.
-Call the `task` tool:
+Call the `task` tool. Do NOT load the forge, quench, or appraise skills yourself — the subagent will use them internally:
+```
+task tool:
+  subagent_type: <subagent_type-from-payload>
+  description: "Run <stage> for <cycle>"
+  prompt: <prompt-from-payload — pass verbatim>
 ```
 task tool:
   subagent_type: <subagent_type-from-payload>
@@ -51,7 +57,7 @@ When the task returns, call `foundry_orchestrate({lastResult: {ok: true}})`. If
 Payload: `{stage, cycle, tasks}`.
-Fire all tasks in parallel by making multiple `task` tool calls in a single response:
+Fire all tasks in parallel by making multiple `task` tool calls in a single response. Do NOT load stage skills yourself:
 ```
 task tool:

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@really-knows-ai/foundry",
-  "version": "3.7.1",
+  "version": "3.8.0",
   "description": "A skill-driven framework for governed artefact generation with AI coding tools. Define your own artefact types, laws, and flows — Foundry handles the forge → quench → appraise pipeline with deterministic routing, quality gates, and iterative refinement.",
   "type": "module",
   "main": "dist/.opencode/plugins/foundry.js",