npm - @forwardimpact/libeval - Versions diffs - 0.1.52 → 0.1.53 - Mend

@forwardimpact/libeval 0.1.52 → 0.1.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/bin/fit-benchmark.js +8 -14
package/bin/fit-eval.js +7 -14
package/bin/fit-selfedit.js +6 -4
package/bin/fit-trace.js +7 -14
package/package.json +1 -1
package/src/benchmark/result.js +2 -2
package/src/benchmark/task-family.js +1 -1
package/src/commands/benchmark-invariants.js +1 -1
package/src/discusser.js +3 -5
package/src/events/github.js +7 -1
package/src/facilitator.js +2 -5
package/src/inbox-poller.js +5 -8
package/src/judge.js +10 -14
package/src/profile-prompt.js +109 -24
package/src/redaction.js +3 -16
package/src/supervisor.js +3 -0

package/bin/fit-benchmark.js CHANGED Viewed

@@ -2,7 +2,7 @@
 import "@forwardimpact/libpreflight/node22";
-import { readFileSync, realpathSync } from "node:fs";
+import { realpathSync } from "node:fs";
 import { createCli } from "@forwardimpact/libcli";
 import { createDefaultRuntime } from "@forwardimpact/libutil/runtime";
 import { createLogger } from "@forwardimpact/libtelemetry";
@@ -11,17 +11,8 @@ import { runBenchmarkRunCommand } from "../src/commands/benchmark-run.js";
 import { runBenchmarkInvariantsCommand } from "../src/commands/benchmark-invariants.js";
 import { runBenchmarkReportCommand } from "../src/commands/benchmark-report.js";
-// `bun build --compile` injects FIT_BENCHMARK_VERSION via --define, eliminating
-// the readFileSync branch in the compiled binary (which would ENOENT against
-// the bunfs virtual mount). Source execution falls through to package.json.
-const VERSION =
-  process.env.FIT_BENCHMARK_VERSION ||
-  JSON.parse(readFileSync(new URL("../package.json", import.meta.url), "utf8"))
-    .version;
 export const definition = {
   name: "fit-benchmark",
-  version: VERSION,
   description:
     "Run coding-agent task families, grade hidden tests, and aggregate pass@k across runs.",
   commands: [
@@ -156,11 +147,14 @@ export const definition = {
   ],
 };
-const logger = createLogger("benchmark");
+const runtime = createDefaultRuntime();
+const logger = createLogger("benchmark", runtime);
 async function main() {
-  const runtime = createDefaultRuntime();
-  const cli = createCli(definition, { runtime });
+  const cli = createCli(definition, {
+    runtime,
+    packageJsonUrl: new URL("../package.json", import.meta.url),
+  });
   const parsed = cli.parse(runtime.proc.argv.slice(2));
   if (!parsed) return runtime.proc.exit(0);
@@ -187,7 +181,7 @@ async function main() {
 if (import.meta.url === `file://${realpathSync(process.argv[1])}`) {
   main().catch((error) => {
     logger.exception("main", error);
-    createCli(definition).error(error.message);
+    createCli(definition, { runtime }).error(error.message);
     process.exit(1);
   });
 }

package/bin/fit-eval.js CHANGED Viewed

@@ -2,7 +2,6 @@
 import "@forwardimpact/libpreflight/node22";
-import { readFileSync } from "node:fs";
 import { createCli } from "@forwardimpact/libcli";
 import { createDefaultRuntime } from "@forwardimpact/libutil/runtime";
 import { createLogger } from "@forwardimpact/libtelemetry";
@@ -15,14 +14,6 @@ import { runFacilitateCommand } from "../src/commands/facilitate.js";
 import { runDiscussCommand } from "../src/commands/discuss.js";
 import { runCallbackCommand } from "../src/commands/callback.js";
-// `bun build --compile` injects FIT_EVAL_VERSION via --define, eliminating
-// the readFileSync branch in the compiled binary (which would ENOENT against
-// the bunfs virtual mount). Source execution falls through to package.json.
-const VERSION =
-  process.env.FIT_EVAL_VERSION ||
-  JSON.parse(readFileSync(new URL("../package.json", import.meta.url), "utf8"))
-    .version;
 const LEAD_OPTIONS = {
   "lead-profile": {
     type: "string",
@@ -60,7 +51,6 @@ const TASK_INPUT_OPTIONS = {
 const definition = {
   name: "fit-eval",
-  version: VERSION,
   description:
     "Run agents and capture NDJSON traces — for agent evaluations or multi-agent collaboration",
   commands: [
@@ -313,11 +303,14 @@ const definition = {
   ],
 };
-const logger = createLogger("eval");
+const runtime = createDefaultRuntime();
+const logger = createLogger("eval", runtime);
 async function main() {
-  const runtime = createDefaultRuntime();
-  const cli = createCli(definition, { runtime });
+  const cli = createCli(definition, {
+    runtime,
+    packageJsonUrl: new URL("../package.json", import.meta.url),
+  });
   const parsed = cli.parse(runtime.proc.argv.slice(2));
   if (!parsed) return runtime.proc.exit(0);
@@ -341,6 +334,6 @@ async function main() {
 main().catch((error) => {
   logger.exception("main", error);
-  createCli(definition).error(error.message);
+  createCli(definition, { runtime }).error(error.message);
   process.exit(1);
 });

package/bin/fit-selfedit.js CHANGED Viewed

@@ -7,12 +7,11 @@
 import "@forwardimpact/libpreflight/node22";
 import { existsSync, readFileSync, writeFileSync } from "node:fs";
-import fsPromises from "node:fs/promises";
 import { parseArgs } from "node:util";
 import { resolve, relative, dirname } from "node:path";
 import { execFileSync } from "node:child_process";
-import { Finder } from "@forwardimpact/libutil";
+import { createDefaultRuntime } from "@forwardimpact/libutil/runtime";
 import { minimatch } from "minimatch";
 const HELP = `fit-selfedit — write stdin to a settings.json-allowed path on a non-main branch.
@@ -71,8 +70,11 @@ if (extra.length > 0) fail(`unexpected extra arguments: ${extra.join(" ")}`);
 const absoluteTarget = resolve(process.cwd(), targetArg);
-// Safeguard 1: settings.json must grant Edit() on this path.
-const settingsPath = new Finder(fsPromises, { debug() {} }).findUpward(
+// Safeguard 1: settings.json must grant Edit() on this path. The bin is the
+// sole construction site for the runtime; resolve the finder off the bag
+// rather than constructing a Finder here (Success Criterion 9).
+const runtime = createDefaultRuntime();
+const settingsPath = runtime.finder.findUpward(
   dirname(absoluteTarget),
   ".claude/settings.json",
   20,

package/bin/fit-trace.js CHANGED Viewed

@@ -2,7 +2,6 @@
 import "@forwardimpact/libpreflight/node22";
-import { readFileSync } from "node:fs";
 import { createCli } from "@forwardimpact/libcli";
 import { createDefaultRuntime } from "@forwardimpact/libutil/runtime";
 import { createScriptConfig } from "@forwardimpact/libconfig";
@@ -31,17 +30,8 @@ import {
 import { runAssertCommand } from "../src/commands/assert.js";
 import { runByDiscussionCommand } from "../src/commands/by-discussion.js";
-// `bun build --compile` injects FIT_TRACE_VERSION via --define, eliminating
-// the readFileSync branch in the compiled binary (which would ENOENT against
-// the bunfs virtual mount). Source execution falls through to package.json.
-const VERSION =
-  process.env.FIT_TRACE_VERSION ||
-  JSON.parse(readFileSync(new URL("../package.json", import.meta.url), "utf8"))
-    .version;
 const definition = {
   name: "fit-trace",
-  version: VERSION,
   description:
     "Download, query, and analyze agent execution traces — read NDJSON output from fit-eval as qualitative research",
   commands: [
@@ -340,15 +330,18 @@ const definition = {
   ],
 };
-const logger = createLogger("trace");
+const runtime = createDefaultRuntime();
+const logger = createLogger("trace", runtime);
 // Commands that talk to the GitHub API need a config-backed token resolver;
 // the rest only read local trace files through the runtime.
 const NEEDS_CONFIG = new Set(["runs", "download"]);
 async function main() {
-  const runtime = createDefaultRuntime();
-  const cli = createCli(definition, { runtime });
+  const cli = createCli(definition, {
+    runtime,
+    packageJsonUrl: new URL("../package.json", import.meta.url),
+  });
   const parsed = cli.parse(runtime.proc.argv.slice(2));
   if (!parsed) return runtime.proc.exit(0);
@@ -376,6 +369,6 @@ async function main() {
 main().catch((error) => {
   logger.exception("main", error);
-  createCli(definition).error(error.message);
+  createCli(definition, { runtime }).error(error.message);
   process.exit(1);
 });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@forwardimpact/libeval",
-  "version": "0.1.52",
+  "version": "0.1.53",
   "description": "Agent evaluation framework — prove whether agent changes improved outcomes with reproducible evidence.",
   "keywords": [
     "eval",

package/src/benchmark/result.js CHANGED Viewed

@@ -5,8 +5,8 @@
  *   - RESULT_RECORD_SCHEMA — one record per (task, runIndex) from a full
  *     benchmark run. Has a happy branch (invariants + judge present) and a
  *     pre-flight-failure branch (invariants/judgeVerdict/submission absent).
- *   - INVARIANTS_RECORD_SCHEMA — narrower output of `benchmark-invariants`
- *     (P7): ad-hoc grading without a full lifecycle.
+ *   - INVARIANTS_RECORD_SCHEMA — narrower output of `benchmark-invariants`:
+ *     ad-hoc grading without a full lifecycle.
  *
  * Validation is throw-on-mismatch so the runner can wrap every JSONL append
  * in a guard and reject schema drift at write time.

package/src/benchmark/task-family.js CHANGED Viewed

@@ -2,7 +2,7 @@
  * Task-family loader. A task family is a directory under
  *   <root>/
  *     apm.lock.yaml
- *     .claude/                # pre-staged skills + agents (P1)
+ *     .claude/                # pre-staged skills + agents
  *     tasks/<task_name>/
  *       agent.task.md
  *       supervisor.task.md    # optional; appended to the task as supervisor context

package/src/commands/benchmark-invariants.js CHANGED Viewed

@@ -1,6 +1,6 @@
 /**
  * `fit-benchmark invariants` — check a single task's invariants against a
- * post-run workdir directory without invoking an agent (P6/P7). Useful for
+ * post-run workdir directory without invoking an agent. Useful for
  * re-checking an agent's output against revised grading material.
  */

package/src/discusser.js CHANGED Viewed

@@ -274,6 +274,7 @@ export function createDiscusser({
         messageBus,
         leadName: "lead",
         signal: abortController.signal,
+        runtime,
       })
     : null;
@@ -309,10 +310,6 @@ export function createDiscusser({
       from: config.name,
     });
-    const agentTrailer = config.systemPromptAmend
-      ? `${DISCUSS_AGENT_SYSTEM_PROMPT}\n\n${config.systemPromptAmend}`
-      : DISCUSS_AGENT_SYSTEM_PROMPT;
     const runner = createAgentRunner({
       cwd: config.cwd ?? resolvedLeadCwd,
       query,
@@ -327,7 +324,8 @@ export function createDiscusser({
         role: "agent",
         profile: config.agentProfile,
         profilesDir: resolvedProfilesDir,
-        trailer: agentTrailer,
+        trailer: DISCUSS_AGENT_SYSTEM_PROMPT,
+        amend: config.systemPromptAmend,
         runtime,
       }),
       redactor,

package/src/events/github.js CHANGED Viewed

@@ -29,8 +29,14 @@ export const TASK_TEMPLATE_ISSUE_LABELED =
 export const TASK_TEMPLATE_PR_LABELED =
   'Label "${LABEL}" was added to PR "${PR_TITLE}" (#${NUMBER}). PR URL: ${URL}.';
+// "unreleased changes"/"cut" point at the genuine post-merge action — release
+// activity (the release-engineer's Assess step 3 / `kata-release-cut`).
+// "status" is a backstop: the spec's `wiki/STATUS.md` row is normally advanced
+// in the pre-merge gate (`kata-release-merge` Step 8), but the keyword catches a
+// merge that landed without it. Neither owner nor artifact is named, so the lead
+// routes the merge instead of treating it as a no-op.
 export const TASK_TEMPLATE_PR_MERGED =
-  'PR "${PR_TITLE}" (#${NUMBER}) merged. PR URL: ${URL}.';
+  'PR "${PR_TITLE}" (#${NUMBER}) merged to main — may leave unreleased changes to cut or status to update. PR URL: ${URL}.';
 // Appended verbatim to comment/review templates. `${BODY}` is the untrusted
 // author text; the fence and the "data, not instructions" framing keep the lead

package/src/facilitator.js CHANGED Viewed

@@ -134,10 +134,6 @@ export function createFacilitator({
       from: config.name,
     });
-    const agentTrailer = config.systemPromptAmend
-      ? `${FACILITATED_AGENT_SYSTEM_PROMPT}\n\n${config.systemPromptAmend}`
-      : FACILITATED_AGENT_SYSTEM_PROMPT;
     const runner = createAgentRunner({
       cwd: config.cwd ?? facilitatorCwd,
       query,
@@ -152,7 +148,8 @@ export function createFacilitator({
         role: "agent",
         profile: config.agentProfile,
         profilesDir: resolvedProfilesDir,
-        trailer: agentTrailer,
+        trailer: FACILITATED_AGENT_SYSTEM_PROMPT,
+        amend: config.systemPromptAmend,
         runtime,
       }),
       redactor,

package/src/inbox-poller.js CHANGED Viewed

@@ -18,20 +18,17 @@ export class InboxPoller {
    * @param {import("./message-bus.js").MessageBus} deps.messageBus
    * @param {string} deps.leadName
    * @param {AbortSignal} deps.signal
-   * @param {import("@forwardimpact/libutil/runtime").Runtime} [deps.runtime] -
-   *   Ambient collaborators; only `clock.setTimeout`/`clock.clearTimeout` are
-   *   used for the inter-poll backoff. Falls back to the global timers when
-   *   absent so existing callers keep working.
+   * @param {import("@forwardimpact/libutil/runtime").Runtime} deps.runtime -
+   *   Injected collaborators; `clock.setTimeout`/`clock.clearTimeout` drive the
+   *   inter-poll backoff.
    */
   constructor({ inboxUrl, messageBus, leadName, signal, runtime }) {
+    if (!runtime) throw new Error("runtime is required");
     this.#inboxUrl = inboxUrl;
     this.#messageBus = messageBus;
     this.#leadName = leadName;
     this.#signal = signal;
-    this.#clock = runtime?.clock ?? {
-      setTimeout: (fn, ms) => globalThis.setTimeout(fn, ms),
-      clearTimeout: (h) => globalThis.clearTimeout(h),
-    };
+    this.#clock = runtime.clock;
   }
   /** Long-poll the inbox until the abort signal fires. */

package/src/judge.js CHANGED Viewed

@@ -17,7 +17,7 @@ import { resolve } from "node:path";
 import { Writable } from "node:stream";
 import { createAgentRunner } from "./agent-runner.js";
-import { composeProfilePrompt } from "./profile-prompt.js";
+import { composeSystemPrompt } from "./profile-prompt.js";
 import { SequenceCounter } from "./sequence-counter.js";
 import {
   createJudgeToolServer,
@@ -140,7 +140,7 @@ export class Judge {
 /**
  * Factory function — wires the AgentRunner with the judge orchestration server
  * and the JUDGE_SYSTEM_PROMPT trailer. A `judgeProfile` (when supplied) layers
- * on top of the trailer via `composeProfilePrompt`, matching the
+ * on top of the trailer via `composeSystemPrompt`, matching the
  * supervisor/facilitator pattern.
  *
  * @param {object} deps
@@ -151,7 +151,7 @@ export class Judge {
  * @param {string} [deps.model]
  * @param {number} [deps.maxTurns] - Default 5 (the judge is expected to act in turn 1; 5 leaves headroom for tool inspection).
  * @param {string[]} [deps.allowedTools] - Default `["Read","Glob","Grep","Bash"]` — read-only inspection.
- * @param {string} [deps.judgeProfile] - Profile name; resolved into the system prompt via `composeProfilePrompt`.
+ * @param {string} [deps.judgeProfile] - Profile name; resolved into the system prompt via `composeSystemPrompt`.
  * @param {string} [deps.profilesDir] - Defaults to `<cwd>/.claude/agents`.
  * @param {string} [deps.taskAmend]
  * @returns {Judge}
@@ -176,17 +176,13 @@ export function createJudge({
   if (!runtime) throw new Error("runtime is required");
   const resolvedProfilesDir = profilesDir ?? resolve(cwd, ".claude/agents");
-  const systemPrompt = judgeProfile
-    ? composeProfilePrompt(judgeProfile, {
-        profilesDir: resolvedProfilesDir,
-        trailer: JUDGE_SYSTEM_PROMPT,
-        runtime,
-      })
-    : {
-        type: "preset",
-        preset: "claude_code",
-        append: JUDGE_SYSTEM_PROMPT,
-      };
+  const systemPrompt = composeSystemPrompt({
+    role: "agent",
+    profile: judgeProfile,
+    profilesDir: resolvedProfilesDir,
+    trailer: JUDGE_SYSTEM_PROMPT,
+    runtime,
+  });
   const ctx = createOrchestrationContext();
   ctx.participants = [{ name: "judge", role: "judge" }];

package/src/profile-prompt.js CHANGED Viewed

@@ -1,7 +1,25 @@
 /**
  * System prompt composition for agent runners.
  *
- * Two helpers:
+ * libeval assembles every agent system prompt from up to two parallel,
+ * sibling-tagged sections (see COALIGNED.md § L0):
+ *
+ *     <agent_profile>
+ *     …persona body…
+ *     </agent_profile>
+ *
+ *     <session_protocol>
+ *     …orchestration mechanics, then any amendment…
+ *     </session_protocol>
+ *
+ * The two tags are siblings joined by a blank line — neither nests inside
+ * the other. A section appears only when its content is present. A
+ * system-prompt amendment is folded into the protocol trailer before
+ * wrapping, so it lands transparently inside `<session_protocol>`. The tag
+ * convention lives entirely here: profile `.md` files and trailer constants
+ * carry no tags.
+ *
+ * Helpers:
  *
  * - `composeProfilePrompt(name, opts)` — profile + `claude_code` preset.
  *   Used by agent participants that need the full Claude Code tool surface.
@@ -10,61 +28,113 @@
  *   roles (supervisor, facilitator, discuss lead) that should only see
  *   the orchestration instructions and optionally a profile body.
  *
- * - `composeSystemPrompt(opts)` — unified entry point. Delegates to one
- *   of the above based on `opts.role`.
+ * - `composeSystemPrompt(opts)` — unified entry point. Folds `amend` into
+ *   the protocol section, then delegates to one of the above based on
+ *   `opts.role`.
  */
 import { join } from "node:path";
+/** Sibling section tags. Neither nests inside the other. */
+const AGENT_PROFILE_TAG = "agent_profile";
+const SESSION_PROTOCOL_TAG = "session_protocol";
+/** Wrap content in a semantic section tag, each on its own line. */
+function wrapSection(tag, content) {
+  return `<${tag}>\n${content}\n</${tag}>`;
+}
 /**
- * Compose a `claude_code`-preset system prompt from a profile file. The
- * profile is read synchronously off the injected `runtime.fsSync` surface —
- * this composer runs inside the synchronous SDK-option builders of the
+ * Assemble the parallel `<agent_profile>` / `<session_protocol>` sections.
+ * Each section is emitted only when its content is non-empty; the two tags
+ * are siblings joined by a blank line and never nest.
+ *
+ * @param {object} parts
+ * @param {string} [parts.body] - Profile body, already frontmatter-stripped.
+ * @param {string} [parts.protocol] - Session protocol trailer, with any
+ *   amendment already folded in.
+ * @returns {string}
+ */
+function assembleSections({ body, protocol }) {
+  const sections = [];
+  if (body) sections.push(wrapSection(AGENT_PROFILE_TAG, body));
+  if (protocol) sections.push(wrapSection(SESSION_PROTOCOL_TAG, protocol));
+  return sections.join("\n\n");
+}
+/**
+ * Read a profile `.md`, strip its frontmatter, and return the trimmed body.
+ * Reads synchronously off the injected `runtime.fsSync` surface — this
+ * composer runs inside the synchronous SDK-option builders of the
  * supervisor / facilitator / discusser / judge factories, so it cannot go
  * async without an unbounded cascade.
  *
  * @param {string} name - Profile basename (no `.md` suffix)
+ * @param {string} profilesDir - Directory containing `<name>.md`
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
+ * @returns {string}
+ */
+function readProfileBody(name, profilesDir, runtime) {
+  const path = join(profilesDir, `${name}.md`);
+  const raw = runtime.fsSync.readFileSync(path, "utf8");
+  return stripFrontmatter(raw).trim();
+}
+/**
+ * Compose a `claude_code`-preset system prompt from a profile file. The
+ * profile body is wrapped in `<agent_profile>`; an optional protocol trailer
+ * is wrapped in a sibling `<session_protocol>`.
+ *
+ * @param {string} name - Profile basename (no `.md` suffix)
  * @param {object} opts
  * @param {string} opts.profilesDir - Directory containing `<name>.md`
- * @param {string} [opts.trailer] - Mode-specific trailer appended after a blank line
+ * @param {string} [opts.trailer] - Session protocol, wrapped as a sibling
+ *   `<session_protocol>` section after a blank line
  * @param {import("@forwardimpact/libutil/runtime").Runtime} opts.runtime - Ambient collaborators; uses `fsSync.readFileSync`.
  * @returns {{type: "preset", preset: "claude_code", append: string}}
  */
 export function composeProfilePrompt(name, { profilesDir, trailer, runtime }) {
-  const path = join(profilesDir, `${name}.md`);
-  const raw = runtime.fsSync.readFileSync(path, "utf8");
-  const body = stripFrontmatter(raw).trim();
-  const append = trailer && trailer.length > 0 ? `${body}\n\n${trailer}` : body;
-  return { type: "preset", preset: "claude_code", append };
+  const body = readProfileBody(name, profilesDir, runtime);
+  return {
+    type: "preset",
+    preset: "claude_code",
+    append: assembleSections({ body, protocol: trailer }),
+  };
 }
 /**
- * Compose a plain-string system prompt for a lead role (no Claude Code preset).
+ * Compose a plain-string system prompt for a lead role (no Claude Code
+ * preset). The protocol trailer is wrapped in `<session_protocol>`; an
+ * optional profile body is wrapped in a sibling `<agent_profile>` before it.
+ *
  * @param {object} opts
  * @param {string} [opts.profile] - Profile basename (no `.md` suffix)
  * @param {string} [opts.profilesDir] - Directory containing profile files
- * @param {string} opts.trailer - Mode-specific orchestration instructions
+ * @param {string} opts.trailer - Session protocol (orchestration instructions)
  * @param {import("@forwardimpact/libutil/runtime").Runtime} opts.runtime - Ambient collaborators; uses `fsSync.readFileSync`.
  * @returns {string}
  */
 export function composeLeadPrompt({ profile, profilesDir, trailer, runtime }) {
   if (!trailer) throw new Error("trailer is required");
-  if (!profile) return trailer;
-  const path = join(profilesDir, `${profile}.md`);
-  const raw = runtime.fsSync.readFileSync(path, "utf8");
-  const body = stripFrontmatter(raw).trim();
-  return `${body}\n\n${trailer}`;
+  const body = profile
+    ? readProfileBody(profile, profilesDir, runtime)
+    : undefined;
+  return assembleSections({ body, protocol: trailer });
 }
 /**
- * Unified entry point for composing system prompts.
+ * Unified entry point for composing system prompts. Folds an optional
+ * amendment into the protocol trailer — so it lands inside
+ * `<session_protocol>` — then delegates by role.
  *
  * @param {object} opts
  * @param {"lead"|"agent"} opts.role - `"lead"` produces a plain string;
  *   `"agent"` produces a `claude_code` preset object.
  * @param {string} [opts.profile] - Profile basename
  * @param {string} [opts.profilesDir]
- * @param {string} opts.trailer - Mode-specific instructions
+ * @param {string} opts.trailer - Session protocol (orchestration instructions)
+ * @param {string} [opts.amend] - Caller-supplied amendment, appended inside
+ *   `<session_protocol>` after the trailer with a blank-line separator.
  * @param {import("@forwardimpact/libutil/runtime").Runtime} opts.runtime - Ambient collaborators; uses `fsSync.readFileSync`.
  * @returns {string | {type: "preset", preset: "claude_code", append: string}}
  */
@@ -73,16 +143,31 @@ export function composeSystemPrompt({
   profile,
   profilesDir,
   trailer,
+  amend,
   runtime,
 }) {
   if (!trailer) throw new Error("trailer is required");
+  const protocol = amend ? `${trailer}\n\n${amend}` : trailer;
   if (role === "lead") {
-    return composeLeadPrompt({ profile, profilesDir, trailer, runtime });
+    return composeLeadPrompt({
+      profile,
+      profilesDir,
+      trailer: protocol,
+      runtime,
+    });
   }
   if (profile) {
-    return composeProfilePrompt(profile, { profilesDir, trailer, runtime });
+    return composeProfilePrompt(profile, {
+      profilesDir,
+      trailer: protocol,
+      runtime,
+    });
   }
-  return { type: "preset", preset: "claude_code", append: trailer };
+  return {
+    type: "preset",
+    preset: "claude_code",
+    append: assembleSections({ protocol }),
+  };
 }
 /**

package/src/redaction.js CHANGED Viewed

@@ -15,6 +15,7 @@ export const DEFAULT_ENV_ALLOWLIST = Object.freeze([
   "DATABASE_PASSWORD",
   "GH_TOKEN",
   "GITHUB_TOKEN",
+  "JWT_SECRET",
   "MCP_TOKEN",
   "MICROSOFT_APP_ID",
   "MICROSOFT_APP_PASSWORD",
@@ -22,7 +23,6 @@ export const DEFAULT_ENV_ALLOWLIST = Object.freeze([
   "PRODUCT_LANDMARK_TOKEN",
   "SERVICE_SECRET",
   "SUPABASE_ANON_KEY",
-  "SUPABASE_JWT_SECRET",
   "SUPABASE_SERVICE_ROLE_KEY",
 ]);
@@ -135,7 +135,8 @@ export function createRedactor({
   patterns = DEFAULT_PATTERNS,
   enabled,
 } = {}) {
-  const proc = runtime?.proc ?? defaultProc();
+  if (!runtime) throw new Error("runtime is required");
+  const proc = runtime.proc;
   const resolvedEnv = env ?? proc.env;
   const envDisabled = resolvedEnv.LIBEVAL_REDACTION_DISABLED === "1";
   const resolvedEnabled = enabled ?? !envDisabled;
@@ -151,20 +152,6 @@ export function createRedactor({
   return new Redactor({ envSnapshot, patterns, enabled: resolvedEnabled });
 }
-/**
- * Lazily build the production proc surface so callers that don't inject a
- * runtime keep working. Imported indirectly to avoid pulling the whole
- * runtime bag (and its `node:fs`/`node:child_process` imports) into modules
- * that only ever receive an injected runtime.
- * @returns {{env: Record<string, string|undefined>, stderr: {write: (s: string) => void}}}
- */
-function defaultProc() {
-  return {
-    env: globalThis.process?.env ?? {},
-    stderr: { write: (s) => globalThis.process?.stderr?.write(s) },
-  };
-}
 /**
  * Parse `LIBEVAL_REDACTION_ENV_VARS` into a trimmed, non-empty name list.
  * Falls back to `DEFAULT_ENV_ALLOWLIST` when unset or empty.

package/src/supervisor.js CHANGED Viewed

@@ -122,6 +122,7 @@ const devNull = new Writable({
  * @param {string[]} [deps.supervisorDisallowedTools]
  * @param {string} [deps.supervisorProfile]
  * @param {string} [deps.agentProfile]
+ * @param {string} [deps.agentSystemPromptAmend] - Amendment folded into the agent's `<session_protocol>` section, after the protocol trailer.
  * @param {string} [deps.profilesDir]
  * @param {string} [deps.taskAmend]
  * @param {Record<string, object>} [deps.agentMcpServers]
@@ -141,6 +142,7 @@ export function createSupervisor({
   supervisorDisallowedTools,
   supervisorProfile,
   agentProfile,
+  agentSystemPromptAmend,
   profilesDir,
   taskAmend,
   agentMcpServers,
@@ -182,6 +184,7 @@ export function createSupervisor({
       profile: agentProfile,
       profilesDir: resolvedProfilesDir,
       trailer: AGENT_SYSTEM_PROMPT,
+      amend: agentSystemPromptAmend,
       runtime,
     }),
     mcpServers: { orchestration: agentServer, ...agentMcpServers },