@forwardimpact/libeval 0.1.52 → 0.1.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
 
3
3
  import "@forwardimpact/libpreflight/node22";
4
4
 
5
- import { readFileSync, realpathSync } from "node:fs";
5
+ import { realpathSync } from "node:fs";
6
6
  import { createCli } from "@forwardimpact/libcli";
7
7
  import { createDefaultRuntime } from "@forwardimpact/libutil/runtime";
8
8
  import { createLogger } from "@forwardimpact/libtelemetry";
@@ -11,17 +11,8 @@ import { runBenchmarkRunCommand } from "../src/commands/benchmark-run.js";
11
11
  import { runBenchmarkInvariantsCommand } from "../src/commands/benchmark-invariants.js";
12
12
  import { runBenchmarkReportCommand } from "../src/commands/benchmark-report.js";
13
13
 
14
- // `bun build --compile` injects FIT_BENCHMARK_VERSION via --define, eliminating
15
- // the readFileSync branch in the compiled binary (which would ENOENT against
16
- // the bunfs virtual mount). Source execution falls through to package.json.
17
- const VERSION =
18
- process.env.FIT_BENCHMARK_VERSION ||
19
- JSON.parse(readFileSync(new URL("../package.json", import.meta.url), "utf8"))
20
- .version;
21
-
22
14
  export const definition = {
23
15
  name: "fit-benchmark",
24
- version: VERSION,
25
16
  description:
26
17
  "Run coding-agent task families, grade hidden tests, and aggregate pass@k across runs.",
27
18
  commands: [
@@ -156,11 +147,14 @@ export const definition = {
156
147
  ],
157
148
  };
158
149
 
159
- const logger = createLogger("benchmark");
150
+ const runtime = createDefaultRuntime();
151
+ const logger = createLogger("benchmark", runtime);
160
152
 
161
153
  async function main() {
162
- const runtime = createDefaultRuntime();
163
- const cli = createCli(definition, { runtime });
154
+ const cli = createCli(definition, {
155
+ runtime,
156
+ packageJsonUrl: new URL("../package.json", import.meta.url),
157
+ });
164
158
  const parsed = cli.parse(runtime.proc.argv.slice(2));
165
159
  if (!parsed) return runtime.proc.exit(0);
166
160
 
@@ -187,7 +181,7 @@ async function main() {
187
181
  if (import.meta.url === `file://${realpathSync(process.argv[1])}`) {
188
182
  main().catch((error) => {
189
183
  logger.exception("main", error);
190
- createCli(definition).error(error.message);
184
+ createCli(definition, { runtime }).error(error.message);
191
185
  process.exit(1);
192
186
  });
193
187
  }
package/bin/fit-eval.js CHANGED
@@ -2,7 +2,6 @@
2
2
 
3
3
  import "@forwardimpact/libpreflight/node22";
4
4
 
5
- import { readFileSync } from "node:fs";
6
5
  import { createCli } from "@forwardimpact/libcli";
7
6
  import { createDefaultRuntime } from "@forwardimpact/libutil/runtime";
8
7
  import { createLogger } from "@forwardimpact/libtelemetry";
@@ -15,14 +14,6 @@ import { runFacilitateCommand } from "../src/commands/facilitate.js";
15
14
  import { runDiscussCommand } from "../src/commands/discuss.js";
16
15
  import { runCallbackCommand } from "../src/commands/callback.js";
17
16
 
18
- // `bun build --compile` injects FIT_EVAL_VERSION via --define, eliminating
19
- // the readFileSync branch in the compiled binary (which would ENOENT against
20
- // the bunfs virtual mount). Source execution falls through to package.json.
21
- const VERSION =
22
- process.env.FIT_EVAL_VERSION ||
23
- JSON.parse(readFileSync(new URL("../package.json", import.meta.url), "utf8"))
24
- .version;
25
-
26
17
  const LEAD_OPTIONS = {
27
18
  "lead-profile": {
28
19
  type: "string",
@@ -60,7 +51,6 @@ const TASK_INPUT_OPTIONS = {
60
51
 
61
52
  const definition = {
62
53
  name: "fit-eval",
63
- version: VERSION,
64
54
  description:
65
55
  "Run agents and capture NDJSON traces — for agent evaluations or multi-agent collaboration",
66
56
  commands: [
@@ -313,11 +303,14 @@ const definition = {
313
303
  ],
314
304
  };
315
305
 
316
- const logger = createLogger("eval");
306
+ const runtime = createDefaultRuntime();
307
+ const logger = createLogger("eval", runtime);
317
308
 
318
309
  async function main() {
319
- const runtime = createDefaultRuntime();
320
- const cli = createCli(definition, { runtime });
310
+ const cli = createCli(definition, {
311
+ runtime,
312
+ packageJsonUrl: new URL("../package.json", import.meta.url),
313
+ });
321
314
  const parsed = cli.parse(runtime.proc.argv.slice(2));
322
315
  if (!parsed) return runtime.proc.exit(0);
323
316
 
@@ -341,6 +334,6 @@ async function main() {
341
334
 
342
335
  main().catch((error) => {
343
336
  logger.exception("main", error);
344
- createCli(definition).error(error.message);
337
+ createCli(definition, { runtime }).error(error.message);
345
338
  process.exit(1);
346
339
  });
@@ -7,12 +7,11 @@
7
7
 
8
8
  import "@forwardimpact/libpreflight/node22";
9
9
  import { existsSync, readFileSync, writeFileSync } from "node:fs";
10
- import fsPromises from "node:fs/promises";
11
10
  import { parseArgs } from "node:util";
12
11
  import { resolve, relative, dirname } from "node:path";
13
12
  import { execFileSync } from "node:child_process";
14
13
 
15
- import { Finder } from "@forwardimpact/libutil";
14
+ import { createDefaultRuntime } from "@forwardimpact/libutil/runtime";
16
15
  import { minimatch } from "minimatch";
17
16
 
18
17
  const HELP = `fit-selfedit — write stdin to a settings.json-allowed path on a non-main branch.
@@ -71,8 +70,11 @@ if (extra.length > 0) fail(`unexpected extra arguments: ${extra.join(" ")}`);
71
70
 
72
71
  const absoluteTarget = resolve(process.cwd(), targetArg);
73
72
 
74
- // Safeguard 1: settings.json must grant Edit() on this path.
75
- const settingsPath = new Finder(fsPromises, { debug() {} }).findUpward(
73
+ // Safeguard 1: settings.json must grant Edit() on this path. The bin is the
74
+ // sole construction site for the runtime; resolve the finder off the bag
75
+ // rather than constructing a Finder here (Success Criterion 9).
76
+ const runtime = createDefaultRuntime();
77
+ const settingsPath = runtime.finder.findUpward(
76
78
  dirname(absoluteTarget),
77
79
  ".claude/settings.json",
78
80
  20,
package/bin/fit-trace.js CHANGED
@@ -2,7 +2,6 @@
2
2
 
3
3
  import "@forwardimpact/libpreflight/node22";
4
4
 
5
- import { readFileSync } from "node:fs";
6
5
  import { createCli } from "@forwardimpact/libcli";
7
6
  import { createDefaultRuntime } from "@forwardimpact/libutil/runtime";
8
7
  import { createScriptConfig } from "@forwardimpact/libconfig";
@@ -31,17 +30,8 @@ import {
31
30
  import { runAssertCommand } from "../src/commands/assert.js";
32
31
  import { runByDiscussionCommand } from "../src/commands/by-discussion.js";
33
32
 
34
- // `bun build --compile` injects FIT_TRACE_VERSION via --define, eliminating
35
- // the readFileSync branch in the compiled binary (which would ENOENT against
36
- // the bunfs virtual mount). Source execution falls through to package.json.
37
- const VERSION =
38
- process.env.FIT_TRACE_VERSION ||
39
- JSON.parse(readFileSync(new URL("../package.json", import.meta.url), "utf8"))
40
- .version;
41
-
42
33
  const definition = {
43
34
  name: "fit-trace",
44
- version: VERSION,
45
35
  description:
46
36
  "Download, query, and analyze agent execution traces — read NDJSON output from fit-eval as qualitative research",
47
37
  commands: [
@@ -340,15 +330,18 @@ const definition = {
340
330
  ],
341
331
  };
342
332
 
343
- const logger = createLogger("trace");
333
+ const runtime = createDefaultRuntime();
334
+ const logger = createLogger("trace", runtime);
344
335
 
345
336
  // Commands that talk to the GitHub API need a config-backed token resolver;
346
337
  // the rest only read local trace files through the runtime.
347
338
  const NEEDS_CONFIG = new Set(["runs", "download"]);
348
339
 
349
340
  async function main() {
350
- const runtime = createDefaultRuntime();
351
- const cli = createCli(definition, { runtime });
341
+ const cli = createCli(definition, {
342
+ runtime,
343
+ packageJsonUrl: new URL("../package.json", import.meta.url),
344
+ });
352
345
  const parsed = cli.parse(runtime.proc.argv.slice(2));
353
346
  if (!parsed) return runtime.proc.exit(0);
354
347
 
@@ -376,6 +369,6 @@ async function main() {
376
369
 
377
370
  main().catch((error) => {
378
371
  logger.exception("main", error);
379
- createCli(definition).error(error.message);
372
+ createCli(definition, { runtime }).error(error.message);
380
373
  process.exit(1);
381
374
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.52",
3
+ "version": "0.1.54",
4
4
  "description": "Agent evaluation framework — prove whether agent changes improved outcomes with reproducible evidence.",
5
5
  "keywords": [
6
6
  "eval",
@@ -5,8 +5,8 @@
5
5
  * - RESULT_RECORD_SCHEMA — one record per (task, runIndex) from a full
6
6
  * benchmark run. Has a happy branch (invariants + judge present) and a
7
7
  * pre-flight-failure branch (invariants/judgeVerdict/submission absent).
8
- * - INVARIANTS_RECORD_SCHEMA — narrower output of `benchmark-invariants`
9
- * (P7): ad-hoc grading without a full lifecycle.
8
+ * - INVARIANTS_RECORD_SCHEMA — narrower output of `benchmark-invariants`:
9
+ * ad-hoc grading without a full lifecycle.
10
10
  *
11
11
  * Validation is throw-on-mismatch so the runner can wrap every JSONL append
12
12
  * in a guard and reject schema drift at write time.
@@ -2,7 +2,7 @@
2
2
  * Task-family loader. A task family is a directory under
3
3
  * <root>/
4
4
  * apm.lock.yaml
5
- * .claude/ # pre-staged skills + agents (P1)
5
+ * .claude/ # pre-staged skills + agents
6
6
  * tasks/<task_name>/
7
7
  * agent.task.md
8
8
  * supervisor.task.md # optional; appended to the task as supervisor context
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * `fit-benchmark invariants` — check a single task's invariants against a
3
- * post-run workdir directory without invoking an agent (P6/P7). Useful for
3
+ * post-run workdir directory without invoking an agent. Useful for
4
4
  * re-checking an agent's output against revised grading material.
5
5
  */
6
6
 
package/src/discusser.js CHANGED
@@ -274,6 +274,7 @@ export function createDiscusser({
274
274
  messageBus,
275
275
  leadName: "lead",
276
276
  signal: abortController.signal,
277
+ runtime,
277
278
  })
278
279
  : null;
279
280
 
@@ -309,10 +310,6 @@ export function createDiscusser({
309
310
  from: config.name,
310
311
  });
311
312
 
312
- const agentTrailer = config.systemPromptAmend
313
- ? `${DISCUSS_AGENT_SYSTEM_PROMPT}\n\n${config.systemPromptAmend}`
314
- : DISCUSS_AGENT_SYSTEM_PROMPT;
315
-
316
313
  const runner = createAgentRunner({
317
314
  cwd: config.cwd ?? resolvedLeadCwd,
318
315
  query,
@@ -327,7 +324,8 @@ export function createDiscusser({
327
324
  role: "agent",
328
325
  profile: config.agentProfile,
329
326
  profilesDir: resolvedProfilesDir,
330
- trailer: agentTrailer,
327
+ trailer: DISCUSS_AGENT_SYSTEM_PROMPT,
328
+ amend: config.systemPromptAmend,
331
329
  runtime,
332
330
  }),
333
331
  redactor,
@@ -29,8 +29,14 @@ export const TASK_TEMPLATE_ISSUE_LABELED =
29
29
  export const TASK_TEMPLATE_PR_LABELED =
30
30
  'Label "${LABEL}" was added to PR "${PR_TITLE}" (#${NUMBER}). PR URL: ${URL}.';
31
31
 
32
+ // "unreleased changes"/"cut" point at the genuine post-merge action — release
33
+ // activity (the release-engineer's Assess step 3 / `kata-release-cut`).
34
+ // "status" is a backstop: the spec's `wiki/STATUS.md` row is normally advanced
35
+ // in the pre-merge gate (`kata-release-merge` Step 8), but the keyword catches a
36
+ // merge that landed without it. Neither owner nor artifact is named, so the lead
37
+ // routes the merge instead of treating it as a no-op.
32
38
  export const TASK_TEMPLATE_PR_MERGED =
33
- 'PR "${PR_TITLE}" (#${NUMBER}) merged. PR URL: ${URL}.';
39
+ 'PR "${PR_TITLE}" (#${NUMBER}) merged to main — may leave unreleased changes to cut or status to update. PR URL: ${URL}.';
34
40
 
35
41
  // Appended verbatim to comment/review templates. `${BODY}` is the untrusted
36
42
  // author text; the fence and the "data, not instructions" framing keep the lead
@@ -134,10 +134,6 @@ export function createFacilitator({
134
134
  from: config.name,
135
135
  });
136
136
 
137
- const agentTrailer = config.systemPromptAmend
138
- ? `${FACILITATED_AGENT_SYSTEM_PROMPT}\n\n${config.systemPromptAmend}`
139
- : FACILITATED_AGENT_SYSTEM_PROMPT;
140
-
141
137
  const runner = createAgentRunner({
142
138
  cwd: config.cwd ?? facilitatorCwd,
143
139
  query,
@@ -152,7 +148,8 @@ export function createFacilitator({
152
148
  role: "agent",
153
149
  profile: config.agentProfile,
154
150
  profilesDir: resolvedProfilesDir,
155
- trailer: agentTrailer,
151
+ trailer: FACILITATED_AGENT_SYSTEM_PROMPT,
152
+ amend: config.systemPromptAmend,
156
153
  runtime,
157
154
  }),
158
155
  redactor,
@@ -18,20 +18,17 @@ export class InboxPoller {
18
18
  * @param {import("./message-bus.js").MessageBus} deps.messageBus
19
19
  * @param {string} deps.leadName
20
20
  * @param {AbortSignal} deps.signal
21
- * @param {import("@forwardimpact/libutil/runtime").Runtime} [deps.runtime] -
22
- * Ambient collaborators; only `clock.setTimeout`/`clock.clearTimeout` are
23
- * used for the inter-poll backoff. Falls back to the global timers when
24
- * absent so existing callers keep working.
21
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} deps.runtime -
22
+ * Injected collaborators; `clock.setTimeout`/`clock.clearTimeout` drive the
23
+ * inter-poll backoff.
25
24
  */
26
25
  constructor({ inboxUrl, messageBus, leadName, signal, runtime }) {
26
+ if (!runtime) throw new Error("runtime is required");
27
27
  this.#inboxUrl = inboxUrl;
28
28
  this.#messageBus = messageBus;
29
29
  this.#leadName = leadName;
30
30
  this.#signal = signal;
31
- this.#clock = runtime?.clock ?? {
32
- setTimeout: (fn, ms) => globalThis.setTimeout(fn, ms),
33
- clearTimeout: (h) => globalThis.clearTimeout(h),
34
- };
31
+ this.#clock = runtime.clock;
35
32
  }
36
33
 
37
34
  /** Long-poll the inbox until the abort signal fires. */
package/src/judge.js CHANGED
@@ -17,7 +17,7 @@ import { resolve } from "node:path";
17
17
  import { Writable } from "node:stream";
18
18
 
19
19
  import { createAgentRunner } from "./agent-runner.js";
20
- import { composeProfilePrompt } from "./profile-prompt.js";
20
+ import { composeSystemPrompt } from "./profile-prompt.js";
21
21
  import { SequenceCounter } from "./sequence-counter.js";
22
22
  import {
23
23
  createJudgeToolServer,
@@ -140,7 +140,7 @@ export class Judge {
140
140
  /**
141
141
  * Factory function — wires the AgentRunner with the judge orchestration server
142
142
  * and the JUDGE_SYSTEM_PROMPT trailer. A `judgeProfile` (when supplied) layers
143
- * on top of the trailer via `composeProfilePrompt`, matching the
143
+ * on top of the trailer via `composeSystemPrompt`, matching the
144
144
  * supervisor/facilitator pattern.
145
145
  *
146
146
  * @param {object} deps
@@ -151,7 +151,7 @@ export class Judge {
151
151
  * @param {string} [deps.model]
152
152
  * @param {number} [deps.maxTurns] - Default 5 (the judge is expected to act in turn 1; 5 leaves headroom for tool inspection).
153
153
  * @param {string[]} [deps.allowedTools] - Default `["Read","Glob","Grep","Bash"]` — read-only inspection.
154
- * @param {string} [deps.judgeProfile] - Profile name; resolved into the system prompt via `composeProfilePrompt`.
154
+ * @param {string} [deps.judgeProfile] - Profile name; resolved into the system prompt via `composeSystemPrompt`.
155
155
  * @param {string} [deps.profilesDir] - Defaults to `<cwd>/.claude/agents`.
156
156
  * @param {string} [deps.taskAmend]
157
157
  * @returns {Judge}
@@ -176,17 +176,13 @@ export function createJudge({
176
176
  if (!runtime) throw new Error("runtime is required");
177
177
 
178
178
  const resolvedProfilesDir = profilesDir ?? resolve(cwd, ".claude/agents");
179
- const systemPrompt = judgeProfile
180
- ? composeProfilePrompt(judgeProfile, {
181
- profilesDir: resolvedProfilesDir,
182
- trailer: JUDGE_SYSTEM_PROMPT,
183
- runtime,
184
- })
185
- : {
186
- type: "preset",
187
- preset: "claude_code",
188
- append: JUDGE_SYSTEM_PROMPT,
189
- };
179
+ const systemPrompt = composeSystemPrompt({
180
+ role: "agent",
181
+ profile: judgeProfile,
182
+ profilesDir: resolvedProfilesDir,
183
+ trailer: JUDGE_SYSTEM_PROMPT,
184
+ runtime,
185
+ });
190
186
 
191
187
  const ctx = createOrchestrationContext();
192
188
  ctx.participants = [{ name: "judge", role: "judge" }];
@@ -1,7 +1,39 @@
1
1
  /**
2
2
  * System prompt composition for agent runners.
3
3
  *
4
- * Two helpers:
4
+ * libeval assembles every agent system prompt from up to two parallel,
5
+ * sibling-tagged sections (see COALIGNED.md § L0):
6
+ *
7
+ * <agent_profile>
8
+ * …persona body…
9
+ * </agent_profile>
10
+ *
11
+ * <session_protocol>
12
+ * …orchestration mechanics, then any amendment…
13
+ * </session_protocol>
14
+ *
15
+ * The two tags are siblings joined by a blank line — neither nests inside
16
+ * the other. A section appears only when its content is present. The tag
17
+ * convention lives entirely here: profile `.md` files and trailer constants
18
+ * carry no tags.
19
+ *
20
+ * The `<session_protocol>` body is assembled from up to three fragments, in
21
+ * order of decreasing generality:
22
+ *
23
+ * 1. the role-invariant orchestration trailer (libeval-owned);
24
+ * 2. the profile's own hoisted `## Session Protocol` section, if present;
25
+ * 3. a run-specific amendment, if supplied.
26
+ *
27
+ * Fragment 2 is the convention-based hoist: a profile may carry a level-2
28
+ * `## Session Protocol` markdown heading whose body is the role's work
29
+ * routine. When present, that section is lifted out of `<agent_profile>` and
30
+ * folded into `<session_protocol>` next to the orchestration mechanics, so
31
+ * the harness comms protocol and the role's work routine read as one
32
+ * coherent block. The heading line itself is dropped — the tag already names
33
+ * the section. Profiles with no such heading are unaffected (the entire body
34
+ * stays in `<agent_profile>`).
35
+ *
36
+ * Helpers:
5
37
  *
6
38
  * - `composeProfilePrompt(name, opts)` — profile + `claude_code` preset.
7
39
  * Used by agent participants that need the full Claude Code tool surface.
@@ -10,61 +42,186 @@
10
42
  * roles (supervisor, facilitator, discuss lead) that should only see
11
43
  * the orchestration instructions and optionally a profile body.
12
44
  *
13
- * - `composeSystemPrompt(opts)` — unified entry point. Delegates to one
45
+ * - `composeSystemPrompt(opts)` — unified entry point. Threads `amend` into
46
+ * the protocol section as the run-specific fragment, then delegates to one
14
47
  * of the above based on `opts.role`.
15
48
  */
16
49
 
17
50
  import { join } from "node:path";
18
51
 
52
+ /** Sibling section tags. Neither nests inside the other. */
53
+ const AGENT_PROFILE_TAG = "agent_profile";
54
+ const SESSION_PROTOCOL_TAG = "session_protocol";
55
+
56
+ /**
57
+ * A level-2 heading that names the profile's hoisted session-protocol
58
+ * section. Case-insensitive, tolerant of trailing whitespace, but the level
59
+ * is fixed at two `#` so a `### Session Protocol` subsection does not trip
60
+ * the hoist.
61
+ */
62
+ const SESSION_PROTOCOL_HEADING = /^##[ \t]+session protocol[ \t]*$/i;
63
+
64
+ /** A level-1 or level-2 heading — the boundary that ends a hoisted section. */
65
+ const SECTION_BOUNDARY = /^#{1,2}[ \t]+\S/;
66
+
67
+ /** Wrap content in a semantic section tag, each on its own line. */
68
+ function wrapSection(tag, content) {
69
+ return `<${tag}>\n${content}\n</${tag}>`;
70
+ }
71
+
72
+ /**
73
+ * Assemble the parallel `<agent_profile>` / `<session_protocol>` sections.
74
+ * The profile section is emitted only when `body` is non-empty. The protocol
75
+ * section is built by joining its fragments (in the order given) with a
76
+ * blank-line separator, dropping any that are empty, and is emitted only
77
+ * when at least one fragment survives. The two tags are siblings joined by a
78
+ * blank line and never nest.
79
+ *
80
+ * @param {object} parts
81
+ * @param {string} [parts.body] - Profile body, frontmatter-stripped and with
82
+ * any `## Session Protocol` section already hoisted out.
83
+ * @param {Array<string | undefined>} [parts.protocolParts] - Ordered session
84
+ * protocol fragments: trailer, hoisted profile section, run amendment.
85
+ * @returns {string}
86
+ */
87
+ function assembleSections({ body, protocolParts = [] }) {
88
+ const sections = [];
89
+ if (body) sections.push(wrapSection(AGENT_PROFILE_TAG, body));
90
+ const protocol = protocolParts.filter(Boolean).join("\n\n");
91
+ if (protocol) sections.push(wrapSection(SESSION_PROTOCOL_TAG, protocol));
92
+ return sections.join("\n\n");
93
+ }
94
+
95
+ /**
96
+ * Split a frontmatter-stripped profile body into its persona and an optional
97
+ * hoisted `## Session Protocol` section. The section runs from its heading to
98
+ * the next level-1/level-2 heading (or end of body); the heading line is
99
+ * dropped. Anything before and after the section is rejoined into `persona`.
100
+ * When the body carries no `## Session Protocol` heading, the whole body is
101
+ * returned as `persona` and `protocol` is `undefined`.
102
+ *
103
+ * @param {string} body - Frontmatter-stripped, trimmed profile body.
104
+ * @returns {{ persona: string, protocol: string | undefined }}
105
+ */
106
+ function splitSessionProtocol(body) {
107
+ const lines = body.split("\n");
108
+ const start = lines.findIndex((line) => SESSION_PROTOCOL_HEADING.test(line));
109
+ if (start === -1) return { persona: body, protocol: undefined };
110
+
111
+ let end = lines.length;
112
+ for (let i = start + 1; i < lines.length; i++) {
113
+ if (SECTION_BOUNDARY.test(lines[i])) {
114
+ end = i;
115
+ break;
116
+ }
117
+ }
118
+
119
+ const protocol = lines
120
+ .slice(start + 1, end)
121
+ .join("\n")
122
+ .trim();
123
+ const before = lines.slice(0, start).join("\n").trim();
124
+ const after = lines.slice(end).join("\n").trim();
125
+ const persona = [before, after].filter(Boolean).join("\n\n");
126
+ return { persona, protocol: protocol || undefined };
127
+ }
128
+
129
+ /**
130
+ * Read a profile `.md`, strip its frontmatter, and split off any hoisted
131
+ * `## Session Protocol` section. Reads synchronously off the injected
132
+ * `runtime.fsSync` surface — this composer runs inside the synchronous
133
+ * SDK-option builders of the supervisor / facilitator / discusser / judge
134
+ * factories, so it cannot go async without an unbounded cascade.
135
+ *
136
+ * @param {string} name - Profile basename (no `.md` suffix)
137
+ * @param {string} profilesDir - Directory containing `<name>.md`
138
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
139
+ * @returns {{ persona: string, protocol: string | undefined }}
140
+ */
141
+ function readProfileSections(name, profilesDir, runtime) {
142
+ const path = join(profilesDir, `${name}.md`);
143
+ const raw = runtime.fsSync.readFileSync(path, "utf8");
144
+ return splitSessionProtocol(stripFrontmatter(raw).trim());
145
+ }
146
+
19
147
  /**
20
148
  * Compose a `claude_code`-preset system prompt from a profile file. The
21
- * profile is read synchronously off the injected `runtime.fsSync` surface —
22
- * this composer runs inside the synchronous SDK-option builders of the
23
- * supervisor / facilitator / discusser / judge factories, so it cannot go
24
- * async without an unbounded cascade.
149
+ * persona is wrapped in `<agent_profile>`; the protocol trailer, the
150
+ * profile's hoisted `## Session Protocol` section, and any amendment are
151
+ * joined (in that order) into a sibling `<session_protocol>`.
25
152
  *
26
153
  * @param {string} name - Profile basename (no `.md` suffix)
27
154
  * @param {object} opts
28
155
  * @param {string} opts.profilesDir - Directory containing `<name>.md`
29
- * @param {string} [opts.trailer] - Mode-specific trailer appended after a blank line
156
+ * @param {string} [opts.trailer] - Session protocol orchestration mechanics,
157
+ * the first fragment of the `<session_protocol>` section.
158
+ * @param {string} [opts.amend] - Run-specific amendment, the last fragment of
159
+ * the `<session_protocol>` section.
30
160
  * @param {import("@forwardimpact/libutil/runtime").Runtime} opts.runtime - Ambient collaborators; uses `fsSync.readFileSync`.
31
161
  * @returns {{type: "preset", preset: "claude_code", append: string}}
32
162
  */
33
- export function composeProfilePrompt(name, { profilesDir, trailer, runtime }) {
34
- const path = join(profilesDir, `${name}.md`);
35
- const raw = runtime.fsSync.readFileSync(path, "utf8");
36
- const body = stripFrontmatter(raw).trim();
37
- const append = trailer && trailer.length > 0 ? `${body}\n\n${trailer}` : body;
38
- return { type: "preset", preset: "claude_code", append };
163
+ export function composeProfilePrompt(
164
+ name,
165
+ { profilesDir, trailer, amend, runtime },
166
+ ) {
167
+ const { persona, protocol } = readProfileSections(name, profilesDir, runtime);
168
+ return {
169
+ type: "preset",
170
+ preset: "claude_code",
171
+ append: assembleSections({
172
+ body: persona,
173
+ protocolParts: [trailer, protocol, amend],
174
+ }),
175
+ };
39
176
  }
40
177
 
41
178
  /**
42
- * Compose a plain-string system prompt for a lead role (no Claude Code preset).
179
+ * Compose a plain-string system prompt for a lead role (no Claude Code
180
+ * preset). The protocol trailer, an optional profile's hoisted
181
+ * `## Session Protocol` section, and any amendment are joined into
182
+ * `<session_protocol>`; an optional persona is wrapped in a sibling
183
+ * `<agent_profile>` before it.
184
+ *
43
185
  * @param {object} opts
44
186
  * @param {string} [opts.profile] - Profile basename (no `.md` suffix)
45
187
  * @param {string} [opts.profilesDir] - Directory containing profile files
46
- * @param {string} opts.trailer - Mode-specific orchestration instructions
188
+ * @param {string} opts.trailer - Session protocol (orchestration instructions)
189
+ * @param {string} [opts.amend] - Run-specific amendment, the last fragment of
190
+ * the `<session_protocol>` section.
47
191
  * @param {import("@forwardimpact/libutil/runtime").Runtime} opts.runtime - Ambient collaborators; uses `fsSync.readFileSync`.
48
192
  * @returns {string}
49
193
  */
50
- export function composeLeadPrompt({ profile, profilesDir, trailer, runtime }) {
194
+ export function composeLeadPrompt({
195
+ profile,
196
+ profilesDir,
197
+ trailer,
198
+ amend,
199
+ runtime,
200
+ }) {
51
201
  if (!trailer) throw new Error("trailer is required");
52
- if (!profile) return trailer;
53
- const path = join(profilesDir, `${profile}.md`);
54
- const raw = runtime.fsSync.readFileSync(path, "utf8");
55
- const body = stripFrontmatter(raw).trim();
56
- return `${body}\n\n${trailer}`;
202
+ const { persona, protocol } = profile
203
+ ? readProfileSections(profile, profilesDir, runtime)
204
+ : { persona: undefined, protocol: undefined };
205
+ return assembleSections({
206
+ body: persona,
207
+ protocolParts: [trailer, protocol, amend],
208
+ });
57
209
  }
58
210
 
59
211
  /**
60
- * Unified entry point for composing system prompts.
212
+ * Unified entry point for composing system prompts. Threads an optional
213
+ * amendment through as the run-specific fragment of `<session_protocol>`
214
+ * (after the trailer and any hoisted profile section), then delegates by
215
+ * role.
61
216
  *
62
217
  * @param {object} opts
63
218
  * @param {"lead"|"agent"} opts.role - `"lead"` produces a plain string;
64
219
  * `"agent"` produces a `claude_code` preset object.
65
220
  * @param {string} [opts.profile] - Profile basename
66
221
  * @param {string} [opts.profilesDir]
67
- * @param {string} opts.trailer - Mode-specific instructions
222
+ * @param {string} opts.trailer - Session protocol (orchestration instructions)
223
+ * @param {string} [opts.amend] - Caller-supplied amendment, the last fragment
224
+ * inside `<session_protocol>`, joined with a blank-line separator.
68
225
  * @param {import("@forwardimpact/libutil/runtime").Runtime} opts.runtime - Ambient collaborators; uses `fsSync.readFileSync`.
69
226
  * @returns {string | {type: "preset", preset: "claude_code", append: string}}
70
227
  */
@@ -73,16 +230,26 @@ export function composeSystemPrompt({
73
230
  profile,
74
231
  profilesDir,
75
232
  trailer,
233
+ amend,
76
234
  runtime,
77
235
  }) {
78
236
  if (!trailer) throw new Error("trailer is required");
79
237
  if (role === "lead") {
80
- return composeLeadPrompt({ profile, profilesDir, trailer, runtime });
238
+ return composeLeadPrompt({ profile, profilesDir, trailer, amend, runtime });
81
239
  }
82
240
  if (profile) {
83
- return composeProfilePrompt(profile, { profilesDir, trailer, runtime });
241
+ return composeProfilePrompt(profile, {
242
+ profilesDir,
243
+ trailer,
244
+ amend,
245
+ runtime,
246
+ });
84
247
  }
85
- return { type: "preset", preset: "claude_code", append: trailer };
248
+ return {
249
+ type: "preset",
250
+ preset: "claude_code",
251
+ append: assembleSections({ protocolParts: [trailer, amend] }),
252
+ };
86
253
  }
87
254
 
88
255
  /**
package/src/redaction.js CHANGED
@@ -15,6 +15,7 @@ export const DEFAULT_ENV_ALLOWLIST = Object.freeze([
15
15
  "DATABASE_PASSWORD",
16
16
  "GH_TOKEN",
17
17
  "GITHUB_TOKEN",
18
+ "JWT_SECRET",
18
19
  "MCP_TOKEN",
19
20
  "MICROSOFT_APP_ID",
20
21
  "MICROSOFT_APP_PASSWORD",
@@ -22,7 +23,6 @@ export const DEFAULT_ENV_ALLOWLIST = Object.freeze([
22
23
  "PRODUCT_LANDMARK_TOKEN",
23
24
  "SERVICE_SECRET",
24
25
  "SUPABASE_ANON_KEY",
25
- "SUPABASE_JWT_SECRET",
26
26
  "SUPABASE_SERVICE_ROLE_KEY",
27
27
  ]);
28
28
 
@@ -135,7 +135,8 @@ export function createRedactor({
135
135
  patterns = DEFAULT_PATTERNS,
136
136
  enabled,
137
137
  } = {}) {
138
- const proc = runtime?.proc ?? defaultProc();
138
+ if (!runtime) throw new Error("runtime is required");
139
+ const proc = runtime.proc;
139
140
  const resolvedEnv = env ?? proc.env;
140
141
  const envDisabled = resolvedEnv.LIBEVAL_REDACTION_DISABLED === "1";
141
142
  const resolvedEnabled = enabled ?? !envDisabled;
@@ -151,20 +152,6 @@ export function createRedactor({
151
152
  return new Redactor({ envSnapshot, patterns, enabled: resolvedEnabled });
152
153
  }
153
154
 
154
- /**
155
- * Lazily build the production proc surface so callers that don't inject a
156
- * runtime keep working. Imported indirectly to avoid pulling the whole
157
- * runtime bag (and its `node:fs`/`node:child_process` imports) into modules
158
- * that only ever receive an injected runtime.
159
- * @returns {{env: Record<string, string|undefined>, stderr: {write: (s: string) => void}}}
160
- */
161
- function defaultProc() {
162
- return {
163
- env: globalThis.process?.env ?? {},
164
- stderr: { write: (s) => globalThis.process?.stderr?.write(s) },
165
- };
166
- }
167
-
168
155
  /**
169
156
  * Parse `LIBEVAL_REDACTION_ENV_VARS` into a trimmed, non-empty name list.
170
157
  * Falls back to `DEFAULT_ENV_ALLOWLIST` when unset or empty.
package/src/supervisor.js CHANGED
@@ -122,6 +122,7 @@ const devNull = new Writable({
122
122
  * @param {string[]} [deps.supervisorDisallowedTools]
123
123
  * @param {string} [deps.supervisorProfile]
124
124
  * @param {string} [deps.agentProfile]
125
+ * @param {string} [deps.agentSystemPromptAmend] - Amendment folded into the agent's `<session_protocol>` section, after the protocol trailer.
125
126
  * @param {string} [deps.profilesDir]
126
127
  * @param {string} [deps.taskAmend]
127
128
  * @param {Record<string, object>} [deps.agentMcpServers]
@@ -141,6 +142,7 @@ export function createSupervisor({
141
142
  supervisorDisallowedTools,
142
143
  supervisorProfile,
143
144
  agentProfile,
145
+ agentSystemPromptAmend,
144
146
  profilesDir,
145
147
  taskAmend,
146
148
  agentMcpServers,
@@ -182,6 +184,7 @@ export function createSupervisor({
182
184
  profile: agentProfile,
183
185
  profilesDir: resolvedProfilesDir,
184
186
  trailer: AGENT_SYSTEM_PROMPT,
187
+ amend: agentSystemPromptAmend,
185
188
  runtime,
186
189
  }),
187
190
  mcpServers: { orchestration: agentServer, ...agentMcpServers },