@forwardimpact/libeval 0.1.52 → 0.1.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
 
3
3
  import "@forwardimpact/libpreflight/node22";
4
4
 
5
- import { readFileSync, realpathSync } from "node:fs";
5
+ import { realpathSync } from "node:fs";
6
6
  import { createCli } from "@forwardimpact/libcli";
7
7
  import { createDefaultRuntime } from "@forwardimpact/libutil/runtime";
8
8
  import { createLogger } from "@forwardimpact/libtelemetry";
@@ -11,17 +11,8 @@ import { runBenchmarkRunCommand } from "../src/commands/benchmark-run.js";
11
11
  import { runBenchmarkInvariantsCommand } from "../src/commands/benchmark-invariants.js";
12
12
  import { runBenchmarkReportCommand } from "../src/commands/benchmark-report.js";
13
13
 
14
- // `bun build --compile` injects FIT_BENCHMARK_VERSION via --define, eliminating
15
- // the readFileSync branch in the compiled binary (which would ENOENT against
16
- // the bunfs virtual mount). Source execution falls through to package.json.
17
- const VERSION =
18
- process.env.FIT_BENCHMARK_VERSION ||
19
- JSON.parse(readFileSync(new URL("../package.json", import.meta.url), "utf8"))
20
- .version;
21
-
22
14
  export const definition = {
23
15
  name: "fit-benchmark",
24
- version: VERSION,
25
16
  description:
26
17
  "Run coding-agent task families, grade hidden tests, and aggregate pass@k across runs.",
27
18
  commands: [
@@ -156,11 +147,14 @@ export const definition = {
156
147
  ],
157
148
  };
158
149
 
159
- const logger = createLogger("benchmark");
150
+ const runtime = createDefaultRuntime();
151
+ const logger = createLogger("benchmark", runtime);
160
152
 
161
153
  async function main() {
162
- const runtime = createDefaultRuntime();
163
- const cli = createCli(definition, { runtime });
154
+ const cli = createCli(definition, {
155
+ runtime,
156
+ packageJsonUrl: new URL("../package.json", import.meta.url),
157
+ });
164
158
  const parsed = cli.parse(runtime.proc.argv.slice(2));
165
159
  if (!parsed) return runtime.proc.exit(0);
166
160
 
@@ -187,7 +181,7 @@ async function main() {
187
181
  if (import.meta.url === `file://${realpathSync(process.argv[1])}`) {
188
182
  main().catch((error) => {
189
183
  logger.exception("main", error);
190
- createCli(definition).error(error.message);
184
+ createCli(definition, { runtime }).error(error.message);
191
185
  process.exit(1);
192
186
  });
193
187
  }
package/bin/fit-eval.js CHANGED
@@ -2,7 +2,6 @@
2
2
 
3
3
  import "@forwardimpact/libpreflight/node22";
4
4
 
5
- import { readFileSync } from "node:fs";
6
5
  import { createCli } from "@forwardimpact/libcli";
7
6
  import { createDefaultRuntime } from "@forwardimpact/libutil/runtime";
8
7
  import { createLogger } from "@forwardimpact/libtelemetry";
@@ -15,14 +14,6 @@ import { runFacilitateCommand } from "../src/commands/facilitate.js";
15
14
  import { runDiscussCommand } from "../src/commands/discuss.js";
16
15
  import { runCallbackCommand } from "../src/commands/callback.js";
17
16
 
18
- // `bun build --compile` injects FIT_EVAL_VERSION via --define, eliminating
19
- // the readFileSync branch in the compiled binary (which would ENOENT against
20
- // the bunfs virtual mount). Source execution falls through to package.json.
21
- const VERSION =
22
- process.env.FIT_EVAL_VERSION ||
23
- JSON.parse(readFileSync(new URL("../package.json", import.meta.url), "utf8"))
24
- .version;
25
-
26
17
  const LEAD_OPTIONS = {
27
18
  "lead-profile": {
28
19
  type: "string",
@@ -60,7 +51,6 @@ const TASK_INPUT_OPTIONS = {
60
51
 
61
52
  const definition = {
62
53
  name: "fit-eval",
63
- version: VERSION,
64
54
  description:
65
55
  "Run agents and capture NDJSON traces — for agent evaluations or multi-agent collaboration",
66
56
  commands: [
@@ -313,11 +303,14 @@ const definition = {
313
303
  ],
314
304
  };
315
305
 
316
- const logger = createLogger("eval");
306
+ const runtime = createDefaultRuntime();
307
+ const logger = createLogger("eval", runtime);
317
308
 
318
309
  async function main() {
319
- const runtime = createDefaultRuntime();
320
- const cli = createCli(definition, { runtime });
310
+ const cli = createCli(definition, {
311
+ runtime,
312
+ packageJsonUrl: new URL("../package.json", import.meta.url),
313
+ });
321
314
  const parsed = cli.parse(runtime.proc.argv.slice(2));
322
315
  if (!parsed) return runtime.proc.exit(0);
323
316
 
@@ -341,6 +334,6 @@ async function main() {
341
334
 
342
335
  main().catch((error) => {
343
336
  logger.exception("main", error);
344
- createCli(definition).error(error.message);
337
+ createCli(definition, { runtime }).error(error.message);
345
338
  process.exit(1);
346
339
  });
@@ -7,12 +7,11 @@
7
7
 
8
8
  import "@forwardimpact/libpreflight/node22";
9
9
  import { existsSync, readFileSync, writeFileSync } from "node:fs";
10
- import fsPromises from "node:fs/promises";
11
10
  import { parseArgs } from "node:util";
12
11
  import { resolve, relative, dirname } from "node:path";
13
12
  import { execFileSync } from "node:child_process";
14
13
 
15
- import { Finder } from "@forwardimpact/libutil";
14
+ import { createDefaultRuntime } from "@forwardimpact/libutil/runtime";
16
15
  import { minimatch } from "minimatch";
17
16
 
18
17
  const HELP = `fit-selfedit — write stdin to a settings.json-allowed path on a non-main branch.
@@ -71,8 +70,11 @@ if (extra.length > 0) fail(`unexpected extra arguments: ${extra.join(" ")}`);
71
70
 
72
71
  const absoluteTarget = resolve(process.cwd(), targetArg);
73
72
 
74
- // Safeguard 1: settings.json must grant Edit() on this path.
75
- const settingsPath = new Finder(fsPromises, { debug() {} }).findUpward(
73
+ // Safeguard 1: settings.json must grant Edit() on this path. The bin is the
74
+ // sole construction site for the runtime; resolve the finder off the bag
75
+ // rather than constructing a Finder here (Success Criterion 9).
76
+ const runtime = createDefaultRuntime();
77
+ const settingsPath = runtime.finder.findUpward(
76
78
  dirname(absoluteTarget),
77
79
  ".claude/settings.json",
78
80
  20,
package/bin/fit-trace.js CHANGED
@@ -2,7 +2,6 @@
2
2
 
3
3
  import "@forwardimpact/libpreflight/node22";
4
4
 
5
- import { readFileSync } from "node:fs";
6
5
  import { createCli } from "@forwardimpact/libcli";
7
6
  import { createDefaultRuntime } from "@forwardimpact/libutil/runtime";
8
7
  import { createScriptConfig } from "@forwardimpact/libconfig";
@@ -31,17 +30,8 @@ import {
31
30
  import { runAssertCommand } from "../src/commands/assert.js";
32
31
  import { runByDiscussionCommand } from "../src/commands/by-discussion.js";
33
32
 
34
- // `bun build --compile` injects FIT_TRACE_VERSION via --define, eliminating
35
- // the readFileSync branch in the compiled binary (which would ENOENT against
36
- // the bunfs virtual mount). Source execution falls through to package.json.
37
- const VERSION =
38
- process.env.FIT_TRACE_VERSION ||
39
- JSON.parse(readFileSync(new URL("../package.json", import.meta.url), "utf8"))
40
- .version;
41
-
42
33
  const definition = {
43
34
  name: "fit-trace",
44
- version: VERSION,
45
35
  description:
46
36
  "Download, query, and analyze agent execution traces — read NDJSON output from fit-eval as qualitative research",
47
37
  commands: [
@@ -340,15 +330,18 @@ const definition = {
340
330
  ],
341
331
  };
342
332
 
343
- const logger = createLogger("trace");
333
+ const runtime = createDefaultRuntime();
334
+ const logger = createLogger("trace", runtime);
344
335
 
345
336
  // Commands that talk to the GitHub API need a config-backed token resolver;
346
337
  // the rest only read local trace files through the runtime.
347
338
  const NEEDS_CONFIG = new Set(["runs", "download"]);
348
339
 
349
340
  async function main() {
350
- const runtime = createDefaultRuntime();
351
- const cli = createCli(definition, { runtime });
341
+ const cli = createCli(definition, {
342
+ runtime,
343
+ packageJsonUrl: new URL("../package.json", import.meta.url),
344
+ });
352
345
  const parsed = cli.parse(runtime.proc.argv.slice(2));
353
346
  if (!parsed) return runtime.proc.exit(0);
354
347
 
@@ -376,6 +369,6 @@ async function main() {
376
369
 
377
370
  main().catch((error) => {
378
371
  logger.exception("main", error);
379
- createCli(definition).error(error.message);
372
+ createCli(definition, { runtime }).error(error.message);
380
373
  process.exit(1);
381
374
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.52",
3
+ "version": "0.1.53",
4
4
  "description": "Agent evaluation framework — prove whether agent changes improved outcomes with reproducible evidence.",
5
5
  "keywords": [
6
6
  "eval",
@@ -5,8 +5,8 @@
5
5
  * - RESULT_RECORD_SCHEMA — one record per (task, runIndex) from a full
6
6
  * benchmark run. Has a happy branch (invariants + judge present) and a
7
7
  * pre-flight-failure branch (invariants/judgeVerdict/submission absent).
8
- * - INVARIANTS_RECORD_SCHEMA — narrower output of `benchmark-invariants`
9
- * (P7): ad-hoc grading without a full lifecycle.
8
+ * - INVARIANTS_RECORD_SCHEMA — narrower output of `benchmark-invariants`:
9
+ * ad-hoc grading without a full lifecycle.
10
10
  *
11
11
  * Validation is throw-on-mismatch so the runner can wrap every JSONL append
12
12
  * in a guard and reject schema drift at write time.
@@ -2,7 +2,7 @@
2
2
  * Task-family loader. A task family is a directory under
3
3
  * <root>/
4
4
  * apm.lock.yaml
5
- * .claude/ # pre-staged skills + agents (P1)
5
+ * .claude/ # pre-staged skills + agents
6
6
  * tasks/<task_name>/
7
7
  * agent.task.md
8
8
  * supervisor.task.md # optional; appended to the task as supervisor context
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * `fit-benchmark invariants` — check a single task's invariants against a
3
- * post-run workdir directory without invoking an agent (P6/P7). Useful for
3
+ * post-run workdir directory without invoking an agent. Useful for
4
4
  * re-checking an agent's output against revised grading material.
5
5
  */
6
6
 
package/src/discusser.js CHANGED
@@ -274,6 +274,7 @@ export function createDiscusser({
274
274
  messageBus,
275
275
  leadName: "lead",
276
276
  signal: abortController.signal,
277
+ runtime,
277
278
  })
278
279
  : null;
279
280
 
@@ -309,10 +310,6 @@ export function createDiscusser({
309
310
  from: config.name,
310
311
  });
311
312
 
312
- const agentTrailer = config.systemPromptAmend
313
- ? `${DISCUSS_AGENT_SYSTEM_PROMPT}\n\n${config.systemPromptAmend}`
314
- : DISCUSS_AGENT_SYSTEM_PROMPT;
315
-
316
313
  const runner = createAgentRunner({
317
314
  cwd: config.cwd ?? resolvedLeadCwd,
318
315
  query,
@@ -327,7 +324,8 @@ export function createDiscusser({
327
324
  role: "agent",
328
325
  profile: config.agentProfile,
329
326
  profilesDir: resolvedProfilesDir,
330
- trailer: agentTrailer,
327
+ trailer: DISCUSS_AGENT_SYSTEM_PROMPT,
328
+ amend: config.systemPromptAmend,
331
329
  runtime,
332
330
  }),
333
331
  redactor,
@@ -29,8 +29,14 @@ export const TASK_TEMPLATE_ISSUE_LABELED =
29
29
  export const TASK_TEMPLATE_PR_LABELED =
30
30
  'Label "${LABEL}" was added to PR "${PR_TITLE}" (#${NUMBER}). PR URL: ${URL}.';
31
31
 
32
+ // "unreleased changes"/"cut" point at the genuine post-merge action — release
33
+ // activity (the release-engineer's Assess step 3 / `kata-release-cut`).
34
+ // "status" is a backstop: the spec's `wiki/STATUS.md` row is normally advanced
35
+ // in the pre-merge gate (`kata-release-merge` Step 8), but the keyword catches a
36
+ // merge that landed without it. Neither owner nor artifact is named, so the lead
37
+ // routes the merge instead of treating it as a no-op.
32
38
  export const TASK_TEMPLATE_PR_MERGED =
33
- 'PR "${PR_TITLE}" (#${NUMBER}) merged. PR URL: ${URL}.';
39
+ 'PR "${PR_TITLE}" (#${NUMBER}) merged to main — may leave unreleased changes to cut or status to update. PR URL: ${URL}.';
34
40
 
35
41
  // Appended verbatim to comment/review templates. `${BODY}` is the untrusted
36
42
  // author text; the fence and the "data, not instructions" framing keep the lead
@@ -134,10 +134,6 @@ export function createFacilitator({
134
134
  from: config.name,
135
135
  });
136
136
 
137
- const agentTrailer = config.systemPromptAmend
138
- ? `${FACILITATED_AGENT_SYSTEM_PROMPT}\n\n${config.systemPromptAmend}`
139
- : FACILITATED_AGENT_SYSTEM_PROMPT;
140
-
141
137
  const runner = createAgentRunner({
142
138
  cwd: config.cwd ?? facilitatorCwd,
143
139
  query,
@@ -152,7 +148,8 @@ export function createFacilitator({
152
148
  role: "agent",
153
149
  profile: config.agentProfile,
154
150
  profilesDir: resolvedProfilesDir,
155
- trailer: agentTrailer,
151
+ trailer: FACILITATED_AGENT_SYSTEM_PROMPT,
152
+ amend: config.systemPromptAmend,
156
153
  runtime,
157
154
  }),
158
155
  redactor,
@@ -18,20 +18,17 @@ export class InboxPoller {
18
18
  * @param {import("./message-bus.js").MessageBus} deps.messageBus
19
19
  * @param {string} deps.leadName
20
20
  * @param {AbortSignal} deps.signal
21
- * @param {import("@forwardimpact/libutil/runtime").Runtime} [deps.runtime] -
22
- * Ambient collaborators; only `clock.setTimeout`/`clock.clearTimeout` are
23
- * used for the inter-poll backoff. Falls back to the global timers when
24
- * absent so existing callers keep working.
21
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} deps.runtime -
22
+ * Injected collaborators; `clock.setTimeout`/`clock.clearTimeout` drive the
23
+ * inter-poll backoff.
25
24
  */
26
25
  constructor({ inboxUrl, messageBus, leadName, signal, runtime }) {
26
+ if (!runtime) throw new Error("runtime is required");
27
27
  this.#inboxUrl = inboxUrl;
28
28
  this.#messageBus = messageBus;
29
29
  this.#leadName = leadName;
30
30
  this.#signal = signal;
31
- this.#clock = runtime?.clock ?? {
32
- setTimeout: (fn, ms) => globalThis.setTimeout(fn, ms),
33
- clearTimeout: (h) => globalThis.clearTimeout(h),
34
- };
31
+ this.#clock = runtime.clock;
35
32
  }
36
33
 
37
34
  /** Long-poll the inbox until the abort signal fires. */
package/src/judge.js CHANGED
@@ -17,7 +17,7 @@ import { resolve } from "node:path";
17
17
  import { Writable } from "node:stream";
18
18
 
19
19
  import { createAgentRunner } from "./agent-runner.js";
20
- import { composeProfilePrompt } from "./profile-prompt.js";
20
+ import { composeSystemPrompt } from "./profile-prompt.js";
21
21
  import { SequenceCounter } from "./sequence-counter.js";
22
22
  import {
23
23
  createJudgeToolServer,
@@ -140,7 +140,7 @@ export class Judge {
140
140
  /**
141
141
  * Factory function — wires the AgentRunner with the judge orchestration server
142
142
  * and the JUDGE_SYSTEM_PROMPT trailer. A `judgeProfile` (when supplied) layers
143
- * on top of the trailer via `composeProfilePrompt`, matching the
143
+ * on top of the trailer via `composeSystemPrompt`, matching the
144
144
  * supervisor/facilitator pattern.
145
145
  *
146
146
  * @param {object} deps
@@ -151,7 +151,7 @@ export class Judge {
151
151
  * @param {string} [deps.model]
152
152
  * @param {number} [deps.maxTurns] - Default 5 (the judge is expected to act in turn 1; 5 leaves headroom for tool inspection).
153
153
  * @param {string[]} [deps.allowedTools] - Default `["Read","Glob","Grep","Bash"]` — read-only inspection.
154
- * @param {string} [deps.judgeProfile] - Profile name; resolved into the system prompt via `composeProfilePrompt`.
154
+ * @param {string} [deps.judgeProfile] - Profile name; resolved into the system prompt via `composeSystemPrompt`.
155
155
  * @param {string} [deps.profilesDir] - Defaults to `<cwd>/.claude/agents`.
156
156
  * @param {string} [deps.taskAmend]
157
157
  * @returns {Judge}
@@ -176,17 +176,13 @@ export function createJudge({
176
176
  if (!runtime) throw new Error("runtime is required");
177
177
 
178
178
  const resolvedProfilesDir = profilesDir ?? resolve(cwd, ".claude/agents");
179
- const systemPrompt = judgeProfile
180
- ? composeProfilePrompt(judgeProfile, {
181
- profilesDir: resolvedProfilesDir,
182
- trailer: JUDGE_SYSTEM_PROMPT,
183
- runtime,
184
- })
185
- : {
186
- type: "preset",
187
- preset: "claude_code",
188
- append: JUDGE_SYSTEM_PROMPT,
189
- };
179
+ const systemPrompt = composeSystemPrompt({
180
+ role: "agent",
181
+ profile: judgeProfile,
182
+ profilesDir: resolvedProfilesDir,
183
+ trailer: JUDGE_SYSTEM_PROMPT,
184
+ runtime,
185
+ });
190
186
 
191
187
  const ctx = createOrchestrationContext();
192
188
  ctx.participants = [{ name: "judge", role: "judge" }];
@@ -1,7 +1,25 @@
1
1
  /**
2
2
  * System prompt composition for agent runners.
3
3
  *
4
- * Two helpers:
4
+ * libeval assembles every agent system prompt from up to two parallel,
5
+ * sibling-tagged sections (see COALIGNED.md § L0):
6
+ *
7
+ * <agent_profile>
8
+ * …persona body…
9
+ * </agent_profile>
10
+ *
11
+ * <session_protocol>
12
+ * …orchestration mechanics, then any amendment…
13
+ * </session_protocol>
14
+ *
15
+ * The two tags are siblings joined by a blank line — neither nests inside
16
+ * the other. A section appears only when its content is present. A
17
+ * system-prompt amendment is folded into the protocol trailer before
18
+ * wrapping, so it lands transparently inside `<session_protocol>`. The tag
19
+ * convention lives entirely here: profile `.md` files and trailer constants
20
+ * carry no tags.
21
+ *
22
+ * Helpers:
5
23
  *
6
24
  * - `composeProfilePrompt(name, opts)` — profile + `claude_code` preset.
7
25
  * Used by agent participants that need the full Claude Code tool surface.
@@ -10,61 +28,113 @@
10
28
  * roles (supervisor, facilitator, discuss lead) that should only see
11
29
  * the orchestration instructions and optionally a profile body.
12
30
  *
13
- * - `composeSystemPrompt(opts)` — unified entry point. Delegates to one
14
- * of the above based on `opts.role`.
31
+ * - `composeSystemPrompt(opts)` — unified entry point. Folds `amend` into
32
+ * the protocol section, then delegates to one of the above based on
33
+ * `opts.role`.
15
34
  */
16
35
 
17
36
  import { join } from "node:path";
18
37
 
38
+ /** Sibling section tags. Neither nests inside the other. */
39
+ const AGENT_PROFILE_TAG = "agent_profile";
40
+ const SESSION_PROTOCOL_TAG = "session_protocol";
41
+
42
+ /** Wrap content in a semantic section tag, each on its own line. */
43
+ function wrapSection(tag, content) {
44
+ return `<${tag}>\n${content}\n</${tag}>`;
45
+ }
46
+
19
47
  /**
20
- * Compose a `claude_code`-preset system prompt from a profile file. The
21
- * profile is read synchronously off the injected `runtime.fsSync` surface
22
- * this composer runs inside the synchronous SDK-option builders of the
48
+ * Assemble the parallel `<agent_profile>` / `<session_protocol>` sections.
49
+ * Each section is emitted only when its content is non-empty; the two tags
50
+ * are siblings joined by a blank line and never nest.
51
+ *
52
+ * @param {object} parts
53
+ * @param {string} [parts.body] - Profile body, already frontmatter-stripped.
54
+ * @param {string} [parts.protocol] - Session protocol trailer, with any
55
+ * amendment already folded in.
56
+ * @returns {string}
57
+ */
58
+ function assembleSections({ body, protocol }) {
59
+ const sections = [];
60
+ if (body) sections.push(wrapSection(AGENT_PROFILE_TAG, body));
61
+ if (protocol) sections.push(wrapSection(SESSION_PROTOCOL_TAG, protocol));
62
+ return sections.join("\n\n");
63
+ }
64
+
65
+ /**
66
+ * Read a profile `.md`, strip its frontmatter, and return the trimmed body.
67
+ * Reads synchronously off the injected `runtime.fsSync` surface — this
68
+ * composer runs inside the synchronous SDK-option builders of the
23
69
  * supervisor / facilitator / discusser / judge factories, so it cannot go
24
70
  * async without an unbounded cascade.
25
71
  *
26
72
  * @param {string} name - Profile basename (no `.md` suffix)
73
+ * @param {string} profilesDir - Directory containing `<name>.md`
74
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
75
+ * @returns {string}
76
+ */
77
+ function readProfileBody(name, profilesDir, runtime) {
78
+ const path = join(profilesDir, `${name}.md`);
79
+ const raw = runtime.fsSync.readFileSync(path, "utf8");
80
+ return stripFrontmatter(raw).trim();
81
+ }
82
+
83
+ /**
84
+ * Compose a `claude_code`-preset system prompt from a profile file. The
85
+ * profile body is wrapped in `<agent_profile>`; an optional protocol trailer
86
+ * is wrapped in a sibling `<session_protocol>`.
87
+ *
88
+ * @param {string} name - Profile basename (no `.md` suffix)
27
89
  * @param {object} opts
28
90
  * @param {string} opts.profilesDir - Directory containing `<name>.md`
29
- * @param {string} [opts.trailer] - Mode-specific trailer appended after a blank line
91
+ * @param {string} [opts.trailer] - Session protocol, wrapped as a sibling
92
+ * `<session_protocol>` section after a blank line
30
93
  * @param {import("@forwardimpact/libutil/runtime").Runtime} opts.runtime - Ambient collaborators; uses `fsSync.readFileSync`.
31
94
  * @returns {{type: "preset", preset: "claude_code", append: string}}
32
95
  */
33
96
  export function composeProfilePrompt(name, { profilesDir, trailer, runtime }) {
34
- const path = join(profilesDir, `${name}.md`);
35
- const raw = runtime.fsSync.readFileSync(path, "utf8");
36
- const body = stripFrontmatter(raw).trim();
37
- const append = trailer && trailer.length > 0 ? `${body}\n\n${trailer}` : body;
38
- return { type: "preset", preset: "claude_code", append };
97
+ const body = readProfileBody(name, profilesDir, runtime);
98
+ return {
99
+ type: "preset",
100
+ preset: "claude_code",
101
+ append: assembleSections({ body, protocol: trailer }),
102
+ };
39
103
  }
40
104
 
41
105
  /**
42
- * Compose a plain-string system prompt for a lead role (no Claude Code preset).
106
+ * Compose a plain-string system prompt for a lead role (no Claude Code
107
+ * preset). The protocol trailer is wrapped in `<session_protocol>`; an
108
+ * optional profile body is wrapped in a sibling `<agent_profile>` before it.
109
+ *
43
110
  * @param {object} opts
44
111
  * @param {string} [opts.profile] - Profile basename (no `.md` suffix)
45
112
  * @param {string} [opts.profilesDir] - Directory containing profile files
46
- * @param {string} opts.trailer - Mode-specific orchestration instructions
113
+ * @param {string} opts.trailer - Session protocol (orchestration instructions)
47
114
  * @param {import("@forwardimpact/libutil/runtime").Runtime} opts.runtime - Ambient collaborators; uses `fsSync.readFileSync`.
48
115
  * @returns {string}
49
116
  */
50
117
  export function composeLeadPrompt({ profile, profilesDir, trailer, runtime }) {
51
118
  if (!trailer) throw new Error("trailer is required");
52
- if (!profile) return trailer;
53
- const path = join(profilesDir, `${profile}.md`);
54
- const raw = runtime.fsSync.readFileSync(path, "utf8");
55
- const body = stripFrontmatter(raw).trim();
56
- return `${body}\n\n${trailer}`;
119
+ const body = profile
120
+ ? readProfileBody(profile, profilesDir, runtime)
121
+ : undefined;
122
+ return assembleSections({ body, protocol: trailer });
57
123
  }
58
124
 
59
125
  /**
60
- * Unified entry point for composing system prompts.
126
+ * Unified entry point for composing system prompts. Folds an optional
127
+ * amendment into the protocol trailer — so it lands inside
128
+ * `<session_protocol>` — then delegates by role.
61
129
  *
62
130
  * @param {object} opts
63
131
  * @param {"lead"|"agent"} opts.role - `"lead"` produces a plain string;
64
132
  * `"agent"` produces a `claude_code` preset object.
65
133
  * @param {string} [opts.profile] - Profile basename
66
134
  * @param {string} [opts.profilesDir]
67
- * @param {string} opts.trailer - Mode-specific instructions
135
+ * @param {string} opts.trailer - Session protocol (orchestration instructions)
136
+ * @param {string} [opts.amend] - Caller-supplied amendment, appended inside
137
+ * `<session_protocol>` after the trailer with a blank-line separator.
68
138
  * @param {import("@forwardimpact/libutil/runtime").Runtime} opts.runtime - Ambient collaborators; uses `fsSync.readFileSync`.
69
139
  * @returns {string | {type: "preset", preset: "claude_code", append: string}}
70
140
  */
@@ -73,16 +143,31 @@ export function composeSystemPrompt({
73
143
  profile,
74
144
  profilesDir,
75
145
  trailer,
146
+ amend,
76
147
  runtime,
77
148
  }) {
78
149
  if (!trailer) throw new Error("trailer is required");
150
+ const protocol = amend ? `${trailer}\n\n${amend}` : trailer;
79
151
  if (role === "lead") {
80
- return composeLeadPrompt({ profile, profilesDir, trailer, runtime });
152
+ return composeLeadPrompt({
153
+ profile,
154
+ profilesDir,
155
+ trailer: protocol,
156
+ runtime,
157
+ });
81
158
  }
82
159
  if (profile) {
83
- return composeProfilePrompt(profile, { profilesDir, trailer, runtime });
160
+ return composeProfilePrompt(profile, {
161
+ profilesDir,
162
+ trailer: protocol,
163
+ runtime,
164
+ });
84
165
  }
85
- return { type: "preset", preset: "claude_code", append: trailer };
166
+ return {
167
+ type: "preset",
168
+ preset: "claude_code",
169
+ append: assembleSections({ protocol }),
170
+ };
86
171
  }
87
172
 
88
173
  /**
package/src/redaction.js CHANGED
@@ -15,6 +15,7 @@ export const DEFAULT_ENV_ALLOWLIST = Object.freeze([
15
15
  "DATABASE_PASSWORD",
16
16
  "GH_TOKEN",
17
17
  "GITHUB_TOKEN",
18
+ "JWT_SECRET",
18
19
  "MCP_TOKEN",
19
20
  "MICROSOFT_APP_ID",
20
21
  "MICROSOFT_APP_PASSWORD",
@@ -22,7 +23,6 @@ export const DEFAULT_ENV_ALLOWLIST = Object.freeze([
22
23
  "PRODUCT_LANDMARK_TOKEN",
23
24
  "SERVICE_SECRET",
24
25
  "SUPABASE_ANON_KEY",
25
- "SUPABASE_JWT_SECRET",
26
26
  "SUPABASE_SERVICE_ROLE_KEY",
27
27
  ]);
28
28
 
@@ -135,7 +135,8 @@ export function createRedactor({
135
135
  patterns = DEFAULT_PATTERNS,
136
136
  enabled,
137
137
  } = {}) {
138
- const proc = runtime?.proc ?? defaultProc();
138
+ if (!runtime) throw new Error("runtime is required");
139
+ const proc = runtime.proc;
139
140
  const resolvedEnv = env ?? proc.env;
140
141
  const envDisabled = resolvedEnv.LIBEVAL_REDACTION_DISABLED === "1";
141
142
  const resolvedEnabled = enabled ?? !envDisabled;
@@ -151,20 +152,6 @@ export function createRedactor({
151
152
  return new Redactor({ envSnapshot, patterns, enabled: resolvedEnabled });
152
153
  }
153
154
 
154
- /**
155
- * Lazily build the production proc surface so callers that don't inject a
156
- * runtime keep working. Imported indirectly to avoid pulling the whole
157
- * runtime bag (and its `node:fs`/`node:child_process` imports) into modules
158
- * that only ever receive an injected runtime.
159
- * @returns {{env: Record<string, string|undefined>, stderr: {write: (s: string) => void}}}
160
- */
161
- function defaultProc() {
162
- return {
163
- env: globalThis.process?.env ?? {},
164
- stderr: { write: (s) => globalThis.process?.stderr?.write(s) },
165
- };
166
- }
167
-
168
155
  /**
169
156
  * Parse `LIBEVAL_REDACTION_ENV_VARS` into a trimmed, non-empty name list.
170
157
  * Falls back to `DEFAULT_ENV_ALLOWLIST` when unset or empty.
package/src/supervisor.js CHANGED
@@ -122,6 +122,7 @@ const devNull = new Writable({
122
122
  * @param {string[]} [deps.supervisorDisallowedTools]
123
123
  * @param {string} [deps.supervisorProfile]
124
124
  * @param {string} [deps.agentProfile]
125
+ * @param {string} [deps.agentSystemPromptAmend] - Amendment folded into the agent's `<session_protocol>` section, after the protocol trailer.
125
126
  * @param {string} [deps.profilesDir]
126
127
  * @param {string} [deps.taskAmend]
127
128
  * @param {Record<string, object>} [deps.agentMcpServers]
@@ -141,6 +142,7 @@ export function createSupervisor({
141
142
  supervisorDisallowedTools,
142
143
  supervisorProfile,
143
144
  agentProfile,
145
+ agentSystemPromptAmend,
144
146
  profilesDir,
145
147
  taskAmend,
146
148
  agentMcpServers,
@@ -182,6 +184,7 @@ export function createSupervisor({
182
184
  profile: agentProfile,
183
185
  profilesDir: resolvedProfilesDir,
184
186
  trailer: AGENT_SYSTEM_PROMPT,
187
+ amend: agentSystemPromptAmend,
185
188
  runtime,
186
189
  }),
187
190
  mcpServers: { orchestration: agentServer, ...agentMcpServers },