@forwardimpact/libeval 0.1.52 → 0.1.53
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fit-benchmark.js +8 -14
- package/bin/fit-eval.js +7 -14
- package/bin/fit-selfedit.js +6 -4
- package/bin/fit-trace.js +7 -14
- package/package.json +1 -1
- package/src/benchmark/result.js +2 -2
- package/src/benchmark/task-family.js +1 -1
- package/src/commands/benchmark-invariants.js +1 -1
- package/src/discusser.js +3 -5
- package/src/events/github.js +7 -1
- package/src/facilitator.js +2 -5
- package/src/inbox-poller.js +5 -8
- package/src/judge.js +10 -14
- package/src/profile-prompt.js +109 -24
- package/src/redaction.js +3 -16
- package/src/supervisor.js +3 -0
package/bin/fit-benchmark.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import "@forwardimpact/libpreflight/node22";
|
|
4
4
|
|
|
5
|
-
import {
|
|
5
|
+
import { realpathSync } from "node:fs";
|
|
6
6
|
import { createCli } from "@forwardimpact/libcli";
|
|
7
7
|
import { createDefaultRuntime } from "@forwardimpact/libutil/runtime";
|
|
8
8
|
import { createLogger } from "@forwardimpact/libtelemetry";
|
|
@@ -11,17 +11,8 @@ import { runBenchmarkRunCommand } from "../src/commands/benchmark-run.js";
|
|
|
11
11
|
import { runBenchmarkInvariantsCommand } from "../src/commands/benchmark-invariants.js";
|
|
12
12
|
import { runBenchmarkReportCommand } from "../src/commands/benchmark-report.js";
|
|
13
13
|
|
|
14
|
-
// `bun build --compile` injects FIT_BENCHMARK_VERSION via --define, eliminating
|
|
15
|
-
// the readFileSync branch in the compiled binary (which would ENOENT against
|
|
16
|
-
// the bunfs virtual mount). Source execution falls through to package.json.
|
|
17
|
-
const VERSION =
|
|
18
|
-
process.env.FIT_BENCHMARK_VERSION ||
|
|
19
|
-
JSON.parse(readFileSync(new URL("../package.json", import.meta.url), "utf8"))
|
|
20
|
-
.version;
|
|
21
|
-
|
|
22
14
|
export const definition = {
|
|
23
15
|
name: "fit-benchmark",
|
|
24
|
-
version: VERSION,
|
|
25
16
|
description:
|
|
26
17
|
"Run coding-agent task families, grade hidden tests, and aggregate pass@k across runs.",
|
|
27
18
|
commands: [
|
|
@@ -156,11 +147,14 @@ export const definition = {
|
|
|
156
147
|
],
|
|
157
148
|
};
|
|
158
149
|
|
|
159
|
-
const
|
|
150
|
+
const runtime = createDefaultRuntime();
|
|
151
|
+
const logger = createLogger("benchmark", runtime);
|
|
160
152
|
|
|
161
153
|
async function main() {
|
|
162
|
-
const
|
|
163
|
-
|
|
154
|
+
const cli = createCli(definition, {
|
|
155
|
+
runtime,
|
|
156
|
+
packageJsonUrl: new URL("../package.json", import.meta.url),
|
|
157
|
+
});
|
|
164
158
|
const parsed = cli.parse(runtime.proc.argv.slice(2));
|
|
165
159
|
if (!parsed) return runtime.proc.exit(0);
|
|
166
160
|
|
|
@@ -187,7 +181,7 @@ async function main() {
|
|
|
187
181
|
if (import.meta.url === `file://${realpathSync(process.argv[1])}`) {
|
|
188
182
|
main().catch((error) => {
|
|
189
183
|
logger.exception("main", error);
|
|
190
|
-
createCli(definition).error(error.message);
|
|
184
|
+
createCli(definition, { runtime }).error(error.message);
|
|
191
185
|
process.exit(1);
|
|
192
186
|
});
|
|
193
187
|
}
|
package/bin/fit-eval.js
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import "@forwardimpact/libpreflight/node22";
|
|
4
4
|
|
|
5
|
-
import { readFileSync } from "node:fs";
|
|
6
5
|
import { createCli } from "@forwardimpact/libcli";
|
|
7
6
|
import { createDefaultRuntime } from "@forwardimpact/libutil/runtime";
|
|
8
7
|
import { createLogger } from "@forwardimpact/libtelemetry";
|
|
@@ -15,14 +14,6 @@ import { runFacilitateCommand } from "../src/commands/facilitate.js";
|
|
|
15
14
|
import { runDiscussCommand } from "../src/commands/discuss.js";
|
|
16
15
|
import { runCallbackCommand } from "../src/commands/callback.js";
|
|
17
16
|
|
|
18
|
-
// `bun build --compile` injects FIT_EVAL_VERSION via --define, eliminating
|
|
19
|
-
// the readFileSync branch in the compiled binary (which would ENOENT against
|
|
20
|
-
// the bunfs virtual mount). Source execution falls through to package.json.
|
|
21
|
-
const VERSION =
|
|
22
|
-
process.env.FIT_EVAL_VERSION ||
|
|
23
|
-
JSON.parse(readFileSync(new URL("../package.json", import.meta.url), "utf8"))
|
|
24
|
-
.version;
|
|
25
|
-
|
|
26
17
|
const LEAD_OPTIONS = {
|
|
27
18
|
"lead-profile": {
|
|
28
19
|
type: "string",
|
|
@@ -60,7 +51,6 @@ const TASK_INPUT_OPTIONS = {
|
|
|
60
51
|
|
|
61
52
|
const definition = {
|
|
62
53
|
name: "fit-eval",
|
|
63
|
-
version: VERSION,
|
|
64
54
|
description:
|
|
65
55
|
"Run agents and capture NDJSON traces — for agent evaluations or multi-agent collaboration",
|
|
66
56
|
commands: [
|
|
@@ -313,11 +303,14 @@ const definition = {
|
|
|
313
303
|
],
|
|
314
304
|
};
|
|
315
305
|
|
|
316
|
-
const
|
|
306
|
+
const runtime = createDefaultRuntime();
|
|
307
|
+
const logger = createLogger("eval", runtime);
|
|
317
308
|
|
|
318
309
|
async function main() {
|
|
319
|
-
const
|
|
320
|
-
|
|
310
|
+
const cli = createCli(definition, {
|
|
311
|
+
runtime,
|
|
312
|
+
packageJsonUrl: new URL("../package.json", import.meta.url),
|
|
313
|
+
});
|
|
321
314
|
const parsed = cli.parse(runtime.proc.argv.slice(2));
|
|
322
315
|
if (!parsed) return runtime.proc.exit(0);
|
|
323
316
|
|
|
@@ -341,6 +334,6 @@ async function main() {
|
|
|
341
334
|
|
|
342
335
|
main().catch((error) => {
|
|
343
336
|
logger.exception("main", error);
|
|
344
|
-
createCli(definition).error(error.message);
|
|
337
|
+
createCli(definition, { runtime }).error(error.message);
|
|
345
338
|
process.exit(1);
|
|
346
339
|
});
|
package/bin/fit-selfedit.js
CHANGED
|
@@ -7,12 +7,11 @@
|
|
|
7
7
|
|
|
8
8
|
import "@forwardimpact/libpreflight/node22";
|
|
9
9
|
import { existsSync, readFileSync, writeFileSync } from "node:fs";
|
|
10
|
-
import fsPromises from "node:fs/promises";
|
|
11
10
|
import { parseArgs } from "node:util";
|
|
12
11
|
import { resolve, relative, dirname } from "node:path";
|
|
13
12
|
import { execFileSync } from "node:child_process";
|
|
14
13
|
|
|
15
|
-
import {
|
|
14
|
+
import { createDefaultRuntime } from "@forwardimpact/libutil/runtime";
|
|
16
15
|
import { minimatch } from "minimatch";
|
|
17
16
|
|
|
18
17
|
const HELP = `fit-selfedit — write stdin to a settings.json-allowed path on a non-main branch.
|
|
@@ -71,8 +70,11 @@ if (extra.length > 0) fail(`unexpected extra arguments: ${extra.join(" ")}`);
|
|
|
71
70
|
|
|
72
71
|
const absoluteTarget = resolve(process.cwd(), targetArg);
|
|
73
72
|
|
|
74
|
-
// Safeguard 1: settings.json must grant Edit() on this path.
|
|
75
|
-
|
|
73
|
+
// Safeguard 1: settings.json must grant Edit() on this path. The bin is the
|
|
74
|
+
// sole construction site for the runtime; resolve the finder off the bag
|
|
75
|
+
// rather than constructing a Finder here (Success Criterion 9).
|
|
76
|
+
const runtime = createDefaultRuntime();
|
|
77
|
+
const settingsPath = runtime.finder.findUpward(
|
|
76
78
|
dirname(absoluteTarget),
|
|
77
79
|
".claude/settings.json",
|
|
78
80
|
20,
|
package/bin/fit-trace.js
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import "@forwardimpact/libpreflight/node22";
|
|
4
4
|
|
|
5
|
-
import { readFileSync } from "node:fs";
|
|
6
5
|
import { createCli } from "@forwardimpact/libcli";
|
|
7
6
|
import { createDefaultRuntime } from "@forwardimpact/libutil/runtime";
|
|
8
7
|
import { createScriptConfig } from "@forwardimpact/libconfig";
|
|
@@ -31,17 +30,8 @@ import {
|
|
|
31
30
|
import { runAssertCommand } from "../src/commands/assert.js";
|
|
32
31
|
import { runByDiscussionCommand } from "../src/commands/by-discussion.js";
|
|
33
32
|
|
|
34
|
-
// `bun build --compile` injects FIT_TRACE_VERSION via --define, eliminating
|
|
35
|
-
// the readFileSync branch in the compiled binary (which would ENOENT against
|
|
36
|
-
// the bunfs virtual mount). Source execution falls through to package.json.
|
|
37
|
-
const VERSION =
|
|
38
|
-
process.env.FIT_TRACE_VERSION ||
|
|
39
|
-
JSON.parse(readFileSync(new URL("../package.json", import.meta.url), "utf8"))
|
|
40
|
-
.version;
|
|
41
|
-
|
|
42
33
|
const definition = {
|
|
43
34
|
name: "fit-trace",
|
|
44
|
-
version: VERSION,
|
|
45
35
|
description:
|
|
46
36
|
"Download, query, and analyze agent execution traces — read NDJSON output from fit-eval as qualitative research",
|
|
47
37
|
commands: [
|
|
@@ -340,15 +330,18 @@ const definition = {
|
|
|
340
330
|
],
|
|
341
331
|
};
|
|
342
332
|
|
|
343
|
-
const
|
|
333
|
+
const runtime = createDefaultRuntime();
|
|
334
|
+
const logger = createLogger("trace", runtime);
|
|
344
335
|
|
|
345
336
|
// Commands that talk to the GitHub API need a config-backed token resolver;
|
|
346
337
|
// the rest only read local trace files through the runtime.
|
|
347
338
|
const NEEDS_CONFIG = new Set(["runs", "download"]);
|
|
348
339
|
|
|
349
340
|
async function main() {
|
|
350
|
-
const
|
|
351
|
-
|
|
341
|
+
const cli = createCli(definition, {
|
|
342
|
+
runtime,
|
|
343
|
+
packageJsonUrl: new URL("../package.json", import.meta.url),
|
|
344
|
+
});
|
|
352
345
|
const parsed = cli.parse(runtime.proc.argv.slice(2));
|
|
353
346
|
if (!parsed) return runtime.proc.exit(0);
|
|
354
347
|
|
|
@@ -376,6 +369,6 @@ async function main() {
|
|
|
376
369
|
|
|
377
370
|
main().catch((error) => {
|
|
378
371
|
logger.exception("main", error);
|
|
379
|
-
createCli(definition).error(error.message);
|
|
372
|
+
createCli(definition, { runtime }).error(error.message);
|
|
380
373
|
process.exit(1);
|
|
381
374
|
});
|
package/package.json
CHANGED
package/src/benchmark/result.js
CHANGED
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
* - RESULT_RECORD_SCHEMA — one record per (task, runIndex) from a full
|
|
6
6
|
* benchmark run. Has a happy branch (invariants + judge present) and a
|
|
7
7
|
* pre-flight-failure branch (invariants/judgeVerdict/submission absent).
|
|
8
|
-
* - INVARIANTS_RECORD_SCHEMA — narrower output of `benchmark-invariants
|
|
9
|
-
*
|
|
8
|
+
* - INVARIANTS_RECORD_SCHEMA — narrower output of `benchmark-invariants`:
|
|
9
|
+
* ad-hoc grading without a full lifecycle.
|
|
10
10
|
*
|
|
11
11
|
* Validation is throw-on-mismatch so the runner can wrap every JSONL append
|
|
12
12
|
* in a guard and reject schema drift at write time.
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Task-family loader. A task family is a directory under
|
|
3
3
|
* <root>/
|
|
4
4
|
* apm.lock.yaml
|
|
5
|
-
* .claude/ # pre-staged skills + agents
|
|
5
|
+
* .claude/ # pre-staged skills + agents
|
|
6
6
|
* tasks/<task_name>/
|
|
7
7
|
* agent.task.md
|
|
8
8
|
* supervisor.task.md # optional; appended to the task as supervisor context
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* `fit-benchmark invariants` — check a single task's invariants against a
|
|
3
|
-
* post-run workdir directory without invoking an agent
|
|
3
|
+
* post-run workdir directory without invoking an agent. Useful for
|
|
4
4
|
* re-checking an agent's output against revised grading material.
|
|
5
5
|
*/
|
|
6
6
|
|
package/src/discusser.js
CHANGED
|
@@ -274,6 +274,7 @@ export function createDiscusser({
|
|
|
274
274
|
messageBus,
|
|
275
275
|
leadName: "lead",
|
|
276
276
|
signal: abortController.signal,
|
|
277
|
+
runtime,
|
|
277
278
|
})
|
|
278
279
|
: null;
|
|
279
280
|
|
|
@@ -309,10 +310,6 @@ export function createDiscusser({
|
|
|
309
310
|
from: config.name,
|
|
310
311
|
});
|
|
311
312
|
|
|
312
|
-
const agentTrailer = config.systemPromptAmend
|
|
313
|
-
? `${DISCUSS_AGENT_SYSTEM_PROMPT}\n\n${config.systemPromptAmend}`
|
|
314
|
-
: DISCUSS_AGENT_SYSTEM_PROMPT;
|
|
315
|
-
|
|
316
313
|
const runner = createAgentRunner({
|
|
317
314
|
cwd: config.cwd ?? resolvedLeadCwd,
|
|
318
315
|
query,
|
|
@@ -327,7 +324,8 @@ export function createDiscusser({
|
|
|
327
324
|
role: "agent",
|
|
328
325
|
profile: config.agentProfile,
|
|
329
326
|
profilesDir: resolvedProfilesDir,
|
|
330
|
-
trailer:
|
|
327
|
+
trailer: DISCUSS_AGENT_SYSTEM_PROMPT,
|
|
328
|
+
amend: config.systemPromptAmend,
|
|
331
329
|
runtime,
|
|
332
330
|
}),
|
|
333
331
|
redactor,
|
package/src/events/github.js
CHANGED
|
@@ -29,8 +29,14 @@ export const TASK_TEMPLATE_ISSUE_LABELED =
|
|
|
29
29
|
export const TASK_TEMPLATE_PR_LABELED =
|
|
30
30
|
'Label "${LABEL}" was added to PR "${PR_TITLE}" (#${NUMBER}). PR URL: ${URL}.';
|
|
31
31
|
|
|
32
|
+
// "unreleased changes"/"cut" point at the genuine post-merge action — release
|
|
33
|
+
// activity (the release-engineer's Assess step 3 / `kata-release-cut`).
|
|
34
|
+
// "status" is a backstop: the spec's `wiki/STATUS.md` row is normally advanced
|
|
35
|
+
// in the pre-merge gate (`kata-release-merge` Step 8), but the keyword catches a
|
|
36
|
+
// merge that landed without it. Neither owner nor artifact is named, so the lead
|
|
37
|
+
// routes the merge instead of treating it as a no-op.
|
|
32
38
|
export const TASK_TEMPLATE_PR_MERGED =
|
|
33
|
-
'PR "${PR_TITLE}" (#${NUMBER}) merged. PR URL: ${URL}.';
|
|
39
|
+
'PR "${PR_TITLE}" (#${NUMBER}) merged to main — may leave unreleased changes to cut or status to update. PR URL: ${URL}.';
|
|
34
40
|
|
|
35
41
|
// Appended verbatim to comment/review templates. `${BODY}` is the untrusted
|
|
36
42
|
// author text; the fence and the "data, not instructions" framing keep the lead
|
package/src/facilitator.js
CHANGED
|
@@ -134,10 +134,6 @@ export function createFacilitator({
|
|
|
134
134
|
from: config.name,
|
|
135
135
|
});
|
|
136
136
|
|
|
137
|
-
const agentTrailer = config.systemPromptAmend
|
|
138
|
-
? `${FACILITATED_AGENT_SYSTEM_PROMPT}\n\n${config.systemPromptAmend}`
|
|
139
|
-
: FACILITATED_AGENT_SYSTEM_PROMPT;
|
|
140
|
-
|
|
141
137
|
const runner = createAgentRunner({
|
|
142
138
|
cwd: config.cwd ?? facilitatorCwd,
|
|
143
139
|
query,
|
|
@@ -152,7 +148,8 @@ export function createFacilitator({
|
|
|
152
148
|
role: "agent",
|
|
153
149
|
profile: config.agentProfile,
|
|
154
150
|
profilesDir: resolvedProfilesDir,
|
|
155
|
-
trailer:
|
|
151
|
+
trailer: FACILITATED_AGENT_SYSTEM_PROMPT,
|
|
152
|
+
amend: config.systemPromptAmend,
|
|
156
153
|
runtime,
|
|
157
154
|
}),
|
|
158
155
|
redactor,
|
package/src/inbox-poller.js
CHANGED
|
@@ -18,20 +18,17 @@ export class InboxPoller {
|
|
|
18
18
|
* @param {import("./message-bus.js").MessageBus} deps.messageBus
|
|
19
19
|
* @param {string} deps.leadName
|
|
20
20
|
* @param {AbortSignal} deps.signal
|
|
21
|
-
* @param {import("@forwardimpact/libutil/runtime").Runtime}
|
|
22
|
-
*
|
|
23
|
-
*
|
|
24
|
-
* absent so existing callers keep working.
|
|
21
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} deps.runtime -
|
|
22
|
+
* Injected collaborators; `clock.setTimeout`/`clock.clearTimeout` drive the
|
|
23
|
+
* inter-poll backoff.
|
|
25
24
|
*/
|
|
26
25
|
constructor({ inboxUrl, messageBus, leadName, signal, runtime }) {
|
|
26
|
+
if (!runtime) throw new Error("runtime is required");
|
|
27
27
|
this.#inboxUrl = inboxUrl;
|
|
28
28
|
this.#messageBus = messageBus;
|
|
29
29
|
this.#leadName = leadName;
|
|
30
30
|
this.#signal = signal;
|
|
31
|
-
this.#clock = runtime
|
|
32
|
-
setTimeout: (fn, ms) => globalThis.setTimeout(fn, ms),
|
|
33
|
-
clearTimeout: (h) => globalThis.clearTimeout(h),
|
|
34
|
-
};
|
|
31
|
+
this.#clock = runtime.clock;
|
|
35
32
|
}
|
|
36
33
|
|
|
37
34
|
/** Long-poll the inbox until the abort signal fires. */
|
package/src/judge.js
CHANGED
|
@@ -17,7 +17,7 @@ import { resolve } from "node:path";
|
|
|
17
17
|
import { Writable } from "node:stream";
|
|
18
18
|
|
|
19
19
|
import { createAgentRunner } from "./agent-runner.js";
|
|
20
|
-
import {
|
|
20
|
+
import { composeSystemPrompt } from "./profile-prompt.js";
|
|
21
21
|
import { SequenceCounter } from "./sequence-counter.js";
|
|
22
22
|
import {
|
|
23
23
|
createJudgeToolServer,
|
|
@@ -140,7 +140,7 @@ export class Judge {
|
|
|
140
140
|
/**
|
|
141
141
|
* Factory function — wires the AgentRunner with the judge orchestration server
|
|
142
142
|
* and the JUDGE_SYSTEM_PROMPT trailer. A `judgeProfile` (when supplied) layers
|
|
143
|
-
* on top of the trailer via `
|
|
143
|
+
* on top of the trailer via `composeSystemPrompt`, matching the
|
|
144
144
|
* supervisor/facilitator pattern.
|
|
145
145
|
*
|
|
146
146
|
* @param {object} deps
|
|
@@ -151,7 +151,7 @@ export class Judge {
|
|
|
151
151
|
* @param {string} [deps.model]
|
|
152
152
|
* @param {number} [deps.maxTurns] - Default 5 (the judge is expected to act in turn 1; 5 leaves headroom for tool inspection).
|
|
153
153
|
* @param {string[]} [deps.allowedTools] - Default `["Read","Glob","Grep","Bash"]` — read-only inspection.
|
|
154
|
-
* @param {string} [deps.judgeProfile] - Profile name; resolved into the system prompt via `
|
|
154
|
+
* @param {string} [deps.judgeProfile] - Profile name; resolved into the system prompt via `composeSystemPrompt`.
|
|
155
155
|
* @param {string} [deps.profilesDir] - Defaults to `<cwd>/.claude/agents`.
|
|
156
156
|
* @param {string} [deps.taskAmend]
|
|
157
157
|
* @returns {Judge}
|
|
@@ -176,17 +176,13 @@ export function createJudge({
|
|
|
176
176
|
if (!runtime) throw new Error("runtime is required");
|
|
177
177
|
|
|
178
178
|
const resolvedProfilesDir = profilesDir ?? resolve(cwd, ".claude/agents");
|
|
179
|
-
const systemPrompt =
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
type: "preset",
|
|
187
|
-
preset: "claude_code",
|
|
188
|
-
append: JUDGE_SYSTEM_PROMPT,
|
|
189
|
-
};
|
|
179
|
+
const systemPrompt = composeSystemPrompt({
|
|
180
|
+
role: "agent",
|
|
181
|
+
profile: judgeProfile,
|
|
182
|
+
profilesDir: resolvedProfilesDir,
|
|
183
|
+
trailer: JUDGE_SYSTEM_PROMPT,
|
|
184
|
+
runtime,
|
|
185
|
+
});
|
|
190
186
|
|
|
191
187
|
const ctx = createOrchestrationContext();
|
|
192
188
|
ctx.participants = [{ name: "judge", role: "judge" }];
|
package/src/profile-prompt.js
CHANGED
|
@@ -1,7 +1,25 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* System prompt composition for agent runners.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
4
|
+
* libeval assembles every agent system prompt from up to two parallel,
|
|
5
|
+
* sibling-tagged sections (see COALIGNED.md § L0):
|
|
6
|
+
*
|
|
7
|
+
* <agent_profile>
|
|
8
|
+
* …persona body…
|
|
9
|
+
* </agent_profile>
|
|
10
|
+
*
|
|
11
|
+
* <session_protocol>
|
|
12
|
+
* …orchestration mechanics, then any amendment…
|
|
13
|
+
* </session_protocol>
|
|
14
|
+
*
|
|
15
|
+
* The two tags are siblings joined by a blank line — neither nests inside
|
|
16
|
+
* the other. A section appears only when its content is present. A
|
|
17
|
+
* system-prompt amendment is folded into the protocol trailer before
|
|
18
|
+
* wrapping, so it lands transparently inside `<session_protocol>`. The tag
|
|
19
|
+
* convention lives entirely here: profile `.md` files and trailer constants
|
|
20
|
+
* carry no tags.
|
|
21
|
+
*
|
|
22
|
+
* Helpers:
|
|
5
23
|
*
|
|
6
24
|
* - `composeProfilePrompt(name, opts)` — profile + `claude_code` preset.
|
|
7
25
|
* Used by agent participants that need the full Claude Code tool surface.
|
|
@@ -10,61 +28,113 @@
|
|
|
10
28
|
* roles (supervisor, facilitator, discuss lead) that should only see
|
|
11
29
|
* the orchestration instructions and optionally a profile body.
|
|
12
30
|
*
|
|
13
|
-
* - `composeSystemPrompt(opts)` — unified entry point.
|
|
14
|
-
* of the above based on
|
|
31
|
+
* - `composeSystemPrompt(opts)` — unified entry point. Folds `amend` into
|
|
32
|
+
* the protocol section, then delegates to one of the above based on
|
|
33
|
+
* `opts.role`.
|
|
15
34
|
*/
|
|
16
35
|
|
|
17
36
|
import { join } from "node:path";
|
|
18
37
|
|
|
38
|
+
/** Sibling section tags. Neither nests inside the other. */
|
|
39
|
+
const AGENT_PROFILE_TAG = "agent_profile";
|
|
40
|
+
const SESSION_PROTOCOL_TAG = "session_protocol";
|
|
41
|
+
|
|
42
|
+
/** Wrap content in a semantic section tag, each on its own line. */
|
|
43
|
+
function wrapSection(tag, content) {
|
|
44
|
+
return `<${tag}>\n${content}\n</${tag}>`;
|
|
45
|
+
}
|
|
46
|
+
|
|
19
47
|
/**
|
|
20
|
-
*
|
|
21
|
-
*
|
|
22
|
-
*
|
|
48
|
+
* Assemble the parallel `<agent_profile>` / `<session_protocol>` sections.
|
|
49
|
+
* Each section is emitted only when its content is non-empty; the two tags
|
|
50
|
+
* are siblings joined by a blank line and never nest.
|
|
51
|
+
*
|
|
52
|
+
* @param {object} parts
|
|
53
|
+
* @param {string} [parts.body] - Profile body, already frontmatter-stripped.
|
|
54
|
+
* @param {string} [parts.protocol] - Session protocol trailer, with any
|
|
55
|
+
* amendment already folded in.
|
|
56
|
+
* @returns {string}
|
|
57
|
+
*/
|
|
58
|
+
function assembleSections({ body, protocol }) {
|
|
59
|
+
const sections = [];
|
|
60
|
+
if (body) sections.push(wrapSection(AGENT_PROFILE_TAG, body));
|
|
61
|
+
if (protocol) sections.push(wrapSection(SESSION_PROTOCOL_TAG, protocol));
|
|
62
|
+
return sections.join("\n\n");
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Read a profile `.md`, strip its frontmatter, and return the trimmed body.
|
|
67
|
+
* Reads synchronously off the injected `runtime.fsSync` surface — this
|
|
68
|
+
* composer runs inside the synchronous SDK-option builders of the
|
|
23
69
|
* supervisor / facilitator / discusser / judge factories, so it cannot go
|
|
24
70
|
* async without an unbounded cascade.
|
|
25
71
|
*
|
|
26
72
|
* @param {string} name - Profile basename (no `.md` suffix)
|
|
73
|
+
* @param {string} profilesDir - Directory containing `<name>.md`
|
|
74
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
75
|
+
* @returns {string}
|
|
76
|
+
*/
|
|
77
|
+
function readProfileBody(name, profilesDir, runtime) {
|
|
78
|
+
const path = join(profilesDir, `${name}.md`);
|
|
79
|
+
const raw = runtime.fsSync.readFileSync(path, "utf8");
|
|
80
|
+
return stripFrontmatter(raw).trim();
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Compose a `claude_code`-preset system prompt from a profile file. The
|
|
85
|
+
* profile body is wrapped in `<agent_profile>`; an optional protocol trailer
|
|
86
|
+
* is wrapped in a sibling `<session_protocol>`.
|
|
87
|
+
*
|
|
88
|
+
* @param {string} name - Profile basename (no `.md` suffix)
|
|
27
89
|
* @param {object} opts
|
|
28
90
|
* @param {string} opts.profilesDir - Directory containing `<name>.md`
|
|
29
|
-
* @param {string} [opts.trailer] -
|
|
91
|
+
* @param {string} [opts.trailer] - Session protocol, wrapped as a sibling
|
|
92
|
+
* `<session_protocol>` section after a blank line
|
|
30
93
|
* @param {import("@forwardimpact/libutil/runtime").Runtime} opts.runtime - Ambient collaborators; uses `fsSync.readFileSync`.
|
|
31
94
|
* @returns {{type: "preset", preset: "claude_code", append: string}}
|
|
32
95
|
*/
|
|
33
96
|
export function composeProfilePrompt(name, { profilesDir, trailer, runtime }) {
|
|
34
|
-
const
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
97
|
+
const body = readProfileBody(name, profilesDir, runtime);
|
|
98
|
+
return {
|
|
99
|
+
type: "preset",
|
|
100
|
+
preset: "claude_code",
|
|
101
|
+
append: assembleSections({ body, protocol: trailer }),
|
|
102
|
+
};
|
|
39
103
|
}
|
|
40
104
|
|
|
41
105
|
/**
|
|
42
|
-
* Compose a plain-string system prompt for a lead role (no Claude Code
|
|
106
|
+
* Compose a plain-string system prompt for a lead role (no Claude Code
|
|
107
|
+
* preset). The protocol trailer is wrapped in `<session_protocol>`; an
|
|
108
|
+
* optional profile body is wrapped in a sibling `<agent_profile>` before it.
|
|
109
|
+
*
|
|
43
110
|
* @param {object} opts
|
|
44
111
|
* @param {string} [opts.profile] - Profile basename (no `.md` suffix)
|
|
45
112
|
* @param {string} [opts.profilesDir] - Directory containing profile files
|
|
46
|
-
* @param {string} opts.trailer -
|
|
113
|
+
* @param {string} opts.trailer - Session protocol (orchestration instructions)
|
|
47
114
|
* @param {import("@forwardimpact/libutil/runtime").Runtime} opts.runtime - Ambient collaborators; uses `fsSync.readFileSync`.
|
|
48
115
|
* @returns {string}
|
|
49
116
|
*/
|
|
50
117
|
export function composeLeadPrompt({ profile, profilesDir, trailer, runtime }) {
|
|
51
118
|
if (!trailer) throw new Error("trailer is required");
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
return `${body}\n\n${trailer}`;
|
|
119
|
+
const body = profile
|
|
120
|
+
? readProfileBody(profile, profilesDir, runtime)
|
|
121
|
+
: undefined;
|
|
122
|
+
return assembleSections({ body, protocol: trailer });
|
|
57
123
|
}
|
|
58
124
|
|
|
59
125
|
/**
|
|
60
|
-
* Unified entry point for composing system prompts.
|
|
126
|
+
* Unified entry point for composing system prompts. Folds an optional
|
|
127
|
+
* amendment into the protocol trailer — so it lands inside
|
|
128
|
+
* `<session_protocol>` — then delegates by role.
|
|
61
129
|
*
|
|
62
130
|
* @param {object} opts
|
|
63
131
|
* @param {"lead"|"agent"} opts.role - `"lead"` produces a plain string;
|
|
64
132
|
* `"agent"` produces a `claude_code` preset object.
|
|
65
133
|
* @param {string} [opts.profile] - Profile basename
|
|
66
134
|
* @param {string} [opts.profilesDir]
|
|
67
|
-
* @param {string} opts.trailer -
|
|
135
|
+
* @param {string} opts.trailer - Session protocol (orchestration instructions)
|
|
136
|
+
* @param {string} [opts.amend] - Caller-supplied amendment, appended inside
|
|
137
|
+
* `<session_protocol>` after the trailer with a blank-line separator.
|
|
68
138
|
* @param {import("@forwardimpact/libutil/runtime").Runtime} opts.runtime - Ambient collaborators; uses `fsSync.readFileSync`.
|
|
69
139
|
* @returns {string | {type: "preset", preset: "claude_code", append: string}}
|
|
70
140
|
*/
|
|
@@ -73,16 +143,31 @@ export function composeSystemPrompt({
|
|
|
73
143
|
profile,
|
|
74
144
|
profilesDir,
|
|
75
145
|
trailer,
|
|
146
|
+
amend,
|
|
76
147
|
runtime,
|
|
77
148
|
}) {
|
|
78
149
|
if (!trailer) throw new Error("trailer is required");
|
|
150
|
+
const protocol = amend ? `${trailer}\n\n${amend}` : trailer;
|
|
79
151
|
if (role === "lead") {
|
|
80
|
-
return composeLeadPrompt({
|
|
152
|
+
return composeLeadPrompt({
|
|
153
|
+
profile,
|
|
154
|
+
profilesDir,
|
|
155
|
+
trailer: protocol,
|
|
156
|
+
runtime,
|
|
157
|
+
});
|
|
81
158
|
}
|
|
82
159
|
if (profile) {
|
|
83
|
-
return composeProfilePrompt(profile, {
|
|
160
|
+
return composeProfilePrompt(profile, {
|
|
161
|
+
profilesDir,
|
|
162
|
+
trailer: protocol,
|
|
163
|
+
runtime,
|
|
164
|
+
});
|
|
84
165
|
}
|
|
85
|
-
return {
|
|
166
|
+
return {
|
|
167
|
+
type: "preset",
|
|
168
|
+
preset: "claude_code",
|
|
169
|
+
append: assembleSections({ protocol }),
|
|
170
|
+
};
|
|
86
171
|
}
|
|
87
172
|
|
|
88
173
|
/**
|
package/src/redaction.js
CHANGED
|
@@ -15,6 +15,7 @@ export const DEFAULT_ENV_ALLOWLIST = Object.freeze([
|
|
|
15
15
|
"DATABASE_PASSWORD",
|
|
16
16
|
"GH_TOKEN",
|
|
17
17
|
"GITHUB_TOKEN",
|
|
18
|
+
"JWT_SECRET",
|
|
18
19
|
"MCP_TOKEN",
|
|
19
20
|
"MICROSOFT_APP_ID",
|
|
20
21
|
"MICROSOFT_APP_PASSWORD",
|
|
@@ -22,7 +23,6 @@ export const DEFAULT_ENV_ALLOWLIST = Object.freeze([
|
|
|
22
23
|
"PRODUCT_LANDMARK_TOKEN",
|
|
23
24
|
"SERVICE_SECRET",
|
|
24
25
|
"SUPABASE_ANON_KEY",
|
|
25
|
-
"SUPABASE_JWT_SECRET",
|
|
26
26
|
"SUPABASE_SERVICE_ROLE_KEY",
|
|
27
27
|
]);
|
|
28
28
|
|
|
@@ -135,7 +135,8 @@ export function createRedactor({
|
|
|
135
135
|
patterns = DEFAULT_PATTERNS,
|
|
136
136
|
enabled,
|
|
137
137
|
} = {}) {
|
|
138
|
-
|
|
138
|
+
if (!runtime) throw new Error("runtime is required");
|
|
139
|
+
const proc = runtime.proc;
|
|
139
140
|
const resolvedEnv = env ?? proc.env;
|
|
140
141
|
const envDisabled = resolvedEnv.LIBEVAL_REDACTION_DISABLED === "1";
|
|
141
142
|
const resolvedEnabled = enabled ?? !envDisabled;
|
|
@@ -151,20 +152,6 @@ export function createRedactor({
|
|
|
151
152
|
return new Redactor({ envSnapshot, patterns, enabled: resolvedEnabled });
|
|
152
153
|
}
|
|
153
154
|
|
|
154
|
-
/**
|
|
155
|
-
* Lazily build the production proc surface so callers that don't inject a
|
|
156
|
-
* runtime keep working. Imported indirectly to avoid pulling the whole
|
|
157
|
-
* runtime bag (and its `node:fs`/`node:child_process` imports) into modules
|
|
158
|
-
* that only ever receive an injected runtime.
|
|
159
|
-
* @returns {{env: Record<string, string|undefined>, stderr: {write: (s: string) => void}}}
|
|
160
|
-
*/
|
|
161
|
-
function defaultProc() {
|
|
162
|
-
return {
|
|
163
|
-
env: globalThis.process?.env ?? {},
|
|
164
|
-
stderr: { write: (s) => globalThis.process?.stderr?.write(s) },
|
|
165
|
-
};
|
|
166
|
-
}
|
|
167
|
-
|
|
168
155
|
/**
|
|
169
156
|
* Parse `LIBEVAL_REDACTION_ENV_VARS` into a trimmed, non-empty name list.
|
|
170
157
|
* Falls back to `DEFAULT_ENV_ALLOWLIST` when unset or empty.
|
package/src/supervisor.js
CHANGED
|
@@ -122,6 +122,7 @@ const devNull = new Writable({
|
|
|
122
122
|
* @param {string[]} [deps.supervisorDisallowedTools]
|
|
123
123
|
* @param {string} [deps.supervisorProfile]
|
|
124
124
|
* @param {string} [deps.agentProfile]
|
|
125
|
+
* @param {string} [deps.agentSystemPromptAmend] - Amendment folded into the agent's `<session_protocol>` section, after the protocol trailer.
|
|
125
126
|
* @param {string} [deps.profilesDir]
|
|
126
127
|
* @param {string} [deps.taskAmend]
|
|
127
128
|
* @param {Record<string, object>} [deps.agentMcpServers]
|
|
@@ -141,6 +142,7 @@ export function createSupervisor({
|
|
|
141
142
|
supervisorDisallowedTools,
|
|
142
143
|
supervisorProfile,
|
|
143
144
|
agentProfile,
|
|
145
|
+
agentSystemPromptAmend,
|
|
144
146
|
profilesDir,
|
|
145
147
|
taskAmend,
|
|
146
148
|
agentMcpServers,
|
|
@@ -182,6 +184,7 @@ export function createSupervisor({
|
|
|
182
184
|
profile: agentProfile,
|
|
183
185
|
profilesDir: resolvedProfilesDir,
|
|
184
186
|
trailer: AGENT_SYSTEM_PROMPT,
|
|
187
|
+
amend: agentSystemPromptAmend,
|
|
185
188
|
runtime,
|
|
186
189
|
}),
|
|
187
190
|
mcpServers: { orchestration: agentServer, ...agentMcpServers },
|