@forwardimpact/libeval 0.1.51 → 0.1.53
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fit-benchmark.js +8 -14
- package/bin/fit-eval.js +8 -28
- package/bin/fit-selfedit.js +6 -4
- package/bin/fit-trace.js +7 -14
- package/package.json +1 -1
- package/src/benchmark/apm-installer.js +48 -44
- package/src/benchmark/invariants.js +51 -63
- package/src/benchmark/judge.js +13 -11
- package/src/benchmark/npm-installer.js +33 -33
- package/src/benchmark/report.js +25 -11
- package/src/benchmark/result.js +2 -2
- package/src/benchmark/runner.js +82 -38
- package/src/benchmark/task-family.js +74 -63
- package/src/benchmark/workdir.js +91 -99
- package/src/commands/benchmark-invariants.js +3 -3
- package/src/commands/benchmark-report.js +1 -0
- package/src/commands/benchmark-run.js +1 -1
- package/src/commands/by-discussion.js +10 -11
- package/src/commands/discuss.js +3 -2
- package/src/commands/facilitate.js +3 -2
- package/src/commands/output.js +4 -1
- package/src/commands/run.js +6 -2
- package/src/commands/supervise.js +3 -2
- package/src/commands/tee.js +24 -9
- package/src/commands/trace.js +7 -2
- package/src/discusser.js +7 -5
- package/src/events/github.js +7 -1
- package/src/facilitator.js +6 -5
- package/src/inbox-poller.js +5 -8
- package/src/judge.js +12 -13
- package/src/profile-prompt.js +124 -26
- package/src/redaction.js +3 -16
- package/src/supervisor.js +7 -0
- package/src/tee-writer.js +4 -2
- package/src/trace-collector.js +9 -2
- package/src/trace-github.js +47 -27
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { createWriteStream } from "node:fs";
|
|
2
1
|
import { resolve } from "node:path";
|
|
2
|
+
import { isoTimestamp } from "@forwardimpact/libutil";
|
|
3
3
|
import { createFacilitator } from "../facilitator.js";
|
|
4
4
|
import { createRedactor } from "../redaction.js";
|
|
5
5
|
import { createTeeWriter } from "../tee-writer.js";
|
|
@@ -76,13 +76,14 @@ export async function runFacilitateCommand(ctx) {
|
|
|
76
76
|
const redactor = createRedactor({ runtime });
|
|
77
77
|
|
|
78
78
|
const fileStream = opts.outputPath
|
|
79
|
-
? createWriteStream(opts.outputPath)
|
|
79
|
+
? runtime.fs.createWriteStream(opts.outputPath)
|
|
80
80
|
: null;
|
|
81
81
|
const output = fileStream
|
|
82
82
|
? createTeeWriter({
|
|
83
83
|
fileStream,
|
|
84
84
|
textStream: runtime.proc.stdout,
|
|
85
85
|
mode: "supervised",
|
|
86
|
+
now: () => isoTimestamp(runtime.clock.now()),
|
|
86
87
|
})
|
|
87
88
|
: runtime.proc.stdout;
|
|
88
89
|
|
package/src/commands/output.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { isoTimestamp } from "@forwardimpact/libutil";
|
|
1
2
|
import { createTraceCollector } from "@forwardimpact/libeval";
|
|
2
3
|
|
|
3
4
|
/**
|
|
@@ -16,7 +17,9 @@ export async function runOutputCommand(ctx) {
|
|
|
16
17
|
values.format === "text" || values.format === "json"
|
|
17
18
|
? values.format
|
|
18
19
|
: "json";
|
|
19
|
-
const collector = createTraceCollector(
|
|
20
|
+
const collector = createTraceCollector({
|
|
21
|
+
now: () => isoTimestamp(runtime.clock.now()),
|
|
22
|
+
});
|
|
20
23
|
|
|
21
24
|
// `runtime.proc.stdin` is an AsyncIterable of UTF-8 lines (newline-split by
|
|
22
25
|
// the runtime), so each yielded value is exactly one NDJSON record.
|
package/src/commands/run.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { createWriteStream } from "node:fs";
|
|
2
1
|
import { Writable } from "node:stream";
|
|
3
2
|
import { resolve } from "node:path";
|
|
3
|
+
import { isoTimestamp } from "@forwardimpact/libutil";
|
|
4
4
|
import { createAgentRunner } from "../agent-runner.js";
|
|
5
5
|
import { composeProfilePrompt } from "../profile-prompt.js";
|
|
6
6
|
import { createRedactor } from "../redaction.js";
|
|
@@ -67,12 +67,15 @@ export async function runRunCommand(ctx) {
|
|
|
67
67
|
|
|
68
68
|
// When --output is specified, stream text to stdout while writing NDJSON to file.
|
|
69
69
|
// Otherwise, write NDJSON directly to stdout (backwards-compatible).
|
|
70
|
-
const fileStream = outputPath
|
|
70
|
+
const fileStream = outputPath
|
|
71
|
+
? runtime.fs.createWriteStream(outputPath)
|
|
72
|
+
: null;
|
|
71
73
|
const output = fileStream
|
|
72
74
|
? createTeeWriter({
|
|
73
75
|
fileStream,
|
|
74
76
|
textStream: runtime.proc.stdout,
|
|
75
77
|
mode: "raw",
|
|
78
|
+
now: () => isoTimestamp(runtime.clock.now()),
|
|
76
79
|
})
|
|
77
80
|
: runtime.proc.stdout;
|
|
78
81
|
|
|
@@ -108,6 +111,7 @@ export async function runRunCommand(ctx) {
|
|
|
108
111
|
const systemPrompt = agentProfile
|
|
109
112
|
? composeProfilePrompt(agentProfile, {
|
|
110
113
|
profilesDir: resolve(cwd, ".claude/agents"),
|
|
114
|
+
runtime,
|
|
111
115
|
})
|
|
112
116
|
: undefined;
|
|
113
117
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { createWriteStream } from "node:fs";
|
|
2
1
|
import { resolve, join } from "node:path";
|
|
2
|
+
import { isoTimestamp } from "@forwardimpact/libutil";
|
|
3
3
|
import { createSupervisor } from "../supervisor.js";
|
|
4
4
|
import { createRedactor } from "../redaction.js";
|
|
5
5
|
import { createTeeWriter } from "../tee-writer.js";
|
|
@@ -72,13 +72,14 @@ export async function runSuperviseCommand(ctx) {
|
|
|
72
72
|
// When --output is specified, stream text to stdout while writing NDJSON to file.
|
|
73
73
|
// Otherwise, write NDJSON directly to stdout (backwards-compatible).
|
|
74
74
|
const fileStream = opts.outputPath
|
|
75
|
-
? createWriteStream(opts.outputPath)
|
|
75
|
+
? runtime.fs.createWriteStream(opts.outputPath)
|
|
76
76
|
: null;
|
|
77
77
|
const output = fileStream
|
|
78
78
|
? createTeeWriter({
|
|
79
79
|
fileStream,
|
|
80
80
|
textStream: runtime.proc.stdout,
|
|
81
81
|
mode: "supervised",
|
|
82
|
+
now: () => isoTimestamp(runtime.clock.now()),
|
|
82
83
|
})
|
|
83
84
|
: runtime.proc.stdout;
|
|
84
85
|
|
package/src/commands/tee.js
CHANGED
|
@@ -1,32 +1,47 @@
|
|
|
1
|
-
import { createWriteStream } from "fs";
|
|
2
1
|
import { PassThrough } from "node:stream";
|
|
3
2
|
import { pipeline } from "node:stream/promises";
|
|
3
|
+
import { isoTimestamp } from "@forwardimpact/libutil";
|
|
4
4
|
import { createTeeWriter } from "../tee-writer.js";
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
7
|
* Tee command — stream text output to stdout while optionally saving the raw
|
|
8
|
-
* NDJSON to a file.
|
|
8
|
+
* NDJSON to a file. Reads stdin line-by-line through the injected runtime and
|
|
9
|
+
* re-delimits each record with a newline so the TeeWriter's line splitter sees
|
|
10
|
+
* the same framing the raw byte stream produced.
|
|
9
11
|
*
|
|
10
12
|
* Usage: fit-eval tee [output.ndjson] < trace.ndjson
|
|
11
13
|
*
|
|
12
|
-
* @param {
|
|
13
|
-
* @
|
|
14
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
15
|
+
* @returns {Promise<{ok: boolean, code?: number, error?: string}>}
|
|
14
16
|
*/
|
|
15
|
-
export async function runTeeCommand(
|
|
16
|
-
const
|
|
17
|
-
const
|
|
17
|
+
export async function runTeeCommand(ctx) {
|
|
18
|
+
const runtime = ctx.deps.runtime;
|
|
19
|
+
const outputPath = ctx.args.output ?? null;
|
|
20
|
+
const fileStream = outputPath
|
|
21
|
+
? runtime.fs.createWriteStream(outputPath)
|
|
22
|
+
: null;
|
|
18
23
|
|
|
19
24
|
// TeeWriter requires a fileStream; when no output file is specified,
|
|
20
25
|
// use a PassThrough as a no-op sink (NDJSON is not saved).
|
|
21
26
|
const sink = fileStream ?? new PassThrough();
|
|
22
27
|
const tee = createTeeWriter({
|
|
23
28
|
fileStream: sink,
|
|
24
|
-
textStream:
|
|
29
|
+
textStream: runtime.proc.stdout,
|
|
25
30
|
mode: "raw",
|
|
31
|
+
now: () => isoTimestamp(runtime.clock.now()),
|
|
26
32
|
});
|
|
27
33
|
|
|
28
34
|
try {
|
|
29
|
-
|
|
35
|
+
// `runtime.proc.stdin` yields newline-stripped lines; re-append `\n` so the
|
|
36
|
+
// TeeWriter's `_write` line splitter frames records exactly as it did when
|
|
37
|
+
// piped the raw byte stream.
|
|
38
|
+
const lines = (async function* () {
|
|
39
|
+
for await (const line of runtime.proc.stdin) yield `${line}\n`;
|
|
40
|
+
})();
|
|
41
|
+
await pipeline(lines, tee);
|
|
42
|
+
return { ok: true };
|
|
43
|
+
} catch (error) {
|
|
44
|
+
return { ok: false, code: 1, error: error.message };
|
|
30
45
|
} finally {
|
|
31
46
|
if (fileStream) {
|
|
32
47
|
await new Promise((resolve, reject) => {
|
package/src/commands/trace.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { join, dirname } from "node:path";
|
|
2
|
+
import { isoTimestamp } from "@forwardimpact/libutil";
|
|
2
3
|
import { createTraceCollector } from "@forwardimpact/libeval";
|
|
3
4
|
import { createTraceQuery } from "../trace-query.js";
|
|
4
5
|
import { createTraceGitHub } from "../trace-github.js";
|
|
@@ -50,7 +51,9 @@ export async function runDownloadCommand(ctx) {
|
|
|
50
51
|
const ndjsonFile = result.files.find((f) => f.endsWith(".ndjson"));
|
|
51
52
|
if (ndjsonFile) {
|
|
52
53
|
const ndjsonPath = join(result.dir, ndjsonFile);
|
|
53
|
-
const collector = createTraceCollector(
|
|
54
|
+
const collector = createTraceCollector({
|
|
55
|
+
now: () => isoTimestamp(runtime.clock.now()),
|
|
56
|
+
});
|
|
54
57
|
for (const line of runtime.fsSync
|
|
55
58
|
.readFileSync(ndjsonPath, "utf8")
|
|
56
59
|
.split("\n")) {
|
|
@@ -325,7 +328,9 @@ function loadTrace(runtime, file) {
|
|
|
325
328
|
// Not valid JSON — fall through to NDJSON.
|
|
326
329
|
}
|
|
327
330
|
|
|
328
|
-
const collector = createTraceCollector(
|
|
331
|
+
const collector = createTraceCollector({
|
|
332
|
+
now: () => isoTimestamp(runtime.clock.now()),
|
|
333
|
+
});
|
|
329
334
|
for (const line of content.split("\n")) {
|
|
330
335
|
collector.addLine(line);
|
|
331
336
|
}
|
package/src/discusser.js
CHANGED
|
@@ -226,8 +226,10 @@ export function createDiscusser({
|
|
|
226
226
|
callbackUrl,
|
|
227
227
|
inboxUrl,
|
|
228
228
|
correlationId,
|
|
229
|
+
runtime,
|
|
229
230
|
}) {
|
|
230
231
|
if (!redactor) throw new Error("redactor is required");
|
|
232
|
+
if (!runtime) throw new Error("runtime is required");
|
|
231
233
|
const resolvedLeadCwd = resolve(leadCwd ?? ".");
|
|
232
234
|
const resolvedProfilesDir =
|
|
233
235
|
profilesDir ?? resolve(resolvedLeadCwd, ".claude/agents");
|
|
@@ -272,6 +274,7 @@ export function createDiscusser({
|
|
|
272
274
|
messageBus,
|
|
273
275
|
leadName: "lead",
|
|
274
276
|
signal: abortController.signal,
|
|
277
|
+
runtime,
|
|
275
278
|
})
|
|
276
279
|
: null;
|
|
277
280
|
|
|
@@ -307,10 +310,6 @@ export function createDiscusser({
|
|
|
307
310
|
from: config.name,
|
|
308
311
|
});
|
|
309
312
|
|
|
310
|
-
const agentTrailer = config.systemPromptAmend
|
|
311
|
-
? `${DISCUSS_AGENT_SYSTEM_PROMPT}\n\n${config.systemPromptAmend}`
|
|
312
|
-
: DISCUSS_AGENT_SYSTEM_PROMPT;
|
|
313
|
-
|
|
314
313
|
const runner = createAgentRunner({
|
|
315
314
|
cwd: config.cwd ?? resolvedLeadCwd,
|
|
316
315
|
query,
|
|
@@ -325,7 +324,9 @@ export function createDiscusser({
|
|
|
325
324
|
role: "agent",
|
|
326
325
|
profile: config.agentProfile,
|
|
327
326
|
profilesDir: resolvedProfilesDir,
|
|
328
|
-
trailer:
|
|
327
|
+
trailer: DISCUSS_AGENT_SYSTEM_PROMPT,
|
|
328
|
+
amend: config.systemPromptAmend,
|
|
329
|
+
runtime,
|
|
329
330
|
}),
|
|
330
331
|
redactor,
|
|
331
332
|
});
|
|
@@ -358,6 +359,7 @@ export function createDiscusser({
|
|
|
358
359
|
profile: leadProfile,
|
|
359
360
|
profilesDir: resolvedProfilesDir,
|
|
360
361
|
trailer: DISCUSS_SYSTEM_PROMPT,
|
|
362
|
+
runtime,
|
|
361
363
|
}),
|
|
362
364
|
redactor,
|
|
363
365
|
});
|
package/src/events/github.js
CHANGED
|
@@ -29,8 +29,14 @@ export const TASK_TEMPLATE_ISSUE_LABELED =
|
|
|
29
29
|
export const TASK_TEMPLATE_PR_LABELED =
|
|
30
30
|
'Label "${LABEL}" was added to PR "${PR_TITLE}" (#${NUMBER}). PR URL: ${URL}.';
|
|
31
31
|
|
|
32
|
+
// "unreleased changes"/"cut" point at the genuine post-merge action — release
|
|
33
|
+
// activity (the release-engineer's Assess step 3 / `kata-release-cut`).
|
|
34
|
+
// "status" is a backstop: the spec's `wiki/STATUS.md` row is normally advanced
|
|
35
|
+
// in the pre-merge gate (`kata-release-merge` Step 8), but the keyword catches a
|
|
36
|
+
// merge that landed without it. Neither owner nor artifact is named, so the lead
|
|
37
|
+
// routes the merge instead of treating it as a no-op.
|
|
32
38
|
export const TASK_TEMPLATE_PR_MERGED =
|
|
33
|
-
'PR "${PR_TITLE}" (#${NUMBER}) merged. PR URL: ${URL}.';
|
|
39
|
+
'PR "${PR_TITLE}" (#${NUMBER}) merged to main — may leave unreleased changes to cut or status to update. PR URL: ${URL}.';
|
|
34
40
|
|
|
35
41
|
// Appended verbatim to comment/review templates. `${BODY}` is the untrusted
|
|
36
42
|
// author text; the fence and the "data, not instructions" framing keep the lead
|
package/src/facilitator.js
CHANGED
|
@@ -109,8 +109,10 @@ export function createFacilitator({
|
|
|
109
109
|
profilesDir,
|
|
110
110
|
taskAmend,
|
|
111
111
|
redactor,
|
|
112
|
+
runtime,
|
|
112
113
|
}) {
|
|
113
114
|
if (!redactor) throw new Error("redactor is required");
|
|
115
|
+
if (!runtime) throw new Error("runtime is required");
|
|
114
116
|
const resolvedProfilesDir =
|
|
115
117
|
profilesDir ?? resolve(facilitatorCwd, ".claude/agents");
|
|
116
118
|
const ctx = createOrchestrationContext();
|
|
@@ -132,10 +134,6 @@ export function createFacilitator({
|
|
|
132
134
|
from: config.name,
|
|
133
135
|
});
|
|
134
136
|
|
|
135
|
-
const agentTrailer = config.systemPromptAmend
|
|
136
|
-
? `${FACILITATED_AGENT_SYSTEM_PROMPT}\n\n${config.systemPromptAmend}`
|
|
137
|
-
: FACILITATED_AGENT_SYSTEM_PROMPT;
|
|
138
|
-
|
|
139
137
|
const runner = createAgentRunner({
|
|
140
138
|
cwd: config.cwd ?? facilitatorCwd,
|
|
141
139
|
query,
|
|
@@ -150,7 +148,9 @@ export function createFacilitator({
|
|
|
150
148
|
role: "agent",
|
|
151
149
|
profile: config.agentProfile,
|
|
152
150
|
profilesDir: resolvedProfilesDir,
|
|
153
|
-
trailer:
|
|
151
|
+
trailer: FACILITATED_AGENT_SYSTEM_PROMPT,
|
|
152
|
+
amend: config.systemPromptAmend,
|
|
153
|
+
runtime,
|
|
154
154
|
}),
|
|
155
155
|
redactor,
|
|
156
156
|
});
|
|
@@ -187,6 +187,7 @@ export function createFacilitator({
|
|
|
187
187
|
profile: facilitatorProfile,
|
|
188
188
|
profilesDir: resolvedProfilesDir,
|
|
189
189
|
trailer: FACILITATOR_SYSTEM_PROMPT,
|
|
190
|
+
runtime,
|
|
190
191
|
}),
|
|
191
192
|
redactor,
|
|
192
193
|
});
|
package/src/inbox-poller.js
CHANGED
|
@@ -18,20 +18,17 @@ export class InboxPoller {
|
|
|
18
18
|
* @param {import("./message-bus.js").MessageBus} deps.messageBus
|
|
19
19
|
* @param {string} deps.leadName
|
|
20
20
|
* @param {AbortSignal} deps.signal
|
|
21
|
-
* @param {import("@forwardimpact/libutil/runtime").Runtime}
|
|
22
|
-
*
|
|
23
|
-
*
|
|
24
|
-
* absent so existing callers keep working.
|
|
21
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} deps.runtime -
|
|
22
|
+
* Injected collaborators; `clock.setTimeout`/`clock.clearTimeout` drive the
|
|
23
|
+
* inter-poll backoff.
|
|
25
24
|
*/
|
|
26
25
|
constructor({ inboxUrl, messageBus, leadName, signal, runtime }) {
|
|
26
|
+
if (!runtime) throw new Error("runtime is required");
|
|
27
27
|
this.#inboxUrl = inboxUrl;
|
|
28
28
|
this.#messageBus = messageBus;
|
|
29
29
|
this.#leadName = leadName;
|
|
30
30
|
this.#signal = signal;
|
|
31
|
-
this.#clock = runtime
|
|
32
|
-
setTimeout: (fn, ms) => globalThis.setTimeout(fn, ms),
|
|
33
|
-
clearTimeout: (h) => globalThis.clearTimeout(h),
|
|
34
|
-
};
|
|
31
|
+
this.#clock = runtime.clock;
|
|
35
32
|
}
|
|
36
33
|
|
|
37
34
|
/** Long-poll the inbox until the abort signal fires. */
|
package/src/judge.js
CHANGED
|
@@ -17,7 +17,7 @@ import { resolve } from "node:path";
|
|
|
17
17
|
import { Writable } from "node:stream";
|
|
18
18
|
|
|
19
19
|
import { createAgentRunner } from "./agent-runner.js";
|
|
20
|
-
import {
|
|
20
|
+
import { composeSystemPrompt } from "./profile-prompt.js";
|
|
21
21
|
import { SequenceCounter } from "./sequence-counter.js";
|
|
22
22
|
import {
|
|
23
23
|
createJudgeToolServer,
|
|
@@ -140,7 +140,7 @@ export class Judge {
|
|
|
140
140
|
/**
|
|
141
141
|
* Factory function — wires the AgentRunner with the judge orchestration server
|
|
142
142
|
* and the JUDGE_SYSTEM_PROMPT trailer. A `judgeProfile` (when supplied) layers
|
|
143
|
-
* on top of the trailer via `
|
|
143
|
+
* on top of the trailer via `composeSystemPrompt`, matching the
|
|
144
144
|
* supervisor/facilitator pattern.
|
|
145
145
|
*
|
|
146
146
|
* @param {object} deps
|
|
@@ -151,7 +151,7 @@ export class Judge {
|
|
|
151
151
|
* @param {string} [deps.model]
|
|
152
152
|
* @param {number} [deps.maxTurns] - Default 5 (the judge is expected to act in turn 1; 5 leaves headroom for tool inspection).
|
|
153
153
|
* @param {string[]} [deps.allowedTools] - Default `["Read","Glob","Grep","Bash"]` — read-only inspection.
|
|
154
|
-
* @param {string} [deps.judgeProfile] - Profile name; resolved into the system prompt via `
|
|
154
|
+
* @param {string} [deps.judgeProfile] - Profile name; resolved into the system prompt via `composeSystemPrompt`.
|
|
155
155
|
* @param {string} [deps.profilesDir] - Defaults to `<cwd>/.claude/agents`.
|
|
156
156
|
* @param {string} [deps.taskAmend]
|
|
157
157
|
* @returns {Judge}
|
|
@@ -167,23 +167,22 @@ export function createJudge({
|
|
|
167
167
|
judgeProfile,
|
|
168
168
|
profilesDir,
|
|
169
169
|
taskAmend,
|
|
170
|
+
runtime,
|
|
170
171
|
}) {
|
|
171
172
|
if (!cwd) throw new Error("cwd is required");
|
|
172
173
|
if (!query) throw new Error("query is required");
|
|
173
174
|
if (!output) throw new Error("output is required");
|
|
174
175
|
if (!redactor) throw new Error("redactor is required");
|
|
176
|
+
if (!runtime) throw new Error("runtime is required");
|
|
175
177
|
|
|
176
178
|
const resolvedProfilesDir = profilesDir ?? resolve(cwd, ".claude/agents");
|
|
177
|
-
const systemPrompt =
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
preset: "claude_code",
|
|
185
|
-
append: JUDGE_SYSTEM_PROMPT,
|
|
186
|
-
};
|
|
179
|
+
const systemPrompt = composeSystemPrompt({
|
|
180
|
+
role: "agent",
|
|
181
|
+
profile: judgeProfile,
|
|
182
|
+
profilesDir: resolvedProfilesDir,
|
|
183
|
+
trailer: JUDGE_SYSTEM_PROMPT,
|
|
184
|
+
runtime,
|
|
185
|
+
});
|
|
187
186
|
|
|
188
187
|
const ctx = createOrchestrationContext();
|
|
189
188
|
ctx.participants = [{ name: "judge", role: "judge" }];
|
package/src/profile-prompt.js
CHANGED
|
@@ -1,7 +1,25 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* System prompt composition for agent runners.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
4
|
+
* libeval assembles every agent system prompt from up to two parallel,
|
|
5
|
+
* sibling-tagged sections (see COALIGNED.md § L0):
|
|
6
|
+
*
|
|
7
|
+
* <agent_profile>
|
|
8
|
+
* …persona body…
|
|
9
|
+
* </agent_profile>
|
|
10
|
+
*
|
|
11
|
+
* <session_protocol>
|
|
12
|
+
* …orchestration mechanics, then any amendment…
|
|
13
|
+
* </session_protocol>
|
|
14
|
+
*
|
|
15
|
+
* The two tags are siblings joined by a blank line — neither nests inside
|
|
16
|
+
* the other. A section appears only when its content is present. A
|
|
17
|
+
* system-prompt amendment is folded into the protocol trailer before
|
|
18
|
+
* wrapping, so it lands transparently inside `<session_protocol>`. The tag
|
|
19
|
+
* convention lives entirely here: profile `.md` files and trailer constants
|
|
20
|
+
* carry no tags.
|
|
21
|
+
*
|
|
22
|
+
* Helpers:
|
|
5
23
|
*
|
|
6
24
|
* - `composeProfilePrompt(name, opts)` — profile + `claude_code` preset.
|
|
7
25
|
* Used by agent participants that need the full Claude Code tool surface.
|
|
@@ -10,66 +28,146 @@
|
|
|
10
28
|
* roles (supervisor, facilitator, discuss lead) that should only see
|
|
11
29
|
* the orchestration instructions and optionally a profile body.
|
|
12
30
|
*
|
|
13
|
-
* - `composeSystemPrompt(opts)` — unified entry point.
|
|
14
|
-
* of the above based on
|
|
31
|
+
* - `composeSystemPrompt(opts)` — unified entry point. Folds `amend` into
|
|
32
|
+
* the protocol section, then delegates to one of the above based on
|
|
33
|
+
* `opts.role`.
|
|
15
34
|
*/
|
|
16
35
|
|
|
17
|
-
import { readFileSync } from "node:fs";
|
|
18
36
|
import { join } from "node:path";
|
|
19
37
|
|
|
38
|
+
/** Sibling section tags. Neither nests inside the other. */
|
|
39
|
+
const AGENT_PROFILE_TAG = "agent_profile";
|
|
40
|
+
const SESSION_PROTOCOL_TAG = "session_protocol";
|
|
41
|
+
|
|
42
|
+
/** Wrap content in a semantic section tag, each on its own line. */
|
|
43
|
+
function wrapSection(tag, content) {
|
|
44
|
+
return `<${tag}>\n${content}\n</${tag}>`;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Assemble the parallel `<agent_profile>` / `<session_protocol>` sections.
|
|
49
|
+
* Each section is emitted only when its content is non-empty; the two tags
|
|
50
|
+
* are siblings joined by a blank line and never nest.
|
|
51
|
+
*
|
|
52
|
+
* @param {object} parts
|
|
53
|
+
* @param {string} [parts.body] - Profile body, already frontmatter-stripped.
|
|
54
|
+
* @param {string} [parts.protocol] - Session protocol trailer, with any
|
|
55
|
+
* amendment already folded in.
|
|
56
|
+
* @returns {string}
|
|
57
|
+
*/
|
|
58
|
+
function assembleSections({ body, protocol }) {
|
|
59
|
+
const sections = [];
|
|
60
|
+
if (body) sections.push(wrapSection(AGENT_PROFILE_TAG, body));
|
|
61
|
+
if (protocol) sections.push(wrapSection(SESSION_PROTOCOL_TAG, protocol));
|
|
62
|
+
return sections.join("\n\n");
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Read a profile `.md`, strip its frontmatter, and return the trimmed body.
|
|
67
|
+
* Reads synchronously off the injected `runtime.fsSync` surface — this
|
|
68
|
+
* composer runs inside the synchronous SDK-option builders of the
|
|
69
|
+
* supervisor / facilitator / discusser / judge factories, so it cannot go
|
|
70
|
+
* async without an unbounded cascade.
|
|
71
|
+
*
|
|
72
|
+
* @param {string} name - Profile basename (no `.md` suffix)
|
|
73
|
+
* @param {string} profilesDir - Directory containing `<name>.md`
|
|
74
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
75
|
+
* @returns {string}
|
|
76
|
+
*/
|
|
77
|
+
function readProfileBody(name, profilesDir, runtime) {
|
|
78
|
+
const path = join(profilesDir, `${name}.md`);
|
|
79
|
+
const raw = runtime.fsSync.readFileSync(path, "utf8");
|
|
80
|
+
return stripFrontmatter(raw).trim();
|
|
81
|
+
}
|
|
82
|
+
|
|
20
83
|
/**
|
|
21
|
-
* Compose a `claude_code`-preset system prompt from a profile file.
|
|
84
|
+
* Compose a `claude_code`-preset system prompt from a profile file. The
|
|
85
|
+
* profile body is wrapped in `<agent_profile>`; an optional protocol trailer
|
|
86
|
+
* is wrapped in a sibling `<session_protocol>`.
|
|
87
|
+
*
|
|
22
88
|
* @param {string} name - Profile basename (no `.md` suffix)
|
|
23
89
|
* @param {object} opts
|
|
24
90
|
* @param {string} opts.profilesDir - Directory containing `<name>.md`
|
|
25
|
-
* @param {string} [opts.trailer] -
|
|
91
|
+
* @param {string} [opts.trailer] - Session protocol, wrapped as a sibling
|
|
92
|
+
* `<session_protocol>` section after a blank line
|
|
93
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} opts.runtime - Ambient collaborators; uses `fsSync.readFileSync`.
|
|
26
94
|
* @returns {{type: "preset", preset: "claude_code", append: string}}
|
|
27
95
|
*/
|
|
28
|
-
export function composeProfilePrompt(name, { profilesDir, trailer }) {
|
|
29
|
-
const
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
96
|
+
export function composeProfilePrompt(name, { profilesDir, trailer, runtime }) {
|
|
97
|
+
const body = readProfileBody(name, profilesDir, runtime);
|
|
98
|
+
return {
|
|
99
|
+
type: "preset",
|
|
100
|
+
preset: "claude_code",
|
|
101
|
+
append: assembleSections({ body, protocol: trailer }),
|
|
102
|
+
};
|
|
34
103
|
}
|
|
35
104
|
|
|
36
105
|
/**
|
|
37
|
-
* Compose a plain-string system prompt for a lead role (no Claude Code
|
|
106
|
+
* Compose a plain-string system prompt for a lead role (no Claude Code
|
|
107
|
+
* preset). The protocol trailer is wrapped in `<session_protocol>`; an
|
|
108
|
+
* optional profile body is wrapped in a sibling `<agent_profile>` before it.
|
|
109
|
+
*
|
|
38
110
|
* @param {object} opts
|
|
39
111
|
* @param {string} [opts.profile] - Profile basename (no `.md` suffix)
|
|
40
112
|
* @param {string} [opts.profilesDir] - Directory containing profile files
|
|
41
|
-
* @param {string} opts.trailer -
|
|
113
|
+
* @param {string} opts.trailer - Session protocol (orchestration instructions)
|
|
114
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} opts.runtime - Ambient collaborators; uses `fsSync.readFileSync`.
|
|
42
115
|
* @returns {string}
|
|
43
116
|
*/
|
|
44
|
-
export function composeLeadPrompt({ profile, profilesDir, trailer }) {
|
|
117
|
+
export function composeLeadPrompt({ profile, profilesDir, trailer, runtime }) {
|
|
45
118
|
if (!trailer) throw new Error("trailer is required");
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
return `${body}\n\n${trailer}`;
|
|
119
|
+
const body = profile
|
|
120
|
+
? readProfileBody(profile, profilesDir, runtime)
|
|
121
|
+
: undefined;
|
|
122
|
+
return assembleSections({ body, protocol: trailer });
|
|
51
123
|
}
|
|
52
124
|
|
|
53
125
|
/**
|
|
54
|
-
* Unified entry point for composing system prompts.
|
|
126
|
+
* Unified entry point for composing system prompts. Folds an optional
|
|
127
|
+
* amendment into the protocol trailer — so it lands inside
|
|
128
|
+
* `<session_protocol>` — then delegates by role.
|
|
55
129
|
*
|
|
56
130
|
* @param {object} opts
|
|
57
131
|
* @param {"lead"|"agent"} opts.role - `"lead"` produces a plain string;
|
|
58
132
|
* `"agent"` produces a `claude_code` preset object.
|
|
59
133
|
* @param {string} [opts.profile] - Profile basename
|
|
60
134
|
* @param {string} [opts.profilesDir]
|
|
61
|
-
* @param {string} opts.trailer -
|
|
135
|
+
* @param {string} opts.trailer - Session protocol (orchestration instructions)
|
|
136
|
+
* @param {string} [opts.amend] - Caller-supplied amendment, appended inside
|
|
137
|
+
* `<session_protocol>` after the trailer with a blank-line separator.
|
|
138
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} opts.runtime - Ambient collaborators; uses `fsSync.readFileSync`.
|
|
62
139
|
* @returns {string | {type: "preset", preset: "claude_code", append: string}}
|
|
63
140
|
*/
|
|
64
|
-
export function composeSystemPrompt({
|
|
141
|
+
export function composeSystemPrompt({
|
|
142
|
+
role,
|
|
143
|
+
profile,
|
|
144
|
+
profilesDir,
|
|
145
|
+
trailer,
|
|
146
|
+
amend,
|
|
147
|
+
runtime,
|
|
148
|
+
}) {
|
|
65
149
|
if (!trailer) throw new Error("trailer is required");
|
|
150
|
+
const protocol = amend ? `${trailer}\n\n${amend}` : trailer;
|
|
66
151
|
if (role === "lead") {
|
|
67
|
-
return composeLeadPrompt({
|
|
152
|
+
return composeLeadPrompt({
|
|
153
|
+
profile,
|
|
154
|
+
profilesDir,
|
|
155
|
+
trailer: protocol,
|
|
156
|
+
runtime,
|
|
157
|
+
});
|
|
68
158
|
}
|
|
69
159
|
if (profile) {
|
|
70
|
-
return composeProfilePrompt(profile, {
|
|
160
|
+
return composeProfilePrompt(profile, {
|
|
161
|
+
profilesDir,
|
|
162
|
+
trailer: protocol,
|
|
163
|
+
runtime,
|
|
164
|
+
});
|
|
71
165
|
}
|
|
72
|
-
return {
|
|
166
|
+
return {
|
|
167
|
+
type: "preset",
|
|
168
|
+
preset: "claude_code",
|
|
169
|
+
append: assembleSections({ protocol }),
|
|
170
|
+
};
|
|
73
171
|
}
|
|
74
172
|
|
|
75
173
|
/**
|
package/src/redaction.js
CHANGED
|
@@ -15,6 +15,7 @@ export const DEFAULT_ENV_ALLOWLIST = Object.freeze([
|
|
|
15
15
|
"DATABASE_PASSWORD",
|
|
16
16
|
"GH_TOKEN",
|
|
17
17
|
"GITHUB_TOKEN",
|
|
18
|
+
"JWT_SECRET",
|
|
18
19
|
"MCP_TOKEN",
|
|
19
20
|
"MICROSOFT_APP_ID",
|
|
20
21
|
"MICROSOFT_APP_PASSWORD",
|
|
@@ -22,7 +23,6 @@ export const DEFAULT_ENV_ALLOWLIST = Object.freeze([
|
|
|
22
23
|
"PRODUCT_LANDMARK_TOKEN",
|
|
23
24
|
"SERVICE_SECRET",
|
|
24
25
|
"SUPABASE_ANON_KEY",
|
|
25
|
-
"SUPABASE_JWT_SECRET",
|
|
26
26
|
"SUPABASE_SERVICE_ROLE_KEY",
|
|
27
27
|
]);
|
|
28
28
|
|
|
@@ -135,7 +135,8 @@ export function createRedactor({
|
|
|
135
135
|
patterns = DEFAULT_PATTERNS,
|
|
136
136
|
enabled,
|
|
137
137
|
} = {}) {
|
|
138
|
-
|
|
138
|
+
if (!runtime) throw new Error("runtime is required");
|
|
139
|
+
const proc = runtime.proc;
|
|
139
140
|
const resolvedEnv = env ?? proc.env;
|
|
140
141
|
const envDisabled = resolvedEnv.LIBEVAL_REDACTION_DISABLED === "1";
|
|
141
142
|
const resolvedEnabled = enabled ?? !envDisabled;
|
|
@@ -151,20 +152,6 @@ export function createRedactor({
|
|
|
151
152
|
return new Redactor({ envSnapshot, patterns, enabled: resolvedEnabled });
|
|
152
153
|
}
|
|
153
154
|
|
|
154
|
-
/**
|
|
155
|
-
* Lazily build the production proc surface so callers that don't inject a
|
|
156
|
-
* runtime keep working. Imported indirectly to avoid pulling the whole
|
|
157
|
-
* runtime bag (and its `node:fs`/`node:child_process` imports) into modules
|
|
158
|
-
* that only ever receive an injected runtime.
|
|
159
|
-
* @returns {{env: Record<string, string|undefined>, stderr: {write: (s: string) => void}}}
|
|
160
|
-
*/
|
|
161
|
-
function defaultProc() {
|
|
162
|
-
return {
|
|
163
|
-
env: globalThis.process?.env ?? {},
|
|
164
|
-
stderr: { write: (s) => globalThis.process?.stderr?.write(s) },
|
|
165
|
-
};
|
|
166
|
-
}
|
|
167
|
-
|
|
168
155
|
/**
|
|
169
156
|
* Parse `LIBEVAL_REDACTION_ENV_VARS` into a trimmed, non-empty name list.
|
|
170
157
|
* Falls back to `DEFAULT_ENV_ALLOWLIST` when unset or empty.
|
package/src/supervisor.js
CHANGED
|
@@ -122,6 +122,7 @@ const devNull = new Writable({
|
|
|
122
122
|
* @param {string[]} [deps.supervisorDisallowedTools]
|
|
123
123
|
* @param {string} [deps.supervisorProfile]
|
|
124
124
|
* @param {string} [deps.agentProfile]
|
|
125
|
+
* @param {string} [deps.agentSystemPromptAmend] - Amendment folded into the agent's `<session_protocol>` section, after the protocol trailer.
|
|
125
126
|
* @param {string} [deps.profilesDir]
|
|
126
127
|
* @param {string} [deps.taskAmend]
|
|
127
128
|
* @param {Record<string, object>} [deps.agentMcpServers]
|
|
@@ -141,12 +142,15 @@ export function createSupervisor({
|
|
|
141
142
|
supervisorDisallowedTools,
|
|
142
143
|
supervisorProfile,
|
|
143
144
|
agentProfile,
|
|
145
|
+
agentSystemPromptAmend,
|
|
144
146
|
profilesDir,
|
|
145
147
|
taskAmend,
|
|
146
148
|
agentMcpServers,
|
|
147
149
|
redactor,
|
|
150
|
+
runtime,
|
|
148
151
|
}) {
|
|
149
152
|
if (!redactor) throw new Error("redactor is required");
|
|
153
|
+
if (!runtime) throw new Error("runtime is required");
|
|
150
154
|
const resolvedProfilesDir =
|
|
151
155
|
profilesDir ?? resolve(supervisorCwd, ".claude/agents");
|
|
152
156
|
|
|
@@ -180,6 +184,8 @@ export function createSupervisor({
|
|
|
180
184
|
profile: agentProfile,
|
|
181
185
|
profilesDir: resolvedProfilesDir,
|
|
182
186
|
trailer: AGENT_SYSTEM_PROMPT,
|
|
187
|
+
amend: agentSystemPromptAmend,
|
|
188
|
+
runtime,
|
|
183
189
|
}),
|
|
184
190
|
mcpServers: { orchestration: agentServer, ...agentMcpServers },
|
|
185
191
|
redactor,
|
|
@@ -213,6 +219,7 @@ export function createSupervisor({
|
|
|
213
219
|
profile: supervisorProfile,
|
|
214
220
|
profilesDir: resolvedProfilesDir,
|
|
215
221
|
trailer: SUPERVISOR_SYSTEM_PROMPT,
|
|
222
|
+
runtime,
|
|
216
223
|
}),
|
|
217
224
|
mcpServers: { orchestration: supervisorServer },
|
|
218
225
|
redactor,
|