@forwardimpact/libeval 0.1.16 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/fit-trace.js CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  import { readFileSync } from "node:fs";
4
4
  import { createCli } from "@forwardimpact/libcli";
5
+ import { createScriptConfig } from "@forwardimpact/libconfig";
5
6
  import { createLogger } from "@forwardimpact/libtelemetry";
6
7
 
7
8
  import {
@@ -188,7 +189,8 @@ async function main() {
188
189
  process.exit(2);
189
190
  }
190
191
 
191
- await handler(values, args);
192
+ const config = await createScriptConfig("eval");
193
+ await handler(values, args, { config });
192
194
  }
193
195
 
194
196
  main().catch((error) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.16",
3
+ "version": "0.1.18",
4
4
  "description": "Process Claude Code stream-json output into structured traces",
5
5
  "license": "Apache-2.0",
6
6
  "author": "D. Olsson <hi@senzilla.io>",
@@ -32,7 +32,7 @@
32
32
  "@forwardimpact/libcli": "^0.1.0",
33
33
  "@forwardimpact/libconfig": "^0.1.0",
34
34
  "@forwardimpact/libtelemetry": "^0.1.22",
35
- "zod": "^3.23.0"
35
+ "zod": "^4.3.6"
36
36
  },
37
37
  "publishConfig": {
38
38
  "access": "public"
@@ -21,7 +21,6 @@ function applyDefaults(deps) {
21
21
  onBatch: deps.onBatch ?? null,
22
22
  batchSize: deps.batchSize ?? 3,
23
23
  settingSources: deps.settingSources ?? [],
24
- agentProfile: deps.agentProfile ?? null,
25
24
  systemPrompt: deps.systemPrompt ?? null,
26
25
  disallowedTools: deps.disallowedTools ?? [],
27
26
  mcpServers: deps.mcpServers ?? null,
@@ -42,7 +41,6 @@ export class AgentRunner {
42
41
  * @param {function} [deps.onBatch] - Async callback invoked with a batch of NDJSON lines at flush boundaries: every `batchSize` assistant text blocks, the terminal `result` message, and — on iterator crash/abort — once more in a final flush carrying any lines that never reached a boundary. Receives `(lines, { abort })` where calling `abort()` stops the in-flight SDK session via the AbortController. Optional; assignable at runtime so the Supervisor can swap it per turn.
43
42
  * @param {number} [deps.batchSize] - Assistant text-block messages to accumulate before firing onBatch. Tool-only assistant messages ride along without counting. Default 3: the supervisor reviews the agent every three text turns instead of every turn. The terminal `result` always flushes regardless of count.
44
43
  * @param {string[]} [deps.settingSources] - SDK setting sources (e.g. ['project'] to load CLAUDE.md)
45
- * @param {string} [deps.agentProfile] - Agent profile name to pass as --agent to the Claude CLI
46
44
  * @param {string|object} [deps.systemPrompt] - SDK system prompt (string replaces default; {type:'preset', preset:'claude_code', append} appends)
47
45
  * @param {string[]} [deps.disallowedTools] - Tools to explicitly remove from the model's context
48
46
  * @param {Record<string, object>} [deps.mcpServers] - MCP server configs to pass to the SDK query
@@ -82,7 +80,6 @@ export class AgentRunner {
82
80
  disallowedTools: this.disallowedTools,
83
81
  }),
84
82
  ...(this.systemPrompt && { systemPrompt: this.systemPrompt }),
85
- ...(this.agentProfile && { extraArgs: { agent: this.agentProfile } }),
86
83
  ...(this.mcpServers && { mcpServers: this.mcpServers }),
87
84
  },
88
85
  });
@@ -2,6 +2,7 @@ import { readFileSync, createWriteStream } from "node:fs";
2
2
  import { Writable } from "node:stream";
3
3
  import { resolve } from "node:path";
4
4
  import { createAgentRunner } from "../agent-runner.js";
5
+ import { composeProfilePrompt } from "../profile-prompt.js";
5
6
  import { createTeeWriter } from "../tee-writer.js";
6
7
  import { SequenceCounter } from "../sequence-counter.js";
7
8
 
@@ -76,6 +77,12 @@ export async function runRunCommand(values, _args) {
76
77
  );
77
78
  };
78
79
 
80
+ const systemPrompt = agentProfile
81
+ ? composeProfilePrompt(agentProfile, {
82
+ profilesDir: resolve(cwd, ".claude/agents"),
83
+ })
84
+ : undefined;
85
+
79
86
  const { query } = await import("@anthropic-ai/claude-agent-sdk");
80
87
  const runner = createAgentRunner({
81
88
  cwd,
@@ -86,7 +93,7 @@ export async function runRunCommand(values, _args) {
86
93
  allowedTools,
87
94
  onLine,
88
95
  settingSources: ["project"],
89
- agentProfile,
96
+ systemPrompt,
90
97
  });
91
98
 
92
99
  const result = await runner.run(taskContent);
@@ -10,9 +10,13 @@ import { createTraceGitHub } from "../trace-github.js";
10
10
  * List recent workflow runs matching a pattern.
11
11
  * @param {object} values - Parsed option values
12
12
  * @param {string[]} args - [pattern?]
13
+ * @param {{config: import("@forwardimpact/libconfig").Config}} ctx
13
14
  */
14
- export async function runRunsCommand(values, args) {
15
- const gh = await createTraceGitHub({ repo: values.repo });
15
+ export async function runRunsCommand(values, args, ctx) {
16
+ const gh = await createTraceGitHub({
17
+ token: ctx.config.ghToken(),
18
+ repo: values.repo,
19
+ });
16
20
  const pattern = args[0] ?? "agent";
17
21
  const lookback = values.lookback ?? "7d";
18
22
  const runs = await gh.listRuns({ pattern, lookback });
@@ -23,9 +27,13 @@ export async function runRunsCommand(values, args) {
23
27
  * Download a trace artifact and auto-convert to structured JSON.
24
28
  * @param {object} values - Parsed option values
25
29
  * @param {string[]} args - [run-id]
30
+ * @param {{config: import("@forwardimpact/libconfig").Config}} ctx
26
31
  */
27
- export async function runDownloadCommand(values, args) {
28
- const gh = await createTraceGitHub({ repo: values.repo });
32
+ export async function runDownloadCommand(values, args, ctx) {
33
+ const gh = await createTraceGitHub({
34
+ token: ctx.config.ghToken(),
35
+ repo: values.repo,
36
+ });
29
37
  const result = await gh.downloadTrace(args[0], {
30
38
  dir: values.dir,
31
39
  name: values.artifact,
@@ -7,7 +7,9 @@
7
7
  */
8
8
 
9
9
  import { Writable } from "node:stream";
10
+ import { resolve } from "node:path";
10
11
  import { createAgentRunner } from "./agent-runner.js";
12
+ import { composeProfilePrompt } from "./profile-prompt.js";
11
13
  import { SequenceCounter } from "./sequence-counter.js";
12
14
  import { createMessageBus } from "./message-bus.js";
13
15
  import {
@@ -415,7 +417,8 @@ const devNull = new Writable({
415
417
  * @param {import("stream").Writable} deps.output
416
418
  * @param {string} [deps.model]
417
419
  * @param {number} [deps.maxTurns]
418
- * @param {string} [deps.facilitatorProfile]
420
+ * @param {string} [deps.facilitatorProfile] - Facilitator profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
421
+ * @param {string} [deps.profilesDir] - Directory containing `<name>.md` profile files. Defaults to `<facilitatorCwd>/.claude/agents`. Resolved once from the facilitator's cwd so profiles travel with the project, not with per-agent sandboxes.
419
422
  * @returns {Facilitator}
420
423
  */
421
424
  export function createFacilitator({
@@ -426,7 +429,19 @@ export function createFacilitator({
426
429
  model,
427
430
  maxTurns,
428
431
  facilitatorProfile,
432
+ profilesDir,
429
433
  }) {
434
+ const resolvedProfilesDir =
435
+ profilesDir ?? resolve(facilitatorCwd, ".claude/agents");
436
+ const systemPromptFor = (profile, trailer) => {
437
+ if (!trailer) throw new Error("trailer is required");
438
+ return profile
439
+ ? composeProfilePrompt(profile, {
440
+ profilesDir: resolvedProfilesDir,
441
+ trailer,
442
+ })
443
+ : { type: "preset", preset: "claude_code", append: trailer };
444
+ };
430
445
  const ctx = createOrchestrationContext();
431
446
  const messageBus = createMessageBus({
432
447
  participants: ["facilitator", ...agentConfigs.map((a) => a.name)],
@@ -471,12 +486,10 @@ export function createFacilitator({
471
486
  onLine: (line) => facilitator.emitLine(config.name, line),
472
487
  mcpServers: { orchestration: agentServer },
473
488
  settingSources: ["project"],
474
- agentProfile: config.agentProfile,
475
- systemPrompt: {
476
- type: "preset",
477
- preset: "claude_code",
478
- append: FACILITATED_AGENT_SYSTEM_PROMPT,
479
- },
489
+ systemPrompt: systemPromptFor(
490
+ config.agentProfile,
491
+ FACILITATED_AGENT_SYSTEM_PROMPT,
492
+ ),
480
493
  });
481
494
 
482
495
  return { name: config.name, role: config.role, runner };
@@ -491,12 +504,10 @@ export function createFacilitator({
491
504
  onLine: (line) => facilitator.emitLine("facilitator", line),
492
505
  mcpServers: { orchestration: facilitatorServer },
493
506
  settingSources: ["project"],
494
- agentProfile: facilitatorProfile,
495
- systemPrompt: {
496
- type: "preset",
497
- preset: "claude_code",
498
- append: FACILITATOR_SYSTEM_PROMPT,
499
- },
507
+ systemPrompt: systemPromptFor(
508
+ facilitatorProfile,
509
+ FACILITATOR_SYSTEM_PROMPT,
510
+ ),
500
511
  });
501
512
 
502
513
  facilitator = new Facilitator({
package/src/index.js CHANGED
@@ -6,6 +6,7 @@ export {
6
6
  parseGitRemote,
7
7
  } from "./trace-github.js";
8
8
  export { AgentRunner, createAgentRunner } from "./agent-runner.js";
9
+ export { composeProfilePrompt } from "./profile-prompt.js";
9
10
  export {
10
11
  Supervisor,
11
12
  createSupervisor,
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Compose an SDK `systemPrompt` value from a `.claude/agents/<name>.md` file.
3
+ *
4
+ * Pure function. Reads the profile file, strips YAML frontmatter, and returns
5
+ * the SDK-shaped `{ type: "preset", preset: "claude_code", append }` object
6
+ * with the profile body — plus an optional mode-specific trailer — in the
7
+ * `append` slot. Callers in libeval pass the result straight into an
8
+ * `AgentRunner`'s `systemPrompt` input so the profile reaches the main-thread
9
+ * system prompt without going through the SDK's top-level `agent` option.
10
+ */
11
+
12
+ import { readFileSync } from "node:fs";
13
+ import { join } from "node:path";
14
+
15
+ /**
16
+ * @param {string} name - Profile basename (no `.md` suffix)
17
+ * @param {object} opts
18
+ * @param {string} opts.profilesDir - Directory containing `<name>.md`
19
+ * @param {string} [opts.trailer] - Optional mode-specific trailer appended after a blank line
20
+ * @returns {{type: "preset", preset: "claude_code", append: string}}
21
+ */
22
+ export function composeProfilePrompt(name, { profilesDir, trailer }) {
23
+ const path = join(profilesDir, `${name}.md`);
24
+ const raw = readFileSync(path, "utf8");
25
+ const body = stripFrontmatter(raw).trim();
26
+ const append = trailer && trailer.length > 0 ? `${body}\n\n${trailer}` : body;
27
+ return { type: "preset", preset: "claude_code", append };
28
+ }
29
+
30
+ /**
31
+ * Strip a leading YAML frontmatter fence (`---\n…\n---\n`) from a markdown
32
+ * string. Returns the input unchanged when no frontmatter is present.
33
+ * @param {string} raw
34
+ * @returns {string}
35
+ */
36
+ function stripFrontmatter(raw) {
37
+ if (!raw.startsWith("---\n")) return raw;
38
+ const end = raw.indexOf("\n---\n", 4);
39
+ if (end === -1) return raw;
40
+ return raw.slice(end + 5);
41
+ }
package/src/supervisor.js CHANGED
@@ -11,7 +11,9 @@
11
11
  */
12
12
 
13
13
  import { Writable } from "node:stream";
14
+ import { resolve } from "node:path";
14
15
  import { createAgentRunner } from "./agent-runner.js";
16
+ import { composeProfilePrompt } from "./profile-prompt.js";
15
17
  import { TraceCollector } from "./trace-collector.js";
16
18
  import { SequenceCounter } from "./sequence-counter.js";
17
19
  import {
@@ -355,8 +357,9 @@ const devNull = new Writable({
355
357
  * @param {string[]} [deps.allowedTools]
356
358
  * @param {string[]} [deps.supervisorAllowedTools]
357
359
  * @param {string[]} [deps.supervisorDisallowedTools]
358
- * @param {string} [deps.supervisorProfile]
359
- * @param {string} [deps.agentProfile]
360
+ * @param {string} [deps.supervisorProfile] - Supervisor profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
361
+ * @param {string} [deps.agentProfile] - Agent profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
362
+ * @param {string} [deps.profilesDir] - Directory containing `<name>.md` profile files. Defaults to `<supervisorCwd>/.claude/agents`. Resolved once from the orchestrator's cwd so profiles travel with the project, not with a per-agent sandbox.
360
363
  * @returns {Supervisor}
361
364
  */
362
365
  export function createSupervisor({
@@ -371,7 +374,19 @@ export function createSupervisor({
371
374
  supervisorAllowedTools,
372
375
  supervisorProfile,
373
376
  agentProfile,
377
+ profilesDir,
374
378
  }) {
379
+ const resolvedProfilesDir =
380
+ profilesDir ?? resolve(supervisorCwd, ".claude/agents");
381
+ const systemPromptFor = (profile, trailer) => {
382
+ if (!trailer) throw new Error("trailer is required");
383
+ return profile
384
+ ? composeProfilePrompt(profile, {
385
+ profilesDir: resolvedProfilesDir,
386
+ trailer,
387
+ })
388
+ : { type: "preset", preset: "claude_code", append: trailer };
389
+ };
375
390
  let supervisor;
376
391
  let supervisorRunner;
377
392
 
@@ -402,12 +417,7 @@ export function createSupervisor({
402
417
  allowedTools,
403
418
  onLine,
404
419
  settingSources: ["project"],
405
- agentProfile,
406
- systemPrompt: {
407
- type: "preset",
408
- preset: "claude_code",
409
- append: AGENT_SYSTEM_PROMPT,
410
- },
420
+ systemPrompt: systemPromptFor(agentProfile, AGENT_SYSTEM_PROMPT),
411
421
  mcpServers: { orchestration: agentServer },
412
422
  });
413
423
 
@@ -433,12 +443,7 @@ export function createSupervisor({
433
443
  disallowedTools,
434
444
  onLine,
435
445
  settingSources: ["project"],
436
- agentProfile: supervisorProfile,
437
- systemPrompt: {
438
- type: "preset",
439
- preset: "claude_code",
440
- append: SUPERVISOR_SYSTEM_PROMPT,
441
- },
446
+ systemPrompt: systemPromptFor(supervisorProfile, SUPERVISOR_SYSTEM_PROMPT),
442
447
  mcpServers: { orchestration: supervisorServer },
443
448
  });
444
449
 
@@ -186,21 +186,30 @@ export function parseGitRemote(remote) {
186
186
  }
187
187
 
188
188
  /**
189
- * Create a TraceGitHub instance using libconfig for the token and
190
- * git remote for the repo.
189
+ * Create a TraceGitHub instance. The caller is responsible for resolving
190
+ * the GitHub token typically via `Config.ghToken()` — so credential
191
+ * loading stays at the CLI entry point.
191
192
  *
192
- * @param {object} [opts]
193
+ * Breaking change from the prior signature: `token` is now a required
194
+ * caller input. Construct a `Config` via `@forwardimpact/libconfig` and
195
+ * pass `config.ghToken()`.
196
+ *
197
+ * @param {object} opts
198
+ * @param {string} opts.token - GitHub token (e.g. from `Config.ghToken()`)
193
199
  * @param {string} [opts.repo] - "owner/repo" override (default: detect from git remote)
194
200
  * @returns {Promise<TraceGitHub>}
195
201
  */
196
202
  export async function createTraceGitHub(opts = {}) {
197
- const { createScriptConfig } = await import("@forwardimpact/libconfig");
198
- const config = await createScriptConfig("eval");
199
- const token = config.ghToken();
203
+ const { token, repo: repoOverride } = opts;
204
+ if (!token) {
205
+ throw new Error(
206
+ "createTraceGitHub: token is required (pass Config.ghToken())",
207
+ );
208
+ }
200
209
 
201
210
  let owner, repo;
202
- if (opts.repo) {
203
- ({ owner, repo } = parseGitRemote(opts.repo));
211
+ if (repoOverride) {
212
+ ({ owner, repo } = parseGitRemote(repoOverride));
204
213
  } else {
205
214
  const { execSync } = await import("node:child_process");
206
215
  const remote = execSync("git remote get-url origin", {