npm - @forwardimpact/libeval - Versions diffs - 0.1.16 → 0.1.18 - Mend

@forwardimpact/libeval 0.1.16 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/bin/fit-trace.js CHANGED Viewed

@@ -2,6 +2,7 @@
 import { readFileSync } from "node:fs";
 import { createCli } from "@forwardimpact/libcli";
+import { createScriptConfig } from "@forwardimpact/libconfig";
 import { createLogger } from "@forwardimpact/libtelemetry";
 import {
@@ -188,7 +189,8 @@ async function main() {
     process.exit(2);
   }
-  await handler(values, args);
+  const config = await createScriptConfig("eval");
+  await handler(values, args, { config });
 }
 main().catch((error) => {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@forwardimpact/libeval",
-  "version": "0.1.16",
+  "version": "0.1.18",
   "description": "Process Claude Code stream-json output into structured traces",
   "license": "Apache-2.0",
   "author": "D. Olsson <hi@senzilla.io>",
@@ -32,7 +32,7 @@
     "@forwardimpact/libcli": "^0.1.0",
     "@forwardimpact/libconfig": "^0.1.0",
     "@forwardimpact/libtelemetry": "^0.1.22",
-    "zod": "^3.23.0"
+    "zod": "^4.3.6"
   },
   "publishConfig": {
     "access": "public"

package/src/agent-runner.js CHANGED Viewed

@@ -21,7 +21,6 @@ function applyDefaults(deps) {
     onBatch: deps.onBatch ?? null,
     batchSize: deps.batchSize ?? 3,
     settingSources: deps.settingSources ?? [],
-    agentProfile: deps.agentProfile ?? null,
     systemPrompt: deps.systemPrompt ?? null,
     disallowedTools: deps.disallowedTools ?? [],
     mcpServers: deps.mcpServers ?? null,
@@ -42,7 +41,6 @@ export class AgentRunner {
    * @param {function} [deps.onBatch] - Async callback invoked with a batch of NDJSON lines at flush boundaries: every `batchSize` assistant text blocks, the terminal `result` message, and — on iterator crash/abort — once more in a final flush carrying any lines that never reached a boundary. Receives `(lines, { abort })` where calling `abort()` stops the in-flight SDK session via the AbortController. Optional; assignable at runtime so the Supervisor can swap it per turn.
    * @param {number} [deps.batchSize] - Assistant text-block messages to accumulate before firing onBatch. Tool-only assistant messages ride along without counting. Default 3: the supervisor reviews the agent every three text turns instead of every turn. The terminal `result` always flushes regardless of count.
    * @param {string[]} [deps.settingSources] - SDK setting sources (e.g. ['project'] to load CLAUDE.md)
-   * @param {string} [deps.agentProfile] - Agent profile name to pass as --agent to the Claude CLI
    * @param {string|object} [deps.systemPrompt] - SDK system prompt (string replaces default; {type:'preset', preset:'claude_code', append} appends)
    * @param {string[]} [deps.disallowedTools] - Tools to explicitly remove from the model's context
    * @param {Record<string, object>} [deps.mcpServers] - MCP server configs to pass to the SDK query
@@ -82,7 +80,6 @@ export class AgentRunner {
             disallowedTools: this.disallowedTools,
           }),
           ...(this.systemPrompt && { systemPrompt: this.systemPrompt }),
-          ...(this.agentProfile && { extraArgs: { agent: this.agentProfile } }),
           ...(this.mcpServers && { mcpServers: this.mcpServers }),
         },
       });

package/src/commands/run.js CHANGED Viewed

@@ -2,6 +2,7 @@ import { readFileSync, createWriteStream } from "node:fs";
 import { Writable } from "node:stream";
 import { resolve } from "node:path";
 import { createAgentRunner } from "../agent-runner.js";
+import { composeProfilePrompt } from "../profile-prompt.js";
 import { createTeeWriter } from "../tee-writer.js";
 import { SequenceCounter } from "../sequence-counter.js";
@@ -76,6 +77,12 @@ export async function runRunCommand(values, _args) {
     );
   };
+  const systemPrompt = agentProfile
+    ? composeProfilePrompt(agentProfile, {
+        profilesDir: resolve(cwd, ".claude/agents"),
+      })
+    : undefined;
   const { query } = await import("@anthropic-ai/claude-agent-sdk");
   const runner = createAgentRunner({
     cwd,
@@ -86,7 +93,7 @@ export async function runRunCommand(values, _args) {
     allowedTools,
     onLine,
     settingSources: ["project"],
-    agentProfile,
+    systemPrompt,
   });
   const result = await runner.run(taskContent);

package/src/commands/trace.js CHANGED Viewed

@@ -10,9 +10,13 @@ import { createTraceGitHub } from "../trace-github.js";
  * List recent workflow runs matching a pattern.
  * @param {object} values - Parsed option values
  * @param {string[]} args - [pattern?]
+ * @param {{config: import("@forwardimpact/libconfig").Config}} ctx
  */
-export async function runRunsCommand(values, args) {
-  const gh = await createTraceGitHub({ repo: values.repo });
+export async function runRunsCommand(values, args, ctx) {
+  const gh = await createTraceGitHub({
+    token: ctx.config.ghToken(),
+    repo: values.repo,
+  });
   const pattern = args[0] ?? "agent";
   const lookback = values.lookback ?? "7d";
   const runs = await gh.listRuns({ pattern, lookback });
@@ -23,9 +27,13 @@ export async function runRunsCommand(values, args) {
  * Download a trace artifact and auto-convert to structured JSON.
  * @param {object} values - Parsed option values
  * @param {string[]} args - [run-id]
+ * @param {{config: import("@forwardimpact/libconfig").Config}} ctx
  */
-export async function runDownloadCommand(values, args) {
-  const gh = await createTraceGitHub({ repo: values.repo });
+export async function runDownloadCommand(values, args, ctx) {
+  const gh = await createTraceGitHub({
+    token: ctx.config.ghToken(),
+    repo: values.repo,
+  });
   const result = await gh.downloadTrace(args[0], {
     dir: values.dir,
     name: values.artifact,

package/src/facilitator.js CHANGED Viewed

@@ -7,7 +7,9 @@
  */
 import { Writable } from "node:stream";
+import { resolve } from "node:path";
 import { createAgentRunner } from "./agent-runner.js";
+import { composeProfilePrompt } from "./profile-prompt.js";
 import { SequenceCounter } from "./sequence-counter.js";
 import { createMessageBus } from "./message-bus.js";
 import {
@@ -415,7 +417,8 @@ const devNull = new Writable({
  * @param {import("stream").Writable} deps.output
  * @param {string} [deps.model]
  * @param {number} [deps.maxTurns]
- * @param {string} [deps.facilitatorProfile]
+ * @param {string} [deps.facilitatorProfile] - Facilitator profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
+ * @param {string} [deps.profilesDir] - Directory containing `<name>.md` profile files. Defaults to `<facilitatorCwd>/.claude/agents`. Resolved once from the facilitator's cwd so profiles travel with the project, not with per-agent sandboxes.
  * @returns {Facilitator}
  */
 export function createFacilitator({
@@ -426,7 +429,19 @@ export function createFacilitator({
   model,
   maxTurns,
   facilitatorProfile,
+  profilesDir,
 }) {
+  const resolvedProfilesDir =
+    profilesDir ?? resolve(facilitatorCwd, ".claude/agents");
+  const systemPromptFor = (profile, trailer) => {
+    if (!trailer) throw new Error("trailer is required");
+    return profile
+      ? composeProfilePrompt(profile, {
+          profilesDir: resolvedProfilesDir,
+          trailer,
+        })
+      : { type: "preset", preset: "claude_code", append: trailer };
+  };
   const ctx = createOrchestrationContext();
   const messageBus = createMessageBus({
     participants: ["facilitator", ...agentConfigs.map((a) => a.name)],
@@ -471,12 +486,10 @@ export function createFacilitator({
       onLine: (line) => facilitator.emitLine(config.name, line),
       mcpServers: { orchestration: agentServer },
       settingSources: ["project"],
-      agentProfile: config.agentProfile,
-      systemPrompt: {
-        type: "preset",
-        preset: "claude_code",
-        append: FACILITATED_AGENT_SYSTEM_PROMPT,
-      },
+      systemPrompt: systemPromptFor(
+        config.agentProfile,
+        FACILITATED_AGENT_SYSTEM_PROMPT,
+      ),
     });
     return { name: config.name, role: config.role, runner };
@@ -491,12 +504,10 @@ export function createFacilitator({
     onLine: (line) => facilitator.emitLine("facilitator", line),
     mcpServers: { orchestration: facilitatorServer },
     settingSources: ["project"],
-    agentProfile: facilitatorProfile,
-    systemPrompt: {
-      type: "preset",
-      preset: "claude_code",
-      append: FACILITATOR_SYSTEM_PROMPT,
-    },
+    systemPrompt: systemPromptFor(
+      facilitatorProfile,
+      FACILITATOR_SYSTEM_PROMPT,
+    ),
   });
   facilitator = new Facilitator({

package/src/index.js CHANGED Viewed

@@ -6,6 +6,7 @@ export {
   parseGitRemote,
 } from "./trace-github.js";
 export { AgentRunner, createAgentRunner } from "./agent-runner.js";
+export { composeProfilePrompt } from "./profile-prompt.js";
 export {
   Supervisor,
   createSupervisor,

package/src/profile-prompt.js ADDED Viewed

@@ -0,0 +1,41 @@
+/**
+ * Compose an SDK `systemPrompt` value from a `.claude/agents/<name>.md` file.
+ *
+ * Pure function. Reads the profile file, strips YAML frontmatter, and returns
+ * the SDK-shaped `{ type: "preset", preset: "claude_code", append }` object
+ * with the profile body — plus an optional mode-specific trailer — in the
+ * `append` slot. Callers in libeval pass the result straight into an
+ * `AgentRunner`'s `systemPrompt` input so the profile reaches the main-thread
+ * system prompt without going through the SDK's top-level `agent` option.
+ */
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+/**
+ * @param {string} name - Profile basename (no `.md` suffix)
+ * @param {object} opts
+ * @param {string} opts.profilesDir - Directory containing `<name>.md`
+ * @param {string} [opts.trailer] - Optional mode-specific trailer appended after a blank line
+ * @returns {{type: "preset", preset: "claude_code", append: string}}
+ */
+export function composeProfilePrompt(name, { profilesDir, trailer }) {
+  const path = join(profilesDir, `${name}.md`);
+  const raw = readFileSync(path, "utf8");
+  const body = stripFrontmatter(raw).trim();
+  const append = trailer && trailer.length > 0 ? `${body}\n\n${trailer}` : body;
+  return { type: "preset", preset: "claude_code", append };
+}
+/**
+ * Strip a leading YAML frontmatter fence (`---\n…\n---\n`) from a markdown
+ * string. Returns the input unchanged when no frontmatter is present.
+ * @param {string} raw
+ * @returns {string}
+ */
+function stripFrontmatter(raw) {
+  if (!raw.startsWith("---\n")) return raw;
+  const end = raw.indexOf("\n---\n", 4);
+  if (end === -1) return raw;
+  return raw.slice(end + 5);
+}

package/src/supervisor.js CHANGED Viewed

@@ -11,7 +11,9 @@
  */
 import { Writable } from "node:stream";
+import { resolve } from "node:path";
 import { createAgentRunner } from "./agent-runner.js";
+import { composeProfilePrompt } from "./profile-prompt.js";
 import { TraceCollector } from "./trace-collector.js";
 import { SequenceCounter } from "./sequence-counter.js";
 import {
@@ -355,8 +357,9 @@ const devNull = new Writable({
  * @param {string[]} [deps.allowedTools]
  * @param {string[]} [deps.supervisorAllowedTools]
  * @param {string[]} [deps.supervisorDisallowedTools]
- * @param {string} [deps.supervisorProfile]
- * @param {string} [deps.agentProfile]
+ * @param {string} [deps.supervisorProfile] - Supervisor profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
+ * @param {string} [deps.agentProfile] - Agent profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
+ * @param {string} [deps.profilesDir] - Directory containing `<name>.md` profile files. Defaults to `<supervisorCwd>/.claude/agents`. Resolved once from the orchestrator's cwd so profiles travel with the project, not with a per-agent sandbox.
  * @returns {Supervisor}
  */
 export function createSupervisor({
@@ -371,7 +374,19 @@ export function createSupervisor({
   supervisorAllowedTools,
   supervisorProfile,
   agentProfile,
+  profilesDir,
 }) {
+  const resolvedProfilesDir =
+    profilesDir ?? resolve(supervisorCwd, ".claude/agents");
+  const systemPromptFor = (profile, trailer) => {
+    if (!trailer) throw new Error("trailer is required");
+    return profile
+      ? composeProfilePrompt(profile, {
+          profilesDir: resolvedProfilesDir,
+          trailer,
+        })
+      : { type: "preset", preset: "claude_code", append: trailer };
+  };
   let supervisor;
   let supervisorRunner;
@@ -402,12 +417,7 @@ export function createSupervisor({
     allowedTools,
     onLine,
     settingSources: ["project"],
-    agentProfile,
-    systemPrompt: {
-      type: "preset",
-      preset: "claude_code",
-      append: AGENT_SYSTEM_PROMPT,
-    },
+    systemPrompt: systemPromptFor(agentProfile, AGENT_SYSTEM_PROMPT),
     mcpServers: { orchestration: agentServer },
   });
@@ -433,12 +443,7 @@ export function createSupervisor({
     disallowedTools,
     onLine,
     settingSources: ["project"],
-    agentProfile: supervisorProfile,
-    systemPrompt: {
-      type: "preset",
-      preset: "claude_code",
-      append: SUPERVISOR_SYSTEM_PROMPT,
-    },
+    systemPrompt: systemPromptFor(supervisorProfile, SUPERVISOR_SYSTEM_PROMPT),
     mcpServers: { orchestration: supervisorServer },
   });

package/src/trace-github.js CHANGED Viewed

@@ -186,21 +186,30 @@ export function parseGitRemote(remote) {
 }
 /**
- * Create a TraceGitHub instance using libconfig for the token and
- * git remote for the repo.
+ * Create a TraceGitHub instance. The caller is responsible for resolving
+ * the GitHub token — typically via `Config.ghToken()` — so credential
+ * loading stays at the CLI entry point.
  *
- * @param {object} [opts]
+ * Breaking change from the prior signature: `token` is now a required
+ * caller input. Construct a `Config` via `@forwardimpact/libconfig` and
+ * pass `config.ghToken()`.
+ *
+ * @param {object} opts
+ * @param {string} opts.token - GitHub token (e.g. from `Config.ghToken()`)
  * @param {string} [opts.repo] - "owner/repo" override (default: detect from git remote)
  * @returns {Promise<TraceGitHub>}
  */
 export async function createTraceGitHub(opts = {}) {
-  const { createScriptConfig } = await import("@forwardimpact/libconfig");
-  const config = await createScriptConfig("eval");
-  const token = config.ghToken();
+  const { token, repo: repoOverride } = opts;
+  if (!token) {
+    throw new Error(
+      "createTraceGitHub: token is required (pass Config.ghToken())",
+    );
+  }
   let owner, repo;
-  if (opts.repo) {
-    ({ owner, repo } = parseGitRemote(opts.repo));
+  if (repoOverride) {
+    ({ owner, repo } = parseGitRemote(repoOverride));
   } else {
     const { execSync } = await import("node:child_process");
     const remote = execSync("git remote get-url origin", {