npm - @forwardimpact/libeval - Versions diffs - 0.1.1 → 0.1.3 - Mend

@forwardimpact/libeval 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/bin/fit-eval.js +1 -1
package/package.json +1 -1
package/src/agent-runner.js +14 -2
package/src/commands/run.js +1 -0
package/src/commands/tee.js +13 -75
package/src/supervisor.js +37 -16
package/test/agent-runner.test.js +25 -0
package/test/supervisor.test.js +13 -4

package/bin/fit-eval.js CHANGED Viewed

@@ -1,4 +1,4 @@
-#!/usr/bin/env bun
+#!/usr/bin/env node
 import { runOutputCommand } from "../src/commands/output.js";
 import { runTeeCommand } from "../src/commands/tee.js";

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@forwardimpact/libeval",
-  "version": "0.1.1",
+  "version": "0.1.3",
   "description": "Process Claude Code stream-json output into structured traces",
   "license": "Apache-2.0",
   "author": "D. Olsson <hi@senzilla.io>",

package/src/agent-runner.js CHANGED Viewed

@@ -16,6 +16,8 @@ export class AgentRunner {
    * @param {number} [deps.maxTurns] - Maximum agentic turns
    * @param {string[]} [deps.allowedTools] - Tools the agent may use
    * @param {string} [deps.permissionMode] - SDK permission mode
+   * @param {function} [deps.onLine] - Callback invoked with each NDJSON line as it's produced
+   * @param {string[]} [deps.settingSources] - SDK setting sources (e.g. ['project'] to load CLAUDE.md)
    */
   constructor({
     cwd,
@@ -25,6 +27,8 @@ export class AgentRunner {
     maxTurns,
     allowedTools,
     permissionMode,
+    onLine,
+    settingSources,
   }) {
     if (!cwd) throw new Error("cwd is required");
     if (!query) throw new Error("query is required");
@@ -43,6 +47,8 @@ export class AgentRunner {
       "Edit",
     ];
     this.permissionMode = permissionMode ?? "bypassPermissions";
+    this.onLine = onLine ?? null;
+    this.settingSources = settingSources ?? [];
     this.sessionId = null;
     this.buffer = [];
   }
@@ -67,11 +73,13 @@ export class AgentRunner {
           model: this.model,
           permissionMode: this.permissionMode,
           allowDangerouslySkipPermissions: true,
+          settingSources: this.settingSources,
         },
       })) {
         const line = JSON.stringify(message);
         this.output.write(line + "\n");
         this.buffer.push(line);
+        if (this.onLine) this.onLine(line);
         if (message.type === "system" && message.subtype === "init") {
           this.sessionId = message.session_id;
@@ -85,7 +93,10 @@ export class AgentRunner {
       error = err;
     }
-    const success = !error && stopReason === "success";
+    // If the SDK already emitted a successful result, honour it even when the
+    // stream throws afterwards (e.g. "Credit balance is too low" during
+    // cleanup). Only treat errors as fatal when no result was received yet.
+    const success = stopReason === "success";
     return { success, text, sessionId: this.sessionId, error };
   }
@@ -107,6 +118,7 @@ export class AgentRunner {
         const line = JSON.stringify(message);
         this.output.write(line + "\n");
         this.buffer.push(line);
+        if (this.onLine) this.onLine(line);
         if (message.type === "result") {
           text = message.result ?? "";
@@ -117,7 +129,7 @@ export class AgentRunner {
       error = err;
     }
-    const success = !error && stopReason === "success";
+    const success = stopReason === "success";
     return { success, text, error };
   }

package/src/commands/run.js CHANGED Viewed

@@ -62,6 +62,7 @@ export async function runRunCommand(args) {
     model,
     maxTurns,
     allowedTools,
+    settingSources: ["project"],
   });
   const result = await runner.run(taskContent);

package/src/commands/tee.js CHANGED Viewed

@@ -1,5 +1,7 @@
 import { createWriteStream } from "fs";
-import { createTraceCollector } from "@forwardimpact/libeval";
+import { PassThrough } from "node:stream";
+import { pipeline } from "node:stream/promises";
+import { createTeeWriter } from "../tee-writer.js";
 /**
  * Tee command — stream text output to stdout while optionally saving the raw
@@ -12,46 +14,18 @@ import { createTraceCollector } from "@forwardimpact/libeval";
 export async function runTeeCommand(args) {
   const outputPath = args.find((a) => !a.startsWith("-")) ?? null;
   const fileStream = outputPath ? createWriteStream(outputPath) : null;
-  const collector = createTraceCollector();
-  const turnsEmitted = { count: 0 };
-  try {
-    let buffer = "";
-    for await (const chunk of process.stdin) {
-      buffer += chunk.toString("utf8");
-      let newlineIdx;
-      while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
-        const line = buffer.slice(0, newlineIdx);
-        buffer = buffer.slice(newlineIdx + 1);
-        if (fileStream) {
-          fileStream.write(line + "\n");
-        }
-        collector.addLine(line);
-        flushNewTurns(collector, turnsEmitted);
-      }
-    }
+  // TeeWriter requires a fileStream; when no output file is specified,
+  // use a PassThrough as a no-op sink (NDJSON is not saved).
+  const sink = fileStream ?? new PassThrough();
+  const tee = createTeeWriter({
+    fileStream: sink,
+    textStream: process.stdout,
+    mode: "raw",
+  });
-    // Process any remaining data without a trailing newline
-    if (buffer.trim()) {
-      if (fileStream) {
-        fileStream.write(buffer + "\n");
-      }
-      collector.addLine(buffer);
-      flushNewTurns(collector, turnsEmitted);
-    }
-    // Emit the result summary at the end
-    if (collector.result) {
-      const text = collector.toText();
-      const lastNewline = text.lastIndexOf("\n---");
-      if (lastNewline !== -1) {
-        process.stdout.write(text.slice(lastNewline) + "\n");
-      }
-    }
+  try {
+    await pipeline(process.stdin, tee);
   } finally {
     if (fileStream) {
       await new Promise((resolve, reject) => {
@@ -61,39 +35,3 @@ export async function runTeeCommand(args) {
     }
   }
 }
-/**
- * Write text for any new turns that haven't been emitted yet.
- * @param {import("@forwardimpact/libeval").TraceCollector} collector
- * @param {{ count: number }} turnsEmitted
- */
-function flushNewTurns(collector, turnsEmitted) {
-  const turns = collector.turns;
-  while (turnsEmitted.count < turns.length) {
-    const turn = turns[turnsEmitted.count];
-    turnsEmitted.count++;
-    if (turn.role === "assistant") {
-      for (const block of turn.content) {
-        if (block.type === "text") {
-          process.stdout.write(block.text + "\n");
-        } else if (block.type === "tool_use") {
-          const inputSummary = summarizeInput(block.input);
-          process.stdout.write(`> Tool: ${block.name} ${inputSummary}\n`);
-        }
-      }
-    }
-  }
-}
-/**
- * Summarize tool input for text display, truncated to keep logs readable.
- * @param {object} input - Tool input object
- * @returns {string} Truncated summary
- */
-function summarizeInput(input) {
-  if (!input || typeof input !== "object") return "";
-  const json = JSON.stringify(input);
-  if (json.length <= 200) return json;
-  return json.slice(0, 197) + "...";
-}

package/src/supervisor.js CHANGED Viewed

@@ -36,6 +36,10 @@ export class Supervisor {
     this.supervisorRunner = supervisorRunner;
     this.output = output;
     this.maxTurns = maxTurns ?? 20;
+    /** @type {"agent"|"supervisor"} */
+    this.currentSource = "agent";
+    /** @type {number} */
+    this.currentTurn = 0;
   }
   /**
@@ -45,8 +49,9 @@ export class Supervisor {
    */
   async run(task) {
     // Turn 0: Agent receives the task and starts working
+    this.currentSource = "agent";
+    this.currentTurn = 0;
     let agentResult = await this.agentRunner.run(task);
-    this.emitTagged("agent", 0);
     if (agentResult.error) {
       this.emitSummary({ success: false, turns: 0 });
@@ -59,13 +64,14 @@ export class Supervisor {
         `The agent reported:\n\n${agentResult.text}\n\n` +
         `Decide: provide guidance, answer a question, or say EVALUATION_COMPLETE on its own line.`;
+      this.currentSource = "supervisor";
+      this.currentTurn = turn;
       let supervisorResult;
       if (turn === 1) {
         supervisorResult = await this.supervisorRunner.run(supervisorPrompt);
       } else {
         supervisorResult = await this.supervisorRunner.resume(supervisorPrompt);
       }
-      this.emitTagged("supervisor", turn);
       if (supervisorResult.error) {
         this.emitSummary({ success: false, turns: turn });
@@ -78,8 +84,9 @@ export class Supervisor {
       }
       // Supervisor's response becomes the agent's next input
+      this.currentSource = "agent";
+      this.currentTurn = turn;
       agentResult = await this.agentRunner.resume(supervisorResult.text);
-      this.emitTagged("agent", turn);
       if (agentResult.error) {
         this.emitSummary({ success: false, turns: turn });
@@ -92,19 +99,18 @@ export class Supervisor {
   }
   /**
-   * Drain a runner's buffered output and re-emit each line tagged with
-   * source and turn metadata.
-   * @param {"agent"|"supervisor"} source
-   * @param {number} turn
+   * Emit a single NDJSON line tagged with the current source and turn.
+   * Called in real-time via the AgentRunner onLine callback.
+   * @param {string} line - Raw NDJSON line from the runner
    */
-  emitTagged(source, turn) {
-    const runner =
-      source === "agent" ? this.agentRunner : this.supervisorRunner;
-    for (const line of runner.drainOutput()) {
-      const event = JSON.parse(line);
-      const tagged = { source, turn, event };
-      this.output.write(JSON.stringify(tagged) + "\n");
-    }
+  emitLine(line) {
+    const event = JSON.parse(line);
+    const tagged = {
+      source: this.currentSource,
+      turn: this.currentTurn,
+      event,
+    };
+    this.output.write(JSON.stringify(tagged) + "\n");
   }
   /**
@@ -143,6 +149,11 @@ export function createSupervisor({
   maxTurns,
   allowedTools,
 }) {
+  // Forward-reference: onLine captures `supervisor` before construction completes.
+  // This is safe because onLine is only called during run(), after construction.
+  let supervisor;
+  const onLine = (line) => supervisor.emitLine(line);
   const agentRunner = createAgentRunner({
     cwd: agentCwd,
     query,
@@ -150,6 +161,8 @@ export function createSupervisor({
     model,
     maxTurns: 50,
     allowedTools,
+    onLine,
+    settingSources: ["project"],
   });
   const supervisorRunner = createAgentRunner({
@@ -159,7 +172,15 @@ export function createSupervisor({
     model,
     maxTurns: 10,
     allowedTools: ["Read", "Glob", "Grep"],
+    onLine,
+    settingSources: ["project"],
   });
-  return new Supervisor({ agentRunner, supervisorRunner, output, maxTurns });
+  supervisor = new Supervisor({
+    agentRunner,
+    supervisorRunner,
+    output,
+    maxTurns,
+  });
+  return supervisor;
 }

package/test/agent-runner.test.js CHANGED Viewed

@@ -81,6 +81,7 @@ describe("AgentRunner", () => {
       "Edit",
     ]);
     assert.strictEqual(runner.permissionMode, "bypassPermissions");
+    assert.deepStrictEqual(runner.settingSources, []);
     assert.strictEqual(runner.sessionId, null);
   });
@@ -145,6 +146,7 @@ describe("AgentRunner", () => {
       maxTurns: 10,
       allowedTools: ["Read", "Grep"],
       permissionMode: "plan",
+      settingSources: ["project"],
     });
     await runner.run("My task");
@@ -156,6 +158,7 @@ describe("AgentRunner", () => {
     assert.deepStrictEqual(captured.options.allowedTools, ["Read", "Grep"]);
     assert.strictEqual(captured.options.permissionMode, "plan");
     assert.strictEqual(captured.options.allowDangerouslySkipPermissions, true);
+    assert.deepStrictEqual(captured.options.settingSources, ["project"]);
   });
   test("run() returns success=false on non-success subtype", async () => {
@@ -281,6 +284,28 @@ describe("AgentRunner", () => {
     assert.match(result.error.message, /Process crashed/);
   });
+  test("run() succeeds when SDK throws after emitting successful result", async () => {
+    async function* creditExhaustedQuery() {
+      yield { type: "system", subtype: "init", session_id: "sess-credit" };
+      yield { type: "assistant", content: "Analysis complete." };
+      yield { type: "result", subtype: "success", result: "Done." };
+      throw new Error("Credit balance is too low");
+    }
+    const output = new PassThrough();
+    const runner = new AgentRunner({
+      cwd: "/tmp",
+      query: () => creditExhaustedQuery(),
+      output,
+    });
+    const result = await runner.run("Task");
+    assert.strictEqual(result.success, true);
+    assert.strictEqual(result.text, "Done.");
+    assert.ok(result.error);
+    assert.match(result.error.message, /Credit balance/);
+  });
   test("createAgentRunner factory returns an AgentRunner instance", () => {
     const runner = createAgentRunner({
       cwd: "/tmp",

package/test/supervisor.test.js CHANGED Viewed

@@ -29,12 +29,13 @@ function createMockRunner(responses, messages) {
   // Override run and resume to return scripted responses
   runner.run = async (_task) => {
     const resp = responses[callIndex++];
-    // Buffer messages for drainOutput
     const msgs = messages?.[callIndex - 1] ?? [
       { type: "assistant", content: resp.text },
     ];
     for (const m of msgs) {
-      runner.buffer.push(JSON.stringify(m));
+      const line = JSON.stringify(m);
+      runner.buffer.push(line);
+      if (runner.onLine) runner.onLine(line);
     }
     runner.sessionId = "mock-session";
     return {
@@ -50,7 +51,9 @@ function createMockRunner(responses, messages) {
       { type: "assistant", content: resp.text },
     ];
     for (const m of msgs) {
-      runner.buffer.push(JSON.stringify(m));
+      const line = JSON.stringify(m);
+      runner.buffer.push(line);
+      if (runner.onLine) runner.onLine(line);
     }
     return { success: resp.success ?? true, text: resp.text };
   };
@@ -211,6 +214,8 @@ describe("Supervisor", () => {
       output,
       maxTurns: 10,
     });
+    agentRunner.onLine = (line) => supervisor.emitLine(line);
+    supervisorRunner.onLine = (line) => supervisor.emitLine(line);
     await supervisor.run("Task");
@@ -258,6 +263,8 @@ describe("Supervisor", () => {
       output,
       maxTurns: 10,
     });
+    agentRunner.onLine = (line) => supervisor.emitLine(line);
+    supervisorRunner.onLine = (line) => supervisor.emitLine(line);
     await supervisor.run("Task");
@@ -273,7 +280,7 @@ describe("Supervisor", () => {
     assert.strictEqual(tagged.event.source, "sdk-internal");
   });
-  test("drains agent output and emits summary when agent errors on turn 0", async () => {
+  test("emits agent output and summary when agent errors on turn 0", async () => {
     const agentMessages = [[{ type: "assistant", content: "Partial work" }]];
     const agentRunner = createMockRunner(
       [{ text: "Partial work", success: false }],
@@ -296,6 +303,8 @@ describe("Supervisor", () => {
       output,
       maxTurns: 10,
     });
+    agentRunner.onLine = (line) => supervisor.emitLine(line);
+    supervisorRunner.onLine = (line) => supervisor.emitLine(line);
     const result = await supervisor.run("Task");