@forwardimpact/libeval 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.1",
3
+ "version": "0.1.2",
4
4
  "description": "Process Claude Code stream-json output into structured traces",
5
5
  "license": "Apache-2.0",
6
6
  "author": "D. Olsson <hi@senzilla.io>",
@@ -16,6 +16,8 @@ export class AgentRunner {
16
16
  * @param {number} [deps.maxTurns] - Maximum agentic turns
17
17
  * @param {string[]} [deps.allowedTools] - Tools the agent may use
18
18
  * @param {string} [deps.permissionMode] - SDK permission mode
19
+ * @param {function} [deps.onLine] - Callback invoked with each NDJSON line as it's produced
20
+ * @param {string[]} [deps.settingSources] - SDK setting sources (e.g. ['project'] to load CLAUDE.md)
19
21
  */
20
22
  constructor({
21
23
  cwd,
@@ -25,6 +27,8 @@ export class AgentRunner {
25
27
  maxTurns,
26
28
  allowedTools,
27
29
  permissionMode,
30
+ onLine,
31
+ settingSources,
28
32
  }) {
29
33
  if (!cwd) throw new Error("cwd is required");
30
34
  if (!query) throw new Error("query is required");
@@ -43,6 +47,8 @@ export class AgentRunner {
43
47
  "Edit",
44
48
  ];
45
49
  this.permissionMode = permissionMode ?? "bypassPermissions";
50
+ this.onLine = onLine ?? null;
51
+ this.settingSources = settingSources ?? [];
46
52
  this.sessionId = null;
47
53
  this.buffer = [];
48
54
  }
@@ -67,11 +73,13 @@ export class AgentRunner {
67
73
  model: this.model,
68
74
  permissionMode: this.permissionMode,
69
75
  allowDangerouslySkipPermissions: true,
76
+ settingSources: this.settingSources,
70
77
  },
71
78
  })) {
72
79
  const line = JSON.stringify(message);
73
80
  this.output.write(line + "\n");
74
81
  this.buffer.push(line);
82
+ if (this.onLine) this.onLine(line);
75
83
 
76
84
  if (message.type === "system" && message.subtype === "init") {
77
85
  this.sessionId = message.session_id;
@@ -85,7 +93,10 @@ export class AgentRunner {
85
93
  error = err;
86
94
  }
87
95
 
88
- const success = !error && stopReason === "success";
96
+ // If the SDK already emitted a successful result, honour it even when the
97
+ // stream throws afterwards (e.g. "Credit balance is too low" during
98
+ // cleanup). Only treat errors as fatal when no result was received yet.
99
+ const success = stopReason === "success";
89
100
  return { success, text, sessionId: this.sessionId, error };
90
101
  }
91
102
 
@@ -107,6 +118,7 @@ export class AgentRunner {
107
118
  const line = JSON.stringify(message);
108
119
  this.output.write(line + "\n");
109
120
  this.buffer.push(line);
121
+ if (this.onLine) this.onLine(line);
110
122
 
111
123
  if (message.type === "result") {
112
124
  text = message.result ?? "";
@@ -117,7 +129,7 @@ export class AgentRunner {
117
129
  error = err;
118
130
  }
119
131
 
120
- const success = !error && stopReason === "success";
132
+ const success = stopReason === "success";
121
133
  return { success, text, error };
122
134
  }
123
135
 
@@ -62,6 +62,7 @@ export async function runRunCommand(args) {
62
62
  model,
63
63
  maxTurns,
64
64
  allowedTools,
65
+ settingSources: ["project"],
65
66
  });
66
67
 
67
68
  const result = await runner.run(taskContent);
@@ -1,5 +1,7 @@
1
1
  import { createWriteStream } from "fs";
2
- import { createTraceCollector } from "@forwardimpact/libeval";
2
+ import { PassThrough } from "node:stream";
3
+ import { pipeline } from "node:stream/promises";
4
+ import { createTeeWriter } from "../tee-writer.js";
3
5
 
4
6
  /**
5
7
  * Tee command — stream text output to stdout while optionally saving the raw
@@ -12,46 +14,18 @@ import { createTraceCollector } from "@forwardimpact/libeval";
12
14
  export async function runTeeCommand(args) {
13
15
  const outputPath = args.find((a) => !a.startsWith("-")) ?? null;
14
16
  const fileStream = outputPath ? createWriteStream(outputPath) : null;
15
- const collector = createTraceCollector();
16
- const turnsEmitted = { count: 0 };
17
17
 
18
- try {
19
- let buffer = "";
20
-
21
- for await (const chunk of process.stdin) {
22
- buffer += chunk.toString("utf8");
23
-
24
- let newlineIdx;
25
- while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
26
- const line = buffer.slice(0, newlineIdx);
27
- buffer = buffer.slice(newlineIdx + 1);
28
-
29
- if (fileStream) {
30
- fileStream.write(line + "\n");
31
- }
32
-
33
- collector.addLine(line);
34
- flushNewTurns(collector, turnsEmitted);
35
- }
36
- }
18
+ // TeeWriter requires a fileStream; when no output file is specified,
19
+ // use a PassThrough as a no-op sink (NDJSON is not saved).
20
+ const sink = fileStream ?? new PassThrough();
21
+ const tee = createTeeWriter({
22
+ fileStream: sink,
23
+ textStream: process.stdout,
24
+ mode: "raw",
25
+ });
37
26
 
38
- // Process any remaining data without a trailing newline
39
- if (buffer.trim()) {
40
- if (fileStream) {
41
- fileStream.write(buffer + "\n");
42
- }
43
- collector.addLine(buffer);
44
- flushNewTurns(collector, turnsEmitted);
45
- }
46
-
47
- // Emit the result summary at the end
48
- if (collector.result) {
49
- const text = collector.toText();
50
- const lastNewline = text.lastIndexOf("\n---");
51
- if (lastNewline !== -1) {
52
- process.stdout.write(text.slice(lastNewline) + "\n");
53
- }
54
- }
27
+ try {
28
+ await pipeline(process.stdin, tee);
55
29
  } finally {
56
30
  if (fileStream) {
57
31
  await new Promise((resolve, reject) => {
@@ -61,39 +35,3 @@ export async function runTeeCommand(args) {
61
35
  }
62
36
  }
63
37
  }
64
-
65
- /**
66
- * Write text for any new turns that haven't been emitted yet.
67
- * @param {import("@forwardimpact/libeval").TraceCollector} collector
68
- * @param {{ count: number }} turnsEmitted
69
- */
70
- function flushNewTurns(collector, turnsEmitted) {
71
- const turns = collector.turns;
72
- while (turnsEmitted.count < turns.length) {
73
- const turn = turns[turnsEmitted.count];
74
- turnsEmitted.count++;
75
-
76
- if (turn.role === "assistant") {
77
- for (const block of turn.content) {
78
- if (block.type === "text") {
79
- process.stdout.write(block.text + "\n");
80
- } else if (block.type === "tool_use") {
81
- const inputSummary = summarizeInput(block.input);
82
- process.stdout.write(`> Tool: ${block.name} ${inputSummary}\n`);
83
- }
84
- }
85
- }
86
- }
87
- }
88
-
89
- /**
90
- * Summarize tool input for text display, truncated to keep logs readable.
91
- * @param {object} input - Tool input object
92
- * @returns {string} Truncated summary
93
- */
94
- function summarizeInput(input) {
95
- if (!input || typeof input !== "object") return "";
96
- const json = JSON.stringify(input);
97
- if (json.length <= 200) return json;
98
- return json.slice(0, 197) + "...";
99
- }
package/src/supervisor.js CHANGED
@@ -36,6 +36,10 @@ export class Supervisor {
36
36
  this.supervisorRunner = supervisorRunner;
37
37
  this.output = output;
38
38
  this.maxTurns = maxTurns ?? 20;
39
+ /** @type {"agent"|"supervisor"} */
40
+ this.currentSource = "agent";
41
+ /** @type {number} */
42
+ this.currentTurn = 0;
39
43
  }
40
44
 
41
45
  /**
@@ -45,8 +49,9 @@ export class Supervisor {
45
49
  */
46
50
  async run(task) {
47
51
  // Turn 0: Agent receives the task and starts working
52
+ this.currentSource = "agent";
53
+ this.currentTurn = 0;
48
54
  let agentResult = await this.agentRunner.run(task);
49
- this.emitTagged("agent", 0);
50
55
 
51
56
  if (agentResult.error) {
52
57
  this.emitSummary({ success: false, turns: 0 });
@@ -59,13 +64,14 @@ export class Supervisor {
59
64
  `The agent reported:\n\n${agentResult.text}\n\n` +
60
65
  `Decide: provide guidance, answer a question, or say EVALUATION_COMPLETE on its own line.`;
61
66
 
67
+ this.currentSource = "supervisor";
68
+ this.currentTurn = turn;
62
69
  let supervisorResult;
63
70
  if (turn === 1) {
64
71
  supervisorResult = await this.supervisorRunner.run(supervisorPrompt);
65
72
  } else {
66
73
  supervisorResult = await this.supervisorRunner.resume(supervisorPrompt);
67
74
  }
68
- this.emitTagged("supervisor", turn);
69
75
 
70
76
  if (supervisorResult.error) {
71
77
  this.emitSummary({ success: false, turns: turn });
@@ -78,8 +84,9 @@ export class Supervisor {
78
84
  }
79
85
 
80
86
  // Supervisor's response becomes the agent's next input
87
+ this.currentSource = "agent";
88
+ this.currentTurn = turn;
81
89
  agentResult = await this.agentRunner.resume(supervisorResult.text);
82
- this.emitTagged("agent", turn);
83
90
 
84
91
  if (agentResult.error) {
85
92
  this.emitSummary({ success: false, turns: turn });
@@ -92,19 +99,18 @@ export class Supervisor {
92
99
  }
93
100
 
94
101
  /**
95
- * Drain a runner's buffered output and re-emit each line tagged with
96
- * source and turn metadata.
97
- * @param {"agent"|"supervisor"} source
98
- * @param {number} turn
102
+ * Emit a single NDJSON line tagged with the current source and turn.
103
+ * Called in real-time via the AgentRunner onLine callback.
104
+ * @param {string} line - Raw NDJSON line from the runner
99
105
  */
100
- emitTagged(source, turn) {
101
- const runner =
102
- source === "agent" ? this.agentRunner : this.supervisorRunner;
103
- for (const line of runner.drainOutput()) {
104
- const event = JSON.parse(line);
105
- const tagged = { source, turn, event };
106
- this.output.write(JSON.stringify(tagged) + "\n");
107
- }
106
+ emitLine(line) {
107
+ const event = JSON.parse(line);
108
+ const tagged = {
109
+ source: this.currentSource,
110
+ turn: this.currentTurn,
111
+ event,
112
+ };
113
+ this.output.write(JSON.stringify(tagged) + "\n");
108
114
  }
109
115
 
110
116
  /**
@@ -143,6 +149,11 @@ export function createSupervisor({
143
149
  maxTurns,
144
150
  allowedTools,
145
151
  }) {
152
+ // Forward-reference: onLine captures `supervisor` before construction completes.
153
+ // This is safe because onLine is only called during run(), after construction.
154
+ let supervisor;
155
+ const onLine = (line) => supervisor.emitLine(line);
156
+
146
157
  const agentRunner = createAgentRunner({
147
158
  cwd: agentCwd,
148
159
  query,
@@ -150,6 +161,8 @@ export function createSupervisor({
150
161
  model,
151
162
  maxTurns: 50,
152
163
  allowedTools,
164
+ onLine,
165
+ settingSources: ["project"],
153
166
  });
154
167
 
155
168
  const supervisorRunner = createAgentRunner({
@@ -159,7 +172,15 @@ export function createSupervisor({
159
172
  model,
160
173
  maxTurns: 10,
161
174
  allowedTools: ["Read", "Glob", "Grep"],
175
+ onLine,
176
+ settingSources: ["project"],
162
177
  });
163
178
 
164
- return new Supervisor({ agentRunner, supervisorRunner, output, maxTurns });
179
+ supervisor = new Supervisor({
180
+ agentRunner,
181
+ supervisorRunner,
182
+ output,
183
+ maxTurns,
184
+ });
185
+ return supervisor;
165
186
  }
@@ -81,6 +81,7 @@ describe("AgentRunner", () => {
81
81
  "Edit",
82
82
  ]);
83
83
  assert.strictEqual(runner.permissionMode, "bypassPermissions");
84
+ assert.deepStrictEqual(runner.settingSources, []);
84
85
  assert.strictEqual(runner.sessionId, null);
85
86
  });
86
87
 
@@ -145,6 +146,7 @@ describe("AgentRunner", () => {
145
146
  maxTurns: 10,
146
147
  allowedTools: ["Read", "Grep"],
147
148
  permissionMode: "plan",
149
+ settingSources: ["project"],
148
150
  });
149
151
 
150
152
  await runner.run("My task");
@@ -156,6 +158,7 @@ describe("AgentRunner", () => {
156
158
  assert.deepStrictEqual(captured.options.allowedTools, ["Read", "Grep"]);
157
159
  assert.strictEqual(captured.options.permissionMode, "plan");
158
160
  assert.strictEqual(captured.options.allowDangerouslySkipPermissions, true);
161
+ assert.deepStrictEqual(captured.options.settingSources, ["project"]);
159
162
  });
160
163
 
161
164
  test("run() returns success=false on non-success subtype", async () => {
@@ -281,6 +284,28 @@ describe("AgentRunner", () => {
281
284
  assert.match(result.error.message, /Process crashed/);
282
285
  });
283
286
 
287
+ test("run() succeeds when SDK throws after emitting successful result", async () => {
288
+ async function* creditExhaustedQuery() {
289
+ yield { type: "system", subtype: "init", session_id: "sess-credit" };
290
+ yield { type: "assistant", content: "Analysis complete." };
291
+ yield { type: "result", subtype: "success", result: "Done." };
292
+ throw new Error("Credit balance is too low");
293
+ }
294
+
295
+ const output = new PassThrough();
296
+ const runner = new AgentRunner({
297
+ cwd: "/tmp",
298
+ query: () => creditExhaustedQuery(),
299
+ output,
300
+ });
301
+
302
+ const result = await runner.run("Task");
303
+ assert.strictEqual(result.success, true);
304
+ assert.strictEqual(result.text, "Done.");
305
+ assert.ok(result.error);
306
+ assert.match(result.error.message, /Credit balance/);
307
+ });
308
+
284
309
  test("createAgentRunner factory returns an AgentRunner instance", () => {
285
310
  const runner = createAgentRunner({
286
311
  cwd: "/tmp",
@@ -29,12 +29,13 @@ function createMockRunner(responses, messages) {
29
29
  // Override run and resume to return scripted responses
30
30
  runner.run = async (_task) => {
31
31
  const resp = responses[callIndex++];
32
- // Buffer messages for drainOutput
33
32
  const msgs = messages?.[callIndex - 1] ?? [
34
33
  { type: "assistant", content: resp.text },
35
34
  ];
36
35
  for (const m of msgs) {
37
- runner.buffer.push(JSON.stringify(m));
36
+ const line = JSON.stringify(m);
37
+ runner.buffer.push(line);
38
+ if (runner.onLine) runner.onLine(line);
38
39
  }
39
40
  runner.sessionId = "mock-session";
40
41
  return {
@@ -50,7 +51,9 @@ function createMockRunner(responses, messages) {
50
51
  { type: "assistant", content: resp.text },
51
52
  ];
52
53
  for (const m of msgs) {
53
- runner.buffer.push(JSON.stringify(m));
54
+ const line = JSON.stringify(m);
55
+ runner.buffer.push(line);
56
+ if (runner.onLine) runner.onLine(line);
54
57
  }
55
58
  return { success: resp.success ?? true, text: resp.text };
56
59
  };
@@ -211,6 +214,8 @@ describe("Supervisor", () => {
211
214
  output,
212
215
  maxTurns: 10,
213
216
  });
217
+ agentRunner.onLine = (line) => supervisor.emitLine(line);
218
+ supervisorRunner.onLine = (line) => supervisor.emitLine(line);
214
219
 
215
220
  await supervisor.run("Task");
216
221
 
@@ -258,6 +263,8 @@ describe("Supervisor", () => {
258
263
  output,
259
264
  maxTurns: 10,
260
265
  });
266
+ agentRunner.onLine = (line) => supervisor.emitLine(line);
267
+ supervisorRunner.onLine = (line) => supervisor.emitLine(line);
261
268
 
262
269
  await supervisor.run("Task");
263
270
 
@@ -273,7 +280,7 @@ describe("Supervisor", () => {
273
280
  assert.strictEqual(tagged.event.source, "sdk-internal");
274
281
  });
275
282
 
276
- test("drains agent output and emits summary when agent errors on turn 0", async () => {
283
+ test("emits agent output and summary when agent errors on turn 0", async () => {
277
284
  const agentMessages = [[{ type: "assistant", content: "Partial work" }]];
278
285
  const agentRunner = createMockRunner(
279
286
  [{ text: "Partial work", success: false }],
@@ -296,6 +303,8 @@ describe("Supervisor", () => {
296
303
  output,
297
304
  maxTurns: 10,
298
305
  });
306
+ agentRunner.onLine = (line) => supervisor.emitLine(line);
307
+ supervisorRunner.onLine = (line) => supervisor.emitLine(line);
299
308
 
300
309
  const result = await supervisor.run("Task");
301
310