@forwardimpact/libeval 0.1.9 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fit-eval.js +103 -72
- package/package.json +14 -3
- package/src/commands/output.js +7 -26
- package/src/commands/run.js +16 -41
- package/src/commands/supervise.js +19 -47
- package/src/commands/tee.js +3 -2
- package/src/index.js +11 -0
- package/index.js +0 -11
- package/test/agent-runner-batching.test.js +0 -271
- package/test/agent-runner.test.js +0 -317
- package/test/fixtures/stream.ndjson +0 -7
- package/test/mock-runner.js +0 -113
- package/test/supervisor-batching.test.js +0 -175
- package/test/supervisor-intervention.test.js +0 -365
- package/test/supervisor-output.test.js +0 -369
- package/test/supervisor-run.test.js +0 -310
- package/test/tee-writer.test.js +0 -324
- package/test/trace-collector.test.js +0 -424
package/test/tee-writer.test.js
DELETED
|
@@ -1,324 +0,0 @@
|
|
|
1
|
-
import { describe, test } from "node:test";
|
|
2
|
-
import assert from "node:assert";
|
|
3
|
-
import { PassThrough } from "node:stream";
|
|
4
|
-
|
|
5
|
-
import { TeeWriter, createTeeWriter } from "@forwardimpact/libeval";
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Collect all data written to a PassThrough stream as a string.
|
|
9
|
-
* @param {PassThrough} stream
|
|
10
|
-
* @returns {string}
|
|
11
|
-
*/
|
|
12
|
-
function collect(stream) {
|
|
13
|
-
const data = stream.read();
|
|
14
|
-
return data ? data.toString() : "";
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
/**
|
|
18
|
-
* Write lines to a TeeWriter and wait for it to finish.
|
|
19
|
-
* @param {TeeWriter} writer
|
|
20
|
-
* @param {string[]} lines - JSON lines to write
|
|
21
|
-
*/
|
|
22
|
-
async function writeLines(writer, lines) {
|
|
23
|
-
for (const line of lines) {
|
|
24
|
-
writer.write(line + "\n");
|
|
25
|
-
}
|
|
26
|
-
await new Promise((resolve) => writer.end(resolve));
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
describe("TeeWriter", () => {
|
|
30
|
-
test("constructor throws on missing fileStream", () => {
|
|
31
|
-
assert.throws(
|
|
32
|
-
() => new TeeWriter({ textStream: new PassThrough() }),
|
|
33
|
-
/fileStream is required/,
|
|
34
|
-
);
|
|
35
|
-
});
|
|
36
|
-
|
|
37
|
-
test("constructor throws on missing textStream", () => {
|
|
38
|
-
assert.throws(
|
|
39
|
-
() => new TeeWriter({ fileStream: new PassThrough() }),
|
|
40
|
-
/textStream is required/,
|
|
41
|
-
);
|
|
42
|
-
});
|
|
43
|
-
|
|
44
|
-
test("writes NDJSON to fileStream and text to textStream in raw mode", async () => {
|
|
45
|
-
const fileStream = new PassThrough();
|
|
46
|
-
const textStream = new PassThrough();
|
|
47
|
-
const writer = new TeeWriter({ fileStream, textStream, mode: "raw" });
|
|
48
|
-
|
|
49
|
-
const events = [
|
|
50
|
-
JSON.stringify({
|
|
51
|
-
type: "system",
|
|
52
|
-
subtype: "init",
|
|
53
|
-
session_id: "s1",
|
|
54
|
-
model: "opus",
|
|
55
|
-
}),
|
|
56
|
-
JSON.stringify({
|
|
57
|
-
type: "assistant",
|
|
58
|
-
message: {
|
|
59
|
-
content: [{ type: "text", text: "Hello world" }],
|
|
60
|
-
usage: { input_tokens: 10, output_tokens: 5 },
|
|
61
|
-
},
|
|
62
|
-
}),
|
|
63
|
-
JSON.stringify({
|
|
64
|
-
type: "assistant",
|
|
65
|
-
message: {
|
|
66
|
-
content: [
|
|
67
|
-
{
|
|
68
|
-
type: "tool_use",
|
|
69
|
-
id: "t1",
|
|
70
|
-
name: "Bash",
|
|
71
|
-
input: { command: "ls" },
|
|
72
|
-
},
|
|
73
|
-
],
|
|
74
|
-
usage: { input_tokens: 20, output_tokens: 10 },
|
|
75
|
-
},
|
|
76
|
-
}),
|
|
77
|
-
JSON.stringify({
|
|
78
|
-
type: "result",
|
|
79
|
-
subtype: "success",
|
|
80
|
-
duration_ms: 5000,
|
|
81
|
-
num_turns: 2,
|
|
82
|
-
total_cost_usd: 0.05,
|
|
83
|
-
usage: { input_tokens: 30, output_tokens: 15 },
|
|
84
|
-
}),
|
|
85
|
-
];
|
|
86
|
-
|
|
87
|
-
await writeLines(writer, events);
|
|
88
|
-
|
|
89
|
-
const fileData = collect(fileStream);
|
|
90
|
-
const textData = collect(textStream);
|
|
91
|
-
|
|
92
|
-
// File should contain all NDJSON lines
|
|
93
|
-
const fileLines = fileData.trim().split("\n");
|
|
94
|
-
assert.strictEqual(fileLines.length, 4);
|
|
95
|
-
assert.deepStrictEqual(JSON.parse(fileLines[0]).type, "system");
|
|
96
|
-
assert.deepStrictEqual(JSON.parse(fileLines[3]).type, "result");
|
|
97
|
-
|
|
98
|
-
// Text should contain human-readable output
|
|
99
|
-
assert.ok(textData.includes("Hello world"));
|
|
100
|
-
assert.ok(textData.includes("> Tool: Bash"));
|
|
101
|
-
assert.ok(textData.includes("--- Result: success"));
|
|
102
|
-
});
|
|
103
|
-
|
|
104
|
-
test("streams text incrementally as events arrive", async () => {
|
|
105
|
-
const fileStream = new PassThrough();
|
|
106
|
-
const textStream = new PassThrough();
|
|
107
|
-
const writer = new TeeWriter({ fileStream, textStream, mode: "raw" });
|
|
108
|
-
|
|
109
|
-
// Write first assistant message
|
|
110
|
-
writer.write(
|
|
111
|
-
JSON.stringify({
|
|
112
|
-
type: "assistant",
|
|
113
|
-
message: {
|
|
114
|
-
content: [{ type: "text", text: "First message" }],
|
|
115
|
-
usage: { input_tokens: 10, output_tokens: 5 },
|
|
116
|
-
},
|
|
117
|
-
}) + "\n",
|
|
118
|
-
);
|
|
119
|
-
|
|
120
|
-
// Text should be available before stream ends
|
|
121
|
-
const firstText = collect(textStream);
|
|
122
|
-
assert.ok(firstText.includes("First message"));
|
|
123
|
-
|
|
124
|
-
writer.write(
|
|
125
|
-
JSON.stringify({
|
|
126
|
-
type: "assistant",
|
|
127
|
-
message: {
|
|
128
|
-
content: [{ type: "text", text: "Second message" }],
|
|
129
|
-
usage: { input_tokens: 20, output_tokens: 10 },
|
|
130
|
-
},
|
|
131
|
-
}) + "\n",
|
|
132
|
-
);
|
|
133
|
-
|
|
134
|
-
const secondText = collect(textStream);
|
|
135
|
-
assert.ok(secondText.includes("Second message"));
|
|
136
|
-
|
|
137
|
-
await new Promise((resolve) => writer.end(resolve));
|
|
138
|
-
});
|
|
139
|
-
|
|
140
|
-
test("supervised mode shows source labels and unwraps events", async () => {
|
|
141
|
-
const fileStream = new PassThrough();
|
|
142
|
-
const textStream = new PassThrough();
|
|
143
|
-
const writer = new TeeWriter({
|
|
144
|
-
fileStream,
|
|
145
|
-
textStream,
|
|
146
|
-
mode: "supervised",
|
|
147
|
-
});
|
|
148
|
-
|
|
149
|
-
const events = [
|
|
150
|
-
JSON.stringify({
|
|
151
|
-
source: "agent",
|
|
152
|
-
turn: 0,
|
|
153
|
-
event: {
|
|
154
|
-
type: "assistant",
|
|
155
|
-
message: {
|
|
156
|
-
content: [{ type: "text", text: "Working on it" }],
|
|
157
|
-
usage: { input_tokens: 10, output_tokens: 5 },
|
|
158
|
-
},
|
|
159
|
-
},
|
|
160
|
-
}),
|
|
161
|
-
JSON.stringify({
|
|
162
|
-
source: "supervisor",
|
|
163
|
-
turn: 1,
|
|
164
|
-
event: {
|
|
165
|
-
type: "assistant",
|
|
166
|
-
message: {
|
|
167
|
-
content: [{ type: "text", text: "Looks good" }],
|
|
168
|
-
usage: { input_tokens: 20, output_tokens: 10 },
|
|
169
|
-
},
|
|
170
|
-
},
|
|
171
|
-
}),
|
|
172
|
-
JSON.stringify({
|
|
173
|
-
source: "orchestrator",
|
|
174
|
-
type: "summary",
|
|
175
|
-
success: true,
|
|
176
|
-
turns: 1,
|
|
177
|
-
}),
|
|
178
|
-
];
|
|
179
|
-
|
|
180
|
-
await writeLines(writer, events);
|
|
181
|
-
|
|
182
|
-
const fileData = collect(fileStream);
|
|
183
|
-
const textData = collect(textStream);
|
|
184
|
-
|
|
185
|
-
// File should contain all raw tagged NDJSON
|
|
186
|
-
const fileLines = fileData.trim().split("\n");
|
|
187
|
-
assert.strictEqual(fileLines.length, 3);
|
|
188
|
-
assert.strictEqual(JSON.parse(fileLines[0]).source, "agent");
|
|
189
|
-
|
|
190
|
-
// Text should show source prefixes on content lines
|
|
191
|
-
assert.ok(textData.includes("[agent] Working on it"));
|
|
192
|
-
assert.ok(textData.includes("[supervisor] Looks good"));
|
|
193
|
-
assert.ok(textData.includes("Evaluation completed after 1 turns"));
|
|
194
|
-
});
|
|
195
|
-
|
|
196
|
-
test("supervised mode shows incomplete status on failure", async () => {
|
|
197
|
-
const fileStream = new PassThrough();
|
|
198
|
-
const textStream = new PassThrough();
|
|
199
|
-
const writer = new TeeWriter({
|
|
200
|
-
fileStream,
|
|
201
|
-
textStream,
|
|
202
|
-
mode: "supervised",
|
|
203
|
-
});
|
|
204
|
-
|
|
205
|
-
await writeLines(writer, [
|
|
206
|
-
JSON.stringify({
|
|
207
|
-
source: "orchestrator",
|
|
208
|
-
type: "summary",
|
|
209
|
-
success: false,
|
|
210
|
-
turns: 5,
|
|
211
|
-
}),
|
|
212
|
-
]);
|
|
213
|
-
|
|
214
|
-
const textData = collect(textStream);
|
|
215
|
-
assert.ok(textData.includes("Evaluation incomplete after 5 turns"));
|
|
216
|
-
});
|
|
217
|
-
|
|
218
|
-
test("supervised mode only shows source label on change", async () => {
|
|
219
|
-
const fileStream = new PassThrough();
|
|
220
|
-
const textStream = new PassThrough();
|
|
221
|
-
const writer = new TeeWriter({
|
|
222
|
-
fileStream,
|
|
223
|
-
textStream,
|
|
224
|
-
mode: "supervised",
|
|
225
|
-
});
|
|
226
|
-
|
|
227
|
-
const events = [
|
|
228
|
-
JSON.stringify({
|
|
229
|
-
source: "agent",
|
|
230
|
-
turn: 0,
|
|
231
|
-
event: {
|
|
232
|
-
type: "assistant",
|
|
233
|
-
message: {
|
|
234
|
-
content: [{ type: "text", text: "Step 1" }],
|
|
235
|
-
usage: { input_tokens: 10, output_tokens: 5 },
|
|
236
|
-
},
|
|
237
|
-
},
|
|
238
|
-
}),
|
|
239
|
-
JSON.stringify({
|
|
240
|
-
source: "agent",
|
|
241
|
-
turn: 0,
|
|
242
|
-
event: {
|
|
243
|
-
type: "assistant",
|
|
244
|
-
message: {
|
|
245
|
-
content: [{ type: "text", text: "Step 2" }],
|
|
246
|
-
usage: { input_tokens: 10, output_tokens: 5 },
|
|
247
|
-
},
|
|
248
|
-
},
|
|
249
|
-
}),
|
|
250
|
-
];
|
|
251
|
-
|
|
252
|
-
await writeLines(writer, events);
|
|
253
|
-
|
|
254
|
-
const textData = collect(textStream);
|
|
255
|
-
// [agent] prefix should appear on each content line
|
|
256
|
-
assert.ok(textData.includes("[agent] Step 1"));
|
|
257
|
-
assert.ok(textData.includes("[agent] Step 2"));
|
|
258
|
-
});
|
|
259
|
-
|
|
260
|
-
test("handles partial lines across chunks", async () => {
|
|
261
|
-
const fileStream = new PassThrough();
|
|
262
|
-
const textStream = new PassThrough();
|
|
263
|
-
const writer = new TeeWriter({ fileStream, textStream, mode: "raw" });
|
|
264
|
-
|
|
265
|
-
const fullLine = JSON.stringify({
|
|
266
|
-
type: "assistant",
|
|
267
|
-
message: {
|
|
268
|
-
content: [{ type: "text", text: "Split message" }],
|
|
269
|
-
usage: { input_tokens: 10, output_tokens: 5 },
|
|
270
|
-
},
|
|
271
|
-
});
|
|
272
|
-
|
|
273
|
-
// Split the line across two chunks
|
|
274
|
-
const mid = Math.floor(fullLine.length / 2);
|
|
275
|
-
writer.write(fullLine.slice(0, mid));
|
|
276
|
-
writer.write(fullLine.slice(mid) + "\n");
|
|
277
|
-
await new Promise((resolve) => writer.end(resolve));
|
|
278
|
-
|
|
279
|
-
const textData = collect(textStream);
|
|
280
|
-
assert.ok(textData.includes("Split message"));
|
|
281
|
-
});
|
|
282
|
-
|
|
283
|
-
test("truncates long tool input", async () => {
|
|
284
|
-
const fileStream = new PassThrough();
|
|
285
|
-
const textStream = new PassThrough();
|
|
286
|
-
const writer = new TeeWriter({ fileStream, textStream, mode: "raw" });
|
|
287
|
-
|
|
288
|
-
const longInput = { command: "x".repeat(300) };
|
|
289
|
-
const event = JSON.stringify({
|
|
290
|
-
type: "assistant",
|
|
291
|
-
message: {
|
|
292
|
-
content: [
|
|
293
|
-
{ type: "tool_use", id: "t1", name: "Bash", input: longInput },
|
|
294
|
-
],
|
|
295
|
-
usage: { input_tokens: 10, output_tokens: 5 },
|
|
296
|
-
},
|
|
297
|
-
});
|
|
298
|
-
|
|
299
|
-
await writeLines(writer, [event]);
|
|
300
|
-
|
|
301
|
-
const textData = collect(textStream);
|
|
302
|
-
assert.ok(textData.includes("> Tool: Bash"));
|
|
303
|
-
assert.ok(textData.includes("..."));
|
|
304
|
-
// Truncated to ~200 chars
|
|
305
|
-
const toolLine = textData.split("\n").find((l) => l.startsWith("> Tool:"));
|
|
306
|
-
assert.ok(toolLine.length < 250);
|
|
307
|
-
});
|
|
308
|
-
|
|
309
|
-
test("defaults to raw mode", () => {
|
|
310
|
-
const writer = new TeeWriter({
|
|
311
|
-
fileStream: new PassThrough(),
|
|
312
|
-
textStream: new PassThrough(),
|
|
313
|
-
});
|
|
314
|
-
assert.strictEqual(writer.mode, "raw");
|
|
315
|
-
});
|
|
316
|
-
|
|
317
|
-
test("createTeeWriter factory returns a TeeWriter instance", () => {
|
|
318
|
-
const writer = createTeeWriter({
|
|
319
|
-
fileStream: new PassThrough(),
|
|
320
|
-
textStream: new PassThrough(),
|
|
321
|
-
});
|
|
322
|
-
assert.ok(writer instanceof TeeWriter);
|
|
323
|
-
});
|
|
324
|
-
});
|