offgrid-ai 0.8.7 → 0.8.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/benchmark.mjs +96 -16
package/package.json
CHANGED
package/src/benchmark.mjs
CHANGED
|
@@ -262,7 +262,36 @@ function formatToolCall(toolCall) {
|
|
|
262
262
|
return `[toolCall] ${toolCall.name}${summary}`;
|
|
263
263
|
}
|
|
264
264
|
|
|
265
|
-
function
|
|
265
|
+
function formatTokens(n) {
|
|
266
|
+
if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
|
|
267
|
+
if (n >= 1_000) return `${Math.round(n / 1_000)}k`;
|
|
268
|
+
return String(Math.round(n));
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
function estimatedTokensFromText(text) {
|
|
272
|
+
// Simple heuristic: ~4 chars per token for code/English.
|
|
273
|
+
return Math.max(1, Math.ceil(text.length / 4));
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
function clearStatusLine() {
|
|
277
|
+
if (process.stdout.isTTY) {
|
|
278
|
+
process.stdout.write("\r\x1b[K");
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
function printStatusLine(text) {
|
|
283
|
+
if (process.stdout.isTTY) {
|
|
284
|
+
process.stdout.write(`\r\x1b[K${text}`);
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
function printFinalLine(text) {
|
|
289
|
+
clearStatusLine();
|
|
290
|
+
console.log(text);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
function renderStreamEvent(parsed, state, opts = {}) {
|
|
294
|
+
const verbose = Boolean(opts.verbose);
|
|
266
295
|
const type = parsed.type;
|
|
267
296
|
|
|
268
297
|
switch (type) {
|
|
@@ -274,7 +303,11 @@ function renderStreamEvent(parsed, state) {
|
|
|
274
303
|
break;
|
|
275
304
|
case "turn_start": {
|
|
276
305
|
state.turn += 1;
|
|
277
|
-
|
|
306
|
+
state.status.mode = "thinking";
|
|
307
|
+
state.status.toolName = null;
|
|
308
|
+
state.status.bytes = 0;
|
|
309
|
+
state.status.tokens = 0;
|
|
310
|
+
printFinalLine(BENCH_COLORS.info(`[turn ${state.turn}]`));
|
|
278
311
|
break;
|
|
279
312
|
}
|
|
280
313
|
case "message_start": {
|
|
@@ -289,15 +322,21 @@ function renderStreamEvent(parsed, state) {
|
|
|
289
322
|
if (!evt) return;
|
|
290
323
|
const subtype = String(evt.type ?? "").replace(/_/gu, "");
|
|
291
324
|
if (subtype === "thinkingstart" || subtype === "thinkingdelta") {
|
|
292
|
-
process.stdout.write(BENCH_COLORS.thinking(evt.delta || ""));
|
|
325
|
+
if (verbose) process.stdout.write(BENCH_COLORS.thinking(evt.delta || ""));
|
|
326
|
+
state.status.mode = "thinking";
|
|
327
|
+
updateStatusFromDelta(state, evt.delta);
|
|
293
328
|
} else if (subtype === "textstart" || subtype === "textdelta") {
|
|
294
|
-
process.stdout.write(BENCH_COLORS.text(evt.delta || ""));
|
|
329
|
+
if (verbose) process.stdout.write(BENCH_COLORS.text(evt.delta || ""));
|
|
330
|
+
state.status.mode = "text";
|
|
331
|
+
updateStatusFromDelta(state, evt.delta);
|
|
295
332
|
} else if (subtype === "toolcallstart") {
|
|
296
|
-
|
|
333
|
+
if (!verbose) printFinalLine(BENCH_COLORS.tool("[tool_call_start]"));
|
|
297
334
|
} else if (subtype === "toolcalldelta") {
|
|
298
|
-
process.stdout.write(BENCH_COLORS.tool(evt.delta || ""));
|
|
335
|
+
if (verbose) process.stdout.write(BENCH_COLORS.tool(evt.delta || ""));
|
|
336
|
+
state.status.mode = "tool";
|
|
337
|
+
updateStatusFromDelta(state, evt.delta);
|
|
299
338
|
} else if (subtype === "toolcallend") {
|
|
300
|
-
|
|
339
|
+
if (!verbose) printFinalLine(BENCH_COLORS.tool("[tool_call_end]"));
|
|
301
340
|
}
|
|
302
341
|
break;
|
|
303
342
|
}
|
|
@@ -306,36 +345,76 @@ function renderStreamEvent(parsed, state) {
|
|
|
306
345
|
if (msg?.role === "assistant" && Array.isArray(msg.content)) {
|
|
307
346
|
for (const item of msg.content) {
|
|
308
347
|
if (item.type === "toolCall") {
|
|
309
|
-
|
|
348
|
+
const toolLine = formatToolCall(item);
|
|
349
|
+
state.status.toolName = item.name;
|
|
350
|
+
if (!verbose) printFinalLine(BENCH_COLORS.tool(toolLine));
|
|
310
351
|
}
|
|
311
352
|
}
|
|
312
353
|
}
|
|
313
354
|
break;
|
|
314
355
|
}
|
|
315
356
|
case "tool_execution_start":
|
|
316
|
-
|
|
357
|
+
state.status.mode = "exec";
|
|
358
|
+
state.status.toolName = parsed.toolName;
|
|
359
|
+
state.status.bytes = 0;
|
|
360
|
+
state.status.tokens = 0;
|
|
361
|
+
printFinalLine(BENCH_COLORS.tool(`[exec] ${parsed.toolName}`));
|
|
317
362
|
break;
|
|
318
|
-
case "tool_execution_update":
|
|
363
|
+
case "tool_execution_update": {
|
|
319
364
|
if (parsed.content) {
|
|
320
|
-
process.stdout.write(BENCH_COLORS.toolOutput(parsed.content));
|
|
365
|
+
if (verbose) process.stdout.write(BENCH_COLORS.toolOutput(parsed.content));
|
|
366
|
+
state.status.mode = "exec";
|
|
367
|
+
updateStatusFromDelta(state, parsed.content);
|
|
321
368
|
}
|
|
322
369
|
break;
|
|
370
|
+
}
|
|
323
371
|
case "tool_execution_end":
|
|
324
|
-
|
|
372
|
+
printFinalLine(BENCH_COLORS.tool(`[exec done] ${state.status.toolName || parsed.toolName}`));
|
|
325
373
|
break;
|
|
326
374
|
case "toolResult": {
|
|
327
375
|
const errorFlag = parsed.isError ? BENCH_COLORS.error(" error") : "";
|
|
328
|
-
|
|
376
|
+
printFinalLine(BENCH_COLORS.tool(`[result] ${parsed.toolName}${errorFlag}`));
|
|
377
|
+
break;
|
|
378
|
+
}
|
|
379
|
+
case "turn_end": {
|
|
380
|
+
const usage = parsed.message?.usage;
|
|
381
|
+
if (usage) {
|
|
382
|
+
const exact = usage.output ?? usage.totalTokens ?? 0;
|
|
383
|
+
printFinalLine(BENCH_COLORS.info(`[turn ${state.turn}] completed · ${formatTokens(exact)} tokens`));
|
|
384
|
+
} else {
|
|
385
|
+
printFinalLine(BENCH_COLORS.info(`[turn ${state.turn}] completed`));
|
|
386
|
+
}
|
|
329
387
|
break;
|
|
330
388
|
}
|
|
331
389
|
case "agent_end":
|
|
332
|
-
|
|
390
|
+
clearStatusLine();
|
|
391
|
+
console.log(BENCH_COLORS.dim("[agent_end]"));
|
|
333
392
|
break;
|
|
334
393
|
default:
|
|
335
394
|
break;
|
|
336
395
|
}
|
|
337
396
|
}
|
|
338
397
|
|
|
398
|
+
function updateStatusFromDelta(state, delta) {
|
|
399
|
+
if (!delta) return;
|
|
400
|
+
state.status.bytes += Buffer.byteLength(delta, "utf8");
|
|
401
|
+
state.status.tokens = estimatedTokensFromText(String(state.status.bytes));
|
|
402
|
+
const label = state.status.toolName ? ` · ${state.status.toolName}` : "";
|
|
403
|
+
const modeLabel = state.status.mode === "thinking" ? "thinking" : state.status.mode === "text" ? "text" : state.status.mode === "tool" ? "tool" : "exec";
|
|
404
|
+
const bytes = formatBytes(state.status.bytes);
|
|
405
|
+
const tokens = formatTokens(state.status.tokens);
|
|
406
|
+
printStatusLine(BENCH_COLORS.dim(`[turn ${state.turn}] ${modeLabel}${label} · ${bytes} (~${tokens} tokens)`));
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
function formatBytes(bytes) {
|
|
410
|
+
if (!Number.isFinite(bytes)) return "unknown";
|
|
411
|
+
const units = ["B", "KB", "MB", "GB", "TB"];
|
|
412
|
+
let size = bytes;
|
|
413
|
+
let unit = 0;
|
|
414
|
+
while (size >= 1024 && unit < units.length - 1) { size /= 1024; unit += 1; }
|
|
415
|
+
return `${size.toFixed(unit === 0 ? 0 : 2)} ${units[unit]}`;
|
|
416
|
+
}
|
|
417
|
+
|
|
339
418
|
export function piModelString(profile) {
|
|
340
419
|
return profile.harnesses?.pi?.model ?? `${profile.providerId}/${profile.modelAlias}`;
|
|
341
420
|
}
|
|
@@ -382,7 +461,8 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
|
|
|
382
461
|
const streamHandle = await openFileHandle(streamPath, "w");
|
|
383
462
|
const stderrHandle = await openFileHandle(stderrPath, "w");
|
|
384
463
|
|
|
385
|
-
const
|
|
464
|
+
const verbose = Boolean(process.env.OFFGRID_BENCHMARK_VERBOSE);
|
|
465
|
+
const renderState = { turn: 0, status: { mode: "idle", toolName: null, bytes: 0, tokens: 0 } };
|
|
386
466
|
|
|
387
467
|
function appendResponse(text) {
|
|
388
468
|
responseBuffer += text;
|
|
@@ -436,7 +516,7 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
|
|
|
436
516
|
const timestamp = extractTimestamp(parsed);
|
|
437
517
|
updateTimeBounds(timestamp);
|
|
438
518
|
|
|
439
|
-
renderStreamEvent(parsed, renderState);
|
|
519
|
+
renderStreamEvent(parsed, renderState, { verbose });
|
|
440
520
|
|
|
441
521
|
if (parsed.type === "session" || parsed.type === "agent_start") {
|
|
442
522
|
if (timestamp && runStartMs === null) runStartMs = timestamp;
|