offgrid-ai 0.8.7 → 0.8.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/benchmark.mjs +104 -21
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "offgrid-ai",
3
- "version": "0.8.7",
3
+ "version": "0.8.9",
4
4
  "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
5
5
  "author": "Eeshan Srivastava (https://eeshans.com)",
6
6
  "type": "module",
package/src/benchmark.mjs CHANGED
@@ -262,7 +262,36 @@ function formatToolCall(toolCall) {
262
262
  return `[toolCall] ${toolCall.name}${summary}`;
263
263
  }
264
264
 
265
- function renderStreamEvent(parsed, state) {
265
+ function formatTokens(n) {
266
+ if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
267
+ if (n >= 1_000) return `${Math.round(n / 1_000)}k`;
268
+ return String(Math.round(n));
269
+ }
270
+
271
+ function estimatedTokensFromText(text) {
272
+ // Simple heuristic: ~4 chars per token for code/English.
273
+ return Math.max(1, Math.ceil(text.length / 4));
274
+ }
275
+
276
+ function clearStatusLine() {
277
+ if (process.stdout.isTTY) {
278
+ process.stdout.write("\r\x1b[K");
279
+ }
280
+ }
281
+
282
+ function printStatusLine(text) {
283
+ if (process.stdout.isTTY) {
284
+ process.stdout.write(`\r\x1b[K${text}`);
285
+ }
286
+ }
287
+
288
+ function printFinalLine(text) {
289
+ clearStatusLine();
290
+ console.log(text);
291
+ }
292
+
293
+ function renderStreamEvent(parsed, state, opts = {}) {
294
+ const verbose = Boolean(opts.verbose);
266
295
  const type = parsed.type;
267
296
 
268
297
  switch (type) {
@@ -274,7 +303,11 @@ function renderStreamEvent(parsed, state) {
274
303
  break;
275
304
  case "turn_start": {
276
305
  state.turn += 1;
277
- console.log(BENCH_COLORS.info(`\n[turn ${state.turn}]`));
306
+ state.status.mode = "thinking";
307
+ state.status.toolName = null;
308
+ state.status.bytes = 0;
309
+ state.status.tokens = 0;
310
+ printFinalLine(BENCH_COLORS.info(`[turn ${state.turn}]`));
278
311
  break;
279
312
  }
280
313
  case "message_start": {
@@ -289,15 +322,21 @@ function renderStreamEvent(parsed, state) {
289
322
  if (!evt) return;
290
323
  const subtype = String(evt.type ?? "").replace(/_/gu, "");
291
324
  if (subtype === "thinkingstart" || subtype === "thinkingdelta") {
292
- process.stdout.write(BENCH_COLORS.thinking(evt.delta || ""));
325
+ if (verbose) process.stdout.write(BENCH_COLORS.thinking(evt.delta || ""));
326
+ state.status.mode = "thinking";
327
+ updateStatusFromDelta(state, evt.delta);
293
328
  } else if (subtype === "textstart" || subtype === "textdelta") {
294
- process.stdout.write(BENCH_COLORS.text(evt.delta || ""));
329
+ if (verbose) process.stdout.write(BENCH_COLORS.text(evt.delta || ""));
330
+ state.status.mode = "text";
331
+ updateStatusFromDelta(state, evt.delta);
295
332
  } else if (subtype === "toolcallstart") {
296
- console.log(BENCH_COLORS.tool("\n[tool_call_start]"));
333
+ if (!verbose) printFinalLine(BENCH_COLORS.tool("[tool_call_start]"));
297
334
  } else if (subtype === "toolcalldelta") {
298
- process.stdout.write(BENCH_COLORS.tool(evt.delta || ""));
335
+ if (verbose) process.stdout.write(BENCH_COLORS.tool(evt.delta || ""));
336
+ state.status.mode = "tool";
337
+ updateStatusFromDelta(state, evt.delta);
299
338
  } else if (subtype === "toolcallend") {
300
- console.log(BENCH_COLORS.tool("[tool_call_end]"));
339
+ if (!verbose) printFinalLine(BENCH_COLORS.tool("[tool_call_end]"));
301
340
  }
302
341
  break;
303
342
  }
@@ -306,36 +345,76 @@ function renderStreamEvent(parsed, state) {
306
345
  if (msg?.role === "assistant" && Array.isArray(msg.content)) {
307
346
  for (const item of msg.content) {
308
347
  if (item.type === "toolCall") {
309
- console.log(BENCH_COLORS.tool(`\n${formatToolCall(item)}`));
348
+ const toolLine = formatToolCall(item);
349
+ state.status.toolName = item.name;
350
+ if (!verbose) printFinalLine(BENCH_COLORS.tool(toolLine));
310
351
  }
311
352
  }
312
353
  }
313
354
  break;
314
355
  }
315
356
  case "tool_execution_start":
316
- console.log(BENCH_COLORS.tool(`\n[exec] ${parsed.toolName}`));
357
+ state.status.mode = "exec";
358
+ state.status.toolName = parsed.toolName;
359
+ state.status.bytes = 0;
360
+ state.status.tokens = 0;
361
+ printFinalLine(BENCH_COLORS.tool(`[exec] ${parsed.toolName}`));
317
362
  break;
318
- case "tool_execution_update":
363
+ case "tool_execution_update": {
319
364
  if (parsed.content) {
320
- process.stdout.write(BENCH_COLORS.toolOutput(parsed.content));
365
+ if (verbose) process.stdout.write(BENCH_COLORS.toolOutput(parsed.content));
366
+ state.status.mode = "exec";
367
+ updateStatusFromDelta(state, parsed.content);
321
368
  }
322
369
  break;
370
+ }
323
371
  case "tool_execution_end":
324
- console.log(BENCH_COLORS.tool(`[exec done] ${parsed.toolName}`));
372
+ printFinalLine(BENCH_COLORS.tool(`[exec done] ${state.status.toolName || parsed.toolName}`));
325
373
  break;
326
374
  case "toolResult": {
327
375
  const errorFlag = parsed.isError ? BENCH_COLORS.error(" error") : "";
328
- console.log(BENCH_COLORS.tool(`\n[result] ${parsed.toolName}${errorFlag}`));
376
+ printFinalLine(BENCH_COLORS.tool(`[result] ${parsed.toolName}${errorFlag}`));
377
+ break;
378
+ }
379
+ case "turn_end": {
380
+ const usage = parsed.message?.usage;
381
+ if (usage) {
382
+ const exact = usage.output ?? usage.totalTokens ?? 0;
383
+ printFinalLine(BENCH_COLORS.info(`[turn ${state.turn}] completed · ${formatTokens(exact)} tokens`));
384
+ } else {
385
+ printFinalLine(BENCH_COLORS.info(`[turn ${state.turn}] completed`));
386
+ }
329
387
  break;
330
388
  }
331
389
  case "agent_end":
332
- console.log(BENCH_COLORS.dim("\n[agent_end]"));
390
+ clearStatusLine();
391
+ console.log(BENCH_COLORS.dim("[agent_end]"));
333
392
  break;
334
393
  default:
335
394
  break;
336
395
  }
337
396
  }
338
397
 
398
+ function updateStatusFromDelta(state, delta) {
399
+ if (!delta) return;
400
+ state.status.bytes += Buffer.byteLength(delta, "utf8");
401
+ state.status.tokens = estimatedTokensFromText(String(state.status.bytes));
402
+ const label = state.status.toolName ? ` · ${state.status.toolName}` : "";
403
+ const modeLabel = state.status.mode === "thinking" ? "thinking" : state.status.mode === "text" ? "text" : state.status.mode === "tool" ? "tool" : "exec";
404
+ const bytes = formatBytes(state.status.bytes);
405
+ const tokens = formatTokens(state.status.tokens);
406
+ printStatusLine(BENCH_COLORS.dim(`[turn ${state.turn}] ${modeLabel}${label} · ${bytes} (~${tokens} tokens)`));
407
+ }
408
+
409
+ function formatBytes(bytes) {
410
+ if (!Number.isFinite(bytes)) return "unknown";
411
+ const units = ["B", "KB", "MB", "GB", "TB"];
412
+ let size = bytes;
413
+ let unit = 0;
414
+ while (size >= 1024 && unit < units.length - 1) { size /= 1024; unit += 1; }
415
+ return `${size.toFixed(unit === 0 ? 0 : 2)} ${units[unit]}`;
416
+ }
417
+
339
418
  export function piModelString(profile) {
340
419
  return profile.harnesses?.pi?.model ?? `${profile.providerId}/${profile.modelAlias}`;
341
420
  }
@@ -382,7 +461,8 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
382
461
  const streamHandle = await openFileHandle(streamPath, "w");
383
462
  const stderrHandle = await openFileHandle(stderrPath, "w");
384
463
 
385
- const renderState = { turn: 0 };
464
+ const verbose = Boolean(process.env.OFFGRID_BENCHMARK_VERBOSE);
465
+ const renderState = { turn: 0, status: { mode: "idle", toolName: null, bytes: 0, tokens: 0 } };
386
466
 
387
467
  function appendResponse(text) {
388
468
  responseBuffer += text;
@@ -436,7 +516,7 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
436
516
  const timestamp = extractTimestamp(parsed);
437
517
  updateTimeBounds(timestamp);
438
518
 
439
- renderStreamEvent(parsed, renderState);
519
+ renderStreamEvent(parsed, renderState, { verbose });
440
520
 
441
521
  if (parsed.type === "session" || parsed.type === "agent_start") {
442
522
  if (timestamp && runStartMs === null) runStartMs = timestamp;
@@ -450,7 +530,6 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
450
530
  const usage = parsed.message.usage;
451
531
  runResult.promptTokens += usage.input ?? 0;
452
532
  runResult.completionTokens += usage.output ?? 0;
453
- runResult.totalTokens += usage.totalTokens ?? 0;
454
533
  runResult.cacheRead += usage.cacheRead ?? 0;
455
534
  runResult.cacheWrite += usage.cacheWrite ?? 0;
456
535
  endTurn(usage, timestamp);
@@ -786,20 +865,24 @@ export async function finalizeBenchmarkRun(runDirectory, runResult, speedMetrics
786
865
  }
787
866
 
788
867
  const success = existsSync(requiredPath) && (await readFile(requiredPath, "utf8")).trim().length > 0;
868
+ const hasTurns = runResult.agentTurns > 0;
869
+ const failed = runResult.error || !success || !hasTurns;
789
870
 
790
- metadata.status = runResult.error ? "failed" : "completed";
871
+ metadata.status = failed ? "failed" : "completed";
791
872
  metadata.updatedAt = timestamp;
792
- if (runResult.error) {
873
+ if (failed) {
793
874
  metadata.failedAt = timestamp;
794
875
  } else {
795
876
  metadata.completedAt = timestamp;
796
877
  }
797
878
 
879
+ const totalTokens = runResult.promptTokens + runResult.completionTokens;
880
+
798
881
  metadata.runner.tokenMetrics = {
799
- reported: true,
882
+ reported: hasTurns,
800
883
  promptTokens: runResult.promptTokens,
801
884
  completionTokens: runResult.completionTokens,
802
- totalTokens: runResult.totalTokens,
885
+ totalTokens,
803
886
  };
804
887
 
805
888
  metadata.runner.speedMetrics = speedMetrics;