npm - @agjs/tsforge - Versions diffs - 0.1.10 → 0.1.11 - Mend

@agjs/tsforge 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/src/cli.ts +17 -0
package/src/loop/loop.types.ts +3 -0
package/src/loop/session.ts +64 -2
package/src/render/ansi.ts +4 -0
package/src/render/render.types.ts +3 -0

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@agjs/tsforge",
   "type": "module",
-  "version": "0.1.10",
+  "version": "0.1.11",
   "license": "MIT",
   "description": "TypeScript coding harness with a deterministic gate, stack-aware guardrails, and stream-level correction.",
   "repository": {

package/src/cli.ts CHANGED Viewed

@@ -629,6 +629,7 @@ const HELP = [
   "  /model [name]    list configured models (★ active), or switch to <name>",
   "  /sessions        list saved sessions (resume one with: tsforge --resume <id>)",
   "  /cost            rough conversation size (messages + ~tokens)",
+  "  /metrics         token totals + generation rate (tok/s) this session",
   "  /exit, /quit     leave the session",
   "",
   "Anything else is sent to the agent. It works with its tools; when it stops,",
@@ -1197,6 +1198,21 @@ async function repl(args: ICliArgs): Promise<number> {
         break;
       }
+      case "metrics": {
+        const m = session.metrics;
+        if (m.calls === 0) {
+          process.stdout.write("  no model calls yet\n");
+        } else {
+          process.stdout.write(
+            `  ${String(m.calls)} call(s) · ${String(m.promptTokens)} in / ${String(m.completionTokens)} out · ` +
+              `${String(m.lastTokensPerSecond)} tok/s last · ${String(m.avgTokensPerSecond)} tok/s avg\n`
+          );
+        }
+        break;
+      }
       default:
         process.stdout.write(`unknown command: ${line} (try /help)\n`);
     }
@@ -1217,6 +1233,7 @@ async function repl(args: ICliArgs): Promise<number> {
         elapsedMs: lastElapsedMs,
         status: lastStatus,
         scope: scopeLabel(session.scope) + (planMode ? " · PLAN" : ""),
+        tokensPerSecond: session.metrics.lastTokensPerSecond,
       })
     );
     process.stdout.write("› ");

package/src/loop/loop.types.ts CHANGED Viewed

@@ -51,6 +51,9 @@ export interface ILoopEvent {
   promptTokens?: number;
   completionTokens?: number;
   totalTokens?: number;
+  /** For `usage` events: output generation rate (completion tokens / second),
+   *  measured from the first streamed token to the call's end. */
+  tokensPerSecond?: number;
   /** For `usage` (and salvage-warning `tool`) events: whether THIS model call
    *  ran with thinking enabled — lets the analyzer correlate malformed-tool-call
    *  rate with the thinking mode (see analyze-malformed). */

package/src/loop/session.ts CHANGED Viewed

@@ -114,6 +114,20 @@ export interface ISendResult {
   turns: number;
 }
+/** Cumulative model-call metrics for a session — the basis for `/metrics`. */
+export interface ISessionMetrics {
+  /** Number of model calls made. */
+  readonly calls: number;
+  /** Total prompt (input) tokens billed across all calls. */
+  readonly promptTokens: number;
+  /** Total completion (output) tokens generated across all calls. */
+  readonly completionTokens: number;
+  /** Output generation rate averaged over all calls (tokens/second). */
+  readonly avgTokensPerSecond: number;
+  /** Output generation rate of the most recent call (tokens/second). */
+  readonly lastTokensPerSecond: number;
+}
 export interface ISendOptions {
   /** Caller cancellation (Ctrl-C). */
   signal?: AbortSignal;
@@ -339,6 +353,15 @@ export class Session {
    *  size of the context the model last saw (drives the status gauge and, soon,
    *  auto-compaction). */
   private lastUsage?: ITokenUsage;
+  /** Running totals behind the `metrics` getter. genMs is the summed generation
+   *  time (first-token→end) so the average rate is tokens/total-gen-seconds. */
+  private readonly metricsTotals = {
+    calls: 0,
+    promptTokens: 0,
+    completionTokens: 0,
+    genMs: 0,
+    lastTokensPerSecond: 0,
+  };
   /** Fast check run every few edits while building (e.g. tsc); "" = off. */
   private incrementalCheck: string;
   /** Per-send thinking override, set from ISendOptions for the duration of a
@@ -507,6 +530,31 @@ export class Session {
     return this.lastUsage;
   }
+  /** Cumulative model-call metrics (tokens + generation rate) for this session. */
+  get metrics(): ISessionMetrics {
+    const t = this.metricsTotals;
+    return {
+      calls: t.calls,
+      promptTokens: t.promptTokens,
+      completionTokens: t.completionTokens,
+      avgTokensPerSecond:
+        t.genMs > 0 ? Math.round((t.completionTokens / t.genMs) * 1000) : 0,
+      lastTokensPerSecond: Math.round(t.lastTokensPerSecond),
+    };
+  }
+  /** Fold one call's usage + generation time into the running metrics totals. */
+  private recordUsage(usage: ITokenUsage, genMs: number): void {
+    this.lastUsage = usage;
+    this.metricsTotals.calls += 1;
+    this.metricsTotals.promptTokens += usage.promptTokens;
+    this.metricsTotals.completionTokens += usage.completionTokens;
+    this.metricsTotals.genMs += genMs;
+    this.metricsTotals.lastTokensPerSecond =
+      genMs > 0 ? (usage.completionTokens / genMs) * 1000 : 0;
+  }
   /** The real size of the context the model is currently holding — the prompt
    *  tokens of the last call (what auto-compaction watches), 0 before any call. */
   get contextTokens(): number {
@@ -957,6 +1005,8 @@ export class Session {
     const mcpSchemas = this.ctx.mcpRegistry?.toolSchemas() ?? [];
     const offeredTools =
       mcpSchemas.length > 0 ? [...baseTools, ...mcpSchemas] : baseTools;
+    const callStart = performance.now();
+    let firstTokenAt = 0;
     const res = await this.provider.complete(ctx.messages, {
       tools: offeredTools,
       temperature: this.cfg.temperature ?? 0,
@@ -967,6 +1017,12 @@ export class Session {
         : { thinkingTokenBudget: this.cfg.thinkingTokenBudget }),
       ...(signal === undefined ? {} : { signal }),
       onToken: (token, channel) => {
+        // Stamp the first token so tokens/sec measures generation rate (excluding
+        // prompt-processing / time-to-first-token), not total wall time.
+        if (firstTokenAt === 0) {
+          firstTokenAt = performance.now();
+        }
         // Stream EVERYTHING live — thinking, the tool calls being written, and
         // the answer itself (channel `content`), so the user watches the reply
         // arrive instead of staring at a frozen indicator. The renderer formats
@@ -977,17 +1033,23 @@ export class Session {
     });
     if (res.usage !== undefined) {
-      this.lastUsage = res.usage;
+      const ended = performance.now();
+      const genMs = firstTokenAt > 0 ? ended - firstTokenAt : ended - callStart;
+      const tps = genMs > 0 ? (res.usage.completionTokens / genMs) * 1000 : 0;
+      this.recordUsage(res.usage, genMs);
       // Logged (not shown) so the --log analyzer can compute tokens-to-solution.
       // `thinking` records THIS call's mode, so malformed-call rates can be
       // correlated with it (analyze-malformed).
       report({
         kind: "usage",
         task: SESSION_ID,
-        message: `tokens ${res.usage.promptTokens} in / ${res.usage.completionTokens} out`,
+        message: `tokens ${res.usage.promptTokens} in / ${res.usage.completionTokens} out · ${Math.round(tps)} tok/s`,
         promptTokens: res.usage.promptTokens,
         completionTokens: res.usage.completionTokens,
         totalTokens: res.usage.totalTokens,
+        tokensPerSecond: Math.round(tps),
+        ms: Math.round(genMs),
         ...(enableThinking === undefined ? {} : { thinking: enableThinking }),
       });
     }

package/src/render/ansi.ts CHANGED Viewed

@@ -71,6 +71,10 @@ export function renderStatus(
     );
   }
+  if (info.tokensPerSecond !== undefined && info.tokensPerSecond > 0) {
+    bits.push(`${info.tokensPerSecond} tok/s`);
+  }
   bits.push(info.status, info.scope);
   return `${paint(`  ⎯ ${bits.join(" · ")}`, STYLE.dim, color)}\n`;

package/src/render/render.types.ts CHANGED Viewed

@@ -18,4 +18,7 @@ export interface IStatusInfo {
   status: string;
   /** Editable scope label. */
   scope: string;
+  /** Output generation rate of the last model call (tokens/second); omitted or
+   *  0 before the first call. */
+  tokensPerSecond?: number;
 }