@agjs/tsforge 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@agjs/tsforge",
3
3
  "type": "module",
4
- "version": "0.1.10",
4
+ "version": "0.1.11",
5
5
  "license": "MIT",
6
6
  "description": "TypeScript coding harness with a deterministic gate, stack-aware guardrails, and stream-level correction.",
7
7
  "repository": {
package/src/cli.ts CHANGED
@@ -629,6 +629,7 @@ const HELP = [
629
629
  " /model [name] list configured models (★ active), or switch to <name>",
630
630
  " /sessions list saved sessions (resume one with: tsforge --resume <id>)",
631
631
  " /cost rough conversation size (messages + ~tokens)",
632
+ " /metrics token totals + generation rate (tok/s) this session",
632
633
  " /exit, /quit leave the session",
633
634
  "",
634
635
  "Anything else is sent to the agent. It works with its tools; when it stops,",
@@ -1197,6 +1198,21 @@ async function repl(args: ICliArgs): Promise<number> {
1197
1198
  break;
1198
1199
  }
1199
1200
 
1201
+ case "metrics": {
1202
+ const m = session.metrics;
1203
+
1204
+ if (m.calls === 0) {
1205
+ process.stdout.write(" no model calls yet\n");
1206
+ } else {
1207
+ process.stdout.write(
1208
+ ` ${String(m.calls)} call(s) · ${String(m.promptTokens)} in / ${String(m.completionTokens)} out · ` +
1209
+ `${String(m.lastTokensPerSecond)} tok/s last · ${String(m.avgTokensPerSecond)} tok/s avg\n`
1210
+ );
1211
+ }
1212
+
1213
+ break;
1214
+ }
1215
+
1200
1216
  default:
1201
1217
  process.stdout.write(`unknown command: ${line} (try /help)\n`);
1202
1218
  }
@@ -1217,6 +1233,7 @@ async function repl(args: ICliArgs): Promise<number> {
1217
1233
  elapsedMs: lastElapsedMs,
1218
1234
  status: lastStatus,
1219
1235
  scope: scopeLabel(session.scope) + (planMode ? " · PLAN" : ""),
1236
+ tokensPerSecond: session.metrics.lastTokensPerSecond,
1220
1237
  })
1221
1238
  );
1222
1239
  process.stdout.write("› ");
@@ -51,6 +51,9 @@ export interface ILoopEvent {
51
51
  promptTokens?: number;
52
52
  completionTokens?: number;
53
53
  totalTokens?: number;
54
+ /** For `usage` events: output generation rate (completion tokens / second),
55
+ * measured from the first streamed token to the call's end. */
56
+ tokensPerSecond?: number;
54
57
  /** For `usage` (and salvage-warning `tool`) events: whether THIS model call
55
58
  * ran with thinking enabled — lets the analyzer correlate malformed-tool-call
56
59
  * rate with the thinking mode (see analyze-malformed). */
@@ -114,6 +114,20 @@ export interface ISendResult {
114
114
  turns: number;
115
115
  }
116
116
 
117
+ /** Cumulative model-call metrics for a session — the basis for `/metrics`. */
118
+ export interface ISessionMetrics {
119
+ /** Number of model calls made. */
120
+ readonly calls: number;
121
+ /** Total prompt (input) tokens billed across all calls. */
122
+ readonly promptTokens: number;
123
+ /** Total completion (output) tokens generated across all calls. */
124
+ readonly completionTokens: number;
125
+ /** Output generation rate averaged over all calls (tokens/second). */
126
+ readonly avgTokensPerSecond: number;
127
+ /** Output generation rate of the most recent call (tokens/second). */
128
+ readonly lastTokensPerSecond: number;
129
+ }
130
+
117
131
  export interface ISendOptions {
118
132
  /** Caller cancellation (Ctrl-C). */
119
133
  signal?: AbortSignal;
@@ -339,6 +353,15 @@ export class Session {
339
353
  * size of the context the model last saw (drives the status gauge and, soon,
340
354
  * auto-compaction). */
341
355
  private lastUsage?: ITokenUsage;
356
+ /** Running totals behind the `metrics` getter. genMs is the summed generation
357
+ * time (first-token→end) so the average rate is tokens/total-gen-seconds. */
358
+ private readonly metricsTotals = {
359
+ calls: 0,
360
+ promptTokens: 0,
361
+ completionTokens: 0,
362
+ genMs: 0,
363
+ lastTokensPerSecond: 0,
364
+ };
342
365
  /** Fast check run every few edits while building (e.g. tsc); "" = off. */
343
366
  private incrementalCheck: string;
344
367
  /** Per-send thinking override, set from ISendOptions for the duration of a
@@ -507,6 +530,31 @@ export class Session {
507
530
  return this.lastUsage;
508
531
  }
509
532
 
533
+ /** Cumulative model-call metrics (tokens + generation rate) for this session. */
534
+ get metrics(): ISessionMetrics {
535
+ const t = this.metricsTotals;
536
+
537
+ return {
538
+ calls: t.calls,
539
+ promptTokens: t.promptTokens,
540
+ completionTokens: t.completionTokens,
541
+ avgTokensPerSecond:
542
+ t.genMs > 0 ? Math.round((t.completionTokens / t.genMs) * 1000) : 0,
543
+ lastTokensPerSecond: Math.round(t.lastTokensPerSecond),
544
+ };
545
+ }
546
+
547
+ /** Fold one call's usage + generation time into the running metrics totals. */
548
+ private recordUsage(usage: ITokenUsage, genMs: number): void {
549
+ this.lastUsage = usage;
550
+ this.metricsTotals.calls += 1;
551
+ this.metricsTotals.promptTokens += usage.promptTokens;
552
+ this.metricsTotals.completionTokens += usage.completionTokens;
553
+ this.metricsTotals.genMs += genMs;
554
+ this.metricsTotals.lastTokensPerSecond =
555
+ genMs > 0 ? (usage.completionTokens / genMs) * 1000 : 0;
556
+ }
557
+
510
558
  /** The real size of the context the model is currently holding — the prompt
511
559
  * tokens of the last call (what auto-compaction watches), 0 before any call. */
512
560
  get contextTokens(): number {
@@ -957,6 +1005,8 @@ export class Session {
957
1005
  const mcpSchemas = this.ctx.mcpRegistry?.toolSchemas() ?? [];
958
1006
  const offeredTools =
959
1007
  mcpSchemas.length > 0 ? [...baseTools, ...mcpSchemas] : baseTools;
1008
+ const callStart = performance.now();
1009
+ let firstTokenAt = 0;
960
1010
  const res = await this.provider.complete(ctx.messages, {
961
1011
  tools: offeredTools,
962
1012
  temperature: this.cfg.temperature ?? 0,
@@ -967,6 +1017,12 @@ export class Session {
967
1017
  : { thinkingTokenBudget: this.cfg.thinkingTokenBudget }),
968
1018
  ...(signal === undefined ? {} : { signal }),
969
1019
  onToken: (token, channel) => {
1020
+ // Stamp the first token so tokens/sec measures generation rate (excluding
1021
+ // prompt-processing / time-to-first-token), not total wall time.
1022
+ if (firstTokenAt === 0) {
1023
+ firstTokenAt = performance.now();
1024
+ }
1025
+
970
1026
  // Stream EVERYTHING live — thinking, the tool calls being written, and
971
1027
  // the answer itself (channel `content`), so the user watches the reply
972
1028
  // arrive instead of staring at a frozen indicator. The renderer formats
@@ -977,17 +1033,23 @@ export class Session {
977
1033
  });
978
1034
 
979
1035
  if (res.usage !== undefined) {
980
- this.lastUsage = res.usage;
1036
+ const ended = performance.now();
1037
+ const genMs = firstTokenAt > 0 ? ended - firstTokenAt : ended - callStart;
1038
+ const tps = genMs > 0 ? (res.usage.completionTokens / genMs) * 1000 : 0;
1039
+
1040
+ this.recordUsage(res.usage, genMs);
981
1041
  // Logged (not shown) so the --log analyzer can compute tokens-to-solution.
982
1042
  // `thinking` records THIS call's mode, so malformed-call rates can be
983
1043
  // correlated with it (analyze-malformed).
984
1044
  report({
985
1045
  kind: "usage",
986
1046
  task: SESSION_ID,
987
- message: `tokens ${res.usage.promptTokens} in / ${res.usage.completionTokens} out`,
1047
+ message: `tokens ${res.usage.promptTokens} in / ${res.usage.completionTokens} out · ${Math.round(tps)} tok/s`,
988
1048
  promptTokens: res.usage.promptTokens,
989
1049
  completionTokens: res.usage.completionTokens,
990
1050
  totalTokens: res.usage.totalTokens,
1051
+ tokensPerSecond: Math.round(tps),
1052
+ ms: Math.round(genMs),
991
1053
  ...(enableThinking === undefined ? {} : { thinking: enableThinking }),
992
1054
  });
993
1055
  }
@@ -71,6 +71,10 @@ export function renderStatus(
71
71
  );
72
72
  }
73
73
 
74
+ if (info.tokensPerSecond !== undefined && info.tokensPerSecond > 0) {
75
+ bits.push(`${info.tokensPerSecond} tok/s`);
76
+ }
77
+
74
78
  bits.push(info.status, info.scope);
75
79
 
76
80
  return `${paint(` ⎯ ${bits.join(" · ")}`, STYLE.dim, color)}\n`;
@@ -18,4 +18,7 @@ export interface IStatusInfo {
18
18
  status: string;
19
19
  /** Editable scope label. */
20
20
  scope: string;
21
+ /** Output generation rate of the last model call (tokens/second); omitted or
22
+ * 0 before the first call. */
23
+ tokensPerSecond?: number;
21
24
  }