@agjs/tsforge 0.1.10 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli.ts +17 -0
- package/src/loop/loop.types.ts +3 -0
- package/src/loop/session.ts +64 -2
- package/src/render/ansi.ts +4 -0
- package/src/render/render.types.ts +3 -0
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -629,6 +629,7 @@ const HELP = [
|
|
|
629
629
|
" /model [name] list configured models (★ active), or switch to <name>",
|
|
630
630
|
" /sessions list saved sessions (resume one with: tsforge --resume <id>)",
|
|
631
631
|
" /cost rough conversation size (messages + ~tokens)",
|
|
632
|
+
" /metrics token totals + generation rate (tok/s) this session",
|
|
632
633
|
" /exit, /quit leave the session",
|
|
633
634
|
"",
|
|
634
635
|
"Anything else is sent to the agent. It works with its tools; when it stops,",
|
|
@@ -1197,6 +1198,21 @@ async function repl(args: ICliArgs): Promise<number> {
|
|
|
1197
1198
|
break;
|
|
1198
1199
|
}
|
|
1199
1200
|
|
|
1201
|
+
case "metrics": {
|
|
1202
|
+
const m = session.metrics;
|
|
1203
|
+
|
|
1204
|
+
if (m.calls === 0) {
|
|
1205
|
+
process.stdout.write(" no model calls yet\n");
|
|
1206
|
+
} else {
|
|
1207
|
+
process.stdout.write(
|
|
1208
|
+
` ${String(m.calls)} call(s) · ${String(m.promptTokens)} in / ${String(m.completionTokens)} out · ` +
|
|
1209
|
+
`${String(m.lastTokensPerSecond)} tok/s last · ${String(m.avgTokensPerSecond)} tok/s avg\n`
|
|
1210
|
+
);
|
|
1211
|
+
}
|
|
1212
|
+
|
|
1213
|
+
break;
|
|
1214
|
+
}
|
|
1215
|
+
|
|
1200
1216
|
default:
|
|
1201
1217
|
process.stdout.write(`unknown command: ${line} (try /help)\n`);
|
|
1202
1218
|
}
|
|
@@ -1217,6 +1233,7 @@ async function repl(args: ICliArgs): Promise<number> {
|
|
|
1217
1233
|
elapsedMs: lastElapsedMs,
|
|
1218
1234
|
status: lastStatus,
|
|
1219
1235
|
scope: scopeLabel(session.scope) + (planMode ? " · PLAN" : ""),
|
|
1236
|
+
tokensPerSecond: session.metrics.lastTokensPerSecond,
|
|
1220
1237
|
})
|
|
1221
1238
|
);
|
|
1222
1239
|
process.stdout.write("› ");
|
package/src/loop/loop.types.ts
CHANGED
|
@@ -51,6 +51,9 @@ export interface ILoopEvent {
|
|
|
51
51
|
promptTokens?: number;
|
|
52
52
|
completionTokens?: number;
|
|
53
53
|
totalTokens?: number;
|
|
54
|
+
/** For `usage` events: output generation rate (completion tokens / second),
|
|
55
|
+
* measured from the first streamed token to the call's end. */
|
|
56
|
+
tokensPerSecond?: number;
|
|
54
57
|
/** For `usage` (and salvage-warning `tool`) events: whether THIS model call
|
|
55
58
|
* ran with thinking enabled — lets the analyzer correlate malformed-tool-call
|
|
56
59
|
* rate with the thinking mode (see analyze-malformed). */
|
package/src/loop/session.ts
CHANGED
|
@@ -114,6 +114,20 @@ export interface ISendResult {
|
|
|
114
114
|
turns: number;
|
|
115
115
|
}
|
|
116
116
|
|
|
117
|
+
/** Cumulative model-call metrics for a session — the basis for `/metrics`. */
|
|
118
|
+
export interface ISessionMetrics {
|
|
119
|
+
/** Number of model calls made. */
|
|
120
|
+
readonly calls: number;
|
|
121
|
+
/** Total prompt (input) tokens billed across all calls. */
|
|
122
|
+
readonly promptTokens: number;
|
|
123
|
+
/** Total completion (output) tokens generated across all calls. */
|
|
124
|
+
readonly completionTokens: number;
|
|
125
|
+
/** Output generation rate averaged over all calls (tokens/second). */
|
|
126
|
+
readonly avgTokensPerSecond: number;
|
|
127
|
+
/** Output generation rate of the most recent call (tokens/second). */
|
|
128
|
+
readonly lastTokensPerSecond: number;
|
|
129
|
+
}
|
|
130
|
+
|
|
117
131
|
export interface ISendOptions {
|
|
118
132
|
/** Caller cancellation (Ctrl-C). */
|
|
119
133
|
signal?: AbortSignal;
|
|
@@ -339,6 +353,15 @@ export class Session {
|
|
|
339
353
|
* size of the context the model last saw (drives the status gauge and, soon,
|
|
340
354
|
* auto-compaction). */
|
|
341
355
|
private lastUsage?: ITokenUsage;
|
|
356
|
+
/** Running totals behind the `metrics` getter. genMs is the summed generation
|
|
357
|
+
* time (first-token→end) so the average rate is tokens/total-gen-seconds. */
|
|
358
|
+
private readonly metricsTotals = {
|
|
359
|
+
calls: 0,
|
|
360
|
+
promptTokens: 0,
|
|
361
|
+
completionTokens: 0,
|
|
362
|
+
genMs: 0,
|
|
363
|
+
lastTokensPerSecond: 0,
|
|
364
|
+
};
|
|
342
365
|
/** Fast check run every few edits while building (e.g. tsc); "" = off. */
|
|
343
366
|
private incrementalCheck: string;
|
|
344
367
|
/** Per-send thinking override, set from ISendOptions for the duration of a
|
|
@@ -507,6 +530,31 @@ export class Session {
|
|
|
507
530
|
return this.lastUsage;
|
|
508
531
|
}
|
|
509
532
|
|
|
533
|
+
/** Cumulative model-call metrics (tokens + generation rate) for this session. */
|
|
534
|
+
get metrics(): ISessionMetrics {
|
|
535
|
+
const t = this.metricsTotals;
|
|
536
|
+
|
|
537
|
+
return {
|
|
538
|
+
calls: t.calls,
|
|
539
|
+
promptTokens: t.promptTokens,
|
|
540
|
+
completionTokens: t.completionTokens,
|
|
541
|
+
avgTokensPerSecond:
|
|
542
|
+
t.genMs > 0 ? Math.round((t.completionTokens / t.genMs) * 1000) : 0,
|
|
543
|
+
lastTokensPerSecond: Math.round(t.lastTokensPerSecond),
|
|
544
|
+
};
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
/** Fold one call's usage + generation time into the running metrics totals. */
|
|
548
|
+
private recordUsage(usage: ITokenUsage, genMs: number): void {
|
|
549
|
+
this.lastUsage = usage;
|
|
550
|
+
this.metricsTotals.calls += 1;
|
|
551
|
+
this.metricsTotals.promptTokens += usage.promptTokens;
|
|
552
|
+
this.metricsTotals.completionTokens += usage.completionTokens;
|
|
553
|
+
this.metricsTotals.genMs += genMs;
|
|
554
|
+
this.metricsTotals.lastTokensPerSecond =
|
|
555
|
+
genMs > 0 ? (usage.completionTokens / genMs) * 1000 : 0;
|
|
556
|
+
}
|
|
557
|
+
|
|
510
558
|
/** The real size of the context the model is currently holding — the prompt
|
|
511
559
|
* tokens of the last call (what auto-compaction watches), 0 before any call. */
|
|
512
560
|
get contextTokens(): number {
|
|
@@ -957,6 +1005,8 @@ export class Session {
|
|
|
957
1005
|
const mcpSchemas = this.ctx.mcpRegistry?.toolSchemas() ?? [];
|
|
958
1006
|
const offeredTools =
|
|
959
1007
|
mcpSchemas.length > 0 ? [...baseTools, ...mcpSchemas] : baseTools;
|
|
1008
|
+
const callStart = performance.now();
|
|
1009
|
+
let firstTokenAt = 0;
|
|
960
1010
|
const res = await this.provider.complete(ctx.messages, {
|
|
961
1011
|
tools: offeredTools,
|
|
962
1012
|
temperature: this.cfg.temperature ?? 0,
|
|
@@ -967,6 +1017,12 @@ export class Session {
|
|
|
967
1017
|
: { thinkingTokenBudget: this.cfg.thinkingTokenBudget }),
|
|
968
1018
|
...(signal === undefined ? {} : { signal }),
|
|
969
1019
|
onToken: (token, channel) => {
|
|
1020
|
+
// Stamp the first token so tokens/sec measures generation rate (excluding
|
|
1021
|
+
// prompt-processing / time-to-first-token), not total wall time.
|
|
1022
|
+
if (firstTokenAt === 0) {
|
|
1023
|
+
firstTokenAt = performance.now();
|
|
1024
|
+
}
|
|
1025
|
+
|
|
970
1026
|
// Stream EVERYTHING live — thinking, the tool calls being written, and
|
|
971
1027
|
// the answer itself (channel `content`), so the user watches the reply
|
|
972
1028
|
// arrive instead of staring at a frozen indicator. The renderer formats
|
|
@@ -977,17 +1033,23 @@ export class Session {
|
|
|
977
1033
|
});
|
|
978
1034
|
|
|
979
1035
|
if (res.usage !== undefined) {
|
|
980
|
-
|
|
1036
|
+
const ended = performance.now();
|
|
1037
|
+
const genMs = firstTokenAt > 0 ? ended - firstTokenAt : ended - callStart;
|
|
1038
|
+
const tps = genMs > 0 ? (res.usage.completionTokens / genMs) * 1000 : 0;
|
|
1039
|
+
|
|
1040
|
+
this.recordUsage(res.usage, genMs);
|
|
981
1041
|
// Logged (not shown) so the --log analyzer can compute tokens-to-solution.
|
|
982
1042
|
// `thinking` records THIS call's mode, so malformed-call rates can be
|
|
983
1043
|
// correlated with it (analyze-malformed).
|
|
984
1044
|
report({
|
|
985
1045
|
kind: "usage",
|
|
986
1046
|
task: SESSION_ID,
|
|
987
|
-
message: `tokens ${res.usage.promptTokens} in / ${res.usage.completionTokens} out`,
|
|
1047
|
+
message: `tokens ${res.usage.promptTokens} in / ${res.usage.completionTokens} out · ${Math.round(tps)} tok/s`,
|
|
988
1048
|
promptTokens: res.usage.promptTokens,
|
|
989
1049
|
completionTokens: res.usage.completionTokens,
|
|
990
1050
|
totalTokens: res.usage.totalTokens,
|
|
1051
|
+
tokensPerSecond: Math.round(tps),
|
|
1052
|
+
ms: Math.round(genMs),
|
|
991
1053
|
...(enableThinking === undefined ? {} : { thinking: enableThinking }),
|
|
992
1054
|
});
|
|
993
1055
|
}
|
package/src/render/ansi.ts
CHANGED
|
@@ -71,6 +71,10 @@ export function renderStatus(
|
|
|
71
71
|
);
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
+
if (info.tokensPerSecond !== undefined && info.tokensPerSecond > 0) {
|
|
75
|
+
bits.push(`${info.tokensPerSecond} tok/s`);
|
|
76
|
+
}
|
|
77
|
+
|
|
74
78
|
bits.push(info.status, info.scope);
|
|
75
79
|
|
|
76
80
|
return `${paint(` ⎯ ${bits.join(" · ")}`, STYLE.dim, color)}\n`;
|