@monotykamary/pi-tps 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,356 @@
1
+ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
2
+ import type { AssistantMessage } from '@earendil-works/pi-ai';
3
+ import { createTestFixture, activateExtension } from './helpers';
4
+
5
+ describe('pi-tps extension — dynamic TPS cap', () => {
6
+ let fixture: ReturnType<typeof createTestFixture>;
7
+
8
+ beforeEach(async () => {
9
+ fixture = createTestFixture();
10
+ await activateExtension(fixture);
11
+ });
12
+
13
+ afterEach(() => {
14
+ vi.restoreAllMocks();
15
+ });
16
+
17
+ /**
18
+ * Drive a turn with mocked performance.now() timestamps.
19
+ * Set `isToolCall: true` to simulate a tool_execution_start during the turn.
20
+ */
21
+ function driveTurn(clocks: {
22
+ turnStart: number;
23
+ messageStart: number;
24
+ firstUpdate: number;
25
+ streamUpdates: number[];
26
+ messageEnd: number;
27
+ turnEnd?: number;
28
+ isToolCall?: boolean;
29
+ }) {
30
+ const { handlers, notifySpy, appendEntrySpy } = fixture;
31
+
32
+ const timestamps = [
33
+ clocks.turnStart,
34
+ clocks.turnStart,
35
+ clocks.messageStart,
36
+ clocks.firstUpdate,
37
+ ...clocks.streamUpdates,
38
+ clocks.messageEnd,
39
+ clocks.turnEnd ?? clocks.messageEnd,
40
+ ];
41
+
42
+ let callIdx = 0;
43
+ const spy = vi.spyOn(performance, 'now').mockImplementation(() => {
44
+ return timestamps[Math.min(callIdx++, timestamps.length - 1)];
45
+ });
46
+
47
+ const assistantMessage: AssistantMessage = {
48
+ role: 'assistant',
49
+ content: [{ type: 'text', text: 'Response' }],
50
+ api: 'openai-completions',
51
+ provider: 'openai',
52
+ model: 'gpt-4',
53
+ usage: {
54
+ input: 50,
55
+ output: 20,
56
+ cacheRead: 0,
57
+ cacheWrite: 0,
58
+ totalTokens: 70,
59
+ cost: { input: 0.001, output: 0.002, cacheRead: 0, cacheWrite: 0, total: 0.003 },
60
+ },
61
+ stopReason: clocks.isToolCall ? 'toolUse' : 'stop',
62
+ timestamp: Date.now(),
63
+ };
64
+
65
+ handlers['turn_start']?.({ type: 'turn_start', turnIndex: 0, timestamp: Date.now() });
66
+ handlers['message_start']?.({ type: 'message_start', message: assistantMessage });
67
+ handlers['message_update']?.({
68
+ type: 'message_update',
69
+ message: assistantMessage,
70
+ assistantMessageEvent: { type: 'text_delta', delta: 't' },
71
+ });
72
+ for (const _ts of clocks.streamUpdates) {
73
+ handlers['message_update']?.({
74
+ type: 'message_update',
75
+ message: assistantMessage,
76
+ assistantMessageEvent: { type: 'text_delta', delta: 't' },
77
+ });
78
+ }
79
+
80
+ // Simulate tool_execution_start if this is a tool call turn
81
+ if (clocks.isToolCall) {
82
+ handlers['tool_execution_start']?.({
83
+ type: 'tool_execution_start',
84
+ toolCallId: 'call_123',
85
+ toolName: 'bash',
86
+ args: { command: 'ls' },
87
+ });
88
+ }
89
+
90
+ handlers['message_end']?.({ type: 'message_end', message: assistantMessage });
91
+ handlers['turn_end']?.(
92
+ { type: 'turn_end', turnIndex: 0, message: assistantMessage, toolResults: [] },
93
+ fixture.mockCtx
94
+ );
95
+
96
+ spy.mockRestore();
97
+ return { notifySpy, appendEntrySpy };
98
+ }
99
+
100
+ // ── Cap is set by reliable streaming turns ────────────────────────────────
101
+
102
+ it('should set the TPS cap from a reliable streaming turn (primary branch, no tool call)', () => {
103
+ // 20 tokens / 0.4s = 50 TPS from primary branch
104
+ const { appendEntrySpy } = driveTurn({
105
+ turnStart: 0,
106
+ messageStart: 200,
107
+ firstUpdate: 200.123,
108
+ streamUpdates: [400, 500, 600, 700, 800],
109
+ messageEnd: 900,
110
+ });
111
+
112
+ const [, data] = appendEntrySpy.mock.calls[0];
113
+ // TPS should be ~50, and isPrimaryBranch should be true
114
+ expect(data.tps).toBeGreaterThanOrEqual(40);
115
+ expect(data.tps).toBeLessThanOrEqual(60);
116
+ expect(data.isPrimaryBranch).toBe(true);
117
+ });
118
+
119
+ // ── Cap is applied to tool-call turns ─────────────────────────────────────
120
+
121
+ it('should clamp tool-call TPS to the cap set by a prior streaming turn', () => {
122
+ // Turn 1: reliable streaming response → sets cap at ~50 TPS
123
+ driveTurn({
124
+ turnStart: 0,
125
+ messageStart: 200,
126
+ firstUpdate: 200.123,
127
+ streamUpdates: [400, 500, 600, 700, 800],
128
+ messageEnd: 900,
129
+ });
130
+
131
+ // Turn 2: tool call with fallback TPS (2 updates, 100ms generationMs)
132
+ // Without cap: 20 tokens / 0.055s ≈ 363 TPS (inflated)
133
+ // With cap: min(363, 50) = 50 TPS
134
+ const { appendEntrySpy, notifySpy } = driveTurn({
135
+ turnStart: 0,
136
+ messageStart: 100,
137
+ firstUpdate: 100.1,
138
+ streamUpdates: [100.15, 100.3],
139
+ messageEnd: 200,
140
+ isToolCall: true,
141
+ });
142
+
143
+ const [, data] = appendEntrySpy.mock.calls[1];
144
+ expect(data.tps).not.toBeNull();
145
+ // Must be clamped to the ~50 TPS cap, not the inflated fallback value
146
+ expect(data.tps).toBeLessThanOrEqual(55);
147
+ expect(data.tps).toBeGreaterThan(0);
148
+ });
149
+
150
+ // ── Tool calls do not set the cap ────────────────────────────────────────
151
+
152
+ it('should not let tool-call turns set the cap', () => {
153
+ // Turn 1: tool call → no cap exists yet, TPS is null
154
+ const { appendEntrySpy: spy1 } = driveTurn({
155
+ turnStart: 0,
156
+ messageStart: 100,
157
+ firstUpdate: 100.1,
158
+ streamUpdates: [100.15, 100.3],
159
+ messageEnd: 200,
160
+ isToolCall: true,
161
+ });
162
+ const [, data1] = spy1.mock.calls[0];
163
+ // No cap → tool call TPS is null
164
+ expect(data1.tps).toBeNull();
165
+
166
+ // Turn 2: reliable streaming response at ~50 TPS → sets the cap
167
+ const { appendEntrySpy: spy2 } = driveTurn({
168
+ turnStart: 0,
169
+ messageStart: 200,
170
+ firstUpdate: 200.123,
171
+ streamUpdates: [400, 500, 600, 700, 800],
172
+ messageEnd: 900,
173
+ });
174
+ const [, data2] = spy2.mock.calls[1];
175
+ expect(data2.tps).toBeGreaterThanOrEqual(40);
176
+ expect(data2.tps).toBeLessThanOrEqual(60);
177
+
178
+ // Turn 3: another tool call — should now be clamped to 50
179
+ const { appendEntrySpy: spy3 } = driveTurn({
180
+ turnStart: 0,
181
+ messageStart: 100,
182
+ firstUpdate: 100.1,
183
+ streamUpdates: [100.15, 100.3],
184
+ messageEnd: 200,
185
+ isToolCall: true,
186
+ });
187
+ const [, data3] = spy3.mock.calls[2];
188
+ expect(data3.tps).not.toBeNull();
189
+ expect(data3.tps).toBeLessThanOrEqual(55);
190
+ });
191
+
192
+ // ── Cold start: no cap yet ────────────────────────────────────────────────
193
+
194
+ it('should show null TPS for tool calls when no cap exists yet', () => {
195
+ const { notifySpy, appendEntrySpy } = driveTurn({
196
+ turnStart: 0,
197
+ messageStart: 100,
198
+ firstUpdate: 100.1,
199
+ streamUpdates: [100.15, 100.3],
200
+ messageEnd: 200,
201
+ isToolCall: true,
202
+ });
203
+
204
+ const notification = notifySpy.mock.calls[0][0] as string;
205
+ // No streaming turn has set the cap yet → tool call TPS is null
206
+ expect(notification).toContain('TPS —');
207
+
208
+ const [, data] = appendEntrySpy.mock.calls[0];
209
+ expect(data.tps).toBeNull();
210
+ });
211
+
212
+ // ── Non-tool-call fallback turns are not clamped ──────────────────────────
213
+
214
+ it('should not clamp non-tool-call fallback TPS', () => {
215
+ // Turn 1: set cap at ~50 TPS from a reliable streaming turn
216
+ driveTurn({
217
+ turnStart: 0,
218
+ messageStart: 200,
219
+ firstUpdate: 200.123,
220
+ streamUpdates: [400, 500, 600, 700, 800],
221
+ messageEnd: 900,
222
+ });
223
+
224
+ // Turn 2: non-tool-call fallback (e.g. short burst response)
225
+ // This should NOT be clamped — only tool calls get capped
226
+ const { appendEntrySpy } = driveTurn({
227
+ turnStart: 0,
228
+ messageStart: 100,
229
+ firstUpdate: 100.1,
230
+ streamUpdates: [100.15, 100.3],
231
+ messageEnd: 200,
232
+ isToolCall: false,
233
+ });
234
+
235
+ const [, data] = appendEntrySpy.mock.calls[1];
236
+ expect(data.tps).not.toBeNull();
237
+ // Non-tool-call fallback TPS is uncapped — may be high
238
+ expect(data.tps).toBeGreaterThan(50);
239
+ });
240
+
241
+ // ── Cap is per-model ──────────────────────────────────────────────────────
242
+
243
+ it('should maintain separate caps per model', () => {
244
+ // Turn 1: openai/gpt-4 streaming → sets cap at ~50 TPS
245
+ driveTurn({
246
+ turnStart: 0,
247
+ messageStart: 200,
248
+ firstUpdate: 200.123,
249
+ streamUpdates: [400, 500, 600, 700, 800],
250
+ messageEnd: 900,
251
+ });
252
+
253
+ // Turn 2: deepseek/deepseek-v3 tool call → no cap for deepseek yet, uncapped
254
+ // Use driveTurn with a different provider/model to avoid the gpt-4 cap
255
+ const { handlers, appendEntrySpy } = fixture;
256
+ const deepseek: AssistantMessage = {
257
+ role: 'assistant',
258
+ content: [{ type: 'text', text: 'Hi' }],
259
+ api: 'openai-completions',
260
+ provider: 'deepseek',
261
+ model: 'deepseek-v3',
262
+ usage: {
263
+ input: 50,
264
+ output: 20,
265
+ cacheRead: 0,
266
+ cacheWrite: 0,
267
+ totalTokens: 70,
268
+ cost: { input: 0.001, output: 0.002, cacheRead: 0, cacheWrite: 0, total: 0.003 },
269
+ },
270
+ stopReason: 'toolUse',
271
+ timestamp: Date.now(),
272
+ };
273
+
274
+ let callIdx = 0;
275
+ const timestamps = [0, 0, 100, 100.1, 100.15, 100.3, 300, 300];
276
+ const spy = vi.spyOn(performance, 'now').mockImplementation(() => {
277
+ return timestamps[Math.min(callIdx++, timestamps.length - 1)];
278
+ });
279
+
280
+ handlers['turn_start']?.({ type: 'turn_start', turnIndex: 1, timestamp: Date.now() });
281
+ handlers['message_start']?.({ type: 'message_start', message: deepseek });
282
+ handlers['message_update']?.({
283
+ type: 'message_update',
284
+ message: deepseek,
285
+ assistantMessageEvent: { type: 'text_delta', delta: 't' },
286
+ });
287
+ handlers['message_update']?.({
288
+ type: 'message_update',
289
+ message: deepseek,
290
+ assistantMessageEvent: { type: 'text_delta', delta: 't' },
291
+ });
292
+ handlers['message_update']?.({
293
+ type: 'message_update',
294
+ message: deepseek,
295
+ assistantMessageEvent: { type: 'text_delta', delta: 't' },
296
+ });
297
+ handlers['tool_execution_start']?.({
298
+ type: 'tool_execution_start',
299
+ toolCallId: 'call_1',
300
+ toolName: 'bash',
301
+ args: {},
302
+ });
303
+ handlers['message_end']?.({ type: 'message_end', message: deepseek });
304
+ handlers['turn_end']?.(
305
+ { type: 'turn_end', turnIndex: 1, message: deepseek, toolResults: [] },
306
+ fixture.mockCtx
307
+ );
308
+ spy.mockRestore();
309
+
310
+ const [, data2] = appendEntrySpy.mock.calls[1];
311
+ // DeepSeek has no cap yet → tool call TPS is null
312
+ expect(data2.tps).toBeNull();
313
+ });
314
+
315
+ // ── Cap only goes up ──────────────────────────────────────────────────────
316
+
317
+ it('should only raise the cap, never lower it', () => {
318
+ // Turn 1: sets cap at ~50 TPS
319
+ driveTurn({
320
+ turnStart: 0,
321
+ messageStart: 200,
322
+ firstUpdate: 200.123,
323
+ streamUpdates: [400, 500, 600, 700, 800],
324
+ messageEnd: 900,
325
+ });
326
+
327
+ // Turn 2: slower streaming response at ~25 TPS → cap stays at 50
328
+ const { appendEntrySpy } = driveTurn({
329
+ turnStart: 0,
330
+ messageStart: 200,
331
+ firstUpdate: 200.123,
332
+ streamUpdates: [600, 800, 1000, 1200, 1400],
333
+ messageEnd: 1500,
334
+ });
335
+
336
+ const [, data2] = appendEntrySpy.mock.calls[1];
337
+ // This turn's TPS is 25, but the cap should still be 50
338
+ expect(data2.tps).toBeGreaterThanOrEqual(15);
339
+ expect(data2.tps).toBeLessThanOrEqual(35);
340
+
341
+ // Turn 3: tool call → should be capped at 50, not 25
342
+ const { appendEntrySpy: spy3 } = driveTurn({
343
+ turnStart: 0,
344
+ messageStart: 100,
345
+ firstUpdate: 100.1,
346
+ streamUpdates: [100.15, 100.3],
347
+ messageEnd: 200,
348
+ isToolCall: true,
349
+ });
350
+
351
+ const [, data3] = spy3.mock.calls[2];
352
+ expect(data3.tps).not.toBeNull();
353
+ // Capped at 50 (the higher of the two streaming measurements)
354
+ expect(data3.tps).toBeLessThanOrEqual(55);
355
+ });
356
+ });
@@ -60,6 +60,13 @@ interface SessionTreeEvent {
60
60
  oldLeafId: string | null;
61
61
  }
62
62
 
63
+ interface ToolExecutionStartEvent {
64
+ type: 'tool_execution_start';
65
+ toolCallId: string;
66
+ toolName: string;
67
+ args: unknown;
68
+ }
69
+
63
70
  // ─── Constants ──────────────────────────────────────────────────────────────
64
71
 
65
72
  /** Minimum gap between token updates to count as a stall (ms) */
@@ -81,6 +88,7 @@ interface TurnTelemetry {
81
88
  messageCount: number; // assistant messages in this turn
82
89
  };
83
90
  tps: number | null; // output / (streamMs / 1000), null when burst/degenerate
91
+ isPrimaryBranch: boolean; // TPS came from primary-branch (reliable) measurement
84
92
  cost: {
85
93
  input: number;
86
94
  output: number;
@@ -108,6 +116,8 @@ interface TurnTiming {
108
116
  stallCount: number;
109
117
  inStall: boolean;
110
118
  messageCount: number;
119
+ isToolCall: boolean; // tool_execution_start fired during this turn
120
+ isPrimaryBranch: boolean; // TPS came from primary-branch (reliable) measurement
111
121
  }
112
122
 
113
123
  // ─── Helpers ────────────────────────────────────────────────────────────────
@@ -336,6 +346,7 @@ function buildTelemetry(timing: TurnTiming, turnEndMs: number): TurnTelemetry |
336
346
  // Includes TTFT, underestimates, but never overshoots.
337
347
  // Else: null — structurally unidentifiable.
338
348
  let tps: number | null = null;
349
+ let isPrimaryBranch = false;
339
350
  if (
340
351
  streamMs !== null &&
341
352
  streamMs >= MIN_STREAM_MS &&
@@ -351,6 +362,7 @@ function buildTelemetry(timing: TurnTiming, turnEndMs: number): TurnTelemetry |
351
362
  const effectiveStreamMs = streamMs - timing.stallMs;
352
363
  const raw = output / (effectiveStreamMs / 1000);
353
364
  tps = Math.round(raw * 10) / 10;
365
+ isPrimaryBranch = true;
354
366
  } else if (timing.updateCount >= 2 && timing.totalGenerationMs >= MIN_GENERATION_MS) {
355
367
  // Fallback: use generationMs (message_start → message_end) minus
356
368
  // stalls. This includes TTFT, so it underestimates generation speed,
@@ -386,6 +398,7 @@ function buildTelemetry(timing: TurnTiming, turnEndMs: number): TurnTelemetry |
386
398
  messageCount: timing.messageCount,
387
399
  },
388
400
  tps,
401
+ isPrimaryBranch,
389
402
  cost: hasCost
390
403
  ? {
391
404
  input: costInput,
@@ -405,6 +418,10 @@ export default function tpsExtension(pi: ExtensionAPI) {
405
418
  // Current turn timing state
406
419
  let currentTiming: TurnTiming | null = null;
407
420
 
421
+ // Per-model TPS cap: highest reliable (primary-branch, non-tool-call) TPS observed.
422
+ // Tool-call turns get clamped to this value. Only set by reliable streaming measurements.
423
+ const tpsCaps = new Map<string, number>(); // "provider:modelId" → cap
424
+
408
425
  // Cached session entries for argument completion (captured on session_start / session_tree)
409
426
  let cachedEntries: Array<{ type?: string; customType?: string; data?: unknown }> = [];
410
427
 
@@ -473,6 +490,8 @@ export default function tpsExtension(pi: ExtensionAPI) {
473
490
  stallCount: 0,
474
491
  inStall: false,
475
492
  messageCount: 0,
493
+ isToolCall: false,
494
+ isPrimaryBranch: false,
476
495
  };
477
496
  });
478
497
 
@@ -537,6 +556,13 @@ export default function tpsExtension(pi: ExtensionAPI) {
537
556
  currentTiming.lastUpdateMs = now;
538
557
  });
539
558
 
559
+ // Track when a tool starts executing — marks this turn as a tool call
560
+ // for the dynamic TPS cap (tool-call turns only get capped, never set the cap).
561
+ pi.on('tool_execution_start', (_event: ToolExecutionStartEvent) => {
562
+ if (!currentTiming) return;
563
+ currentTiming.isToolCall = true;
564
+ });
565
+
540
566
  // Track when a message ends
541
567
  pi.on('message_end', (event: MessageEndEvent) => {
542
568
  if (!currentTiming) return;
@@ -569,6 +595,28 @@ export default function tpsExtension(pi: ExtensionAPI) {
569
595
  const telemetry = buildTelemetry(timing, turnEndMs);
570
596
  if (!telemetry) return;
571
597
 
598
+ // ── Dynamic TPS cap ────────────────────────────────────────────────
599
+ // Only non-tool-call, primary-branch (reliable) measurements set the cap.
600
+ // Tool-call turns get clamped to the cap to prevent inflation from
601
+ // short outputs over tiny time windows.
602
+ const modelKey = `${telemetry.model.provider}:${telemetry.model.modelId}`;
603
+
604
+ if (telemetry.isPrimaryBranch && !timing.isToolCall && telemetry.tps !== null) {
605
+ const currentCap = tpsCaps.get(modelKey);
606
+ if (currentCap === undefined || telemetry.tps > currentCap) {
607
+ tpsCaps.set(modelKey, telemetry.tps);
608
+ }
609
+ }
610
+
611
+ if (timing.isToolCall && telemetry.tps !== null) {
612
+ const cap = tpsCaps.get(modelKey);
613
+ if (cap !== undefined) {
614
+ telemetry.tps = Math.min(telemetry.tps, cap);
615
+ } else {
616
+ telemetry.tps = null;
617
+ }
618
+ }
619
+
572
620
  // Persist structured telemetry to session for export and rehydration
573
621
  pi.appendEntry('tps', telemetry);
574
622
 
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "pi-tps",
3
- "version": "1.0.0",
3
+ "version": "1.1.0",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "pi-tps",
9
- "version": "1.0.0",
9
+ "version": "1.1.0",
10
10
  "hasInstallScript": true,
11
11
  "license": "MIT",
12
12
  "devDependencies": {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@monotykamary/pi-tps",
3
- "version": "1.0.0",
3
+ "version": "1.1.0",
4
4
  "description": "Tokens-per-second tracker for pi — see your LLM generation speed after every agent turn",
5
5
  "keywords": [
6
6
  "pi-package"