@monotykamary/pi-tps 1.0.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
|
2
|
+
import type { AssistantMessage } from '@earendil-works/pi-ai';
|
|
3
|
+
import { createTestFixture, activateExtension } from './helpers';
|
|
4
|
+
|
|
5
|
+
describe('pi-tps extension — dynamic TPS cap', () => {
|
|
6
|
+
let fixture: ReturnType<typeof createTestFixture>;
|
|
7
|
+
|
|
8
|
+
beforeEach(async () => {
|
|
9
|
+
fixture = createTestFixture();
|
|
10
|
+
await activateExtension(fixture);
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
afterEach(() => {
|
|
14
|
+
vi.restoreAllMocks();
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Drive a turn with mocked performance.now() timestamps.
|
|
19
|
+
* Set `isToolCall: true` to simulate a tool_execution_start during the turn.
|
|
20
|
+
*/
|
|
21
|
+
function driveTurn(clocks: {
|
|
22
|
+
turnStart: number;
|
|
23
|
+
messageStart: number;
|
|
24
|
+
firstUpdate: number;
|
|
25
|
+
streamUpdates: number[];
|
|
26
|
+
messageEnd: number;
|
|
27
|
+
turnEnd?: number;
|
|
28
|
+
isToolCall?: boolean;
|
|
29
|
+
}) {
|
|
30
|
+
const { handlers, notifySpy, appendEntrySpy } = fixture;
|
|
31
|
+
|
|
32
|
+
const timestamps = [
|
|
33
|
+
clocks.turnStart,
|
|
34
|
+
clocks.turnStart,
|
|
35
|
+
clocks.messageStart,
|
|
36
|
+
clocks.firstUpdate,
|
|
37
|
+
...clocks.streamUpdates,
|
|
38
|
+
clocks.messageEnd,
|
|
39
|
+
clocks.turnEnd ?? clocks.messageEnd,
|
|
40
|
+
];
|
|
41
|
+
|
|
42
|
+
let callIdx = 0;
|
|
43
|
+
const spy = vi.spyOn(performance, 'now').mockImplementation(() => {
|
|
44
|
+
return timestamps[Math.min(callIdx++, timestamps.length - 1)];
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
const assistantMessage: AssistantMessage = {
|
|
48
|
+
role: 'assistant',
|
|
49
|
+
content: [{ type: 'text', text: 'Response' }],
|
|
50
|
+
api: 'openai-completions',
|
|
51
|
+
provider: 'openai',
|
|
52
|
+
model: 'gpt-4',
|
|
53
|
+
usage: {
|
|
54
|
+
input: 50,
|
|
55
|
+
output: 20,
|
|
56
|
+
cacheRead: 0,
|
|
57
|
+
cacheWrite: 0,
|
|
58
|
+
totalTokens: 70,
|
|
59
|
+
cost: { input: 0.001, output: 0.002, cacheRead: 0, cacheWrite: 0, total: 0.003 },
|
|
60
|
+
},
|
|
61
|
+
stopReason: clocks.isToolCall ? 'toolUse' : 'stop',
|
|
62
|
+
timestamp: Date.now(),
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
handlers['turn_start']?.({ type: 'turn_start', turnIndex: 0, timestamp: Date.now() });
|
|
66
|
+
handlers['message_start']?.({ type: 'message_start', message: assistantMessage });
|
|
67
|
+
handlers['message_update']?.({
|
|
68
|
+
type: 'message_update',
|
|
69
|
+
message: assistantMessage,
|
|
70
|
+
assistantMessageEvent: { type: 'text_delta', delta: 't' },
|
|
71
|
+
});
|
|
72
|
+
for (const _ts of clocks.streamUpdates) {
|
|
73
|
+
handlers['message_update']?.({
|
|
74
|
+
type: 'message_update',
|
|
75
|
+
message: assistantMessage,
|
|
76
|
+
assistantMessageEvent: { type: 'text_delta', delta: 't' },
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Simulate tool_execution_start if this is a tool call turn
|
|
81
|
+
if (clocks.isToolCall) {
|
|
82
|
+
handlers['tool_execution_start']?.({
|
|
83
|
+
type: 'tool_execution_start',
|
|
84
|
+
toolCallId: 'call_123',
|
|
85
|
+
toolName: 'bash',
|
|
86
|
+
args: { command: 'ls' },
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
handlers['message_end']?.({ type: 'message_end', message: assistantMessage });
|
|
91
|
+
handlers['turn_end']?.(
|
|
92
|
+
{ type: 'turn_end', turnIndex: 0, message: assistantMessage, toolResults: [] },
|
|
93
|
+
fixture.mockCtx
|
|
94
|
+
);
|
|
95
|
+
|
|
96
|
+
spy.mockRestore();
|
|
97
|
+
return { notifySpy, appendEntrySpy };
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// ── Cap is set by reliable streaming turns ────────────────────────────────
|
|
101
|
+
|
|
102
|
+
it('should set the TPS cap from a reliable streaming turn (primary branch, no tool call)', () => {
|
|
103
|
+
// 20 tokens / 0.4s = 50 TPS from primary branch
|
|
104
|
+
const { appendEntrySpy } = driveTurn({
|
|
105
|
+
turnStart: 0,
|
|
106
|
+
messageStart: 200,
|
|
107
|
+
firstUpdate: 200.123,
|
|
108
|
+
streamUpdates: [400, 500, 600, 700, 800],
|
|
109
|
+
messageEnd: 900,
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
const [, data] = appendEntrySpy.mock.calls[0];
|
|
113
|
+
// TPS should be ~50, and isPrimaryBranch should be true
|
|
114
|
+
expect(data.tps).toBeGreaterThanOrEqual(40);
|
|
115
|
+
expect(data.tps).toBeLessThanOrEqual(60);
|
|
116
|
+
expect(data.isPrimaryBranch).toBe(true);
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
// ── Cap is applied to tool-call turns ─────────────────────────────────────
|
|
120
|
+
|
|
121
|
+
it('should clamp tool-call TPS to the cap set by a prior streaming turn', () => {
|
|
122
|
+
// Turn 1: reliable streaming response → sets cap at ~50 TPS
|
|
123
|
+
driveTurn({
|
|
124
|
+
turnStart: 0,
|
|
125
|
+
messageStart: 200,
|
|
126
|
+
firstUpdate: 200.123,
|
|
127
|
+
streamUpdates: [400, 500, 600, 700, 800],
|
|
128
|
+
messageEnd: 900,
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
// Turn 2: tool call with fallback TPS (2 updates, 250ms generationMs)
|
|
132
|
+
// Without cap: 20 tokens / 0.25s ≈ 80 TPS (feasible but from short window)
|
|
133
|
+
// With cap: min(80, 50) = 50 TPS
|
|
134
|
+
const { appendEntrySpy, notifySpy } = driveTurn({
|
|
135
|
+
turnStart: 0,
|
|
136
|
+
messageStart: 100,
|
|
137
|
+
firstUpdate: 100.1,
|
|
138
|
+
streamUpdates: [100.15, 100.3],
|
|
139
|
+
messageEnd: 350,
|
|
140
|
+
isToolCall: true,
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
const [, data] = appendEntrySpy.mock.calls[1];
|
|
144
|
+
expect(data.tps).not.toBeNull();
|
|
145
|
+
// Must be clamped to the ~50 TPS cap, not the inflated fallback value
|
|
146
|
+
expect(data.tps).toBeLessThanOrEqual(55);
|
|
147
|
+
expect(data.tps).toBeGreaterThan(0);
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
// ── Tool calls do not set the cap from fallback ─────────────────────────────
|
|
151
|
+
|
|
152
|
+
it('should not let fallback-branch tool-call turns set the cap', () => {
|
|
153
|
+
// Turn 1: tool call with fallback TPS — should NOT set the cap
|
|
154
|
+
const { appendEntrySpy: spy1 } = driveTurn({
|
|
155
|
+
turnStart: 0,
|
|
156
|
+
messageStart: 100,
|
|
157
|
+
firstUpdate: 100.1,
|
|
158
|
+
streamUpdates: [100.15, 100.3],
|
|
159
|
+
messageEnd: 350,
|
|
160
|
+
isToolCall: true,
|
|
161
|
+
});
|
|
162
|
+
const [, data1] = spy1.mock.calls[0];
|
|
163
|
+
// No cap → fallback tool call TPS is null
|
|
164
|
+
expect(data1.tps).toBeNull();
|
|
165
|
+
|
|
166
|
+
// Turn 2: reliable streaming response at ~50 TPS → sets the cap
|
|
167
|
+
const { appendEntrySpy: spy2 } = driveTurn({
|
|
168
|
+
turnStart: 0,
|
|
169
|
+
messageStart: 200,
|
|
170
|
+
firstUpdate: 200.123,
|
|
171
|
+
streamUpdates: [400, 500, 600, 700, 800],
|
|
172
|
+
messageEnd: 900,
|
|
173
|
+
});
|
|
174
|
+
const [, data2] = spy2.mock.calls[1];
|
|
175
|
+
expect(data2.tps).toBeGreaterThanOrEqual(40);
|
|
176
|
+
expect(data2.tps).toBeLessThanOrEqual(60);
|
|
177
|
+
|
|
178
|
+
// Turn 3: another fallback tool call — should now be clamped to 50
|
|
179
|
+
const { appendEntrySpy: spy3 } = driveTurn({
|
|
180
|
+
turnStart: 0,
|
|
181
|
+
messageStart: 100,
|
|
182
|
+
firstUpdate: 100.1,
|
|
183
|
+
streamUpdates: [100.15, 100.3],
|
|
184
|
+
messageEnd: 350,
|
|
185
|
+
isToolCall: true,
|
|
186
|
+
});
|
|
187
|
+
const [, data3] = spy3.mock.calls[2];
|
|
188
|
+
expect(data3.tps).not.toBeNull();
|
|
189
|
+
expect(data3.tps).toBeLessThanOrEqual(55);
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
// ── Primary-branch tool calls (reasoning) set the cap ──────────────────────
|
|
193
|
+
|
|
194
|
+
it('should let primary-branch tool-call turns set the cap (e.g. reasoning before tool call)', () => {
|
|
195
|
+
// Turn 1: tool call with PRIMARY-branch TPS (reasoning + tool call, enough updates/time)
|
|
196
|
+
// 20 tokens / 0.4s = 50 TPS from primary branch, isToolCall = true
|
|
197
|
+
const { appendEntrySpy: spy1 } = driveTurn({
|
|
198
|
+
turnStart: 0,
|
|
199
|
+
messageStart: 200,
|
|
200
|
+
firstUpdate: 200.123,
|
|
201
|
+
streamUpdates: [400, 500, 600, 700, 800],
|
|
202
|
+
messageEnd: 900,
|
|
203
|
+
isToolCall: true,
|
|
204
|
+
});
|
|
205
|
+
const [, data1] = spy1.mock.calls[0];
|
|
206
|
+
// Primary branch + isToolCall → TPS is still computed (not null/capped)
|
|
207
|
+
expect(data1.tps).toBeGreaterThanOrEqual(40);
|
|
208
|
+
expect(data1.tps).toBeLessThanOrEqual(60);
|
|
209
|
+
expect(data1.isPrimaryBranch).toBe(true);
|
|
210
|
+
|
|
211
|
+
// Turn 2: fallback tool call — should be clamped to the cap from turn 1
|
|
212
|
+
const { appendEntrySpy: spy2 } = driveTurn({
|
|
213
|
+
turnStart: 0,
|
|
214
|
+
messageStart: 100,
|
|
215
|
+
firstUpdate: 100.1,
|
|
216
|
+
streamUpdates: [100.15, 100.3],
|
|
217
|
+
messageEnd: 350,
|
|
218
|
+
isToolCall: true,
|
|
219
|
+
});
|
|
220
|
+
const [, data2] = spy2.mock.calls[1];
|
|
221
|
+
expect(data2.tps).not.toBeNull();
|
|
222
|
+
// Clamped to ~50 cap set by the primary-branch tool call in turn 1
|
|
223
|
+
expect(data2.tps).toBeLessThanOrEqual(55);
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
// ── Cold start: no cap yet ────────────────────────────────────────────────
|
|
227
|
+
|
|
228
|
+
it('should show null TPS for tool calls when no cap exists yet', () => {
|
|
229
|
+
const { notifySpy, appendEntrySpy } = driveTurn({
|
|
230
|
+
turnStart: 0,
|
|
231
|
+
messageStart: 100,
|
|
232
|
+
firstUpdate: 100.1,
|
|
233
|
+
streamUpdates: [100.15, 100.3],
|
|
234
|
+
messageEnd: 350,
|
|
235
|
+
isToolCall: true,
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
const notification = notifySpy.mock.calls[0][0] as string;
|
|
239
|
+
// No streaming turn has set the cap yet → tool call TPS is null
|
|
240
|
+
expect(notification).toContain('TPS —');
|
|
241
|
+
|
|
242
|
+
const [, data] = appendEntrySpy.mock.calls[0];
|
|
243
|
+
expect(data.tps).toBeNull();
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
// ── Non-tool-call fallback turns are not clamped ──────────────────────────
|
|
247
|
+
|
|
248
|
+
it('should not clamp non-tool-call fallback TPS', () => {
|
|
249
|
+
// Turn 1: set cap at ~50 TPS from a reliable streaming turn
|
|
250
|
+
driveTurn({
|
|
251
|
+
turnStart: 0,
|
|
252
|
+
messageStart: 200,
|
|
253
|
+
firstUpdate: 200.123,
|
|
254
|
+
streamUpdates: [400, 500, 600, 700, 800],
|
|
255
|
+
messageEnd: 900,
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
// Turn 2: non-tool-call fallback (e.g. short burst response)
|
|
259
|
+
// This should NOT be clamped — only tool calls get capped
|
|
260
|
+
const { appendEntrySpy } = driveTurn({
|
|
261
|
+
turnStart: 0,
|
|
262
|
+
messageStart: 100,
|
|
263
|
+
firstUpdate: 100.1,
|
|
264
|
+
streamUpdates: [100.15, 100.3],
|
|
265
|
+
messageEnd: 350,
|
|
266
|
+
isToolCall: false,
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
const [, data] = appendEntrySpy.mock.calls[1];
|
|
270
|
+
expect(data.tps).not.toBeNull();
|
|
271
|
+
// Non-tool-call fallback TPS is uncapped — may be high
|
|
272
|
+
expect(data.tps).toBeGreaterThan(50);
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
// ── Cap is per-model ──────────────────────────────────────────────────────
|
|
276
|
+
|
|
277
|
+
it('should maintain separate caps per model', () => {
|
|
278
|
+
// Turn 1: openai/gpt-4 streaming → sets cap at ~50 TPS
|
|
279
|
+
driveTurn({
|
|
280
|
+
turnStart: 0,
|
|
281
|
+
messageStart: 200,
|
|
282
|
+
firstUpdate: 200.123,
|
|
283
|
+
streamUpdates: [400, 500, 600, 700, 800],
|
|
284
|
+
messageEnd: 900,
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
// Turn 2: deepseek/deepseek-v3 tool call → no cap for deepseek yet, uncapped
|
|
288
|
+
// Use driveTurn with a different provider/model to avoid the gpt-4 cap
|
|
289
|
+
const { handlers, appendEntrySpy } = fixture;
|
|
290
|
+
const deepseek: AssistantMessage = {
|
|
291
|
+
role: 'assistant',
|
|
292
|
+
content: [{ type: 'text', text: 'Hi' }],
|
|
293
|
+
api: 'openai-completions',
|
|
294
|
+
provider: 'deepseek',
|
|
295
|
+
model: 'deepseek-v3',
|
|
296
|
+
usage: {
|
|
297
|
+
input: 50,
|
|
298
|
+
output: 20,
|
|
299
|
+
cacheRead: 0,
|
|
300
|
+
cacheWrite: 0,
|
|
301
|
+
totalTokens: 70,
|
|
302
|
+
cost: { input: 0.001, output: 0.002, cacheRead: 0, cacheWrite: 0, total: 0.003 },
|
|
303
|
+
},
|
|
304
|
+
stopReason: 'toolUse',
|
|
305
|
+
timestamp: Date.now(),
|
|
306
|
+
};
|
|
307
|
+
|
|
308
|
+
let callIdx = 0;
|
|
309
|
+
const timestamps = [0, 0, 100, 100.1, 100.15, 100.3, 300, 300];
|
|
310
|
+
const spy = vi.spyOn(performance, 'now').mockImplementation(() => {
|
|
311
|
+
return timestamps[Math.min(callIdx++, timestamps.length - 1)];
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
handlers['turn_start']?.({ type: 'turn_start', turnIndex: 1, timestamp: Date.now() });
|
|
315
|
+
handlers['message_start']?.({ type: 'message_start', message: deepseek });
|
|
316
|
+
handlers['message_update']?.({
|
|
317
|
+
type: 'message_update',
|
|
318
|
+
message: deepseek,
|
|
319
|
+
assistantMessageEvent: { type: 'text_delta', delta: 't' },
|
|
320
|
+
});
|
|
321
|
+
handlers['message_update']?.({
|
|
322
|
+
type: 'message_update',
|
|
323
|
+
message: deepseek,
|
|
324
|
+
assistantMessageEvent: { type: 'text_delta', delta: 't' },
|
|
325
|
+
});
|
|
326
|
+
handlers['message_update']?.({
|
|
327
|
+
type: 'message_update',
|
|
328
|
+
message: deepseek,
|
|
329
|
+
assistantMessageEvent: { type: 'text_delta', delta: 't' },
|
|
330
|
+
});
|
|
331
|
+
handlers['tool_execution_start']?.({
|
|
332
|
+
type: 'tool_execution_start',
|
|
333
|
+
toolCallId: 'call_1',
|
|
334
|
+
toolName: 'bash',
|
|
335
|
+
args: {},
|
|
336
|
+
});
|
|
337
|
+
handlers['message_end']?.({ type: 'message_end', message: deepseek });
|
|
338
|
+
handlers['turn_end']?.(
|
|
339
|
+
{ type: 'turn_end', turnIndex: 1, message: deepseek, toolResults: [] },
|
|
340
|
+
fixture.mockCtx
|
|
341
|
+
);
|
|
342
|
+
spy.mockRestore();
|
|
343
|
+
|
|
344
|
+
const [, data2] = appendEntrySpy.mock.calls[1];
|
|
345
|
+
// DeepSeek has no cap yet → tool call TPS is null
|
|
346
|
+
expect(data2.tps).toBeNull();
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
// ── Cap only goes up ──────────────────────────────────────────────────────
|
|
350
|
+
|
|
351
|
+
it('should only raise the cap, never lower it', () => {
|
|
352
|
+
// Turn 1: sets cap at ~50 TPS
|
|
353
|
+
driveTurn({
|
|
354
|
+
turnStart: 0,
|
|
355
|
+
messageStart: 200,
|
|
356
|
+
firstUpdate: 200.123,
|
|
357
|
+
streamUpdates: [400, 500, 600, 700, 800],
|
|
358
|
+
messageEnd: 900,
|
|
359
|
+
});
|
|
360
|
+
|
|
361
|
+
// Turn 2: slower streaming response at ~25 TPS → cap stays at 50
|
|
362
|
+
const { appendEntrySpy } = driveTurn({
|
|
363
|
+
turnStart: 0,
|
|
364
|
+
messageStart: 200,
|
|
365
|
+
firstUpdate: 200.123,
|
|
366
|
+
streamUpdates: [600, 800, 1000, 1200, 1400],
|
|
367
|
+
messageEnd: 1500,
|
|
368
|
+
});
|
|
369
|
+
|
|
370
|
+
const [, data2] = appendEntrySpy.mock.calls[1];
|
|
371
|
+
// This turn's TPS is 25, but the cap should still be 50
|
|
372
|
+
expect(data2.tps).toBeGreaterThanOrEqual(15);
|
|
373
|
+
expect(data2.tps).toBeLessThanOrEqual(35);
|
|
374
|
+
|
|
375
|
+
// Turn 3: tool call → should be capped at 50, not 25
|
|
376
|
+
const { appendEntrySpy: spy3 } = driveTurn({
|
|
377
|
+
turnStart: 0,
|
|
378
|
+
messageStart: 100,
|
|
379
|
+
firstUpdate: 100.1,
|
|
380
|
+
streamUpdates: [100.15, 100.3],
|
|
381
|
+
messageEnd: 350,
|
|
382
|
+
isToolCall: true,
|
|
383
|
+
});
|
|
384
|
+
|
|
385
|
+
const [, data3] = spy3.mock.calls[2];
|
|
386
|
+
expect(data3.tps).not.toBeNull();
|
|
387
|
+
// Capped at 50 (the higher of the two streaming measurements)
|
|
388
|
+
expect(data3.tps).toBeLessThanOrEqual(55);
|
|
389
|
+
});
|
|
390
|
+
});
|
|
@@ -60,6 +60,13 @@ interface SessionTreeEvent {
|
|
|
60
60
|
oldLeafId: string | null;
|
|
61
61
|
}
|
|
62
62
|
|
|
63
|
+
interface ToolExecutionStartEvent {
|
|
64
|
+
type: 'tool_execution_start';
|
|
65
|
+
toolCallId: string;
|
|
66
|
+
toolName: string;
|
|
67
|
+
args: unknown;
|
|
68
|
+
}
|
|
69
|
+
|
|
63
70
|
// ─── Constants ──────────────────────────────────────────────────────────────
|
|
64
71
|
|
|
65
72
|
/** Minimum gap between token updates to count as a stall (ms) */
|
|
@@ -81,6 +88,7 @@ interface TurnTelemetry {
|
|
|
81
88
|
messageCount: number; // assistant messages in this turn
|
|
82
89
|
};
|
|
83
90
|
tps: number | null; // output / (streamMs / 1000), null when burst/degenerate
|
|
91
|
+
isPrimaryBranch: boolean; // TPS came from primary-branch (reliable) measurement
|
|
84
92
|
cost: {
|
|
85
93
|
input: number;
|
|
86
94
|
output: number;
|
|
@@ -108,6 +116,8 @@ interface TurnTiming {
|
|
|
108
116
|
stallCount: number;
|
|
109
117
|
inStall: boolean;
|
|
110
118
|
messageCount: number;
|
|
119
|
+
isToolCall: boolean; // tool_execution_start fired during this turn
|
|
120
|
+
isPrimaryBranch: boolean; // TPS came from primary-branch (reliable) measurement
|
|
111
121
|
}
|
|
112
122
|
|
|
113
123
|
// ─── Helpers ────────────────────────────────────────────────────────────────
|
|
@@ -293,7 +303,7 @@ function buildTelemetry(timing: TurnTiming, turnEndMs: number): TurnTelemetry |
|
|
|
293
303
|
const MIN_STREAM_MS = 1;
|
|
294
304
|
const MIN_STREAM_UPDATES = 5;
|
|
295
305
|
const MIN_INTER_CHUNK_MS = 1;
|
|
296
|
-
const MIN_GENERATION_MS =
|
|
306
|
+
const MIN_GENERATION_MS = 200;
|
|
297
307
|
const ACTIVE_TIME_THRESHOLD_MS = 200;
|
|
298
308
|
const STALL_REDUCTION_DENOM = 2;
|
|
299
309
|
const STALL_DOMINANCE_RATIO = 0.85;
|
|
@@ -336,6 +346,7 @@ function buildTelemetry(timing: TurnTiming, turnEndMs: number): TurnTelemetry |
|
|
|
336
346
|
// Includes TTFT, underestimates, but never overshoots.
|
|
337
347
|
// Else: null — structurally unidentifiable.
|
|
338
348
|
let tps: number | null = null;
|
|
349
|
+
let isPrimaryBranch = false;
|
|
339
350
|
if (
|
|
340
351
|
streamMs !== null &&
|
|
341
352
|
streamMs >= MIN_STREAM_MS &&
|
|
@@ -351,6 +362,7 @@ function buildTelemetry(timing: TurnTiming, turnEndMs: number): TurnTelemetry |
|
|
|
351
362
|
const effectiveStreamMs = streamMs - timing.stallMs;
|
|
352
363
|
const raw = output / (effectiveStreamMs / 1000);
|
|
353
364
|
tps = Math.round(raw * 10) / 10;
|
|
365
|
+
isPrimaryBranch = true;
|
|
354
366
|
} else if (timing.updateCount >= 2 && timing.totalGenerationMs >= MIN_GENERATION_MS) {
|
|
355
367
|
// Fallback: use generationMs (message_start → message_end) minus
|
|
356
368
|
// stalls. This includes TTFT, so it underestimates generation speed,
|
|
@@ -386,6 +398,7 @@ function buildTelemetry(timing: TurnTiming, turnEndMs: number): TurnTelemetry |
|
|
|
386
398
|
messageCount: timing.messageCount,
|
|
387
399
|
},
|
|
388
400
|
tps,
|
|
401
|
+
isPrimaryBranch,
|
|
389
402
|
cost: hasCost
|
|
390
403
|
? {
|
|
391
404
|
input: costInput,
|
|
@@ -405,6 +418,10 @@ export default function tpsExtension(pi: ExtensionAPI) {
|
|
|
405
418
|
// Current turn timing state
|
|
406
419
|
let currentTiming: TurnTiming | null = null;
|
|
407
420
|
|
|
421
|
+
// Per-model TPS cap: highest reliable (primary-branch, non-tool-call) TPS observed.
|
|
422
|
+
// Tool-call turns get clamped to this value. Only set by reliable streaming measurements.
|
|
423
|
+
const tpsCaps = new Map<string, number>(); // "provider:modelId" → cap
|
|
424
|
+
|
|
408
425
|
// Cached session entries for argument completion (captured on session_start / session_tree)
|
|
409
426
|
let cachedEntries: Array<{ type?: string; customType?: string; data?: unknown }> = [];
|
|
410
427
|
|
|
@@ -473,6 +490,8 @@ export default function tpsExtension(pi: ExtensionAPI) {
|
|
|
473
490
|
stallCount: 0,
|
|
474
491
|
inStall: false,
|
|
475
492
|
messageCount: 0,
|
|
493
|
+
isToolCall: false,
|
|
494
|
+
isPrimaryBranch: false,
|
|
476
495
|
};
|
|
477
496
|
});
|
|
478
497
|
|
|
@@ -537,6 +556,13 @@ export default function tpsExtension(pi: ExtensionAPI) {
|
|
|
537
556
|
currentTiming.lastUpdateMs = now;
|
|
538
557
|
});
|
|
539
558
|
|
|
559
|
+
// Track when a tool starts executing — marks this turn as a tool call
|
|
560
|
+
// for the dynamic TPS cap (tool-call turns only get capped, never set the cap).
|
|
561
|
+
pi.on('tool_execution_start', (_event: ToolExecutionStartEvent) => {
|
|
562
|
+
if (!currentTiming) return;
|
|
563
|
+
currentTiming.isToolCall = true;
|
|
564
|
+
});
|
|
565
|
+
|
|
540
566
|
// Track when a message ends
|
|
541
567
|
pi.on('message_end', (event: MessageEndEvent) => {
|
|
542
568
|
if (!currentTiming) return;
|
|
@@ -569,6 +595,28 @@ export default function tpsExtension(pi: ExtensionAPI) {
|
|
|
569
595
|
const telemetry = buildTelemetry(timing, turnEndMs);
|
|
570
596
|
if (!telemetry) return;
|
|
571
597
|
|
|
598
|
+
// ── Dynamic TPS cap ────────────────────────────────────────────────
|
|
599
|
+
// Only non-tool-call, primary-branch (reliable) measurements set the cap.
|
|
600
|
+
// Tool-call turns get clamped to the cap to prevent inflation from
|
|
601
|
+
// short outputs over tiny time windows.
|
|
602
|
+
const modelKey = `${telemetry.model.provider}:${telemetry.model.modelId}`;
|
|
603
|
+
|
|
604
|
+
if (telemetry.isPrimaryBranch && telemetry.tps !== null) {
|
|
605
|
+
const currentCap = tpsCaps.get(modelKey);
|
|
606
|
+
if (currentCap === undefined || telemetry.tps > currentCap) {
|
|
607
|
+
tpsCaps.set(modelKey, telemetry.tps);
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
if (timing.isToolCall && telemetry.tps !== null) {
|
|
612
|
+
const cap = tpsCaps.get(modelKey);
|
|
613
|
+
if (cap !== undefined) {
|
|
614
|
+
telemetry.tps = Math.min(telemetry.tps, cap);
|
|
615
|
+
} else {
|
|
616
|
+
telemetry.tps = null;
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
|
|
572
620
|
// Persist structured telemetry to session for export and rehydration
|
|
573
621
|
pi.appendEntry('tps', telemetry);
|
|
574
622
|
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-tps",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.1.1",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "pi-tps",
|
|
9
|
-
"version": "1.
|
|
9
|
+
"version": "1.1.1",
|
|
10
10
|
"hasInstallScript": true,
|
|
11
11
|
"license": "MIT",
|
|
12
12
|
"devDependencies": {
|