@monotykamary/pi-tps 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/FUNDING.yml +4 -0
- package/.github/workflows/test.yml +55 -0
- package/.pi/autoresearch/session-id +1 -0
- package/.prettierrc +7 -0
- package/LICENSE +21 -0
- package/README.md +237 -0
- package/commitlint.config.cjs +1 -0
- package/extensions/pi-tps/__tests__/export-command.test.ts +307 -0
- package/extensions/pi-tps/__tests__/extension-setup.test.ts +41 -0
- package/extensions/pi-tps/__tests__/format-duration.test.ts +83 -0
- package/extensions/pi-tps/__tests__/helpers.ts +154 -0
- package/extensions/pi-tps/__tests__/precision-timing.test.ts +701 -0
- package/extensions/pi-tps/__tests__/rehydration.test.ts +266 -0
- package/extensions/pi-tps/__tests__/session-export.test.ts +204 -0
- package/extensions/pi-tps/__tests__/stall-detection.test.ts +209 -0
- package/extensions/pi-tps/__tests__/stall-reduction.test.ts +139 -0
- package/extensions/pi-tps/__tests__/telemetry-flow.test.ts +654 -0
- package/extensions/pi-tps/index.ts +734 -0
- package/knip.json +10 -0
- package/npm-shrinkwrap.json +6923 -0
- package/package.json +54 -0
- package/tsconfig.json +24 -0
- package/vitest.config.ts +15 -0
|
@@ -0,0 +1,654 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
2
|
+
import type { AssistantMessage } from '@earendil-works/pi-ai';
|
|
3
|
+
import { createTestFixture, activateExtension, tick, makeAssistantMessage } from './helpers';
|
|
4
|
+
import type {
|
|
5
|
+
TurnStartEvent,
|
|
6
|
+
TurnEndEvent,
|
|
7
|
+
MessageStartEvent,
|
|
8
|
+
MessageUpdateEvent,
|
|
9
|
+
MessageEndEvent,
|
|
10
|
+
} from './helpers';
|
|
11
|
+
|
|
12
|
+
describe('pi-tps extension — telemetry flow', () => {
|
|
13
|
+
let fixture: ReturnType<typeof createTestFixture>;
|
|
14
|
+
|
|
15
|
+
beforeEach(async () => {
|
|
16
|
+
fixture = createTestFixture();
|
|
17
|
+
await activateExtension(fixture);
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
afterEach(() => {
|
|
21
|
+
vi.restoreAllMocks();
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
// ── Basic telemetry flow ─────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
it('should show notification with TPS, TTFT (1 decimal), and total time', async () => {
|
|
27
|
+
const { handlers, notifySpy, appendEntrySpy } = fixture;
|
|
28
|
+
const now = Date.now();
|
|
29
|
+
const assistantMessage: AssistantMessage = {
|
|
30
|
+
role: 'assistant',
|
|
31
|
+
content: [{ type: 'text', text: 'Hello world' }],
|
|
32
|
+
api: 'openai-completions',
|
|
33
|
+
provider: 'openai',
|
|
34
|
+
model: 'gpt-4',
|
|
35
|
+
usage: {
|
|
36
|
+
input: 100,
|
|
37
|
+
output: 200,
|
|
38
|
+
cacheRead: 50,
|
|
39
|
+
cacheWrite: 25,
|
|
40
|
+
totalTokens: 375,
|
|
41
|
+
cost: {
|
|
42
|
+
input: 0.001,
|
|
43
|
+
output: 0.002,
|
|
44
|
+
cacheRead: 0.0005,
|
|
45
|
+
cacheWrite: 0.00025,
|
|
46
|
+
total: 0.00375,
|
|
47
|
+
},
|
|
48
|
+
},
|
|
49
|
+
stopReason: 'stop',
|
|
50
|
+
timestamp: Date.now(),
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
handlers['turn_start']?.({ type: 'turn_start', turnIndex: 0, timestamp: now });
|
|
54
|
+
await tick(100);
|
|
55
|
+
handlers['message_start']?.({ type: 'message_start', message: assistantMessage });
|
|
56
|
+
await tick(50);
|
|
57
|
+
handlers['message_update']?.({
|
|
58
|
+
type: 'message_update',
|
|
59
|
+
message: assistantMessage,
|
|
60
|
+
assistantMessageEvent: { type: 'text_delta', delta: 'Hello' },
|
|
61
|
+
}); // TTFT
|
|
62
|
+
await tick(50);
|
|
63
|
+
handlers['message_update']?.({
|
|
64
|
+
type: 'message_update',
|
|
65
|
+
message: assistantMessage,
|
|
66
|
+
assistantMessageEvent: { type: 'text_delta', delta: 'Hello' },
|
|
67
|
+
}); // stream 1
|
|
68
|
+
await tick(50);
|
|
69
|
+
handlers['message_update']?.({
|
|
70
|
+
type: 'message_update',
|
|
71
|
+
message: assistantMessage,
|
|
72
|
+
assistantMessageEvent: { type: 'text_delta', delta: 'Hello' },
|
|
73
|
+
}); // stream 2
|
|
74
|
+
await tick(50);
|
|
75
|
+
handlers['message_update']?.({
|
|
76
|
+
type: 'message_update',
|
|
77
|
+
message: assistantMessage,
|
|
78
|
+
assistantMessageEvent: { type: 'text_delta', delta: 'Hello' },
|
|
79
|
+
}); // stream 3
|
|
80
|
+
await tick(50);
|
|
81
|
+
handlers['message_update']?.({
|
|
82
|
+
type: 'message_update',
|
|
83
|
+
message: assistantMessage,
|
|
84
|
+
assistantMessageEvent: { type: 'text_delta', delta: 'Hello' },
|
|
85
|
+
}); // stream 4
|
|
86
|
+
await tick(50);
|
|
87
|
+
handlers['message_update']?.({
|
|
88
|
+
type: 'message_update',
|
|
89
|
+
message: assistantMessage,
|
|
90
|
+
assistantMessageEvent: { type: 'text_delta', delta: 'Hello' },
|
|
91
|
+
}); // stream 5
|
|
92
|
+
await tick(300);
|
|
93
|
+
handlers['message_end']?.({ type: 'message_end', message: assistantMessage });
|
|
94
|
+
handlers['turn_end']?.(
|
|
95
|
+
{ type: 'turn_end', turnIndex: 0, message: assistantMessage, toolResults: [] },
|
|
96
|
+
fixture.mockCtx
|
|
97
|
+
);
|
|
98
|
+
|
|
99
|
+
expect(notifySpy).toHaveBeenCalledOnce();
|
|
100
|
+
const notification = notifySpy.mock.calls[0][0] as string;
|
|
101
|
+
expect(notification).toMatch(/TPS \d+\.\d tok\/s/);
|
|
102
|
+
expect(notification).toMatch(/TTFT \d+\.\ds/);
|
|
103
|
+
expect(notification).toMatch(/out 200/);
|
|
104
|
+
expect(notification).toMatch(/in 100/);
|
|
105
|
+
|
|
106
|
+
expect(appendEntrySpy).toHaveBeenCalledOnce();
|
|
107
|
+
const [type, data] = appendEntrySpy.mock.calls[0];
|
|
108
|
+
expect(type).toBe('tps');
|
|
109
|
+
expect(data.model).toEqual({ provider: 'openai', modelId: 'gpt-4' });
|
|
110
|
+
expect(data.tokens).toEqual({
|
|
111
|
+
input: 100,
|
|
112
|
+
output: 200,
|
|
113
|
+
cacheRead: 50,
|
|
114
|
+
cacheWrite: 25,
|
|
115
|
+
total: 375,
|
|
116
|
+
});
|
|
117
|
+
expect(data.timing.ttftMs).toBeGreaterThan(0);
|
|
118
|
+
expect(data.timing.totalMs).toBeGreaterThan(0);
|
|
119
|
+
expect(data.timing.generationMs).toBeGreaterThan(0);
|
|
120
|
+
expect(data.timing.messageCount).toBe(1);
|
|
121
|
+
expect(data.tps).toBeGreaterThan(0);
|
|
122
|
+
expect(data.timestamp).toBeTypeOf('number');
|
|
123
|
+
|
|
124
|
+
// Verify event was emitted with the same telemetry
|
|
125
|
+
expect(fixture.eventsEmitSpy).toHaveBeenCalledOnce();
|
|
126
|
+
expect(fixture.eventsEmitSpy.mock.calls[0][0]).toBe('tps:telemetry');
|
|
127
|
+
expect(fixture.eventsEmitSpy.mock.calls[0][1]).toEqual(data);
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
// ── Token aggregation across multiple messages per turn ──────────────────
|
|
131
|
+
|
|
132
|
+
it('should aggregate tokens from multiple assistant messages in current turn only', async () => {
|
|
133
|
+
const { handlers, notifySpy } = fixture;
|
|
134
|
+
|
|
135
|
+
const firstMessage: AssistantMessage = {
|
|
136
|
+
role: 'assistant',
|
|
137
|
+
content: [{ type: 'text', text: 'First' }],
|
|
138
|
+
api: 'openai-completions',
|
|
139
|
+
provider: 'openai',
|
|
140
|
+
model: 'gpt-4',
|
|
141
|
+
usage: {
|
|
142
|
+
input: 50,
|
|
143
|
+
output: 100,
|
|
144
|
+
cacheRead: 25,
|
|
145
|
+
cacheWrite: 10,
|
|
146
|
+
totalTokens: 185,
|
|
147
|
+
cost: {
|
|
148
|
+
input: 0.001,
|
|
149
|
+
output: 0.002,
|
|
150
|
+
cacheRead: 0.0005,
|
|
151
|
+
cacheWrite: 0.00025,
|
|
152
|
+
total: 0.00375,
|
|
153
|
+
},
|
|
154
|
+
},
|
|
155
|
+
stopReason: 'toolUse',
|
|
156
|
+
timestamp: Date.now(),
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
const secondMessage: AssistantMessage = {
|
|
160
|
+
role: 'assistant',
|
|
161
|
+
content: [{ type: 'text', text: 'Second' }],
|
|
162
|
+
api: 'openai-completions',
|
|
163
|
+
provider: 'openai',
|
|
164
|
+
model: 'gpt-4',
|
|
165
|
+
usage: {
|
|
166
|
+
input: 30,
|
|
167
|
+
output: 80,
|
|
168
|
+
cacheRead: 15,
|
|
169
|
+
cacheWrite: 5,
|
|
170
|
+
totalTokens: 130,
|
|
171
|
+
cost: {
|
|
172
|
+
input: 0.001,
|
|
173
|
+
output: 0.002,
|
|
174
|
+
cacheRead: 0.0005,
|
|
175
|
+
cacheWrite: 0.00025,
|
|
176
|
+
total: 0.00375,
|
|
177
|
+
},
|
|
178
|
+
},
|
|
179
|
+
stopReason: 'stop',
|
|
180
|
+
timestamp: Date.now(),
|
|
181
|
+
};
|
|
182
|
+
|
|
183
|
+
const updateEvent: MessageUpdateEvent = {
|
|
184
|
+
type: 'message_update',
|
|
185
|
+
message: firstMessage,
|
|
186
|
+
assistantMessageEvent: { type: 'text_delta', delta: 't' },
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
handlers['turn_start']?.({ type: 'turn_start', turnIndex: 0, timestamp: Date.now() });
|
|
190
|
+
await tick(100);
|
|
191
|
+
handlers['message_start']?.({ type: 'message_start', message: firstMessage });
|
|
192
|
+
await tick(50);
|
|
193
|
+
handlers['message_update']?.(updateEvent);
|
|
194
|
+
await tick(200);
|
|
195
|
+
handlers['message_end']?.({ type: 'message_end', message: firstMessage });
|
|
196
|
+
await tick(50);
|
|
197
|
+
handlers['message_start']?.({ type: 'message_start', message: secondMessage });
|
|
198
|
+
await tick(50);
|
|
199
|
+
handlers['message_update']?.({
|
|
200
|
+
type: 'message_update',
|
|
201
|
+
message: secondMessage,
|
|
202
|
+
assistantMessageEvent: { type: 'text_delta', delta: 't' },
|
|
203
|
+
});
|
|
204
|
+
await tick(150);
|
|
205
|
+
handlers['message_end']?.({ type: 'message_end', message: secondMessage });
|
|
206
|
+
handlers['turn_end']?.(
|
|
207
|
+
{ type: 'turn_end', turnIndex: 0, message: secondMessage, toolResults: [] },
|
|
208
|
+
fixture.mockCtx
|
|
209
|
+
);
|
|
210
|
+
|
|
211
|
+
expect(notifySpy).toHaveBeenCalledOnce();
|
|
212
|
+
const notification = notifySpy.mock.calls[0][0] as string;
|
|
213
|
+
expect(notification).toContain('out 180');
|
|
214
|
+
expect(notification).toContain('in 80');
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
// ── Model tracking ──────────────────────────────────────────────────────
|
|
218
|
+
|
|
219
|
+
it('should capture model info from first assistant message', async () => {
|
|
220
|
+
const { handlers, appendEntrySpy } = fixture;
|
|
221
|
+
|
|
222
|
+
const assistantMessage: AssistantMessage = {
|
|
223
|
+
role: 'assistant',
|
|
224
|
+
content: [{ type: 'text', text: 'Hello' }],
|
|
225
|
+
api: 'openai-completions',
|
|
226
|
+
provider: 'neuralwatt',
|
|
227
|
+
model: 'moonshotai/Kimi-K2.5',
|
|
228
|
+
usage: {
|
|
229
|
+
input: 10,
|
|
230
|
+
output: 20,
|
|
231
|
+
cacheRead: 0,
|
|
232
|
+
cacheWrite: 0,
|
|
233
|
+
totalTokens: 30,
|
|
234
|
+
cost: { input: 0.001, output: 0.002, cacheRead: 0, cacheWrite: 0, total: 0.003 },
|
|
235
|
+
},
|
|
236
|
+
stopReason: 'stop',
|
|
237
|
+
timestamp: Date.now(),
|
|
238
|
+
};
|
|
239
|
+
|
|
240
|
+
handlers['turn_start']?.({ type: 'turn_start', turnIndex: 0, timestamp: Date.now() });
|
|
241
|
+
await tick(100);
|
|
242
|
+
handlers['message_start']?.({ type: 'message_start', message: assistantMessage });
|
|
243
|
+
await tick(50);
|
|
244
|
+
handlers['message_update']?.({
|
|
245
|
+
type: 'message_update',
|
|
246
|
+
message: assistantMessage,
|
|
247
|
+
assistantMessageEvent: { type: 'text_delta', delta: 'H' },
|
|
248
|
+
}); // TTFT
|
|
249
|
+
await tick(50);
|
|
250
|
+
handlers['message_update']?.({
|
|
251
|
+
type: 'message_update',
|
|
252
|
+
message: assistantMessage,
|
|
253
|
+
assistantMessageEvent: { type: 'text_delta', delta: 'i' },
|
|
254
|
+
});
|
|
255
|
+
await tick(50);
|
|
256
|
+
handlers['message_update']?.({
|
|
257
|
+
type: 'message_update',
|
|
258
|
+
message: assistantMessage,
|
|
259
|
+
assistantMessageEvent: { type: 'text_delta', delta: 'i' },
|
|
260
|
+
});
|
|
261
|
+
await tick(50);
|
|
262
|
+
handlers['message_update']?.({
|
|
263
|
+
type: 'message_update',
|
|
264
|
+
message: assistantMessage,
|
|
265
|
+
assistantMessageEvent: { type: 'text_delta', delta: 'i' },
|
|
266
|
+
});
|
|
267
|
+
await tick(50);
|
|
268
|
+
handlers['message_update']?.({
|
|
269
|
+
type: 'message_update',
|
|
270
|
+
message: assistantMessage,
|
|
271
|
+
assistantMessageEvent: { type: 'text_delta', delta: 'i' },
|
|
272
|
+
});
|
|
273
|
+
await tick(50);
|
|
274
|
+
handlers['message_update']?.({
|
|
275
|
+
type: 'message_update',
|
|
276
|
+
message: assistantMessage,
|
|
277
|
+
assistantMessageEvent: { type: 'text_delta', delta: 'i' },
|
|
278
|
+
});
|
|
279
|
+
await tick(50);
|
|
280
|
+
handlers['message_end']?.({ type: 'message_end', message: assistantMessage });
|
|
281
|
+
handlers['turn_end']?.(
|
|
282
|
+
{ type: 'turn_end', turnIndex: 0, message: assistantMessage, toolResults: [] },
|
|
283
|
+
fixture.mockCtx
|
|
284
|
+
);
|
|
285
|
+
|
|
286
|
+
const [, data] = appendEntrySpy.mock.calls[0];
|
|
287
|
+
expect(data.model).toEqual({ provider: 'neuralwatt', modelId: 'moonshotai/Kimi-K2.5' });
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
// ── UI-less mode ─────────────────────────────────────────────────────────
|
|
291
|
+
|
|
292
|
+
it('should persist and emit telemetry but skip notification when hasUI is false', async () => {
|
|
293
|
+
const { handlers, notifySpy, appendEntrySpy, eventsEmitSpy } = fixture;
|
|
294
|
+
const noUiCtx = { ...fixture.mockCtx, hasUI: false };
|
|
295
|
+
|
|
296
|
+
const assistantMessage = makeAssistantMessage({ output: 20, input: 10 });
|
|
297
|
+
|
|
298
|
+
handlers['turn_start']?.({ type: 'turn_start', turnIndex: 0, timestamp: Date.now() });
|
|
299
|
+
await tick(50);
|
|
300
|
+
handlers['message_start']?.({ type: 'message_start', message: assistantMessage });
|
|
301
|
+
await tick(50);
|
|
302
|
+
handlers['message_update']?.({
|
|
303
|
+
type: 'message_update',
|
|
304
|
+
message: assistantMessage,
|
|
305
|
+
assistantMessageEvent: { type: 'text_delta', delta: 'H' },
|
|
306
|
+
});
|
|
307
|
+
await tick(50);
|
|
308
|
+
handlers['message_end']?.({ type: 'message_end', message: assistantMessage });
|
|
309
|
+
handlers['turn_end']?.(
|
|
310
|
+
{ type: 'turn_end', turnIndex: 0, message: assistantMessage, toolResults: [] },
|
|
311
|
+
noUiCtx
|
|
312
|
+
);
|
|
313
|
+
|
|
314
|
+
expect(notifySpy).not.toHaveBeenCalled();
|
|
315
|
+
expect(appendEntrySpy).toHaveBeenCalledOnce();
|
|
316
|
+
expect(appendEntrySpy.mock.calls[0][0]).toBe('tps');
|
|
317
|
+
expect(eventsEmitSpy).toHaveBeenCalledOnce();
|
|
318
|
+
expect(eventsEmitSpy.mock.calls[0][0]).toBe('tps:telemetry');
|
|
319
|
+
expect(eventsEmitSpy.mock.calls[0][1]).toEqual(appendEntrySpy.mock.calls[0][1]);
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
// ── Zero output ──────────────────────────────────────────────────────────
|
|
323
|
+
|
|
324
|
+
it('should skip when no output tokens', async () => {
|
|
325
|
+
const { handlers, notifySpy, appendEntrySpy } = fixture;
|
|
326
|
+
|
|
327
|
+
const assistantMessage = makeAssistantMessage({ output: 0, input: 10 });
|
|
328
|
+
|
|
329
|
+
handlers['turn_start']?.({ type: 'turn_start', turnIndex: 0, timestamp: Date.now() });
|
|
330
|
+
await tick(50);
|
|
331
|
+
handlers['message_start']?.({ type: 'message_start', message: assistantMessage });
|
|
332
|
+
await tick(50);
|
|
333
|
+
handlers['message_end']?.({ type: 'message_end', message: assistantMessage });
|
|
334
|
+
handlers['turn_end']?.(
|
|
335
|
+
{ type: 'turn_end', turnIndex: 0, message: assistantMessage, toolResults: [] },
|
|
336
|
+
fixture.mockCtx
|
|
337
|
+
);
|
|
338
|
+
|
|
339
|
+
expect(notifySpy).not.toHaveBeenCalled();
|
|
340
|
+
expect(appendEntrySpy).not.toHaveBeenCalled();
|
|
341
|
+
});
|
|
342
|
+
|
|
343
|
+
// ── Non-assistant message filtering ──────────────────────────────────────
|
|
344
|
+
|
|
345
|
+
it('should ignore non-assistant messages for timing', async () => {
|
|
346
|
+
const { handlers, notifySpy, appendEntrySpy } = fixture;
|
|
347
|
+
|
|
348
|
+
handlers['turn_start']?.({ type: 'turn_start', turnIndex: 0, timestamp: Date.now() });
|
|
349
|
+
handlers['message_start']?.({
|
|
350
|
+
type: 'message_start',
|
|
351
|
+
message: { role: 'user', content: 'Hello' },
|
|
352
|
+
});
|
|
353
|
+
await tick(100);
|
|
354
|
+
handlers['message_end']?.({ type: 'message_end', message: { role: 'user', content: 'Hello' } });
|
|
355
|
+
handlers['message_start']?.({
|
|
356
|
+
type: 'message_start',
|
|
357
|
+
message: { role: 'system', content: 'System' },
|
|
358
|
+
});
|
|
359
|
+
await tick(50);
|
|
360
|
+
handlers['message_end']?.({
|
|
361
|
+
type: 'message_end',
|
|
362
|
+
message: { role: 'system', content: 'System' },
|
|
363
|
+
});
|
|
364
|
+
handlers['turn_end']?.(
|
|
365
|
+
{ type: 'turn_end', turnIndex: 0, message: {}, toolResults: [] },
|
|
366
|
+
fixture.mockCtx
|
|
367
|
+
);
|
|
368
|
+
|
|
369
|
+
expect(notifySpy).not.toHaveBeenCalled();
|
|
370
|
+
expect(appendEntrySpy).not.toHaveBeenCalled();
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
// ── True generation TPS (excluding TTFT and tool gaps) ──────────────────
|
|
374
|
+
|
|
375
|
+
it('should calculate true generation TPS excluding TTFT and tool gaps', async () => {
|
|
376
|
+
const { handlers, notifySpy } = fixture;
|
|
377
|
+
|
|
378
|
+
const firstMessage: AssistantMessage = {
|
|
379
|
+
role: 'assistant',
|
|
380
|
+
content: [{ type: 'text', text: 'Let me check that...' }],
|
|
381
|
+
api: 'openai-completions',
|
|
382
|
+
provider: 'openai',
|
|
383
|
+
model: 'gpt-4',
|
|
384
|
+
usage: {
|
|
385
|
+
input: 100,
|
|
386
|
+
output: 200,
|
|
387
|
+
cacheRead: 0,
|
|
388
|
+
cacheWrite: 0,
|
|
389
|
+
totalTokens: 300,
|
|
390
|
+
cost: { input: 0.001, output: 0.002, cacheRead: 0, cacheWrite: 0, total: 0.003 },
|
|
391
|
+
},
|
|
392
|
+
stopReason: 'toolUse',
|
|
393
|
+
timestamp: Date.now(),
|
|
394
|
+
};
|
|
395
|
+
|
|
396
|
+
const secondMessage: AssistantMessage = {
|
|
397
|
+
role: 'assistant',
|
|
398
|
+
content: [{ type: 'text', text: 'Here is the detailed answer...' }],
|
|
399
|
+
api: 'openai-completions',
|
|
400
|
+
provider: 'openai',
|
|
401
|
+
model: 'gpt-4',
|
|
402
|
+
usage: {
|
|
403
|
+
input: 500,
|
|
404
|
+
output: 800,
|
|
405
|
+
cacheRead: 0,
|
|
406
|
+
cacheWrite: 0,
|
|
407
|
+
totalTokens: 1300,
|
|
408
|
+
cost: { input: 0.005, output: 0.008, cacheRead: 0, cacheWrite: 0, total: 0.013 },
|
|
409
|
+
},
|
|
410
|
+
stopReason: 'stop',
|
|
411
|
+
timestamp: Date.now(),
|
|
412
|
+
};
|
|
413
|
+
|
|
414
|
+
const updateEvent: MessageUpdateEvent = {
|
|
415
|
+
type: 'message_update',
|
|
416
|
+
message: firstMessage,
|
|
417
|
+
assistantMessageEvent: { type: 'text_delta', delta: 't' },
|
|
418
|
+
};
|
|
419
|
+
|
|
420
|
+
handlers['turn_start']?.({ type: 'turn_start', turnIndex: 0, timestamp: Date.now() });
|
|
421
|
+
await tick(100); // TTFT (excluded from generation TPS)
|
|
422
|
+
|
|
423
|
+
handlers['message_start']?.({ type: 'message_start', message: firstMessage });
|
|
424
|
+
await tick(50);
|
|
425
|
+
handlers['message_update']?.(updateEvent); // TTFT
|
|
426
|
+
await tick(50);
|
|
427
|
+
handlers['message_update']?.(updateEvent); // streaming
|
|
428
|
+
await tick(50);
|
|
429
|
+
handlers['message_update']?.(updateEvent); // streaming
|
|
430
|
+
await tick(50);
|
|
431
|
+
handlers['message_update']?.(updateEvent); // streaming
|
|
432
|
+
await tick(50);
|
|
433
|
+
handlers['message_end']?.({ type: 'message_end', message: firstMessage });
|
|
434
|
+
|
|
435
|
+
// TOOL EXECUTION GAP: 1000ms (excluded from generation TPS)
|
|
436
|
+
await tick(1000);
|
|
437
|
+
|
|
438
|
+
handlers['message_start']?.({ type: 'message_start', message: secondMessage });
|
|
439
|
+
await tick(100);
|
|
440
|
+
handlers['message_update']?.({
|
|
441
|
+
type: 'message_update',
|
|
442
|
+
message: secondMessage,
|
|
443
|
+
assistantMessageEvent: { type: 'text_delta', delta: 't' },
|
|
444
|
+
}); // TTFT
|
|
445
|
+
await tick(100);
|
|
446
|
+
handlers['message_update']?.({
|
|
447
|
+
type: 'message_update',
|
|
448
|
+
message: secondMessage,
|
|
449
|
+
assistantMessageEvent: { type: 'text_delta', delta: 't' },
|
|
450
|
+
}); // streaming
|
|
451
|
+
await tick(100);
|
|
452
|
+
handlers['message_update']?.({
|
|
453
|
+
type: 'message_update',
|
|
454
|
+
message: secondMessage,
|
|
455
|
+
assistantMessageEvent: { type: 'text_delta', delta: 't' },
|
|
456
|
+
}); // streaming
|
|
457
|
+
await tick(100);
|
|
458
|
+
handlers['message_update']?.({
|
|
459
|
+
type: 'message_update',
|
|
460
|
+
message: secondMessage,
|
|
461
|
+
assistantMessageEvent: { type: 'text_delta', delta: 't' },
|
|
462
|
+
}); // streaming
|
|
463
|
+
await tick(100);
|
|
464
|
+
handlers['message_end']?.({ type: 'message_end', message: secondMessage });
|
|
465
|
+
|
|
466
|
+
handlers['turn_end']?.(
|
|
467
|
+
{ type: 'turn_end', turnIndex: 0, message: secondMessage, toolResults: [] },
|
|
468
|
+
fixture.mockCtx
|
|
469
|
+
);
|
|
470
|
+
|
|
471
|
+
expect(notifySpy).toHaveBeenCalledOnce();
|
|
472
|
+
const notification = notifySpy.mock.calls[0][0] as string;
|
|
473
|
+
|
|
474
|
+
const tpsMatch = notification.match(/TPS (\d+(?:\.\d+)?) tok\/s/);
|
|
475
|
+
expect(tpsMatch).toBeTruthy();
|
|
476
|
+
const tps = parseFloat(tpsMatch![1]);
|
|
477
|
+
// Inter-update TPS: 3+3=6 streaming updates across two messages.
|
|
478
|
+
// Span includes tool gap but still well under 100K (burst artifact).
|
|
479
|
+
expect(tps).toBeGreaterThan(50);
|
|
480
|
+
expect(tps).toBeLessThan(2000);
|
|
481
|
+
|
|
482
|
+
expect(notification).toContain('out 1K');
|
|
483
|
+
expect(notification).toContain('in 600');
|
|
484
|
+
});
|
|
485
|
+
|
|
486
|
+
// ── Inter-update TPS ──────────────────────────────────────────────────
|
|
487
|
+
|
|
488
|
+
it('should produce null TPS for burst delivery (all updates in same tick)', async () => {
|
|
489
|
+
// Simulates the read-command case: all message_updates fire within
|
|
490
|
+
// a single event loop tick, so the inter-update span is 0ms.
|
|
491
|
+
const { handlers, notifySpy, appendEntrySpy } = fixture;
|
|
492
|
+
|
|
493
|
+
const assistantMessage: AssistantMessage = {
|
|
494
|
+
role: 'assistant',
|
|
495
|
+
content: [{ type: 'text', text: 'Done' }],
|
|
496
|
+
api: 'openai-completions',
|
|
497
|
+
provider: 'openai',
|
|
498
|
+
model: 'gpt-4',
|
|
499
|
+
usage: {
|
|
500
|
+
input: 291,
|
|
501
|
+
output: 46,
|
|
502
|
+
cacheRead: 0,
|
|
503
|
+
cacheWrite: 0,
|
|
504
|
+
totalTokens: 337,
|
|
505
|
+
cost: { input: 0.001, output: 0.002, cacheRead: 0, cacheWrite: 0, total: 0.003 },
|
|
506
|
+
},
|
|
507
|
+
stopReason: 'stop',
|
|
508
|
+
timestamp: Date.now(),
|
|
509
|
+
};
|
|
510
|
+
|
|
511
|
+
const updateEvent = {
|
|
512
|
+
type: 'message_update' as const,
|
|
513
|
+
message: assistantMessage,
|
|
514
|
+
assistantMessageEvent: { type: 'text_delta' as const, delta: 't' },
|
|
515
|
+
};
|
|
516
|
+
|
|
517
|
+
handlers['turn_start']?.({ type: 'turn_start', turnIndex: 0, timestamp: Date.now() });
|
|
518
|
+
await tick(50);
|
|
519
|
+
handlers['message_start']?.({ type: 'message_start', message: assistantMessage });
|
|
520
|
+
// All updates fire without any tick gap — simulates SSE burst
|
|
521
|
+
handlers['message_update']?.(updateEvent); // TTFT
|
|
522
|
+
handlers['message_update']?.(updateEvent); // streaming (same tick)
|
|
523
|
+
handlers['message_update']?.(updateEvent); // streaming (same tick)
|
|
524
|
+
handlers['message_update']?.(updateEvent); // streaming (same tick)
|
|
525
|
+
handlers['message_end']?.({ type: 'message_end', message: assistantMessage });
|
|
526
|
+
handlers['turn_end']?.(
|
|
527
|
+
{ type: 'turn_end', turnIndex: 0, message: assistantMessage, toolResults: [] },
|
|
528
|
+
fixture.mockCtx
|
|
529
|
+
);
|
|
530
|
+
|
|
531
|
+
expect(notifySpy).toHaveBeenCalledOnce();
|
|
532
|
+
const notification = notifySpy.mock.calls[0][0] as string;
|
|
533
|
+
// No TPS number displayed — burst delivery can't produce meaningful rate
|
|
534
|
+
expect(notification).toContain('TPS —');
|
|
535
|
+
|
|
536
|
+
const [, data] = appendEntrySpy.mock.calls[0];
|
|
537
|
+
expect(data.tps).toBeNull();
|
|
538
|
+
expect(data.timing.streamMs).toBeLessThan(1); // burst: sub-ms span
|
|
539
|
+
// Other telemetry still present
|
|
540
|
+
expect(notification).toContain('TTFT');
|
|
541
|
+
expect(notification).toContain('out 46');
|
|
542
|
+
});
|
|
543
|
+
|
|
544
|
+
it('should produce null TPS with only TTFT update and no streaming updates', async () => {
|
|
545
|
+
const { handlers, notifySpy, appendEntrySpy } = fixture;
|
|
546
|
+
|
|
547
|
+
const assistantMessage = makeAssistantMessage({ output: 20, input: 10 });
|
|
548
|
+
|
|
549
|
+
handlers['turn_start']?.({ type: 'turn_start', turnIndex: 0, timestamp: Date.now() });
|
|
550
|
+
await tick(50);
|
|
551
|
+
handlers['message_start']?.({ type: 'message_start', message: assistantMessage });
|
|
552
|
+
await tick(50);
|
|
553
|
+
// Only TTFT update — no subsequent streaming updates
|
|
554
|
+
handlers['message_update']?.({
|
|
555
|
+
type: 'message_update',
|
|
556
|
+
message: assistantMessage,
|
|
557
|
+
assistantMessageEvent: { type: 'text_delta', delta: 'H' },
|
|
558
|
+
});
|
|
559
|
+
await tick(50);
|
|
560
|
+
handlers['message_end']?.({ type: 'message_end', message: assistantMessage });
|
|
561
|
+
handlers['turn_end']?.(
|
|
562
|
+
{ type: 'turn_end', turnIndex: 0, message: assistantMessage, toolResults: [] },
|
|
563
|
+
fixture.mockCtx
|
|
564
|
+
);
|
|
565
|
+
|
|
566
|
+
expect(notifySpy).toHaveBeenCalledOnce();
|
|
567
|
+
const notification = notifySpy.mock.calls[0][0] as string;
|
|
568
|
+
expect(notification).toContain('TPS —');
|
|
569
|
+
|
|
570
|
+
const [, data] = appendEntrySpy.mock.calls[0];
|
|
571
|
+
expect(data.tps).toBeNull();
|
|
572
|
+
expect(data.timing.streamMs).toBeNull(); // no streaming updates at all
|
|
573
|
+
});
|
|
574
|
+
|
|
575
|
+
it('should calculate inter-update TPS from streaming updates', async () => {
|
|
576
|
+
const { handlers, notifySpy, appendEntrySpy } = fixture;
|
|
577
|
+
|
|
578
|
+
const assistantMessage: AssistantMessage = {
|
|
579
|
+
role: 'assistant',
|
|
580
|
+
content: [{ type: 'text', text: 'A longer response' }],
|
|
581
|
+
api: 'openai-completions',
|
|
582
|
+
provider: 'openai',
|
|
583
|
+
model: 'gpt-4',
|
|
584
|
+
usage: {
|
|
585
|
+
input: 100,
|
|
586
|
+
output: 500,
|
|
587
|
+
cacheRead: 0,
|
|
588
|
+
cacheWrite: 0,
|
|
589
|
+
totalTokens: 600,
|
|
590
|
+
cost: { input: 0.001, output: 0.002, cacheRead: 0, cacheWrite: 0, total: 0.003 },
|
|
591
|
+
},
|
|
592
|
+
stopReason: 'stop',
|
|
593
|
+
timestamp: Date.now(),
|
|
594
|
+
};
|
|
595
|
+
|
|
596
|
+
const updateEvent = {
|
|
597
|
+
type: 'message_update' as const,
|
|
598
|
+
message: assistantMessage,
|
|
599
|
+
assistantMessageEvent: { type: 'text_delta' as const, delta: 't' },
|
|
600
|
+
};
|
|
601
|
+
|
|
602
|
+
handlers['turn_start']?.({ type: 'turn_start', turnIndex: 0, timestamp: Date.now() });
|
|
603
|
+
await tick(100);
|
|
604
|
+
handlers['message_start']?.({ type: 'message_start', message: assistantMessage });
|
|
605
|
+
await tick(50); // TTFT
|
|
606
|
+
handlers['message_update']?.(updateEvent); // TTFT
|
|
607
|
+
await tick(100);
|
|
608
|
+
handlers['message_update']?.(updateEvent); // streaming #1
|
|
609
|
+
await tick(100);
|
|
610
|
+
handlers['message_update']?.(updateEvent); // streaming #2
|
|
611
|
+
await tick(100);
|
|
612
|
+
handlers['message_update']?.(updateEvent); // streaming #3
|
|
613
|
+
await tick(100);
|
|
614
|
+
handlers['message_update']?.(updateEvent); // streaming #4
|
|
615
|
+
await tick(100);
|
|
616
|
+
handlers['message_update']?.(updateEvent); // streaming #5
|
|
617
|
+
await tick(100);
|
|
618
|
+
handlers['message_end']?.({ type: 'message_end', message: assistantMessage });
|
|
619
|
+
handlers['turn_end']?.(
|
|
620
|
+
{ type: 'turn_end', turnIndex: 0, message: assistantMessage, toolResults: [] },
|
|
621
|
+
fixture.mockCtx
|
|
622
|
+
);
|
|
623
|
+
|
|
624
|
+
expect(notifySpy).toHaveBeenCalledOnce();
|
|
625
|
+
const notification = notifySpy.mock.calls[0][0] as string;
|
|
626
|
+
const tpsMatch = notification.match(/TPS (\d+(?:\.\d+)?) tok\/s/);
|
|
627
|
+
expect(tpsMatch).toBeTruthy();
|
|
628
|
+
const tps = parseFloat(tpsMatch![1]);
|
|
629
|
+
// Inter-update span ~500ms (5 gaps of ~100ms between streaming updates)
|
|
630
|
+
// 500 tokens / 0.5s ≈ 1000 TPS (real timer is approximate, so just check sane range)
|
|
631
|
+
expect(tps).toBeGreaterThan(400);
|
|
632
|
+
expect(tps).toBeLessThan(5000); // no 452K burst artifact
|
|
633
|
+
|
|
634
|
+
const [, data] = appendEntrySpy.mock.calls[0];
|
|
635
|
+
expect(data.tps).toBeGreaterThan(0);
|
|
636
|
+
expect(data.timing.streamMs).toBeGreaterThan(0);
|
|
637
|
+
});
|
|
638
|
+
|
|
639
|
+
// ── Missing turn_start ───────────────────────────────────────────────────
|
|
640
|
+
|
|
641
|
+
it('should skip when turn_start was not called', () => {
|
|
642
|
+
const { handlers, notifySpy, appendEntrySpy } = fixture;
|
|
643
|
+
|
|
644
|
+
const assistantMessage = makeAssistantMessage({ output: 20, input: 10 });
|
|
645
|
+
|
|
646
|
+
handlers['turn_end']?.(
|
|
647
|
+
{ type: 'turn_end', turnIndex: 0, message: assistantMessage, toolResults: [] },
|
|
648
|
+
fixture.mockCtx
|
|
649
|
+
);
|
|
650
|
+
|
|
651
|
+
expect(notifySpy).not.toHaveBeenCalled();
|
|
652
|
+
expect(appendEntrySpy).not.toHaveBeenCalled();
|
|
653
|
+
});
|
|
654
|
+
});
|