@monotykamary/pi-tps 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/FUNDING.yml +4 -0
- package/.github/workflows/test.yml +55 -0
- package/.pi/autoresearch/session-id +1 -0
- package/.prettierrc +7 -0
- package/LICENSE +21 -0
- package/README.md +237 -0
- package/commitlint.config.cjs +1 -0
- package/extensions/pi-tps/__tests__/export-command.test.ts +307 -0
- package/extensions/pi-tps/__tests__/extension-setup.test.ts +41 -0
- package/extensions/pi-tps/__tests__/format-duration.test.ts +83 -0
- package/extensions/pi-tps/__tests__/helpers.ts +154 -0
- package/extensions/pi-tps/__tests__/precision-timing.test.ts +701 -0
- package/extensions/pi-tps/__tests__/rehydration.test.ts +266 -0
- package/extensions/pi-tps/__tests__/session-export.test.ts +204 -0
- package/extensions/pi-tps/__tests__/stall-detection.test.ts +209 -0
- package/extensions/pi-tps/__tests__/stall-reduction.test.ts +139 -0
- package/extensions/pi-tps/__tests__/telemetry-flow.test.ts +654 -0
- package/extensions/pi-tps/index.ts +734 -0
- package/knip.json +10 -0
- package/npm-shrinkwrap.json +6923 -0
- package/package.json +54 -0
- package/tsconfig.json +24 -0
- package/vitest.config.ts +15 -0
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
|
2
|
+
import type { AssistantMessage } from '@earendil-works/pi-ai';
|
|
3
|
+
import { createTestFixture, activateExtension } from './helpers';
|
|
4
|
+
import type { MessageUpdateEvent } from './helpers';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Tests partial stall reduction in the fallback branch.
|
|
8
|
+
*
|
|
9
|
+
* When stalls dominate the effective generation window (effectiveGenMs < 200ms
|
|
10
|
+
* OR stallMs > 85% of generationMs), the raw TPS would explode because the
|
|
11
|
+
* denominator is tiny. Partial reduction divides stallMs by 2 before subtracting
|
|
12
|
+
* it, giving a much larger safe denominator and a saner TPS.
|
|
13
|
+
*/
|
|
14
|
+
describe('pi-tps extension — partial stall reduction', () => {
|
|
15
|
+
let fixture: ReturnType<typeof createTestFixture>;
|
|
16
|
+
|
|
17
|
+
beforeEach(async () => {
|
|
18
|
+
fixture = createTestFixture();
|
|
19
|
+
await activateExtension(fixture);
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
afterEach(() => {
|
|
23
|
+
vi.restoreAllMocks();
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Drive a full turn with mocked performance.now() so every
|
|
28
|
+
* timestamp is deterministic (no real timers needed).
|
|
29
|
+
*/
|
|
30
|
+
function driveTurn(clocks: {
|
|
31
|
+
turnStart: number;
|
|
32
|
+
messageStart: number;
|
|
33
|
+
firstUpdate: number;
|
|
34
|
+
streamUpdates: number[];
|
|
35
|
+
messageEnd: number;
|
|
36
|
+
turnEnd?: number;
|
|
37
|
+
}) {
|
|
38
|
+
const { handlers, appendEntrySpy } = fixture;
|
|
39
|
+
|
|
40
|
+
const timestamps = [
|
|
41
|
+
clocks.turnStart,
|
|
42
|
+
clocks.turnStart,
|
|
43
|
+
clocks.messageStart,
|
|
44
|
+
clocks.firstUpdate,
|
|
45
|
+
...clocks.streamUpdates,
|
|
46
|
+
clocks.messageEnd,
|
|
47
|
+
clocks.turnEnd ?? clocks.messageEnd,
|
|
48
|
+
];
|
|
49
|
+
|
|
50
|
+
let callIdx = 0;
|
|
51
|
+
const spy = vi.spyOn(performance, 'now').mockImplementation(() => {
|
|
52
|
+
return timestamps[Math.min(callIdx++, timestamps.length - 1)];
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
const assistantMessage: AssistantMessage = {
|
|
56
|
+
role: 'assistant',
|
|
57
|
+
content: [{ type: 'text' as const, text: 'x'.repeat(5000) }],
|
|
58
|
+
api: 'openai-completions',
|
|
59
|
+
provider: 'openai',
|
|
60
|
+
model: 'gpt-4',
|
|
61
|
+
usage: {
|
|
62
|
+
input: 100,
|
|
63
|
+
output: 5000,
|
|
64
|
+
cacheRead: 0,
|
|
65
|
+
cacheWrite: 0,
|
|
66
|
+
totalTokens: 5100,
|
|
67
|
+
cost: { input: 0.001, output: 0.002, cacheRead: 0, cacheWrite: 0, total: 0.003 },
|
|
68
|
+
},
|
|
69
|
+
stopReason: 'stop',
|
|
70
|
+
timestamp: Date.now(),
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
handlers['turn_start']?.({ type: 'turn_start', turnIndex: 0, timestamp: Date.now() });
|
|
74
|
+
handlers['message_start']?.({ type: 'message_start', message: assistantMessage });
|
|
75
|
+
handlers['message_update']?.({
|
|
76
|
+
type: 'message_update',
|
|
77
|
+
message: assistantMessage,
|
|
78
|
+
assistantMessageEvent: { type: 'text_delta', delta: 't' },
|
|
79
|
+
});
|
|
80
|
+
for (const _ts of clocks.streamUpdates) {
|
|
81
|
+
handlers['message_update']?.({
|
|
82
|
+
type: 'message_update',
|
|
83
|
+
message: assistantMessage,
|
|
84
|
+
assistantMessageEvent: { type: 'text_delta', delta: 't' },
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
handlers['message_end']?.({ type: 'message_end', message: assistantMessage });
|
|
88
|
+
handlers['turn_end']?.(
|
|
89
|
+
{ type: 'turn_end', turnIndex: 0, message: assistantMessage, toolResults: [] },
|
|
90
|
+
fixture.mockCtx
|
|
91
|
+
);
|
|
92
|
+
|
|
93
|
+
spy.mockRestore();
|
|
94
|
+
return { appendEntrySpy };
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
it('partially reduces stalls when effectiveGenMs is tiny', () => {
|
|
98
|
+
// generationMs ~ 50_300, stallMs ~ 50_120
|
|
99
|
+
// effectiveGenMs = 50_300 - 50_120 = 180 (< ACTIVE_TIME_THRESHOLD=200)
|
|
100
|
+
// stallMs (50_120) > effectiveGenMs (180) → partial reduction kicks in
|
|
101
|
+
// safeGenMs = max(50_300 - 50_120/2, 50) = max(25_240, 50) = 25_240
|
|
102
|
+
// raw = 5_000 / 25.24 = 198.1
|
|
103
|
+
// Without partial reduction: 5_000 / 0.18 = 27_778
|
|
104
|
+
const { appendEntrySpy } = driveTurn({
|
|
105
|
+
turnStart: 0,
|
|
106
|
+
messageStart: 100,
|
|
107
|
+
firstUpdate: 200,
|
|
108
|
+
streamUpdates: [250, 300],
|
|
109
|
+
messageEnd: 50400,
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
const [, data] = appendEntrySpy.mock.calls[0];
|
|
113
|
+
expect(data.tps).not.toBeNull();
|
|
114
|
+
expect(data.tps).toBeLessThan(300);
|
|
115
|
+
expect(data.tps).toBeGreaterThan(50);
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
it('partially reduces stalls when stallMs dominates generationMs', () => {
|
|
119
|
+
// generationMs = 15_000, stallMs = 13_500
|
|
120
|
+
// effectiveGenMs = 1_500 (>200 so absolute threshold NOT hit)
|
|
121
|
+
// BUT stallMs/genMs ratio = 90% > STALL_DOMINANCE_RATIO (85%)
|
|
122
|
+
// → partial reduction kicks in via ratio branch
|
|
123
|
+
// safeGenMs = max(15_000 - 13_500/2, 50) = max(8_250, 50) = 8_250
|
|
124
|
+
// raw = 5_000 / 8.25 = 606
|
|
125
|
+
// Without partial reduction: 5_000 / 1.5 = 3_333
|
|
126
|
+
const { appendEntrySpy } = driveTurn({
|
|
127
|
+
turnStart: 0,
|
|
128
|
+
messageStart: 50,
|
|
129
|
+
firstUpdate: 150,
|
|
130
|
+
streamUpdates: [300, 800],
|
|
131
|
+
messageEnd: 15050,
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
const [, data] = appendEntrySpy.mock.calls[0];
|
|
135
|
+
expect(data.tps).not.toBeNull();
|
|
136
|
+
expect(data.tps).toBeLessThan(700);
|
|
137
|
+
expect(data.tps).toBeGreaterThan(100);
|
|
138
|
+
});
|
|
139
|
+
});
|