@monotykamary/pi-tps 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,139 @@
1
+ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
2
+ import type { AssistantMessage } from '@earendil-works/pi-ai';
3
+ import { createTestFixture, activateExtension } from './helpers';
4
+ import type { MessageUpdateEvent } from './helpers';
5
+
6
+ /**
7
+ * Tests partial stall reduction in the fallback branch.
8
+ *
9
+ * When stalls dominate the effective generation window (effectiveGenMs < 200ms
10
+ * OR stallMs > 85% of generationMs), the raw TPS would explode because the
11
+ * denominator is tiny. Partial reduction divides stallMs by 2 before subtracting
12
+ * it, giving a much larger safe denominator and a saner TPS.
13
+ */
14
+ describe('pi-tps extension — partial stall reduction', () => {
15
+ let fixture: ReturnType<typeof createTestFixture>;
16
+
17
+ beforeEach(async () => {
18
+ fixture = createTestFixture();
19
+ await activateExtension(fixture);
20
+ });
21
+
22
+ afterEach(() => {
23
+ vi.restoreAllMocks();
24
+ });
25
+
26
+ /**
27
+ * Drive a full turn with mocked performance.now() so every
28
+ * timestamp is deterministic (no real timers needed).
29
+ */
30
+ function driveTurn(clocks: {
31
+ turnStart: number;
32
+ messageStart: number;
33
+ firstUpdate: number;
34
+ streamUpdates: number[];
35
+ messageEnd: number;
36
+ turnEnd?: number;
37
+ }) {
38
+ const { handlers, appendEntrySpy } = fixture;
39
+
40
+ const timestamps = [
41
+ clocks.turnStart,
42
+ clocks.turnStart,
43
+ clocks.messageStart,
44
+ clocks.firstUpdate,
45
+ ...clocks.streamUpdates,
46
+ clocks.messageEnd,
47
+ clocks.turnEnd ?? clocks.messageEnd,
48
+ ];
49
+
50
+ let callIdx = 0;
51
+ const spy = vi.spyOn(performance, 'now').mockImplementation(() => {
52
+ return timestamps[Math.min(callIdx++, timestamps.length - 1)];
53
+ });
54
+
55
+ const assistantMessage: AssistantMessage = {
56
+ role: 'assistant',
57
+ content: [{ type: 'text' as const, text: 'x'.repeat(5000) }],
58
+ api: 'openai-completions',
59
+ provider: 'openai',
60
+ model: 'gpt-4',
61
+ usage: {
62
+ input: 100,
63
+ output: 5000,
64
+ cacheRead: 0,
65
+ cacheWrite: 0,
66
+ totalTokens: 5100,
67
+ cost: { input: 0.001, output: 0.002, cacheRead: 0, cacheWrite: 0, total: 0.003 },
68
+ },
69
+ stopReason: 'stop',
70
+ timestamp: Date.now(),
71
+ };
72
+
73
+ handlers['turn_start']?.({ type: 'turn_start', turnIndex: 0, timestamp: Date.now() });
74
+ handlers['message_start']?.({ type: 'message_start', message: assistantMessage });
75
+ handlers['message_update']?.({
76
+ type: 'message_update',
77
+ message: assistantMessage,
78
+ assistantMessageEvent: { type: 'text_delta', delta: 't' },
79
+ });
80
+ for (const _ts of clocks.streamUpdates) {
81
+ handlers['message_update']?.({
82
+ type: 'message_update',
83
+ message: assistantMessage,
84
+ assistantMessageEvent: { type: 'text_delta', delta: 't' },
85
+ });
86
+ }
87
+ handlers['message_end']?.({ type: 'message_end', message: assistantMessage });
88
+ handlers['turn_end']?.(
89
+ { type: 'turn_end', turnIndex: 0, message: assistantMessage, toolResults: [] },
90
+ fixture.mockCtx
91
+ );
92
+
93
+ spy.mockRestore();
94
+ return { appendEntrySpy };
95
+ }
96
+
97
+ it('partially reduces stalls when effectiveGenMs is tiny', () => {
98
+ // generationMs ~ 50_300, stallMs ~ 50_120
99
+ // effectiveGenMs = 50_300 - 50_120 = 180 (< ACTIVE_TIME_THRESHOLD=200)
100
+ // stallMs (50_120) > effectiveGenMs (180) → partial reduction kicks in
101
+ // safeGenMs = max(50_300 - 50_120/2, 50) = max(25_240, 50) = 25_240
102
+ // raw = 5_000 / 25.24 = 198.1
103
+ // Without partial reduction: 5_000 / 0.18 = 27_778
104
+ const { appendEntrySpy } = driveTurn({
105
+ turnStart: 0,
106
+ messageStart: 100,
107
+ firstUpdate: 200,
108
+ streamUpdates: [250, 300],
109
+ messageEnd: 50400,
110
+ });
111
+
112
+ const [, data] = appendEntrySpy.mock.calls[0];
113
+ expect(data.tps).not.toBeNull();
114
+ expect(data.tps).toBeLessThan(300);
115
+ expect(data.tps).toBeGreaterThan(50);
116
+ });
117
+
118
+ it('partially reduces stalls when stallMs dominates generationMs', () => {
119
+ // generationMs = 15_000, stallMs = 13_500
120
+ // effectiveGenMs = 1_500 (>200 so absolute threshold NOT hit)
121
+ // BUT stallMs/genMs ratio = 90% > STALL_DOMINANCE_RATIO (85%)
122
+ // → partial reduction kicks in via ratio branch
123
+ // safeGenMs = max(15_000 - 13_500/2, 50) = max(8_250, 50) = 8_250
124
+ // raw = 5_000 / 8.25 = 606
125
+ // Without partial reduction: 5_000 / 1.5 = 3_333
126
+ const { appendEntrySpy } = driveTurn({
127
+ turnStart: 0,
128
+ messageStart: 50,
129
+ firstUpdate: 150,
130
+ streamUpdates: [300, 800],
131
+ messageEnd: 15050,
132
+ });
133
+
134
+ const [, data] = appendEntrySpy.mock.calls[0];
135
+ expect(data.tps).not.toBeNull();
136
+ expect(data.tps).toBeLessThan(700);
137
+ expect(data.tps).toBeGreaterThan(100);
138
+ });
139
+ });