tokengolf 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.husky/pre-commit +4 -0
- package/.prettierignore +2 -0
- package/.prettierrc +6 -0
- package/.vscode/settings.json +15 -0
- package/CHANGELOG.md +254 -0
- package/CLAUDE.md +136 -10
- package/README.md +89 -47
- package/assets/demo-hud.png +0 -0
- package/assets/scorecard.png +0 -0
- package/dist/cli.js +790 -103
- package/docs/assets/demo-hud.png +0 -0
- package/docs/assets/scorecard.png +0 -0
- package/docs/assets/tokengolf-bg-min.jpg +0 -0
- package/docs/index.html +1080 -0
- package/eslint.config.js +39 -0
- package/hooks/post-tool-use-failure.js +27 -0
- package/hooks/post-tool-use.js +11 -7
- package/hooks/pre-compact.js +9 -3
- package/hooks/session-end.js +168 -42
- package/hooks/session-start.js +31 -11
- package/hooks/session-stop.js +6 -2
- package/hooks/statusline.sh +16 -7
- package/hooks/stop.js +27 -0
- package/hooks/subagent-start.js +27 -0
- package/hooks/user-prompt-submit.js +8 -6
- package/package.json +16 -3
- package/src/cli.js +23 -6
- package/src/components/ActiveRun.js +76 -24
- package/src/components/ScoreCard.js +132 -37
- package/src/components/StartRun.js +156 -53
- package/src/components/StatsView.js +89 -37
- package/src/lib/__tests__/score.test.js +596 -0
- package/src/lib/cost.js +84 -21
- package/src/lib/demo.js +186 -0
- package/src/lib/install.js +92 -62
- package/src/lib/score.js +433 -136
- package/src/lib/store.js +11 -11
- package/.claude/settings.local.json +0 -36
|
@@ -0,0 +1,596 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
calculateAchievements,
|
|
4
|
+
getTier,
|
|
5
|
+
getEfficiencyRating,
|
|
6
|
+
getModelClass,
|
|
7
|
+
MODEL_CLASSES,
|
|
8
|
+
} from '../score.js';
|
|
9
|
+
|
|
10
|
+
// ── helpers ──────────────────────────────────────────────────────────────────
|
|
11
|
+
|
|
12
|
+
function keys(run) {
|
|
13
|
+
return calculateAchievements(run).map((a) => a.key);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function wonRun(overrides = {}) {
|
|
17
|
+
return {
|
|
18
|
+
status: 'won',
|
|
19
|
+
spent: 0.1,
|
|
20
|
+
budget: 0.5,
|
|
21
|
+
model: 'claude-sonnet-4-6',
|
|
22
|
+
promptCount: 5,
|
|
23
|
+
totalToolCalls: 10,
|
|
24
|
+
toolCalls: { Read: 5, Edit: 3, Bash: 2 },
|
|
25
|
+
compactionEvents: [],
|
|
26
|
+
thinkingInvocations: 0,
|
|
27
|
+
sessionCount: 1,
|
|
28
|
+
fainted: false,
|
|
29
|
+
startedAt: new Date(Date.now() - 15 * 60 * 1000).toISOString(),
|
|
30
|
+
endedAt: new Date().toISOString(),
|
|
31
|
+
...overrides,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function diedRun(overrides = {}) {
|
|
36
|
+
return {
|
|
37
|
+
status: 'died',
|
|
38
|
+
spent: 0.55,
|
|
39
|
+
budget: 0.5,
|
|
40
|
+
model: 'claude-sonnet-4-6',
|
|
41
|
+
promptCount: 5,
|
|
42
|
+
totalToolCalls: 10,
|
|
43
|
+
toolCalls: {},
|
|
44
|
+
compactionEvents: [],
|
|
45
|
+
thinkingInvocations: 0,
|
|
46
|
+
...overrides,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// ── pure functions ────────────────────────────────────────────────────────────
|
|
51
|
+
|
|
52
|
+
describe('getTier', () => {
|
|
53
|
+
it('Diamond under $0.10', () => expect(getTier(0.05).label).toBe('Diamond'));
|
|
54
|
+
it('Gold under $0.30', () => expect(getTier(0.2).label).toBe('Gold'));
|
|
55
|
+
it('Silver under $1.00', () => expect(getTier(0.8).label).toBe('Silver'));
|
|
56
|
+
it('Bronze under $3.00', () => expect(getTier(2.0).label).toBe('Bronze'));
|
|
57
|
+
it('Reckless above $3.00', () => expect(getTier(5.0).label).toBe('Reckless'));
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
describe('getEfficiencyRating', () => {
|
|
61
|
+
it('LEGENDARY at 24%', () => expect(getEfficiencyRating(0.24, 1.0).label).toBe('LEGENDARY'));
|
|
62
|
+
it('EFFICIENT at 49%', () => expect(getEfficiencyRating(0.49, 1.0).label).toBe('EFFICIENT'));
|
|
63
|
+
it('SOLID at 74%', () => expect(getEfficiencyRating(0.74, 1.0).label).toBe('SOLID'));
|
|
64
|
+
it('CLOSE CALL at 99%', () => expect(getEfficiencyRating(0.99, 1.0).label).toBe('CLOSE CALL'));
|
|
65
|
+
it('BUSTED over 100%', () => expect(getEfficiencyRating(1.01, 1.0).label).toBe('BUSTED'));
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
describe('getModelClass', () => {
|
|
69
|
+
it('haiku', () => expect(getModelClass('claude-haiku-4-5')).toBe(MODEL_CLASSES.haiku));
|
|
70
|
+
it('sonnet', () => expect(getModelClass('claude-sonnet-4-6')).toBe(MODEL_CLASSES.sonnet));
|
|
71
|
+
it('opus', () => expect(getModelClass('claude-opus-4-6')).toBe(MODEL_CLASSES.opus));
|
|
72
|
+
it('unknown defaults to sonnet', () =>
|
|
73
|
+
expect(getModelClass('unknown-model')).toBe(MODEL_CLASSES.sonnet));
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
// ── model class achievements ──────────────────────────────────────────────────
|
|
77
|
+
|
|
78
|
+
describe('model class achievements', () => {
|
|
79
|
+
it('haiku win → gold_haiku', () => {
|
|
80
|
+
expect(keys(wonRun({ model: 'claude-haiku-4-5-20251001' }))).toContain('gold_haiku');
|
|
81
|
+
});
|
|
82
|
+
it('haiku win under $0.10 → diamond', () => {
|
|
83
|
+
expect(keys(wonRun({ model: 'claude-haiku-4-5-20251001', spent: 0.08 }))).toContain('diamond');
|
|
84
|
+
});
|
|
85
|
+
it('haiku win at $0.12 → no diamond', () => {
|
|
86
|
+
expect(keys(wonRun({ model: 'claude-haiku-4-5-20251001', spent: 0.12 }))).not.toContain(
|
|
87
|
+
'diamond'
|
|
88
|
+
);
|
|
89
|
+
});
|
|
90
|
+
it('sonnet win → silver_sonnet', () => {
|
|
91
|
+
expect(keys(wonRun())).toContain('silver_sonnet');
|
|
92
|
+
});
|
|
93
|
+
it('opus win → bronze_opus', () => {
|
|
94
|
+
expect(keys(wonRun({ model: 'claude-opus-4-6' }))).toContain('bronze_opus');
|
|
95
|
+
});
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
// ── budget efficiency achievements ───────────────────────────────────────────
|
|
99
|
+
|
|
100
|
+
describe('budget efficiency', () => {
|
|
101
|
+
it('sniper at 20% budget', () => {
|
|
102
|
+
expect(keys(wonRun({ spent: 0.1, budget: 0.5 }))).toContain('sniper');
|
|
103
|
+
});
|
|
104
|
+
it('efficient at 40% budget', () => {
|
|
105
|
+
expect(keys(wonRun({ spent: 0.2, budget: 0.5 }))).toContain('efficient');
|
|
106
|
+
});
|
|
107
|
+
it('no sniper at 30% budget', () => {
|
|
108
|
+
expect(keys(wonRun({ spent: 0.15, budget: 0.5 }))).not.toContain('sniper');
|
|
109
|
+
});
|
|
110
|
+
it('penny pincher under $0.10', () => {
|
|
111
|
+
expect(keys(wonRun({ spent: 0.08 }))).toContain('penny');
|
|
112
|
+
});
|
|
113
|
+
it('no penny at $0.12', () => {
|
|
114
|
+
expect(keys(wonRun({ spent: 0.12 }))).not.toContain('penny');
|
|
115
|
+
});
|
|
116
|
+
it('no sniper/efficient without budget', () => {
|
|
117
|
+
const a = keys(wonRun({ budget: null, spent: 0.05 }));
|
|
118
|
+
expect(a).not.toContain('sniper');
|
|
119
|
+
expect(a).not.toContain('efficient');
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
// ── prompting skill ───────────────────────────────────────────────────────────
|
|
124
|
+
|
|
125
|
+
describe('prompting skill achievements', () => {
|
|
126
|
+
it('one_shot on single prompt', () => {
|
|
127
|
+
expect(keys(wonRun({ promptCount: 1 }))).toContain('one_shot');
|
|
128
|
+
});
|
|
129
|
+
it('no one_shot on 2 prompts', () => {
|
|
130
|
+
expect(keys(wonRun({ promptCount: 2 }))).not.toContain('one_shot');
|
|
131
|
+
});
|
|
132
|
+
it('conversationalist at 20+ prompts', () => {
|
|
133
|
+
expect(keys(wonRun({ promptCount: 20 }))).toContain('conversationalist');
|
|
134
|
+
});
|
|
135
|
+
it('terse: ≤3 prompts and ≥10 tool calls', () => {
|
|
136
|
+
expect(keys(wonRun({ promptCount: 2, totalToolCalls: 12 }))).toContain('terse');
|
|
137
|
+
});
|
|
138
|
+
it('no terse if tool calls < 10', () => {
|
|
139
|
+
expect(keys(wonRun({ promptCount: 2, totalToolCalls: 8 }))).not.toContain('terse');
|
|
140
|
+
});
|
|
141
|
+
it('high_leverage: 5+ tools per prompt', () => {
|
|
142
|
+
expect(keys(wonRun({ promptCount: 2, totalToolCalls: 10 }))).toContain('high_leverage');
|
|
143
|
+
});
|
|
144
|
+
it('backseat_driver: 15+ prompts, <1 tool per prompt', () => {
|
|
145
|
+
expect(keys(wonRun({ promptCount: 15, totalToolCalls: 10 }))).toContain('backseat_driver');
|
|
146
|
+
});
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
// ── tool mastery ──────────────────────────────────────────────────────────────
|
|
150
|
+
|
|
151
|
+
describe('tool mastery achievements', () => {
|
|
152
|
+
it('read_only: reads but no edits/writes', () => {
|
|
153
|
+
expect(keys(wonRun({ toolCalls: { Read: 5 }, totalToolCalls: 5 }))).toContain('read_only');
|
|
154
|
+
});
|
|
155
|
+
it('no read_only if Edit present', () => {
|
|
156
|
+
expect(keys(wonRun({ toolCalls: { Read: 5, Edit: 1 }, totalToolCalls: 6 }))).not.toContain(
|
|
157
|
+
'read_only'
|
|
158
|
+
);
|
|
159
|
+
});
|
|
160
|
+
it('editor: 10+ Edit calls', () => {
|
|
161
|
+
expect(keys(wonRun({ toolCalls: { Edit: 10 }, totalToolCalls: 10 }))).toContain('editor');
|
|
162
|
+
});
|
|
163
|
+
it('bash_warrior: 10+ Bash, ≥50% of tools', () => {
|
|
164
|
+
expect(keys(wonRun({ toolCalls: { Bash: 10, Read: 5 }, totalToolCalls: 15 }))).toContain(
|
|
165
|
+
'bash_warrior'
|
|
166
|
+
); // 67%
|
|
167
|
+
expect(keys(wonRun({ toolCalls: { Bash: 10, Read: 9 }, totalToolCalls: 19 }))).toContain(
|
|
168
|
+
'bash_warrior'
|
|
169
|
+
); // 53%
|
|
170
|
+
expect(keys(wonRun({ toolCalls: { Bash: 10 }, totalToolCalls: 10 }))).toContain('bash_warrior'); // 100%
|
|
171
|
+
expect(keys(wonRun({ toolCalls: { Bash: 5, Read: 6 }, totalToolCalls: 11 }))).not.toContain(
|
|
172
|
+
'bash_warrior'
|
|
173
|
+
); // <10 Bash
|
|
174
|
+
});
|
|
175
|
+
it('scout: ≥60% Read, ≥5 total', () => {
|
|
176
|
+
expect(keys(wonRun({ toolCalls: { Read: 7, Edit: 3 }, totalToolCalls: 10 }))).toContain(
|
|
177
|
+
'scout'
|
|
178
|
+
);
|
|
179
|
+
});
|
|
180
|
+
it('no scout if <60% Read', () => {
|
|
181
|
+
expect(keys(wonRun({ toolCalls: { Read: 5, Edit: 5 }, totalToolCalls: 10 }))).not.toContain(
|
|
182
|
+
'scout'
|
|
183
|
+
);
|
|
184
|
+
});
|
|
185
|
+
it('surgeon: 1-3 Edits, under budget', () => {
|
|
186
|
+
expect(
|
|
187
|
+
keys(wonRun({ toolCalls: { Read: 5, Edit: 2 }, totalToolCalls: 7, spent: 0.1, budget: 0.5 }))
|
|
188
|
+
).toContain('surgeon');
|
|
189
|
+
});
|
|
190
|
+
it('no surgeon if over budget', () => {
|
|
191
|
+
expect(
|
|
192
|
+
keys(wonRun({ toolCalls: { Edit: 2 }, totalToolCalls: 2, spent: 0.6, budget: 0.5 }))
|
|
193
|
+
).not.toContain('surgeon');
|
|
194
|
+
});
|
|
195
|
+
it('toolbox: 5+ distinct tools', () => {
|
|
196
|
+
expect(
|
|
197
|
+
keys(
|
|
198
|
+
wonRun({ toolCalls: { Read: 1, Edit: 1, Bash: 1, Glob: 1, Grep: 1 }, totalToolCalls: 5 })
|
|
199
|
+
)
|
|
200
|
+
).toContain('toolbox');
|
|
201
|
+
});
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
// ── cost per prompt ───────────────────────────────────────────────────────────
|
|
205
|
+
|
|
206
|
+
describe('cost per prompt', () => {
|
|
207
|
+
it('cheap_shots: <$0.01 per prompt, ≥3 prompts', () => {
|
|
208
|
+
expect(keys(wonRun({ spent: 0.02, promptCount: 3 }))).toContain('cheap_shots');
|
|
209
|
+
});
|
|
210
|
+
it('expensive_taste on won run: ≥$0.50 per prompt, ≥3 prompts', () => {
|
|
211
|
+
expect(keys(wonRun({ spent: 1.6, promptCount: 3, budget: 2.0 }))).toContain('expensive_taste');
|
|
212
|
+
});
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
// ── time-based ────────────────────────────────────────────────────────────────
|
|
216
|
+
|
|
217
|
+
describe('time-based achievements', () => {
|
|
218
|
+
it('speedrun under 5 minutes', () => {
|
|
219
|
+
const start = new Date(Date.now() - 3 * 60 * 1000).toISOString();
|
|
220
|
+
expect(keys(wonRun({ startedAt: start, endedAt: new Date().toISOString() }))).toContain(
|
|
221
|
+
'speedrun'
|
|
222
|
+
);
|
|
223
|
+
});
|
|
224
|
+
it('no speedrun at 10 minutes', () => {
|
|
225
|
+
const start = new Date(Date.now() - 10 * 60 * 1000).toISOString();
|
|
226
|
+
expect(keys(wonRun({ startedAt: start, endedAt: new Date().toISOString() }))).not.toContain(
|
|
227
|
+
'speedrun'
|
|
228
|
+
);
|
|
229
|
+
});
|
|
230
|
+
it('marathon over 60 minutes', () => {
|
|
231
|
+
const start = new Date(Date.now() - 90 * 60 * 1000).toISOString();
|
|
232
|
+
expect(keys(wonRun({ startedAt: start, endedAt: new Date().toISOString() }))).toContain(
|
|
233
|
+
'marathon'
|
|
234
|
+
);
|
|
235
|
+
});
|
|
236
|
+
it('endurance over 3 hours', () => {
|
|
237
|
+
const start = new Date(Date.now() - 4 * 60 * 60 * 1000).toISOString();
|
|
238
|
+
expect(keys(wonRun({ startedAt: start, endedAt: new Date().toISOString() }))).toContain(
|
|
239
|
+
'endurance'
|
|
240
|
+
);
|
|
241
|
+
});
|
|
242
|
+
it('endurance but not marathon at 4 hours', () => {
|
|
243
|
+
const start = new Date(Date.now() - 4 * 60 * 60 * 1000).toISOString();
|
|
244
|
+
const a = keys(wonRun({ startedAt: start, endedAt: new Date().toISOString() }));
|
|
245
|
+
expect(a).toContain('endurance');
|
|
246
|
+
expect(a).not.toContain('marathon');
|
|
247
|
+
});
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
// ── death marks ───────────────────────────────────────────────────────────────
|
|
251
|
+
|
|
252
|
+
describe('death marks', () => {
|
|
253
|
+
it('blowout at 2× budget', () => {
|
|
254
|
+
expect(keys(diedRun({ spent: 1.0, budget: 0.5 }))).toContain('blowout');
|
|
255
|
+
});
|
|
256
|
+
it('no blowout at 1.5×', () => {
|
|
257
|
+
expect(keys(diedRun({ spent: 0.75, budget: 0.5 }))).not.toContain('blowout');
|
|
258
|
+
});
|
|
259
|
+
it('so_close between 100-110% budget', () => {
|
|
260
|
+
expect(keys(diedRun({ spent: 0.52, budget: 0.5 }))).toContain('so_close');
|
|
261
|
+
});
|
|
262
|
+
it('no so_close at 115%', () => {
|
|
263
|
+
expect(keys(diedRun({ spent: 0.575, budget: 0.5 }))).not.toContain('so_close');
|
|
264
|
+
});
|
|
265
|
+
it('tool_happy at 30+ tool calls', () => {
|
|
266
|
+
expect(keys(diedRun({ totalToolCalls: 30 }))).toContain('tool_happy');
|
|
267
|
+
});
|
|
268
|
+
it('no tool_happy at 29', () => {
|
|
269
|
+
expect(keys(diedRun({ totalToolCalls: 29 }))).not.toContain('tool_happy');
|
|
270
|
+
});
|
|
271
|
+
it('silent_death at ≤2 prompts', () => {
|
|
272
|
+
expect(keys(diedRun({ promptCount: 2 }))).toContain('silent_death');
|
|
273
|
+
});
|
|
274
|
+
it('no silent_death at 3 prompts', () => {
|
|
275
|
+
expect(keys(diedRun({ promptCount: 3 }))).not.toContain('silent_death');
|
|
276
|
+
});
|
|
277
|
+
it('fumble at 5+ failed tool calls on death', () => {
|
|
278
|
+
expect(keys(diedRun({ failedToolCalls: 5 }))).toContain('fumble');
|
|
279
|
+
});
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
// ── death marks don't fire on won runs ───────────────────────────────────────
|
|
283
|
+
|
|
284
|
+
describe("death marks don't fire on won runs", () => {
|
|
285
|
+
it('no blowout on won', () => {
|
|
286
|
+
expect(keys(wonRun({ spent: 2.0, budget: 0.5 }))).not.toContain('blowout');
|
|
287
|
+
});
|
|
288
|
+
it('no tool_happy on won', () => {
|
|
289
|
+
expect(keys(wonRun({ totalToolCalls: 35 }))).not.toContain('tool_happy');
|
|
290
|
+
});
|
|
291
|
+
it('no silent_death on won', () => {
|
|
292
|
+
expect(keys(wonRun({ promptCount: 1 }))).not.toContain('silent_death');
|
|
293
|
+
});
|
|
294
|
+
});
|
|
295
|
+
|
|
296
|
+
// ── hubris / ultrathink ───────────────────────────────────────────────────────
|
|
297
|
+
|
|
298
|
+
describe('hubris + ultrathink', () => {
|
|
299
|
+
it('hubris fires on death with thinking', () => {
|
|
300
|
+
expect(keys(diedRun({ thinkingInvocations: 2 }))).toContain('hubris');
|
|
301
|
+
});
|
|
302
|
+
it('no hubris on won with thinking', () => {
|
|
303
|
+
expect(keys(wonRun({ thinkingInvocations: 2 }))).not.toContain('hubris');
|
|
304
|
+
});
|
|
305
|
+
it('spell_cast on won with thinking', () => {
|
|
306
|
+
expect(keys(wonRun({ thinkingInvocations: 1 }))).toContain('spell_cast');
|
|
307
|
+
});
|
|
308
|
+
it('deep_thinker at 3+ invocations', () => {
|
|
309
|
+
expect(keys(wonRun({ thinkingInvocations: 3 }))).toContain('deep_thinker');
|
|
310
|
+
});
|
|
311
|
+
it('silent_run on won with 0 thinking and SOLID budget', () => {
|
|
312
|
+
expect(keys(wonRun({ thinkingInvocations: 0, spent: 0.3, budget: 0.5 }))).toContain(
|
|
313
|
+
'silent_run'
|
|
314
|
+
);
|
|
315
|
+
});
|
|
316
|
+
it('no silent_run when thinking undefined', () => {
|
|
317
|
+
const run = wonRun();
|
|
318
|
+
delete run.thinkingInvocations;
|
|
319
|
+
expect(keys(run)).not.toContain('silent_run');
|
|
320
|
+
});
|
|
321
|
+
});
|
|
322
|
+
|
|
323
|
+
// ── phase 2: new hook fields ──────────────────────────────────────────────────
|
|
324
|
+
|
|
325
|
+
describe('failed tool call achievements', () => {
|
|
326
|
+
it('clean_run: 0 failures, ≥5 tool calls', () => {
|
|
327
|
+
expect(keys(wonRun({ failedToolCalls: 0, totalToolCalls: 10 }))).toContain('clean_run');
|
|
328
|
+
});
|
|
329
|
+
it('no clean_run if <5 tool calls', () => {
|
|
330
|
+
expect(keys(wonRun({ failedToolCalls: 0, totalToolCalls: 4 }))).not.toContain('clean_run');
|
|
331
|
+
});
|
|
332
|
+
it('no clean_run if failedToolCalls undefined but totalToolCalls ok', () => {
|
|
333
|
+
// undefined ?? 0 = 0 → should still fire
|
|
334
|
+
const run = wonRun({ totalToolCalls: 10 });
|
|
335
|
+
delete run.failedToolCalls;
|
|
336
|
+
expect(keys(run)).toContain('clean_run');
|
|
337
|
+
});
|
|
338
|
+
it('stubborn: 10+ failures, still won', () => {
|
|
339
|
+
expect(keys(wonRun({ failedToolCalls: 10 }))).toContain('stubborn');
|
|
340
|
+
});
|
|
341
|
+
});
|
|
342
|
+
|
|
343
|
+
describe('subagent achievements', () => {
|
|
344
|
+
it('lone_wolf: 0 subagents', () => {
|
|
345
|
+
expect(keys(wonRun({ subagentSpawns: 0 }))).toContain('lone_wolf');
|
|
346
|
+
});
|
|
347
|
+
it('no lone_wolf if subagents undefined (defaults to 0)', () => {
|
|
348
|
+
const run = wonRun();
|
|
349
|
+
delete run.subagentSpawns;
|
|
350
|
+
expect(keys(run)).toContain('lone_wolf');
|
|
351
|
+
});
|
|
352
|
+
it('summoner: 5+ subagents', () => {
|
|
353
|
+
expect(keys(wonRun({ subagentSpawns: 5 }))).toContain('summoner');
|
|
354
|
+
});
|
|
355
|
+
it('army: 10+ subagents, <50% budget', () => {
|
|
356
|
+
expect(keys(wonRun({ subagentSpawns: 10, spent: 0.1, budget: 0.5 }))).toContain('army');
|
|
357
|
+
});
|
|
358
|
+
it('no army if ≥50% budget', () => {
|
|
359
|
+
expect(keys(wonRun({ subagentSpawns: 10, spent: 0.3, budget: 0.5 }))).not.toContain('army');
|
|
360
|
+
});
|
|
361
|
+
});
|
|
362
|
+
|
|
363
|
+
describe('turn count achievements', () => {
|
|
364
|
+
it('agentic: 3+ turns per prompt', () => {
|
|
365
|
+
expect(keys(wonRun({ turnCount: 15, promptCount: 5 }))).toContain('agentic');
|
|
366
|
+
});
|
|
367
|
+
it('no agentic below 3× ratio', () => {
|
|
368
|
+
expect(keys(wonRun({ turnCount: 10, promptCount: 5 }))).not.toContain('agentic');
|
|
369
|
+
});
|
|
370
|
+
it('obedient: turnCount === promptCount, ≥3 prompts', () => {
|
|
371
|
+
expect(keys(wonRun({ turnCount: 5, promptCount: 5 }))).toContain('obedient');
|
|
372
|
+
});
|
|
373
|
+
it('no obedient if mismatch', () => {
|
|
374
|
+
expect(keys(wonRun({ turnCount: 6, promptCount: 5 }))).not.toContain('obedient');
|
|
375
|
+
});
|
|
376
|
+
it('no obedient if <3 prompts', () => {
|
|
377
|
+
expect(keys(wonRun({ turnCount: 2, promptCount: 2 }))).not.toContain('obedient');
|
|
378
|
+
});
|
|
379
|
+
});
|
|
380
|
+
|
|
381
|
+
// ── session / rest achievements ───────────────────────────────────────────────
|
|
382
|
+
|
|
383
|
+
describe('session achievements', () => {
|
|
384
|
+
it('no_rest on single session', () => {
|
|
385
|
+
expect(keys(wonRun({ sessionCount: 1 }))).toContain('no_rest');
|
|
386
|
+
});
|
|
387
|
+
it('made_camp on 2+ sessions', () => {
|
|
388
|
+
expect(keys(wonRun({ sessionCount: 2 }))).toContain('made_camp');
|
|
389
|
+
});
|
|
390
|
+
it('came_back if fainted', () => {
|
|
391
|
+
expect(keys(wonRun({ fainted: true, sessionCount: 2 }))).toContain('came_back');
|
|
392
|
+
});
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
// ── getModelClass opusplan ────────────────────────────────────────────────────
|
|
396
|
+
|
|
397
|
+
describe('getModelClass opusplan', () => {
|
|
398
|
+
it('opusplan string → MODEL_CLASSES.opusplan', () => {
|
|
399
|
+
expect(getModelClass('opusplan')).toBe(MODEL_CLASSES.opusplan);
|
|
400
|
+
});
|
|
401
|
+
});
|
|
402
|
+
|
|
403
|
+
// ── Paladin achievements ──────────────────────────────────────────────────────
|
|
404
|
+
|
|
405
|
+
describe('Paladin achievements', () => {
|
|
406
|
+
it('paladin: opusplan win', () => {
|
|
407
|
+
expect(keys(wonRun({ model: 'opusplan' }))).toContain('paladin');
|
|
408
|
+
});
|
|
409
|
+
it('grand_strategist: opusplan win at ≤25% budget', () => {
|
|
410
|
+
expect(keys(wonRun({ model: 'opusplan', spent: 0.1, budget: 0.5 }))).toContain(
|
|
411
|
+
'grand_strategist'
|
|
412
|
+
);
|
|
413
|
+
});
|
|
414
|
+
it('no grand_strategist at 30% budget', () => {
|
|
415
|
+
expect(keys(wonRun({ model: 'opusplan', spent: 0.15, budget: 0.5 }))).not.toContain(
|
|
416
|
+
'grand_strategist'
|
|
417
|
+
);
|
|
418
|
+
});
|
|
419
|
+
it('architect: opus pct > 60%', () => {
|
|
420
|
+
const mb = { 'claude-opus-4-6': 0.07, 'claude-sonnet-4-6': 0.03 };
|
|
421
|
+
expect(keys(wonRun({ model: 'opusplan', spent: 0.1, modelBreakdown: mb }))).toContain(
|
|
422
|
+
'architect'
|
|
423
|
+
);
|
|
424
|
+
});
|
|
425
|
+
it('blitz: opus pct < 25%', () => {
|
|
426
|
+
const mb = { 'claude-opus-4-6': 0.02, 'claude-sonnet-4-6': 0.08 };
|
|
427
|
+
expect(keys(wonRun({ model: 'opusplan', spent: 0.1, modelBreakdown: mb }))).toContain('blitz');
|
|
428
|
+
});
|
|
429
|
+
it('equilibrium: opus pct 40–60%', () => {
|
|
430
|
+
const mb = { 'claude-opus-4-6': 0.05, 'claude-sonnet-4-6': 0.05 };
|
|
431
|
+
expect(keys(wonRun({ model: 'opusplan', spent: 0.1, modelBreakdown: mb }))).toContain(
|
|
432
|
+
'equilibrium'
|
|
433
|
+
);
|
|
434
|
+
});
|
|
435
|
+
it('Paladin does not fire purist/committed/chameleon', () => {
|
|
436
|
+
const a = keys(wonRun({ model: 'opusplan', spent: 0.1, budget: 0.5 }));
|
|
437
|
+
expect(a).not.toContain('purist');
|
|
438
|
+
expect(a).not.toContain('committed');
|
|
439
|
+
expect(a).not.toContain('chameleon');
|
|
440
|
+
});
|
|
441
|
+
});
|
|
442
|
+
|
|
443
|
+
// ── effort-based achievements ─────────────────────────────────────────────────
|
|
444
|
+
|
|
445
|
+
describe('effort-based achievements', () => {
|
|
446
|
+
it('speedrunner: low effort, completed under budget', () => {
|
|
447
|
+
expect(keys(wonRun({ effort: 'low', spent: 0.1, budget: 0.5 }))).toContain('speedrunner');
|
|
448
|
+
});
|
|
449
|
+
it('no speedrunner if over budget', () => {
|
|
450
|
+
expect(keys(wonRun({ effort: 'low', spent: 0.6, budget: 0.5 }))).not.toContain('speedrunner');
|
|
451
|
+
});
|
|
452
|
+
it('tryhard: high effort, ≤25% budget', () => {
|
|
453
|
+
expect(keys(wonRun({ effort: 'high', spent: 0.1, budget: 0.5 }))).toContain('tryhard');
|
|
454
|
+
});
|
|
455
|
+
it('no tryhard at 30%', () => {
|
|
456
|
+
expect(keys(wonRun({ effort: 'high', spent: 0.15, budget: 0.5 }))).not.toContain('tryhard');
|
|
457
|
+
});
|
|
458
|
+
it('archmagus: max effort, opus model', () => {
|
|
459
|
+
expect(keys(wonRun({ effort: 'max', model: 'claude-opus-4-6' }))).toContain('archmagus');
|
|
460
|
+
});
|
|
461
|
+
it('no archmagus: max effort, non-opus', () => {
|
|
462
|
+
expect(keys(wonRun({ effort: 'max', model: 'claude-sonnet-4-6' }))).not.toContain('archmagus');
|
|
463
|
+
});
|
|
464
|
+
});
|
|
465
|
+
|
|
466
|
+
// ── fast mode achievements ────────────────────────────────────────────────────
|
|
467
|
+
|
|
468
|
+
describe('fast mode achievements', () => {
|
|
469
|
+
it('lightning: opus fastMode, under budget', () => {
|
|
470
|
+
expect(
|
|
471
|
+
keys(wonRun({ fastMode: true, model: 'claude-opus-4-6', spent: 0.1, budget: 0.5 }))
|
|
472
|
+
).toContain('lightning');
|
|
473
|
+
});
|
|
474
|
+
it('daredevil: opus fastMode, ≤25% budget', () => {
|
|
475
|
+
expect(
|
|
476
|
+
keys(wonRun({ fastMode: true, model: 'claude-opus-4-6', spent: 0.1, budget: 0.5 }))
|
|
477
|
+
).toContain('daredevil');
|
|
478
|
+
});
|
|
479
|
+
it('no lightning for non-opus fastMode', () => {
|
|
480
|
+
expect(
|
|
481
|
+
keys(wonRun({ fastMode: true, model: 'claude-sonnet-4-6', spent: 0.1, budget: 0.5 }))
|
|
482
|
+
).not.toContain('lightning');
|
|
483
|
+
});
|
|
484
|
+
});
|
|
485
|
+
|
|
486
|
+
// ── compaction / gear achievements ────────────────────────────────────────────
|
|
487
|
+
|
|
488
|
+
describe('compaction achievements', () => {
|
|
489
|
+
it('overencumbered: auto-compaction event', () => {
|
|
490
|
+
const events = [{ trigger: 'auto', contextPct: 92 }];
|
|
491
|
+
expect(keys(wonRun({ compactionEvents: events }))).toContain('overencumbered');
|
|
492
|
+
});
|
|
493
|
+
it('no overencumbered with only manual compaction', () => {
|
|
494
|
+
const events = [{ trigger: 'manual', contextPct: 45 }];
|
|
495
|
+
expect(keys(wonRun({ compactionEvents: events }))).not.toContain('overencumbered');
|
|
496
|
+
});
|
|
497
|
+
it('ghost_run: manual compact at ≤30% context', () => {
|
|
498
|
+
const events = [{ trigger: 'manual', contextPct: 28 }];
|
|
499
|
+
expect(keys(wonRun({ compactionEvents: events }))).toContain('ghost_run');
|
|
500
|
+
});
|
|
501
|
+
it('ultralight: manual compact at 31–40% context', () => {
|
|
502
|
+
const events = [{ trigger: 'manual', contextPct: 35 }];
|
|
503
|
+
expect(keys(wonRun({ compactionEvents: events }))).toContain('ultralight');
|
|
504
|
+
});
|
|
505
|
+
it('traveling_light: manual compact at 41–50% context', () => {
|
|
506
|
+
const events = [{ trigger: 'manual', contextPct: 48 }];
|
|
507
|
+
expect(keys(wonRun({ compactionEvents: events }))).toContain('traveling_light');
|
|
508
|
+
});
|
|
509
|
+
it('no compaction achievement above 50%', () => {
|
|
510
|
+
const events = [{ trigger: 'manual', contextPct: 60 }];
|
|
511
|
+
const a = keys(wonRun({ compactionEvents: events }));
|
|
512
|
+
expect(a).not.toContain('ghost_run');
|
|
513
|
+
expect(a).not.toContain('ultralight');
|
|
514
|
+
expect(a).not.toContain('traveling_light');
|
|
515
|
+
});
|
|
516
|
+
});
|
|
517
|
+
|
|
518
|
+
// ── calculated_risk ───────────────────────────────────────────────────────────
|
|
519
|
+
|
|
520
|
+
describe('calculated_risk', () => {
|
|
521
|
+
it('fires with thinking + ≤25% budget', () => {
|
|
522
|
+
expect(keys(wonRun({ thinkingInvocations: 1, spent: 0.1, budget: 0.5 }))).toContain(
|
|
523
|
+
'calculated_risk'
|
|
524
|
+
);
|
|
525
|
+
});
|
|
526
|
+
it('no calculated_risk at 30% budget', () => {
|
|
527
|
+
expect(keys(wonRun({ thinkingInvocations: 1, spent: 0.15, budget: 0.5 }))).not.toContain(
|
|
528
|
+
'calculated_risk'
|
|
529
|
+
);
|
|
530
|
+
});
|
|
531
|
+
});
|
|
532
|
+
|
|
533
|
+
// ── multi-model achievements ──────────────────────────────────────────────────
|
|
534
|
+
|
|
535
|
+
describe('frugal + rogue_run', () => {
|
|
536
|
+
it('frugal: haiku ≥50% of spend', () => {
|
|
537
|
+
const mb = { 'claude-haiku-4-5-20251001': 0.06, 'claude-sonnet-4-6': 0.04 };
|
|
538
|
+
expect(keys(wonRun({ modelBreakdown: mb, spent: 0.1 }))).toContain('frugal');
|
|
539
|
+
});
|
|
540
|
+
it('rogue_run: haiku ≥75% of spend', () => {
|
|
541
|
+
const mb = { 'claude-haiku-4-5-20251001': 0.08, 'claude-sonnet-4-6': 0.02 };
|
|
542
|
+
expect(keys(wonRun({ modelBreakdown: mb, spent: 0.1 }))).toContain('rogue_run');
|
|
543
|
+
});
|
|
544
|
+
it('no frugal when haiku < 50%', () => {
|
|
545
|
+
const mb = { 'claude-haiku-4-5-20251001': 0.04, 'claude-sonnet-4-6': 0.06 };
|
|
546
|
+
expect(keys(wonRun({ modelBreakdown: mb, spent: 0.1 }))).not.toContain('frugal');
|
|
547
|
+
});
|
|
548
|
+
it('no rogue_run when haiku < 75%', () => {
|
|
549
|
+
const mb = { 'claude-haiku-4-5-20251001': 0.06, 'claude-sonnet-4-6': 0.04 };
|
|
550
|
+
expect(keys(wonRun({ modelBreakdown: mb, spent: 0.1 }))).not.toContain('rogue_run');
|
|
551
|
+
});
|
|
552
|
+
});
|
|
553
|
+
|
|
554
|
+
// ── model switching achievements ──────────────────────────────────────────────
|
|
555
|
+
|
|
556
|
+
describe('model switching achievements', () => {
|
|
557
|
+
it('purist: single distinct model', () => {
|
|
558
|
+
expect(keys(wonRun({ modelSwitches: 0, distinctModels: 1 }))).toContain('purist');
|
|
559
|
+
});
|
|
560
|
+
it('committed: 0 switches, ≤1 distinct', () => {
|
|
561
|
+
expect(keys(wonRun({ modelSwitches: 0, distinctModels: 1 }))).toContain('committed');
|
|
562
|
+
});
|
|
563
|
+
it('chameleon: 2+ distinct models, under budget', () => {
|
|
564
|
+
expect(
|
|
565
|
+
keys(wonRun({ modelSwitches: 2, distinctModels: 2, spent: 0.1, budget: 0.5 }))
|
|
566
|
+
).toContain('chameleon');
|
|
567
|
+
});
|
|
568
|
+
it('no chameleon if over budget', () => {
|
|
569
|
+
expect(
|
|
570
|
+
keys(wonRun({ modelSwitches: 2, distinctModels: 2, spent: 0.6, budget: 0.5 }))
|
|
571
|
+
).not.toContain('chameleon');
|
|
572
|
+
});
|
|
573
|
+
it('tactical_switch: exactly 1 switch, under budget', () => {
|
|
574
|
+
expect(
|
|
575
|
+
keys(wonRun({ modelSwitches: 1, distinctModels: 2, spent: 0.1, budget: 0.5 }))
|
|
576
|
+
).toContain('tactical_switch');
|
|
577
|
+
});
|
|
578
|
+
it('class_defection: declared haiku but >50% on heavier models', () => {
|
|
579
|
+
const mb = { 'claude-haiku-4-5-20251001': 0.04, 'claude-sonnet-4-6': 0.06 };
|
|
580
|
+
expect(
|
|
581
|
+
keys(wonRun({ model: 'claude-haiku-4-5-20251001', modelBreakdown: mb, spent: 0.1 }))
|
|
582
|
+
).toContain('class_defection');
|
|
583
|
+
});
|
|
584
|
+
it('no class_defection: declared haiku, haiku dominant', () => {
|
|
585
|
+
const mb = { 'claude-haiku-4-5-20251001': 0.07, 'claude-sonnet-4-6': 0.03 };
|
|
586
|
+
expect(
|
|
587
|
+
keys(wonRun({ model: 'claude-haiku-4-5-20251001', modelBreakdown: mb, spent: 0.1 }))
|
|
588
|
+
).not.toContain('class_defection');
|
|
589
|
+
});
|
|
590
|
+
it('class_defection: declared sonnet but >40% on opus', () => {
|
|
591
|
+
const mb = { 'claude-sonnet-4-6': 0.05, 'claude-opus-4-6': 0.05 };
|
|
592
|
+
expect(keys(wonRun({ model: 'claude-sonnet-4-6', modelBreakdown: mb, spent: 0.1 }))).toContain(
|
|
593
|
+
'class_defection'
|
|
594
|
+
);
|
|
595
|
+
});
|
|
596
|
+
});
|