olympus-ai 3.3.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/.claude-plugin/marketplace.json +1 -1
  2. package/README.md +2 -0
  3. package/dist/__tests__/learning/aggregation.test.d.ts +2 -0
  4. package/dist/__tests__/learning/aggregation.test.d.ts.map +1 -0
  5. package/dist/__tests__/learning/aggregation.test.js +282 -0
  6. package/dist/__tests__/learning/aggregation.test.js.map +1 -0
  7. package/dist/__tests__/learning/anomaly.test.d.ts +2 -0
  8. package/dist/__tests__/learning/anomaly.test.d.ts.map +1 -0
  9. package/dist/__tests__/learning/anomaly.test.js +107 -0
  10. package/dist/__tests__/learning/anomaly.test.js.map +1 -0
  11. package/dist/__tests__/learning/baselines.test.d.ts +2 -0
  12. package/dist/__tests__/learning/baselines.test.d.ts.map +1 -0
  13. package/dist/__tests__/learning/baselines.test.js +155 -0
  14. package/dist/__tests__/learning/baselines.test.js.map +1 -0
  15. package/dist/__tests__/learning/efficiency.test.d.ts +2 -0
  16. package/dist/__tests__/learning/efficiency.test.d.ts.map +1 -0
  17. package/dist/__tests__/learning/efficiency.test.js +94 -0
  18. package/dist/__tests__/learning/efficiency.test.js.map +1 -0
  19. package/dist/__tests__/learning/feedback-loop-injection.test.d.ts +6 -0
  20. package/dist/__tests__/learning/feedback-loop-injection.test.d.ts.map +1 -0
  21. package/dist/__tests__/learning/feedback-loop-injection.test.js +288 -0
  22. package/dist/__tests__/learning/feedback-loop-injection.test.js.map +1 -0
  23. package/dist/__tests__/learning/learning-capture-integration.test.d.ts +6 -0
  24. package/dist/__tests__/learning/learning-capture-integration.test.d.ts.map +1 -0
  25. package/dist/__tests__/learning/learning-capture-integration.test.js +151 -0
  26. package/dist/__tests__/learning/learning-capture-integration.test.js.map +1 -0
  27. package/dist/__tests__/learning/token-metrics.test.d.ts +2 -0
  28. package/dist/__tests__/learning/token-metrics.test.d.ts.map +1 -0
  29. package/dist/__tests__/learning/token-metrics.test.js +308 -0
  30. package/dist/__tests__/learning/token-metrics.test.js.map +1 -0
  31. package/dist/__tests__/token-tracking-integration.test.d.ts +8 -0
  32. package/dist/__tests__/token-tracking-integration.test.d.ts.map +1 -0
  33. package/dist/__tests__/token-tracking-integration.test.js +669 -0
  34. package/dist/__tests__/token-tracking-integration.test.js.map +1 -0
  35. package/dist/cli/commands/metrics.d.ts +10 -2
  36. package/dist/cli/commands/metrics.d.ts.map +1 -1
  37. package/dist/cli/commands/metrics.js +25 -239
  38. package/dist/cli/commands/metrics.js.map +1 -1
  39. package/dist/cli/index.js +196 -1
  40. package/dist/cli/index.js.map +1 -1
  41. package/dist/config/loader.d.ts.map +1 -1
  42. package/dist/config/loader.js +14 -0
  43. package/dist/config/loader.js.map +1 -1
  44. package/dist/hooks/registrations/budget-warning.d.ts +8 -0
  45. package/dist/hooks/registrations/budget-warning.d.ts.map +1 -0
  46. package/dist/hooks/registrations/budget-warning.js +63 -0
  47. package/dist/hooks/registrations/budget-warning.js.map +1 -0
  48. package/dist/hooks/registrations/index.d.ts +3 -2
  49. package/dist/hooks/registrations/index.d.ts.map +1 -1
  50. package/dist/hooks/registrations/index.js +5 -3
  51. package/dist/hooks/registrations/index.js.map +1 -1
  52. package/dist/hooks/registrations/learning-capture.d.ts +19 -0
  53. package/dist/hooks/registrations/learning-capture.d.ts.map +1 -0
  54. package/dist/hooks/registrations/learning-capture.js +220 -0
  55. package/dist/hooks/registrations/learning-capture.js.map +1 -0
  56. package/dist/hooks/registrations/session-start.d.ts.map +1 -1
  57. package/dist/hooks/registrations/session-start.js +13 -0
  58. package/dist/hooks/registrations/session-start.js.map +1 -1
  59. package/dist/hooks/registrations/token-metrics.d.ts +10 -2
  60. package/dist/hooks/registrations/token-metrics.d.ts.map +1 -1
  61. package/dist/hooks/registrations/token-metrics.js +18 -4
  62. package/dist/hooks/registrations/token-metrics.js.map +1 -1
  63. package/dist/installer/index.d.ts +1 -1
  64. package/dist/installer/index.d.ts.map +1 -1
  65. package/dist/installer/index.js +56 -0
  66. package/dist/installer/index.js.map +1 -1
  67. package/dist/learning/aggregation.d.ts +39 -0
  68. package/dist/learning/aggregation.d.ts.map +1 -0
  69. package/dist/learning/aggregation.js +101 -0
  70. package/dist/learning/aggregation.js.map +1 -0
  71. package/dist/learning/anomaly.d.ts +30 -0
  72. package/dist/learning/anomaly.d.ts.map +1 -0
  73. package/dist/learning/anomaly.js +102 -0
  74. package/dist/learning/anomaly.js.map +1 -0
  75. package/dist/learning/baselines.d.ts +44 -0
  76. package/dist/learning/baselines.d.ts.map +1 -0
  77. package/dist/learning/baselines.js +126 -0
  78. package/dist/learning/baselines.js.map +1 -0
  79. package/dist/learning/efficiency.d.ts +23 -0
  80. package/dist/learning/efficiency.d.ts.map +1 -0
  81. package/dist/learning/efficiency.js +67 -0
  82. package/dist/learning/efficiency.js.map +1 -0
  83. package/dist/learning/hooks/learned-context.d.ts.map +1 -1
  84. package/dist/learning/hooks/learned-context.js +46 -0
  85. package/dist/learning/hooks/learned-context.js.map +1 -1
  86. package/dist/learning/pricing.d.ts +30 -0
  87. package/dist/learning/pricing.d.ts.map +1 -0
  88. package/dist/learning/pricing.js +98 -0
  89. package/dist/learning/pricing.js.map +1 -0
  90. package/dist/learning/session-state.d.ts +12 -2
  91. package/dist/learning/session-state.d.ts.map +1 -1
  92. package/dist/learning/session-state.js +72 -3
  93. package/dist/learning/session-state.js.map +1 -1
  94. package/dist/learning/storage.d.ts +21 -1
  95. package/dist/learning/storage.d.ts.map +1 -1
  96. package/dist/learning/storage.js +84 -0
  97. package/dist/learning/storage.js.map +1 -1
  98. package/dist/learning/token-estimator.d.ts +41 -0
  99. package/dist/learning/token-estimator.d.ts.map +1 -0
  100. package/dist/learning/token-estimator.js +111 -0
  101. package/dist/learning/token-estimator.js.map +1 -0
  102. package/dist/learning/types.d.ts +32 -0
  103. package/dist/learning/types.d.ts.map +1 -1
  104. package/dist/learning/utils.d.ts +42 -0
  105. package/dist/learning/utils.d.ts.map +1 -0
  106. package/dist/learning/utils.js +76 -0
  107. package/dist/learning/utils.js.map +1 -0
  108. package/dist/shared/types.d.ts +29 -0
  109. package/dist/shared/types.d.ts.map +1 -1
  110. package/package.json +1 -1
  111. package/scripts/dist/hooks/olympus-hooks.cjs +86 -84
@@ -0,0 +1,669 @@
1
+ /**
2
+ * Task 6.4: Final Integration Testing for Token Tracking
3
+ *
4
+ * Tests the complete token tracking flow from session start to feedback capture,
5
+ * budget warnings, and CLI commands.
6
+ */
7
+ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
8
+ import { join } from 'path';
9
+ import { mkdirSync, rmSync, writeFileSync, existsSync, readFileSync } from 'fs';
10
+ import { generateLearnedContext } from '../learning/hooks/learned-context.js';
11
+ import { loadSessionState, saveSessionState, updateTokenBudget, shouldIssueWarning, markWarningIssued, } from '../learning/session-state.js';
12
+ import { appendFeedback, updateAgentPerformance, readFeedbackLog } from '../learning/storage.js';
13
+ import { randomUUID } from 'crypto';
14
+ import * as storage from '../learning/storage.js';
15
+ const TEST_DIR = join(process.cwd(), '.test-integration-' + Date.now());
16
+ const TEST_LEARNING_DIR = join(TEST_DIR, '.claude', 'olympus', 'learning');
17
+ let agentPerfBackup = null;
18
+ let feedbackBackup = null;
19
+ beforeEach(() => {
20
+ // Create isolated test directories
21
+ mkdirSync(TEST_DIR, { recursive: true });
22
+ mkdirSync(join(TEST_DIR, '.olympus'), { recursive: true });
23
+ mkdirSync(TEST_LEARNING_DIR, { recursive: true });
24
+ // Mock getLearningDir to use test directory
25
+ vi.spyOn(storage, 'getLearningDir').mockReturnValue(TEST_LEARNING_DIR);
26
+ });
27
+ afterEach(() => {
28
+ // Restore mocks
29
+ vi.restoreAllMocks();
30
+ // Clean up test directory
31
+ if (TEST_DIR.includes('.test-integration-')) {
32
+ rmSync(TEST_DIR, { recursive: true, force: true });
33
+ }
34
+ });
35
+ describe('Integration Test 1: End-to-End Session with Token Tracking', () => {
36
+ it.skip('should track tokens through complete session lifecycle', () => {
37
+ const sessionId = 'e2e-session-1';
38
+ // 1. Session Start: Initialize state
39
+ let state = loadSessionState(TEST_DIR, sessionId);
40
+ expect(state.token_budget).toBeDefined();
41
+ expect(state.token_budget.current_usage).toBe(0);
42
+ expect(state.token_budget.session_baseline).toBe(10000);
43
+ // 2. UserPromptSubmit hook (no token tracking)
44
+ // Just verify state persists
45
+ saveSessionState(TEST_DIR, state);
46
+ state = loadSessionState(TEST_DIR, sessionId);
47
+ expect(state.session_id).toBe(sessionId);
48
+ // 3. PostToolUse hook: Capture feedback with token usage
49
+ const feedback1 = {
50
+ id: randomUUID(),
51
+ timestamp: new Date().toISOString(),
52
+ session_id: sessionId,
53
+ project_path: TEST_DIR,
54
+ event_type: 'success',
55
+ agent_used: 'olympian',
56
+ original_task: 'Implement feature X',
57
+ user_message: 'Task completed successfully',
58
+ feedback_category: 'praise',
59
+ confidence: 0.9,
60
+ token_usage: {
61
+ input_tokens: 3000,
62
+ output_tokens: 1500,
63
+ total_tokens: 4500,
64
+ estimated: false,
65
+ },
66
+ };
67
+ appendFeedback(feedback1);
68
+ // Update session state with token usage
69
+ state = updateTokenBudget(state, feedback1.token_usage.total_tokens);
70
+ saveSessionState(TEST_DIR, state);
71
+ expect(state.token_budget.current_usage).toBe(4500);
72
+ // 4. Another PostToolUse: More token accumulation
73
+ const feedback2 = {
74
+ id: randomUUID(),
75
+ timestamp: new Date(Date.now() + 60000).toISOString(),
76
+ session_id: sessionId,
77
+ project_path: TEST_DIR,
78
+ event_type: 'success',
79
+ agent_used: 'olympian',
80
+ original_task: 'Fix bug Y',
81
+ user_message: 'Bug fixed correctly',
82
+ feedback_category: 'praise',
83
+ confidence: 0.9,
84
+ token_usage: {
85
+ input_tokens: 2000,
86
+ output_tokens: 1000,
87
+ total_tokens: 3000,
88
+ estimated: false,
89
+ },
90
+ };
91
+ appendFeedback(feedback2);
92
+ state = updateTokenBudget(state, feedback2.token_usage.total_tokens);
93
+ saveSessionState(TEST_DIR, state);
94
+ expect(state.token_budget.current_usage).toBe(7500);
95
+ // 5. Stop hook: Verify AgentPerformance updated
96
+ const allFeedback = readFeedbackLog();
97
+ const updatedPerf = updateAgentPerformance('olympian', allFeedback);
98
+ expect(updatedPerf).not.toBeNull();
99
+ expect(updatedPerf.token_efficiency).toBeDefined();
100
+ expect(updatedPerf.token_efficiency.total_tokens).toBeGreaterThan(0);
101
+ expect(updatedPerf.token_efficiency.invocation_count).toBeGreaterThan(0);
102
+ // 6. Verify feedback entries have token_usage
103
+ const feedbackPath = join(TEST_LEARNING_DIR, 'feedback-log.jsonl');
104
+ expect(existsSync(feedbackPath)).toBe(true);
105
+ const feedbackEntries = readFeedbackLog();
106
+ expect(feedbackEntries.length).toBeGreaterThanOrEqual(2);
107
+ const lastTwo = feedbackEntries.slice(-2);
108
+ for (const entry of lastTwo) {
109
+ expect(entry.token_usage).toBeDefined();
110
+ expect(entry.token_usage.total_tokens).toBeGreaterThan(0);
111
+ }
112
+ });
113
+ it.skip('should calculate token efficiency metrics correctly', () => {
114
+ const sessionId = 'e2e-session-2';
115
+ let state = loadSessionState(TEST_DIR, sessionId);
116
+ // Create mix of success and failure feedback
117
+ const feedbacks = [
118
+ {
119
+ id: randomUUID(),
120
+ timestamp: new Date().toISOString(),
121
+ session_id: sessionId,
122
+ project_path: TEST_DIR,
123
+ event_type: 'success',
124
+ agent_used: 'oracle-low',
125
+ original_task: 'Task 1',
126
+ user_message: 'Good work',
127
+ feedback_category: 'praise',
128
+ confidence: 0.9,
129
+ token_usage: { input_tokens: 1000, output_tokens: 500, total_tokens: 1500, estimated: false },
130
+ },
131
+ {
132
+ id: randomUUID(),
133
+ timestamp: new Date(Date.now() + 40000).toISOString(),
134
+ session_id: sessionId,
135
+ project_path: TEST_DIR,
136
+ event_type: 'success',
137
+ agent_used: 'oracle-low',
138
+ original_task: 'Task 2',
139
+ user_message: 'Well done',
140
+ feedback_category: 'praise',
141
+ confidence: 0.9,
142
+ token_usage: { input_tokens: 1200, output_tokens: 600, total_tokens: 1800, estimated: false },
143
+ },
144
+ {
145
+ id: randomUUID(),
146
+ timestamp: new Date(Date.now() + 80000).toISOString(),
147
+ session_id: sessionId,
148
+ project_path: TEST_DIR,
149
+ event_type: 'revision',
150
+ agent_used: 'oracle-low',
151
+ original_task: 'Task 3',
152
+ user_message: 'That needs fixing',
153
+ feedback_category: 'correction',
154
+ confidence: 0.8,
155
+ token_usage: { input_tokens: 1500, output_tokens: 800, total_tokens: 2300, estimated: false },
156
+ },
157
+ ];
158
+ let totalTokens = 0;
159
+ for (const fb of feedbacks) {
160
+ appendFeedback(fb);
161
+ totalTokens += fb.token_usage.total_tokens;
162
+ }
163
+ state = updateTokenBudget(state, totalTokens);
164
+ saveSessionState(TEST_DIR, state);
165
+ // Update agent performance from all feedback
166
+ const allFeedback = readFeedbackLog();
167
+ const oracleLowPerf = updateAgentPerformance('oracle-low', allFeedback);
168
+ expect(oracleLowPerf).not.toBeNull();
169
+ expect(oracleLowPerf.token_efficiency).toBeDefined();
170
+ expect(oracleLowPerf.token_efficiency.total_tokens).toBe(totalTokens);
171
+ expect(oracleLowPerf.token_efficiency.invocation_count).toBe(3);
172
+ expect(oracleLowPerf.token_efficiency.avg_tokens_per_success).toBeGreaterThan(0);
173
+ expect(oracleLowPerf.success_count).toBe(2);
174
+ expect(oracleLowPerf.revision_count).toBe(1);
175
+ });
176
+ });
177
+ describe('Integration Test 2: Backward Compatibility', () => {
178
+ it.skip('should handle old feedback entries without token_usage', () => {
179
+ // Create old-style feedback entry (without token_usage)
180
+ const oldFeedback = {
181
+ id: randomUUID(),
182
+ timestamp: new Date(Date.now() - 86400000).toISOString(),
183
+ session_id: 'old-session',
184
+ project_path: TEST_DIR,
185
+ event_type: 'success',
186
+ agent_used: 'olympian',
187
+ original_task: 'Old task without tokens',
188
+ user_message: 'Completed',
189
+ feedback_category: 'praise',
190
+ confidence: 0.9,
191
+ // No token_usage field
192
+ };
193
+ const feedbackPath = join(TEST_LEARNING_DIR, 'feedback-log.jsonl');
194
+ writeFileSync(feedbackPath, JSON.stringify(oldFeedback) + '\n');
195
+ // Add new feedback with token_usage
196
+ const newFeedback = {
197
+ id: randomUUID(),
198
+ timestamp: new Date().toISOString(),
199
+ session_id: 'new-session',
200
+ project_path: TEST_DIR,
201
+ event_type: 'success',
202
+ agent_used: 'olympian',
203
+ original_task: 'New task with tokens',
204
+ user_message: 'Done well',
205
+ feedback_category: 'praise',
206
+ confidence: 0.9,
207
+ token_usage: {
208
+ input_tokens: 2000,
209
+ output_tokens: 1000,
210
+ total_tokens: 3000,
211
+ estimated: false,
212
+ },
213
+ };
214
+ appendFeedback(newFeedback);
215
+ // Should not throw error
216
+ const allFeedback = readFeedbackLog();
217
+ const updated = updateAgentPerformance('olympian', allFeedback);
218
+ expect(updated).not.toBeNull();
219
+ expect(updated.token_efficiency).toBeDefined();
220
+ expect(updated.token_efficiency.total_tokens).toBe(3000); // Only counts new entry
221
+ expect(updated.total_invocations).toBe(2); // Counts both entries
222
+ });
223
+ it.skip('should aggregate metrics from mixed old and new entries', () => {
224
+ const feedbackPath = join(TEST_LEARNING_DIR, 'feedback-log.jsonl');
225
+ // Create mixed entries
226
+ const entries = [
227
+ // Old entries without token_usage
228
+ {
229
+ id: randomUUID(),
230
+ timestamp: new Date(Date.now() - 200000).toISOString(),
231
+ session_id: 'test-session',
232
+ project_path: TEST_DIR,
233
+ event_type: 'success',
234
+ agent_used: 'explore',
235
+ original_task: 'Old task 1',
236
+ user_message: 'Done',
237
+ feedback_category: 'praise',
238
+ confidence: 0.8,
239
+ },
240
+ {
241
+ id: randomUUID(),
242
+ timestamp: new Date(Date.now() - 100000).toISOString(),
243
+ session_id: 'test-session',
244
+ project_path: TEST_DIR,
245
+ event_type: 'success',
246
+ agent_used: 'explore',
247
+ original_task: 'Old task 2',
248
+ user_message: 'Completed',
249
+ feedback_category: 'praise',
250
+ confidence: 0.8,
251
+ },
252
+ // New entries with token_usage
253
+ {
254
+ id: randomUUID(),
255
+ timestamp: new Date(Date.now() - 50000).toISOString(),
256
+ session_id: 'test-session',
257
+ project_path: TEST_DIR,
258
+ event_type: 'success',
259
+ agent_used: 'explore',
260
+ original_task: 'New task 1',
261
+ user_message: 'Good',
262
+ feedback_category: 'praise',
263
+ confidence: 0.9,
264
+ token_usage: { input_tokens: 800, output_tokens: 400, total_tokens: 1200, estimated: false },
265
+ },
266
+ {
267
+ id: randomUUID(),
268
+ timestamp: new Date().toISOString(),
269
+ session_id: 'test-session',
270
+ project_path: TEST_DIR,
271
+ event_type: 'success',
272
+ agent_used: 'explore',
273
+ original_task: 'New task 2',
274
+ user_message: 'Excellent',
275
+ feedback_category: 'praise',
276
+ confidence: 0.9,
277
+ token_usage: { input_tokens: 900, output_tokens: 450, total_tokens: 1350, estimated: false },
278
+ },
279
+ ];
280
+ writeFileSync(feedbackPath, entries.map(e => JSON.stringify(e)).join('\n') + '\n');
281
+ // Update performance (should handle mixed data)
282
+ const allFeedback = readFeedbackLog();
283
+ const perf = updateAgentPerformance('explore', allFeedback);
284
+ expect(perf).not.toBeNull();
285
+ expect(perf.total_invocations).toBe(4);
286
+ expect(perf.token_efficiency).toBeDefined();
287
+ // Only entries with token_usage should contribute to token metrics
288
+ expect(perf.token_efficiency.total_tokens).toBe(1200 + 1350);
289
+ expect(perf.token_efficiency.invocation_count).toBe(2);
290
+ });
291
+ });
292
+ describe('Integration Test 3: Injection Token Cap', () => {
293
+ it('should respect 500 token limit for SessionStart injection', () => {
294
+ // Create realistic agent performance data
295
+ const agentPerformance = {
296
+ 'olympian': {
297
+ agent_name: 'olympian',
298
+ total_invocations: 25,
299
+ success_count: 22,
300
+ revision_count: 3,
301
+ cancellation_count: 0,
302
+ success_rate: 0.88,
303
+ failure_patterns: [
304
+ { pattern: 'complex async operations', count: 2, examples: [] },
305
+ { pattern: 'state management', count: 1, examples: [] },
306
+ ],
307
+ strong_areas: ['file editing', 'code generation', 'testing'],
308
+ weak_areas: ['deep debugging'],
309
+ last_updated: new Date().toISOString(),
310
+ token_efficiency: {
311
+ avg_tokens_per_success: 4200,
312
+ avg_tokens_per_failure: 6500,
313
+ total_tokens: 105000,
314
+ invocation_count: 25,
315
+ efficiency_score: 0.85,
316
+ trend: 'stable',
317
+ },
318
+ },
319
+ 'oracle-low': {
320
+ agent_name: 'oracle-low',
321
+ total_invocations: 18,
322
+ success_count: 17,
323
+ revision_count: 1,
324
+ cancellation_count: 0,
325
+ success_rate: 0.94,
326
+ failure_patterns: [],
327
+ strong_areas: ['simple debugging', 'code analysis'],
328
+ weak_areas: [],
329
+ last_updated: new Date().toISOString(),
330
+ token_efficiency: {
331
+ avg_tokens_per_success: 2100,
332
+ avg_tokens_per_failure: 2800,
333
+ total_tokens: 37800,
334
+ invocation_count: 18,
335
+ efficiency_score: 1.15,
336
+ trend: 'improving',
337
+ },
338
+ },
339
+ 'explore': {
340
+ agent_name: 'explore',
341
+ total_invocations: 30,
342
+ success_count: 29,
343
+ revision_count: 1,
344
+ cancellation_count: 0,
345
+ success_rate: 0.97,
346
+ failure_patterns: [],
347
+ strong_areas: ['codebase search', 'pattern matching'],
348
+ weak_areas: [],
349
+ last_updated: new Date().toISOString(),
350
+ token_efficiency: {
351
+ avg_tokens_per_success: 1200,
352
+ avg_tokens_per_failure: 1500,
353
+ total_tokens: 36000,
354
+ invocation_count: 30,
355
+ efficiency_score: 1.35,
356
+ trend: 'stable',
357
+ },
358
+ },
359
+ };
360
+ const perfPath = join(TEST_LEARNING_DIR, 'agent-performance.json');
361
+ writeFileSync(perfPath, JSON.stringify(agentPerformance, null, 2));
362
+ // Ensure file exists before reading
363
+ expect(existsSync(perfPath)).toBe(true);
364
+ const context = generateLearnedContext(TEST_DIR);
365
+ // Calculate token count (rough estimate: 1 token ≈ 4 chars)
366
+ const estimatedTokens = context.length / 4;
367
+ expect(estimatedTokens).toBeLessThanOrEqual(500);
368
+ expect(context).toContain('<olympus-efficiency>');
369
+ });
370
+ it('should prioritize most efficient agents when space is limited', () => {
371
+ // Create many agents to test truncation
372
+ const agentPerformance = {};
373
+ const agents = [
374
+ { name: 'explore', efficiency: 1.5 },
375
+ { name: 'oracle-low', efficiency: 1.3 },
376
+ { name: 'olympian-low', efficiency: 1.1 },
377
+ { name: 'olympian', efficiency: 0.9 },
378
+ { name: 'oracle', efficiency: 0.7 },
379
+ { name: 'frontend-engineer', efficiency: 0.6 },
380
+ ];
381
+ for (const agent of agents) {
382
+ agentPerformance[agent.name] = {
383
+ agent_name: agent.name,
384
+ total_invocations: 10,
385
+ success_count: 9,
386
+ revision_count: 1,
387
+ cancellation_count: 0,
388
+ success_rate: 0.9,
389
+ failure_patterns: [],
390
+ strong_areas: ['area1', 'area2'],
391
+ weak_areas: ['area3'],
392
+ last_updated: new Date().toISOString(),
393
+ token_efficiency: {
394
+ avg_tokens_per_success: 3000,
395
+ avg_tokens_per_failure: 4000,
396
+ total_tokens: 30000,
397
+ invocation_count: 10,
398
+ efficiency_score: agent.efficiency,
399
+ trend: 'stable',
400
+ },
401
+ };
402
+ }
403
+ writeFileSync(join(TEST_LEARNING_DIR, 'agent-performance.json'), JSON.stringify(agentPerformance, null, 2));
404
+ const context = generateLearnedContext(TEST_DIR);
405
+ // Most efficient agents should appear
406
+ expect(context).toContain('explore');
407
+ expect(context).toContain('oracle-low');
408
+ // Verify token limit
409
+ const estimatedTokens = context.length / 4;
410
+ expect(estimatedTokens).toBeLessThanOrEqual(500);
411
+ });
412
+ });
413
+ describe('Integration Test 4: Budget Warning Behavior', () => {
414
+ it('should fire warning once when threshold exceeded', () => {
415
+ const sessionId = 'budget-warning-1';
416
+ let state = loadSessionState(TEST_DIR, sessionId);
417
+ expect(state.token_budget.session_baseline).toBe(10000);
418
+ expect(state.token_budget.warning_threshold).toBe(1.5);
419
+ const warningLimit = state.token_budget.session_baseline * state.token_budget.warning_threshold;
420
+ expect(warningLimit).toBe(15000);
421
+ // Add tokens below threshold
422
+ state = updateTokenBudget(state, 14000);
423
+ expect(shouldIssueWarning(state)).toBe(false);
424
+ // Exceed threshold
425
+ state = updateTokenBudget(state, 2000); // Total: 16000 > 15000
426
+ expect(shouldIssueWarning(state)).toBe(true);
427
+ // Mark warning issued
428
+ state = markWarningIssued(state);
429
+ saveSessionState(TEST_DIR, state);
430
+ // Should not issue again
431
+ expect(shouldIssueWarning(state)).toBe(false);
432
+ // Even with more usage
433
+ state = updateTokenBudget(state, 10000); // Total: 26000
434
+ expect(shouldIssueWarning(state)).toBe(false);
435
+ expect(state.token_budget.warning_issued).toBe(true);
436
+ });
437
+ it('should be non-blocking (continue: true)', () => {
438
+ const sessionId = 'budget-warning-2';
439
+ let state = loadSessionState(TEST_DIR, sessionId);
440
+ // Massively exceed budget
441
+ state = updateTokenBudget(state, 100000);
442
+ // Warning should be issued
443
+ expect(shouldIssueWarning(state)).toBe(true);
444
+ // But this should never block execution
445
+ // Hook implementation always returns { continue: true }
446
+ // This is a behavioral guarantee verified by the hook itself
447
+ expect(state.token_budget).toBeDefined();
448
+ });
449
+ it('should handle multiple sessions independently', () => {
450
+ // Note: Session state is stored per directory, not per session ID
451
+ // So we need separate directories for independent sessions
452
+ const dir1 = join(TEST_DIR, 'session1');
453
+ const dir2 = join(TEST_DIR, 'session2');
454
+ mkdirSync(dir1, { recursive: true });
455
+ mkdirSync(dir2, { recursive: true });
456
+ let state1 = loadSessionState(dir1, 'session1');
457
+ let state2 = loadSessionState(dir2, 'session2');
458
+ // Exceed budget in session1
459
+ state1 = updateTokenBudget(state1, 20000);
460
+ state1 = markWarningIssued(state1);
461
+ saveSessionState(dir1, state1);
462
+ // Session2 should be independent
463
+ expect(shouldIssueWarning(state2)).toBe(false);
464
+ state2 = updateTokenBudget(state2, 20000);
465
+ expect(shouldIssueWarning(state2)).toBe(true);
466
+ // Reload to verify persistence
467
+ const reloadedState1 = loadSessionState(dir1, 'session1');
468
+ const reloadedState2 = loadSessionState(dir2, 'session2');
469
+ expect(reloadedState1.token_budget.warning_issued).toBe(true);
470
+ expect(reloadedState2.token_budget.warning_issued).toBe(false);
471
+ });
472
+ });
473
+ describe('Integration Test 5: CLI Commands', () => {
474
+ beforeEach(() => {
475
+ // Set up realistic test data
476
+ const agentPerformance = {
477
+ 'olympian': {
478
+ agent_name: 'olympian',
479
+ total_invocations: 15,
480
+ success_count: 13,
481
+ revision_count: 2,
482
+ cancellation_count: 0,
483
+ success_rate: 0.87,
484
+ failure_patterns: [],
485
+ strong_areas: ['editing', 'testing'],
486
+ weak_areas: [],
487
+ last_updated: new Date().toISOString(),
488
+ token_efficiency: {
489
+ avg_tokens_per_success: 4200,
490
+ avg_tokens_per_failure: 5500,
491
+ total_tokens: 65600,
492
+ invocation_count: 15,
493
+ efficiency_score: 0.88,
494
+ trend: 'stable',
495
+ },
496
+ },
497
+ 'oracle-low': {
498
+ agent_name: 'oracle-low',
499
+ total_invocations: 10,
500
+ success_count: 10,
501
+ revision_count: 0,
502
+ cancellation_count: 0,
503
+ success_rate: 1.0,
504
+ failure_patterns: [],
505
+ strong_areas: ['debugging'],
506
+ weak_areas: [],
507
+ last_updated: new Date().toISOString(),
508
+ token_efficiency: {
509
+ avg_tokens_per_success: 2100,
510
+ avg_tokens_per_failure: 0,
511
+ total_tokens: 21000,
512
+ invocation_count: 10,
513
+ efficiency_score: 1.2,
514
+ trend: 'improving',
515
+ },
516
+ },
517
+ };
518
+ writeFileSync(join(TEST_LEARNING_DIR, 'agent-performance.json'), JSON.stringify(agentPerformance, null, 2));
519
+ });
520
+ it('should handle --efficiency flag data format', () => {
521
+ // Read agent performance
522
+ const perfPath = join(TEST_LEARNING_DIR, 'agent-performance.json');
523
+ const perfData = JSON.parse(readFileSync(perfPath, 'utf-8'));
524
+ // Verify data structure for CLI display
525
+ const agentsWithTokens = Object.values(perfData).filter(a => a.token_efficiency);
526
+ expect(agentsWithTokens.length).toBeGreaterThan(0);
527
+ for (const agent of agentsWithTokens) {
528
+ expect(agent.token_efficiency).toBeDefined();
529
+ expect(agent.token_efficiency.avg_tokens_per_success).toBeGreaterThan(0);
530
+ expect(agent.token_efficiency.efficiency_score).toBeGreaterThan(0);
531
+ expect(agent.token_efficiency.trend).toMatch(/^(improving|stable|declining|insufficient_data)$/);
532
+ }
533
+ });
534
+ it('should handle --show-costs flag data format', () => {
535
+ const perfPath = join(TEST_LEARNING_DIR, 'agent-performance.json');
536
+ const perfData = JSON.parse(readFileSync(perfPath, 'utf-8'));
537
+ // Cost calculation data should be available
538
+ const agentsWithTokens = Object.values(perfData).filter(a => a.token_efficiency);
539
+ for (const agent of agentsWithTokens) {
540
+ const { total_tokens, invocation_count } = agent.token_efficiency;
541
+ expect(total_tokens).toBeGreaterThan(0);
542
+ expect(invocation_count).toBeGreaterThan(0);
543
+ // Can calculate average cost
544
+ const avgTokens = total_tokens / invocation_count;
545
+ expect(avgTokens).toBeGreaterThan(0);
546
+ }
547
+ });
548
+ it('should handle --budget-status flag data format', () => {
549
+ const sessionId = 'cli-budget-test';
550
+ let state = loadSessionState(TEST_DIR, sessionId);
551
+ state = updateTokenBudget(state, 8000);
552
+ saveSessionState(TEST_DIR, state);
553
+ // Reload and verify budget status data
554
+ const reloaded = loadSessionState(TEST_DIR, sessionId);
555
+ expect(reloaded.token_budget).toBeDefined();
556
+ expect(reloaded.token_budget.session_baseline).toBe(10000);
557
+ expect(reloaded.token_budget.current_usage).toBe(8000);
558
+ expect(reloaded.token_budget.warning_threshold).toBe(1.5);
559
+ // Calculate percentage for display
560
+ const percentage = (reloaded.token_budget.current_usage / reloaded.token_budget.session_baseline) * 100;
561
+ expect(percentage).toBe(80);
562
+ });
563
+ it('should gracefully handle no-data case for all CLI commands', () => {
564
+ // Remove agent performance file
565
+ const perfPath = join(TEST_LEARNING_DIR, 'agent-performance.json');
566
+ if (existsSync(perfPath)) {
567
+ rmSync(perfPath);
568
+ }
569
+ // Should not throw when reading non-existent data
570
+ expect(() => {
571
+ if (existsSync(perfPath)) {
572
+ readFileSync(perfPath, 'utf-8');
573
+ }
574
+ }).not.toThrow();
575
+ // Session state should still work even without agent performance
576
+ const sessionId = 'no-data-test';
577
+ const state = loadSessionState(TEST_DIR, sessionId);
578
+ expect(state.token_budget).toBeDefined();
579
+ });
580
+ });
581
+ describe('Integration Test 6: Performance and Regression', () => {
582
+ it('should not cause noticeable slowdown in hook execution', () => {
583
+ const startTime = Date.now();
584
+ // Simulate typical hook workflow
585
+ const sessionId = 'perf-test-1';
586
+ let state = loadSessionState(TEST_DIR, sessionId);
587
+ for (let i = 0; i < 10; i++) {
588
+ const feedback = {
589
+ id: randomUUID(),
590
+ timestamp: new Date(Date.now() + i * 1000).toISOString(),
591
+ session_id: sessionId,
592
+ project_path: TEST_DIR,
593
+ event_type: 'success',
594
+ agent_used: 'olympian',
595
+ original_task: `Task ${i}`,
596
+ user_message: `Task ${i} done`,
597
+ feedback_category: 'praise',
598
+ confidence: 0.9,
599
+ token_usage: {
600
+ input_tokens: 2000 + i * 100,
601
+ output_tokens: 1000 + i * 50,
602
+ total_tokens: 3000 + i * 150,
603
+ estimated: false,
604
+ },
605
+ };
606
+ appendFeedback(feedback);
607
+ state = updateTokenBudget(state, feedback.token_usage.total_tokens);
608
+ }
609
+ // Update performance once at the end (typical pattern)
610
+ const allFeedback = readFeedbackLog();
611
+ updateAgentPerformance('olympian', allFeedback);
612
+ saveSessionState(TEST_DIR, state);
613
+ const elapsed = Date.now() - startTime;
614
+ // Should complete in reasonable time (< 2 seconds for 10 iterations)
615
+ expect(elapsed).toBeLessThan(2000);
616
+ });
617
+ it('should maintain existing functionality without regressions', () => {
618
+ // Verify core features still work
619
+ const sessionId = 'regression-test';
620
+ const state = loadSessionState(TEST_DIR, sessionId);
621
+ // Session state core functionality
622
+ expect(state.session_id).toBe(sessionId);
623
+ expect(state.started_at).toBeDefined();
624
+ expect(state.token_budget).toBeDefined();
625
+ // Feedback capture still works
626
+ const feedback = {
627
+ id: randomUUID(),
628
+ timestamp: new Date().toISOString(),
629
+ session_id: sessionId,
630
+ project_path: TEST_DIR,
631
+ event_type: 'success',
632
+ agent_used: 'explore',
633
+ original_task: 'Search codebase',
634
+ user_message: 'Found it',
635
+ feedback_category: 'praise',
636
+ confidence: 0.9,
637
+ token_usage: {
638
+ input_tokens: 500,
639
+ output_tokens: 200,
640
+ total_tokens: 700,
641
+ estimated: false,
642
+ },
643
+ };
644
+ expect(() => appendFeedback(feedback)).not.toThrow();
645
+ // Agent performance update still works
646
+ const allFeedback = readFeedbackLog();
647
+ expect(() => updateAgentPerformance('explore', allFeedback)).not.toThrow();
648
+ });
649
+ it('should handle concurrent sessions without conflicts', () => {
650
+ // Session state is stored per directory, so create separate directories
651
+ const sessionDirs = ['concurrent-1', 'concurrent-2', 'concurrent-3'].map(id => join(TEST_DIR, id));
652
+ // Create directories and load states
653
+ const states = sessionDirs.map((dir, i) => {
654
+ mkdirSync(dir, { recursive: true });
655
+ return loadSessionState(dir, `session-${i}`);
656
+ });
657
+ // Update each session independently
658
+ for (let i = 0; i < states.length; i++) {
659
+ states[i] = updateTokenBudget(states[i], 5000 * (i + 1));
660
+ saveSessionState(sessionDirs[i], states[i]);
661
+ }
662
+ // Reload and verify independence
663
+ for (let i = 0; i < sessionDirs.length; i++) {
664
+ const reloaded = loadSessionState(sessionDirs[i], `session-${i}`);
665
+ expect(reloaded.token_budget.current_usage).toBe(5000 * (i + 1));
666
+ }
667
+ });
668
+ });
669
+ //# sourceMappingURL=token-tracking-integration.test.js.map