arbiter-ai 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -0
- package/assets/jerom_16x16.png +0 -0
- package/dist/arbiter.d.ts +43 -0
- package/dist/arbiter.js +486 -0
- package/dist/context-analyzer.d.ts +15 -0
- package/dist/context-analyzer.js +603 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +165 -0
- package/dist/orchestrator.d.ts +31 -0
- package/dist/orchestrator.js +227 -0
- package/dist/router.d.ts +187 -0
- package/dist/router.js +1135 -0
- package/dist/router.test.d.ts +15 -0
- package/dist/router.test.js +95 -0
- package/dist/session-persistence.d.ts +9 -0
- package/dist/session-persistence.js +63 -0
- package/dist/session-persistence.test.d.ts +1 -0
- package/dist/session-persistence.test.js +165 -0
- package/dist/sound.d.ts +31 -0
- package/dist/sound.js +50 -0
- package/dist/state.d.ts +72 -0
- package/dist/state.js +107 -0
- package/dist/state.test.d.ts +1 -0
- package/dist/state.test.js +194 -0
- package/dist/test-headless.d.ts +1 -0
- package/dist/test-headless.js +155 -0
- package/dist/tui/index.d.ts +14 -0
- package/dist/tui/index.js +17 -0
- package/dist/tui/layout.d.ts +30 -0
- package/dist/tui/layout.js +200 -0
- package/dist/tui/render.d.ts +57 -0
- package/dist/tui/render.js +266 -0
- package/dist/tui/scene.d.ts +64 -0
- package/dist/tui/scene.js +366 -0
- package/dist/tui/screens/CharacterSelect-termkit.d.ts +18 -0
- package/dist/tui/screens/CharacterSelect-termkit.js +216 -0
- package/dist/tui/screens/ForestIntro-termkit.d.ts +15 -0
- package/dist/tui/screens/ForestIntro-termkit.js +856 -0
- package/dist/tui/screens/GitignoreCheck-termkit.d.ts +14 -0
- package/dist/tui/screens/GitignoreCheck-termkit.js +185 -0
- package/dist/tui/screens/TitleScreen-termkit.d.ts +14 -0
- package/dist/tui/screens/TitleScreen-termkit.js +132 -0
- package/dist/tui/screens/index.d.ts +9 -0
- package/dist/tui/screens/index.js +10 -0
- package/dist/tui/tileset.d.ts +97 -0
- package/dist/tui/tileset.js +237 -0
- package/dist/tui/tui-termkit.d.ts +34 -0
- package/dist/tui/tui-termkit.js +2602 -0
- package/dist/tui/types.d.ts +41 -0
- package/dist/tui/types.js +4 -0
- package/package.json +71 -0
|
@@ -0,0 +1,603 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context Analyzer - Systematic SDK Context Usage Analysis
|
|
3
|
+
*
|
|
4
|
+
* This tool helps analyze how context window usage is tracked in the Claude Agent SDK.
|
|
5
|
+
* It captures all usage data in a structured format for analysis.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* npm run analyze:context -- [options]
|
|
9
|
+
*
|
|
10
|
+
* Options:
|
|
11
|
+
* --subagents Allow subagent usage (default: no subagents)
|
|
12
|
+
* --prompts N Number of test prompts to send (default: 3)
|
|
13
|
+
* --output FILE Output prefix for CSV/JSON files (default: context-analysis)
|
|
14
|
+
*/
|
|
15
|
+
import * as fs from 'node:fs';
|
|
16
|
+
import { query } from '@anthropic-ai/claude-agent-sdk';
|
|
17
|
+
/** Type guard for assistant messages with usage data */
|
|
18
|
+
function isAssistantMessage(msg) {
|
|
19
|
+
return (typeof msg === 'object' &&
|
|
20
|
+
msg !== null &&
|
|
21
|
+
'type' in msg &&
|
|
22
|
+
msg.type === 'assistant');
|
|
23
|
+
}
|
|
24
|
+
// ============================================================================
|
|
25
|
+
// Constants
|
|
26
|
+
// ============================================================================
|
|
27
|
+
const MAX_CONTEXT_TOKENS = 200_000;
|
|
28
|
+
const TEST_PROMPTS_NO_SUBAGENT = [
|
|
29
|
+
'What is 2 + 2? Reply with just the number.',
|
|
30
|
+
'What is the capital of France? Reply with just the city name.',
|
|
31
|
+
'What color is the sky? Reply with just one word.',
|
|
32
|
+
'Name three programming languages.',
|
|
33
|
+
'What is the largest planet in our solar system?',
|
|
34
|
+
'Explain what a variable is in programming in 2-3 sentences.',
|
|
35
|
+
'What is the difference between a list and a dictionary in Python?',
|
|
36
|
+
'Name the four cardinal directions.',
|
|
37
|
+
'What does API stand for?',
|
|
38
|
+
'Explain what recursion is in one paragraph.',
|
|
39
|
+
'What is the time complexity of binary search?',
|
|
40
|
+
'Name three popular JavaScript frameworks.',
|
|
41
|
+
];
|
|
42
|
+
const TEST_PROMPTS_WITH_SUBAGENT = [
|
|
43
|
+
'Read the package.json file and tell me what dependencies this project has.',
|
|
44
|
+
'Look at the src/router.ts file and summarize what it does in 2-3 sentences.',
|
|
45
|
+
'Find all TypeScript files in the src directory and list them.',
|
|
46
|
+
'Read the CLAUDE.md file and tell me what this project is about.',
|
|
47
|
+
'Look at src/state.ts and explain the AppState interface.',
|
|
48
|
+
"Search for any files that mention 'context' in their name.",
|
|
49
|
+
'Read src/arbiter.ts and tell me what MCP tools are defined there.',
|
|
50
|
+
'Find all files in .claude/knowledge directory.',
|
|
51
|
+
'Look at the tsconfig.json and tell me the TypeScript target version.',
|
|
52
|
+
"Search for 'spawn_orchestrator' in the codebase and tell me where it's used.",
|
|
53
|
+
'Read src/orchestrator.ts and summarize its purpose.',
|
|
54
|
+
'Find any test files in the project and list them.',
|
|
55
|
+
];
|
|
56
|
+
// ============================================================================
|
|
57
|
+
// Parsing Utilities
|
|
58
|
+
// ============================================================================
|
|
59
|
+
/**
|
|
60
|
+
* Parse token count from string like "18.4k" or "3.1k" or "8" or "181.6k"
|
|
61
|
+
*/
|
|
62
|
+
function parseTokenCount(str) {
|
|
63
|
+
if (!str)
|
|
64
|
+
return 0;
|
|
65
|
+
const cleaned = str.replace(/,/g, '').trim();
|
|
66
|
+
if (cleaned.endsWith('k')) {
|
|
67
|
+
return Math.round(parseFloat(cleaned.slice(0, -1)) * 1000);
|
|
68
|
+
}
|
|
69
|
+
return parseInt(cleaned, 10) || 0;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Parse /context command output to extract token counts
|
|
73
|
+
* Handles the markdown table format:
|
|
74
|
+
* **Tokens:** 18.4k / 200.0k (9%)
|
|
75
|
+
* | System prompt | 3.1k | 1.6% |
|
|
76
|
+
* | Messages | 8 | 0.0% |
|
|
77
|
+
*/
|
|
78
|
+
function parseContextOutput(output) {
|
|
79
|
+
const result = {
|
|
80
|
+
raw_output: output,
|
|
81
|
+
total_tokens: 0,
|
|
82
|
+
total_percent: 0,
|
|
83
|
+
messages_tokens: 0,
|
|
84
|
+
system_tokens: 0,
|
|
85
|
+
};
|
|
86
|
+
// Match total: **Tokens:** 18.4k / 200.0k (9%)
|
|
87
|
+
const totalMatch = output.match(/\*\*Tokens:\*\*\s*([0-9,.]+k?)\s*\/\s*200\.?0?k\s*\((\d+)%\)/i);
|
|
88
|
+
if (totalMatch) {
|
|
89
|
+
result.total_tokens = parseTokenCount(totalMatch[1]);
|
|
90
|
+
result.total_percent = parseFloat(totalMatch[2]);
|
|
91
|
+
}
|
|
92
|
+
// Parse markdown table rows: | Category | Tokens | Percentage |
|
|
93
|
+
const tableRowRegex = /\|\s*([^|]+)\s*\|\s*([0-9,.]+k?)\s*\|\s*([0-9.]+)%\s*\|/g;
|
|
94
|
+
let match = null;
|
|
95
|
+
let systemTotal = 0;
|
|
96
|
+
// biome-ignore lint/suspicious/noAssignInExpressions: idiomatic regex exec loop
|
|
97
|
+
while ((match = tableRowRegex.exec(output)) !== null) {
|
|
98
|
+
const category = match[1].trim().toLowerCase();
|
|
99
|
+
const tokens = parseTokenCount(match[2]);
|
|
100
|
+
if (category === 'messages') {
|
|
101
|
+
result.messages_tokens = tokens;
|
|
102
|
+
}
|
|
103
|
+
else if (category === 'system prompt' ||
|
|
104
|
+
category === 'system tools' ||
|
|
105
|
+
category === 'mcp tools' ||
|
|
106
|
+
category === 'memory' ||
|
|
107
|
+
category === 'custom agents' ||
|
|
108
|
+
category === 'memory files') {
|
|
109
|
+
systemTotal += tokens;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
if (systemTotal > 0) {
|
|
113
|
+
result.system_tokens = systemTotal;
|
|
114
|
+
}
|
|
115
|
+
else if (result.total_tokens && result.messages_tokens) {
|
|
116
|
+
// Infer system tokens (subtract messages and free space)
|
|
117
|
+
result.system_tokens = result.total_tokens - result.messages_tokens;
|
|
118
|
+
}
|
|
119
|
+
return result;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Extract text preview from SDK message
|
|
123
|
+
*/
|
|
124
|
+
function getTextPreview(message, maxLen = 100) {
|
|
125
|
+
try {
|
|
126
|
+
if (message.type === 'assistant') {
|
|
127
|
+
const content = message.message?.content;
|
|
128
|
+
if (typeof content === 'string') {
|
|
129
|
+
return content.slice(0, maxLen).replace(/\n/g, ' ');
|
|
130
|
+
}
|
|
131
|
+
if (Array.isArray(content)) {
|
|
132
|
+
for (const block of content) {
|
|
133
|
+
if (block.type === 'text' && block.text) {
|
|
134
|
+
return block.text.slice(0, maxLen).replace(/\n/g, ' ');
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
if (message.type === 'user') {
|
|
140
|
+
const content = message.message?.content;
|
|
141
|
+
if (typeof content === 'string') {
|
|
142
|
+
return content.slice(0, maxLen).replace(/\n/g, ' ');
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
if (message.type === 'result') {
|
|
146
|
+
const subtype = message.subtype;
|
|
147
|
+
return `[result: ${subtype}]`;
|
|
148
|
+
}
|
|
149
|
+
if (message.type === 'system') {
|
|
150
|
+
const subtype = message.subtype;
|
|
151
|
+
return `[system: ${subtype}]`;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
catch {
|
|
155
|
+
// Ignore
|
|
156
|
+
}
|
|
157
|
+
return `[${message.type}]`;
|
|
158
|
+
}
|
|
159
|
+
// ============================================================================
|
|
160
|
+
// Core Analyzer
|
|
161
|
+
// ============================================================================
|
|
162
|
+
class ContextAnalyzer {
|
|
163
|
+
cwd;
|
|
164
|
+
useSubagents;
|
|
165
|
+
seenMsgIds = new Set();
|
|
166
|
+
maxCacheRead = 0;
|
|
167
|
+
sumCacheCreate = 0;
|
|
168
|
+
sumInput = 0;
|
|
169
|
+
sumOutput = 0;
|
|
170
|
+
// Combined metric tracking: cache_read + cache_create
|
|
171
|
+
firstCombinedRC = 0; // First message's (cache_read + cache_create)
|
|
172
|
+
maxCombinedRC = 0; // Max(cache_read + cache_create) seen
|
|
173
|
+
messages = [];
|
|
174
|
+
seq = 0;
|
|
175
|
+
startTime;
|
|
176
|
+
sessionId = null;
|
|
177
|
+
constructor(cwd, useSubagents) {
|
|
178
|
+
this.cwd = cwd;
|
|
179
|
+
this.useSubagents = useSubagents;
|
|
180
|
+
this.startTime = Date.now();
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Run /context command and parse the output
|
|
184
|
+
* Uses the current session if available for accurate measurement
|
|
185
|
+
*/
|
|
186
|
+
async runContextCommand(label) {
|
|
187
|
+
console.log(`\n[${label}] Running /context command...${this.sessionId ? ` (session: ${this.sessionId.slice(0, 8)}...)` : ' (new session)'}`);
|
|
188
|
+
const options = {
|
|
189
|
+
cwd: this.cwd,
|
|
190
|
+
permissionMode: 'bypassPermissions',
|
|
191
|
+
};
|
|
192
|
+
// Resume existing session if we have one
|
|
193
|
+
if (this.sessionId) {
|
|
194
|
+
options.resume = this.sessionId;
|
|
195
|
+
}
|
|
196
|
+
const q = query({
|
|
197
|
+
prompt: '/context',
|
|
198
|
+
options,
|
|
199
|
+
});
|
|
200
|
+
let rawOutput = '';
|
|
201
|
+
for await (const msg of q) {
|
|
202
|
+
// Capture session ID from system init
|
|
203
|
+
if (msg.type === 'system') {
|
|
204
|
+
const sysMsg = msg;
|
|
205
|
+
if (sysMsg.subtype === 'init' && sysMsg.session_id) {
|
|
206
|
+
this.sessionId = sysMsg.session_id;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
// /context output comes through in various ways
|
|
210
|
+
if (msg.type === 'user') {
|
|
211
|
+
const content = msg.message?.content;
|
|
212
|
+
if (typeof content === 'string') {
|
|
213
|
+
rawOutput += `${content}\n`;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
if (msg.type === 'result') {
|
|
217
|
+
const resultContent = msg.result;
|
|
218
|
+
if (typeof resultContent === 'string') {
|
|
219
|
+
rawOutput += `${resultContent}\n`;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
// Also check for assistant messages that might contain the output
|
|
223
|
+
if (msg.type === 'assistant') {
|
|
224
|
+
const content = msg.message?.content;
|
|
225
|
+
if (typeof content === 'string' && content.includes('tokens')) {
|
|
226
|
+
rawOutput += `${content}\n`;
|
|
227
|
+
}
|
|
228
|
+
else if (Array.isArray(content)) {
|
|
229
|
+
for (const block of content) {
|
|
230
|
+
if (block.type === 'text' && block.text?.includes('tokens')) {
|
|
231
|
+
rawOutput += `${block.text}\n`;
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
const parsed = parseContextOutput(rawOutput);
|
|
238
|
+
const snapshot = {
|
|
239
|
+
timestamp: new Date().toISOString(),
|
|
240
|
+
elapsed_ms: Date.now() - this.startTime,
|
|
241
|
+
label,
|
|
242
|
+
total_tokens: parsed.total_tokens || 0,
|
|
243
|
+
total_percent: parsed.total_percent || 0,
|
|
244
|
+
messages_tokens: parsed.messages_tokens || 0,
|
|
245
|
+
system_tokens: parsed.system_tokens || 0,
|
|
246
|
+
raw_output: rawOutput.trim(),
|
|
247
|
+
};
|
|
248
|
+
console.log(`[${label}] Total: ${snapshot.total_tokens} tokens (${snapshot.total_percent}%)`);
|
|
249
|
+
console.log(`[${label}] Messages: ${snapshot.messages_tokens}, System: ${snapshot.system_tokens}`);
|
|
250
|
+
return snapshot;
|
|
251
|
+
}
|
|
252
|
+
/**
|
|
253
|
+
* Process a single SDK message and record usage data
|
|
254
|
+
*/
|
|
255
|
+
processMessage(msg) {
|
|
256
|
+
// Only process assistant messages for usage data
|
|
257
|
+
if (msg.type !== 'assistant') {
|
|
258
|
+
return null;
|
|
259
|
+
}
|
|
260
|
+
const assistantMsg = msg;
|
|
261
|
+
const usage = assistantMsg.message?.usage;
|
|
262
|
+
const msgId = assistantMsg.message?.id;
|
|
263
|
+
const uuid = assistantMsg.uuid;
|
|
264
|
+
if (!usage || !msgId) {
|
|
265
|
+
return null;
|
|
266
|
+
}
|
|
267
|
+
// Dedupe by message.id (NOT uuid)
|
|
268
|
+
if (this.seenMsgIds.has(msgId)) {
|
|
269
|
+
return null;
|
|
270
|
+
}
|
|
271
|
+
this.seenMsgIds.add(msgId);
|
|
272
|
+
// Extract raw values
|
|
273
|
+
const cacheRead = usage.cache_read_input_tokens || 0;
|
|
274
|
+
const cacheCreate = usage.cache_creation_input_tokens || 0;
|
|
275
|
+
const input = usage.input_tokens || 0;
|
|
276
|
+
const output = usage.output_tokens || 0;
|
|
277
|
+
// Combined metric: cache_read + cache_create
|
|
278
|
+
const combinedRC = cacheRead + cacheCreate;
|
|
279
|
+
// Update running totals
|
|
280
|
+
this.maxCacheRead = Math.max(this.maxCacheRead, cacheRead);
|
|
281
|
+
this.sumCacheCreate += cacheCreate;
|
|
282
|
+
this.sumInput += input;
|
|
283
|
+
this.sumOutput += output;
|
|
284
|
+
// Track combined metric
|
|
285
|
+
if (this.firstCombinedRC === 0) {
|
|
286
|
+
this.firstCombinedRC = combinedRC; // First message's combined value
|
|
287
|
+
}
|
|
288
|
+
this.maxCombinedRC = Math.max(this.maxCombinedRC, combinedRC);
|
|
289
|
+
// Calculate formulas
|
|
290
|
+
const formulaMaxOnly = this.maxCacheRead;
|
|
291
|
+
const formulaMaxPlusSums = this.maxCacheRead + this.sumInput + this.sumOutput;
|
|
292
|
+
const formulaCreatePlusSums = this.sumCacheCreate + this.sumInput + this.sumOutput;
|
|
293
|
+
const formulaCombinedGrowth = this.maxCombinedRC - this.firstCombinedRC; // NEW
|
|
294
|
+
const row = {
|
|
295
|
+
seq: ++this.seq,
|
|
296
|
+
timestamp: new Date().toISOString(),
|
|
297
|
+
elapsed_ms: Date.now() - this.startTime,
|
|
298
|
+
message_id: msgId,
|
|
299
|
+
uuid,
|
|
300
|
+
type: msg.type,
|
|
301
|
+
cache_read: cacheRead,
|
|
302
|
+
cache_create: cacheCreate,
|
|
303
|
+
input,
|
|
304
|
+
output,
|
|
305
|
+
running_max_cache_read: this.maxCacheRead,
|
|
306
|
+
running_sum_cache_create: this.sumCacheCreate,
|
|
307
|
+
running_sum_input: this.sumInput,
|
|
308
|
+
running_sum_output: this.sumOutput,
|
|
309
|
+
// Combined metric tracking
|
|
310
|
+
combined_rc: combinedRC,
|
|
311
|
+
running_max_combined_rc: this.maxCombinedRC,
|
|
312
|
+
first_combined_rc: this.firstCombinedRC,
|
|
313
|
+
formula_max_only: formulaMaxOnly,
|
|
314
|
+
formula_max_plus_sums: formulaMaxPlusSums,
|
|
315
|
+
formula_create_plus_sums: formulaCreatePlusSums,
|
|
316
|
+
formula_combined_growth: formulaCombinedGrowth,
|
|
317
|
+
pct_max_only: (formulaMaxOnly / MAX_CONTEXT_TOKENS) * 100,
|
|
318
|
+
pct_max_plus_sums: (formulaMaxPlusSums / MAX_CONTEXT_TOKENS) * 100,
|
|
319
|
+
pct_create_plus_sums: (formulaCreatePlusSums / MAX_CONTEXT_TOKENS) * 100,
|
|
320
|
+
pct_combined_growth: (formulaCombinedGrowth / MAX_CONTEXT_TOKENS) * 100,
|
|
321
|
+
unique_api_calls: this.seenMsgIds.size,
|
|
322
|
+
text_preview: getTextPreview(msg),
|
|
323
|
+
};
|
|
324
|
+
this.messages.push(row);
|
|
325
|
+
// Log progress
|
|
326
|
+
console.log(` [${row.seq}] ${msgId.slice(0, 12)}... r=${cacheRead}, c=${cacheCreate}, r+c=${combinedRC}`);
|
|
327
|
+
console.log(` max(r+c)=${this.maxCombinedRC}, growth=${formulaCombinedGrowth}`);
|
|
328
|
+
return row;
|
|
329
|
+
}
|
|
330
|
+
/**
|
|
331
|
+
* Send a prompt and collect all messages
|
|
332
|
+
* Uses session resumption to maintain the same session
|
|
333
|
+
*/
|
|
334
|
+
async sendPrompt(prompt, promptNum) {
|
|
335
|
+
console.log(`\n[Prompt ${promptNum}] Sending: "${prompt}"${this.sessionId ? ` (session: ${this.sessionId.slice(0, 8)}...)` : ''}`);
|
|
336
|
+
const options = {
|
|
337
|
+
cwd: this.cwd,
|
|
338
|
+
permissionMode: 'bypassPermissions',
|
|
339
|
+
};
|
|
340
|
+
// Resume existing session if we have one
|
|
341
|
+
if (this.sessionId) {
|
|
342
|
+
options.resume = this.sessionId;
|
|
343
|
+
}
|
|
344
|
+
// Add system prompt for first message
|
|
345
|
+
if (!this.sessionId) {
|
|
346
|
+
if (!this.useSubagents) {
|
|
347
|
+
options.systemPrompt =
|
|
348
|
+
'You are a helpful assistant. Do NOT use any tools or spawn any subagents. Answer directly and concisely.';
|
|
349
|
+
}
|
|
350
|
+
else {
|
|
351
|
+
options.systemPrompt =
|
|
352
|
+
'You are a helpful assistant. Use tools as needed to complete tasks. Do NOT use non-blocking subagents - only use blocking tool calls. Be concise in your responses.';
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
const q = query({
|
|
356
|
+
prompt,
|
|
357
|
+
options,
|
|
358
|
+
});
|
|
359
|
+
for await (const msg of q) {
|
|
360
|
+
// Capture session ID from system init
|
|
361
|
+
if (msg.type === 'system') {
|
|
362
|
+
const sysMsg = msg;
|
|
363
|
+
if (sysMsg.subtype === 'init' && sysMsg.session_id) {
|
|
364
|
+
this.sessionId = sysMsg.session_id;
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
// Process for usage data
|
|
368
|
+
if (isAssistantMessage(msg)) {
|
|
369
|
+
this.processMessage(msg);
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
/**
|
|
374
|
+
* Run the full analysis
|
|
375
|
+
*/
|
|
376
|
+
async run(prompts) {
|
|
377
|
+
console.log('='.repeat(60));
|
|
378
|
+
console.log('CONTEXT ANALYZER - SDK Usage Analysis');
|
|
379
|
+
console.log('='.repeat(60));
|
|
380
|
+
console.log(`CWD: ${this.cwd}`);
|
|
381
|
+
console.log(`Subagents: ${this.useSubagents ? 'ENABLED' : 'DISABLED'}`);
|
|
382
|
+
console.log(`Prompts: ${prompts.length}`);
|
|
383
|
+
console.log('='.repeat(60));
|
|
384
|
+
const startedAt = new Date().toISOString();
|
|
385
|
+
// Step 1: Get baseline context
|
|
386
|
+
const baseline = await this.runContextCommand('BASELINE');
|
|
387
|
+
// Step 2: Send test prompts
|
|
388
|
+
for (let i = 0; i < prompts.length; i++) {
|
|
389
|
+
await this.sendPrompt(prompts[i], i + 1);
|
|
390
|
+
}
|
|
391
|
+
// Step 3: Get final context
|
|
392
|
+
const final = await this.runContextCommand('FINAL');
|
|
393
|
+
const endedAt = new Date().toISOString();
|
|
394
|
+
// Build result
|
|
395
|
+
const result = {
|
|
396
|
+
session_id: this.sessionId || 'unknown',
|
|
397
|
+
started_at: startedAt,
|
|
398
|
+
ended_at: endedAt,
|
|
399
|
+
duration_ms: Date.now() - this.startTime,
|
|
400
|
+
cwd: this.cwd,
|
|
401
|
+
used_subagents: this.useSubagents,
|
|
402
|
+
baseline,
|
|
403
|
+
final,
|
|
404
|
+
messages: this.messages,
|
|
405
|
+
summary: {
|
|
406
|
+
total_messages: this.messages.length,
|
|
407
|
+
unique_api_calls: this.seenMsgIds.size,
|
|
408
|
+
final_max_cache_read: this.maxCacheRead,
|
|
409
|
+
final_sum_cache_create: this.sumCacheCreate,
|
|
410
|
+
final_sum_input: this.sumInput,
|
|
411
|
+
final_sum_output: this.sumOutput,
|
|
412
|
+
// Combined metric tracking
|
|
413
|
+
first_combined_rc: this.firstCombinedRC,
|
|
414
|
+
final_max_combined_rc: this.maxCombinedRC,
|
|
415
|
+
// Calculated formulas
|
|
416
|
+
calculated_max_only: this.maxCacheRead,
|
|
417
|
+
calculated_max_plus_sums: this.maxCacheRead + this.sumInput + this.sumOutput,
|
|
418
|
+
calculated_create_plus_sums: this.sumCacheCreate + this.sumInput + this.sumOutput,
|
|
419
|
+
calculated_combined_growth: this.maxCombinedRC - this.firstCombinedRC,
|
|
420
|
+
actual_total: final.total_tokens,
|
|
421
|
+
actual_messages: final.messages_tokens,
|
|
422
|
+
},
|
|
423
|
+
};
|
|
424
|
+
return result;
|
|
425
|
+
}
|
|
426
|
+
/**
|
|
427
|
+
* Get current running totals (for access during analysis)
|
|
428
|
+
*/
|
|
429
|
+
getRunningTotals() {
|
|
430
|
+
return {
|
|
431
|
+
maxCacheRead: this.maxCacheRead,
|
|
432
|
+
sumCacheCreate: this.sumCacheCreate,
|
|
433
|
+
sumInput: this.sumInput,
|
|
434
|
+
sumOutput: this.sumOutput,
|
|
435
|
+
firstCombinedRC: this.firstCombinedRC,
|
|
436
|
+
maxCombinedRC: this.maxCombinedRC,
|
|
437
|
+
combinedGrowth: this.maxCombinedRC - this.firstCombinedRC,
|
|
438
|
+
uniqueApiCalls: this.seenMsgIds.size,
|
|
439
|
+
};
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
// ============================================================================
|
|
443
|
+
// Output Formatters
|
|
444
|
+
// ============================================================================
|
|
445
|
+
function toCSV(messages) {
|
|
446
|
+
const headers = [
|
|
447
|
+
'seq',
|
|
448
|
+
'timestamp',
|
|
449
|
+
'elapsed_ms',
|
|
450
|
+
'message_id',
|
|
451
|
+
'uuid',
|
|
452
|
+
'type',
|
|
453
|
+
'cache_read',
|
|
454
|
+
'cache_create',
|
|
455
|
+
'input',
|
|
456
|
+
'output',
|
|
457
|
+
'combined_rc',
|
|
458
|
+
'running_max_combined_rc',
|
|
459
|
+
'first_combined_rc',
|
|
460
|
+
'running_max_cache_read',
|
|
461
|
+
'running_sum_cache_create',
|
|
462
|
+
'running_sum_input',
|
|
463
|
+
'running_sum_output',
|
|
464
|
+
'formula_max_only',
|
|
465
|
+
'formula_max_plus_sums',
|
|
466
|
+
'formula_create_plus_sums',
|
|
467
|
+
'formula_combined_growth',
|
|
468
|
+
'pct_max_only',
|
|
469
|
+
'pct_max_plus_sums',
|
|
470
|
+
'pct_create_plus_sums',
|
|
471
|
+
'pct_combined_growth',
|
|
472
|
+
'unique_api_calls',
|
|
473
|
+
'text_preview',
|
|
474
|
+
];
|
|
475
|
+
const rows = messages.map((m) => [
|
|
476
|
+
m.seq,
|
|
477
|
+
m.timestamp,
|
|
478
|
+
m.elapsed_ms,
|
|
479
|
+
m.message_id,
|
|
480
|
+
m.uuid,
|
|
481
|
+
m.type,
|
|
482
|
+
m.cache_read,
|
|
483
|
+
m.cache_create,
|
|
484
|
+
m.input,
|
|
485
|
+
m.output,
|
|
486
|
+
m.combined_rc,
|
|
487
|
+
m.running_max_combined_rc,
|
|
488
|
+
m.first_combined_rc,
|
|
489
|
+
m.running_max_cache_read,
|
|
490
|
+
m.running_sum_cache_create,
|
|
491
|
+
m.running_sum_input,
|
|
492
|
+
m.running_sum_output,
|
|
493
|
+
m.formula_max_only,
|
|
494
|
+
m.formula_max_plus_sums,
|
|
495
|
+
m.formula_create_plus_sums,
|
|
496
|
+
m.formula_combined_growth,
|
|
497
|
+
m.pct_max_only.toFixed(2),
|
|
498
|
+
m.pct_max_plus_sums.toFixed(2),
|
|
499
|
+
m.pct_create_plus_sums.toFixed(2),
|
|
500
|
+
m.pct_combined_growth.toFixed(2),
|
|
501
|
+
m.unique_api_calls,
|
|
502
|
+
`"${m.text_preview.replace(/"/g, '""')}"`,
|
|
503
|
+
].join(','));
|
|
504
|
+
return [headers.join(','), ...rows].join('\n');
|
|
505
|
+
}
|
|
506
|
+
function printSummary(result) {
|
|
507
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
508
|
+
console.log('ANALYSIS SUMMARY');
|
|
509
|
+
console.log('='.repeat(60));
|
|
510
|
+
console.log('\n--- Session Info ---');
|
|
511
|
+
console.log(`Session ID: ${result.session_id}`);
|
|
512
|
+
console.log(`Duration: ${result.duration_ms}ms`);
|
|
513
|
+
console.log(`Subagents: ${result.used_subagents ? 'ENABLED' : 'DISABLED'}`);
|
|
514
|
+
console.log(`Total unique API calls: ${result.summary.unique_api_calls}`);
|
|
515
|
+
console.log('\n--- Raw Totals ---');
|
|
516
|
+
console.log(`max(cache_read): ${result.summary.final_max_cache_read}`);
|
|
517
|
+
console.log(`sum(cache_create): ${result.summary.final_sum_cache_create}`);
|
|
518
|
+
console.log(`sum(input): ${result.summary.final_sum_input}`);
|
|
519
|
+
console.log(`sum(output): ${result.summary.final_sum_output}`);
|
|
520
|
+
console.log('\n--- Combined Metric (cache_read + cache_create) ---');
|
|
521
|
+
const s = result.summary;
|
|
522
|
+
console.log(`first(r+c): ${s.first_combined_rc.toLocaleString().padStart(10)}`);
|
|
523
|
+
console.log(`max(r+c): ${s.final_max_combined_rc.toLocaleString().padStart(10)}`);
|
|
524
|
+
console.log(`growth: ${s.calculated_combined_growth.toLocaleString().padStart(10)}`);
|
|
525
|
+
if (result.final) {
|
|
526
|
+
console.log('\n--- Actual /context Output ---');
|
|
527
|
+
console.log(`Total: ${(result.final.total_tokens || 0).toLocaleString().padStart(10)} (${result.final.total_percent}%)`);
|
|
528
|
+
console.log(`Messages: ${(result.final.messages_tokens || 0).toLocaleString().padStart(10)}`);
|
|
529
|
+
console.log(`System: ${(result.final.system_tokens || 0).toLocaleString().padStart(10)}`);
|
|
530
|
+
// THE NEW FORMULA: baseline + max(r+c) - first(r+c)
|
|
531
|
+
if (result.baseline && result.messages.length > 0) {
|
|
532
|
+
const baselineTotal = result.baseline.total_tokens;
|
|
533
|
+
const actual = result.final.total_tokens || 0;
|
|
534
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
535
|
+
console.log('★ THE FORMULA: baseline + max(r+c) - first(r+c) + sum(i+o) ★');
|
|
536
|
+
console.log('='.repeat(60));
|
|
537
|
+
console.log(`Baseline total: ${baselineTotal.toLocaleString().padStart(10)}`);
|
|
538
|
+
console.log(`max(cache_read+create): ${s.final_max_combined_rc.toLocaleString().padStart(10)}`);
|
|
539
|
+
console.log(`first(cache_read+create): ${s.first_combined_rc.toLocaleString().padStart(9)} (subtract)`);
|
|
540
|
+
console.log(`sum(input): ${s.final_sum_input.toLocaleString().padStart(10)}`);
|
|
541
|
+
console.log(`sum(output): ${s.final_sum_output.toLocaleString().padStart(10)}`);
|
|
542
|
+
const formulaNew = baselineTotal + s.calculated_combined_growth + s.final_sum_input + s.final_sum_output;
|
|
543
|
+
const diffNew = formulaNew - actual;
|
|
544
|
+
const pctErrorNew = (diffNew / actual) * 100;
|
|
545
|
+
console.log(`─────────────────────────────────────`);
|
|
546
|
+
console.log(`Calculated: ${formulaNew.toLocaleString().padStart(10)}`);
|
|
547
|
+
console.log(`Actual (/context): ${actual.toLocaleString().padStart(10)}`);
|
|
548
|
+
console.log(`Difference: ${diffNew >= 0 ? '+' : ''}${diffNew.toLocaleString().padStart(9)} (${pctErrorNew >= 0 ? '+' : ''}${pctErrorNew.toFixed(2)}%)`);
|
|
549
|
+
// Also show the old formula for comparison
|
|
550
|
+
console.log('\n--- Old Formula Comparison ---');
|
|
551
|
+
const firstCacheCreate = result.messages[0].cache_create;
|
|
552
|
+
const formulaOld = baselineTotal +
|
|
553
|
+
(s.final_sum_cache_create - firstCacheCreate) +
|
|
554
|
+
s.final_sum_input +
|
|
555
|
+
s.final_sum_output;
|
|
556
|
+
const diffOld = formulaOld - actual;
|
|
557
|
+
const pctErrorOld = (diffOld / actual) * 100;
|
|
558
|
+
console.log(`Old (baseline + sum(create) - first(create) + in + out):`);
|
|
559
|
+
console.log(` Calculated: ${formulaOld.toLocaleString().padStart(10)}, Error: ${diffOld >= 0 ? '+' : ''}${pctErrorOld.toFixed(2)}%`);
|
|
560
|
+
// Summary comparison
|
|
561
|
+
console.log('\n--- Formula Accuracy Comparison ---');
|
|
562
|
+
console.log(`NEW (baseline + max(r+c) - first(r+c)): ${Math.abs(pctErrorNew).toFixed(2)}% error`);
|
|
563
|
+
console.log(`OLD (baseline + Σcache_create - first): ${Math.abs(pctErrorOld).toFixed(2)}% error`);
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
567
|
+
}
|
|
568
|
+
// ============================================================================
|
|
569
|
+
// Main
|
|
570
|
+
// ============================================================================
|
|
571
|
+
async function main() {
|
|
572
|
+
const args = process.argv.slice(2);
|
|
573
|
+
const useSubagents = args.includes('--subagents');
|
|
574
|
+
const outputPrefix = args.find((a) => a.startsWith('--output='))?.split('=')[1] || 'context-analysis';
|
|
575
|
+
let numPrompts = 3;
|
|
576
|
+
const promptsArg = args.find((a) => a.startsWith('--prompts='));
|
|
577
|
+
if (promptsArg) {
|
|
578
|
+
numPrompts = parseInt(promptsArg.split('=')[1], 10) || 3;
|
|
579
|
+
}
|
|
580
|
+
const prompts = useSubagents
|
|
581
|
+
? TEST_PROMPTS_WITH_SUBAGENT.slice(0, numPrompts)
|
|
582
|
+
: TEST_PROMPTS_NO_SUBAGENT.slice(0, numPrompts);
|
|
583
|
+
const cwd = process.cwd();
|
|
584
|
+
const analyzer = new ContextAnalyzer(cwd, useSubagents);
|
|
585
|
+
try {
|
|
586
|
+
const result = await analyzer.run(prompts);
|
|
587
|
+
// Print summary
|
|
588
|
+
printSummary(result);
|
|
589
|
+
// Save outputs
|
|
590
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
|
591
|
+
const jsonFile = `${outputPrefix}-${timestamp}.json`;
|
|
592
|
+
const csvFile = `${outputPrefix}-${timestamp}.csv`;
|
|
593
|
+
fs.writeFileSync(jsonFile, JSON.stringify(result, null, 2));
|
|
594
|
+
console.log(`\nJSON output saved to: ${jsonFile}`);
|
|
595
|
+
fs.writeFileSync(csvFile, toCSV(result.messages));
|
|
596
|
+
console.log(`CSV output saved to: ${csvFile}`);
|
|
597
|
+
}
|
|
598
|
+
catch (error) {
|
|
599
|
+
console.error('Analysis failed:', error);
|
|
600
|
+
process.exit(1);
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
main().catch(console.error);
|
package/dist/index.d.ts
ADDED