grov 0.2.3 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,15 +4,236 @@ import Fastify from 'fastify';
4
4
  import { config } from './config.js';
5
5
  import { forwardToAnthropic, isForwardError } from './forwarder.js';
6
6
  import { parseToolUseBlocks, extractTokenUsage } from './action-parser.js';
7
- import { createSessionState, getSessionState, updateSessionState, createStep, updateTokenCount, logDriftEvent, getRecentSteps, getValidatedSteps, updateSessionMode, markWaitingForRecovery, incrementEscalation, updateLastChecked, markCleared, getActiveSessionForUser, deleteSessionState, deleteStepsForSession, updateRecentStepsReasoning, markSessionCompleted, getCompletedSessionForProject, cleanupOldCompletedSessions, } from '../lib/store.js';
7
+ import { createSessionState, getSessionState, updateSessionState, createStep, updateTokenCount, logDriftEvent, getRecentSteps, getValidatedSteps, updateSessionMode, markWaitingForRecovery, incrementEscalation, updateLastChecked, markCleared, getActiveSessionForUser, deleteSessionState, deleteStepsForSession, updateRecentStepsReasoning, markSessionCompleted, getCompletedSessionForProject, cleanupOldCompletedSessions, getKeyDecisions, getEditedFiles, } from '../lib/store.js';
8
+ import { smartTruncate } from '../lib/utils.js';
8
9
  import { checkDrift, scoreToCorrectionLevel, shouldSkipSteps, isDriftCheckAvailable, checkRecoveryAlignment, generateForcedRecovery, } from '../lib/drift-checker-proxy.js';
9
10
  import { buildCorrection, formatCorrectionForInjection } from '../lib/correction-builder-proxy.js';
10
11
  import { generateSessionSummary, isSummaryAvailable, extractIntent, isIntentExtractionAvailable, analyzeTaskContext, isTaskAnalysisAvailable, } from '../lib/llm-extractor.js';
11
12
  import { buildTeamMemoryContext, extractFilesFromMessages } from './request-processor.js';
12
13
  import { saveToTeamMemory } from './response-processor.js';
13
14
  import { randomUUID } from 'crypto';
15
+ import * as fs from 'fs';
16
+ import * as path from 'path';
14
17
  // Store last drift result for recovery alignment check
15
18
  const lastDriftResults = new Map();
19
+ // Track last messageCount per session to detect retries vs new turns
20
+ const lastMessageCount = new Map();
21
+ // Cache injection content per session (MUST be identical across requests for cache preservation)
22
+ // Stored in memory because session DB state doesn't exist on first request
23
+ const cachedInjections = new Map();
24
+ const sessionInjectionTracking = new Map();
25
+ function getOrCreateTracking(sessionId) {
26
+ if (!sessionInjectionTracking.has(sessionId)) {
27
+ sessionInjectionTracking.set(sessionId, {
28
+ files: new Set(),
29
+ decisionIds: new Set(),
30
+ reasonings: new Set(),
31
+ });
32
+ }
33
+ return sessionInjectionTracking.get(sessionId);
34
+ }
35
+ /**
36
+ * Build dynamic injection content for user message (DELTA only)
37
+ * Includes: edited files, key decisions, drift correction, forced recovery
38
+ * Only injects NEW content that hasn't been injected before
39
+ */
40
+ function buildDynamicInjection(sessionId, sessionState, logger) {
41
+ const tracking = getOrCreateTracking(sessionId);
42
+ const parts = [];
43
+ const debugInfo = {};
44
+ // 1. Get edited files (delta - not already injected)
45
+ const allEditedFiles = getEditedFiles(sessionId);
46
+ const newFiles = allEditedFiles.filter(f => !tracking.files.has(f));
47
+ debugInfo.totalEditedFiles = allEditedFiles.length;
48
+ debugInfo.newEditedFiles = newFiles.length;
49
+ debugInfo.alreadyTrackedFiles = tracking.files.size;
50
+ if (newFiles.length > 0) {
51
+ // Track and add to injection
52
+ newFiles.forEach(f => tracking.files.add(f));
53
+ const fileNames = newFiles.slice(0, 5).map(f => f.split('/').pop());
54
+ parts.push(`[EDITED: ${fileNames.join(', ')}]`);
55
+ debugInfo.editedFilesInjected = fileNames;
56
+ }
57
+ // 2. Get key decisions with reasoning (delta - not already injected)
58
+ const keyDecisions = getKeyDecisions(sessionId, 5);
59
+ debugInfo.totalKeyDecisions = keyDecisions.length;
60
+ debugInfo.alreadyTrackedDecisions = tracking.decisionIds.size;
61
+ const newDecisions = keyDecisions.filter(d => !tracking.decisionIds.has(d.id) &&
62
+ d.reasoning &&
63
+ !tracking.reasonings.has(d.reasoning));
64
+ debugInfo.newKeyDecisions = newDecisions.length;
65
+ for (const decision of newDecisions.slice(0, 3)) {
66
+ tracking.decisionIds.add(decision.id);
67
+ tracking.reasonings.add(decision.reasoning);
68
+ const truncated = smartTruncate(decision.reasoning, 120);
69
+ parts.push(`[DECISION: ${truncated}]`);
70
+ // Log the original and truncated reasoning for debugging
71
+ if (logger) {
72
+ logger.info({
73
+ msg: 'Key decision reasoning extracted',
74
+ originalLength: decision.reasoning.length,
75
+ truncatedLength: truncated.length,
76
+ original: decision.reasoning.substring(0, 200) + (decision.reasoning.length > 200 ? '...' : ''),
77
+ truncated,
78
+ });
79
+ }
80
+ }
81
+ debugInfo.decisionsInjected = newDecisions.slice(0, 3).length;
82
+ // 3. Add drift correction if pending
83
+ if (sessionState?.pending_correction) {
84
+ parts.push(`[DRIFT: ${sessionState.pending_correction}]`);
85
+ debugInfo.hasDriftCorrection = true;
86
+ debugInfo.driftCorrectionLength = sessionState.pending_correction.length;
87
+ }
88
+ // 4. Add forced recovery if pending
89
+ if (sessionState?.pending_forced_recovery) {
90
+ parts.push(`[RECOVERY: ${sessionState.pending_forced_recovery}]`);
91
+ debugInfo.hasForcedRecovery = true;
92
+ debugInfo.forcedRecoveryLength = sessionState.pending_forced_recovery.length;
93
+ }
94
+ // Log debug info
95
+ if (logger) {
96
+ logger.info({
97
+ msg: 'Dynamic injection build details',
98
+ ...debugInfo,
99
+ partsCount: parts.length,
100
+ });
101
+ }
102
+ if (parts.length === 0) {
103
+ return null;
104
+ }
105
+ const injection = '---\n[GROV CONTEXT]\n' + parts.join('\n');
106
+ // Log final injection content
107
+ if (logger) {
108
+ logger.info({
109
+ msg: 'Dynamic injection content',
110
+ size: injection.length,
111
+ content: injection,
112
+ });
113
+ }
114
+ return injection;
115
+ }
116
+ /**
117
+ * Append dynamic injection to the last user message in raw body string
118
+ * This preserves cache for system + previous messages, only the last user msg changes
119
+ */
120
+ function appendToLastUserMessage(rawBody, injection) {
121
+ // Find the last occurrence of "role":"user" followed by content
122
+ // We need to find the content field of the last user message and append to it
123
+ // Strategy: Find all user messages, get the last one, append to its content
124
+ // This is tricky because content can be string or array
125
+ // Simpler approach: Find the last user message's closing content
126
+ // Look for pattern: "role":"user","content":"..." or "role":"user","content":[...]
127
+ // Find last "role":"user"
128
+ const userRolePattern = /"role"\s*:\s*"user"/g;
129
+ let lastUserMatch = null;
130
+ let match;
131
+ while ((match = userRolePattern.exec(rawBody)) !== null) {
132
+ lastUserMatch = match;
133
+ }
134
+ if (!lastUserMatch) {
135
+ // No user message found, can't inject
136
+ return rawBody;
137
+ }
138
+ // From lastUserMatch position, find the content field
139
+ const afterRole = rawBody.slice(lastUserMatch.index);
140
+ // Find "content" field after role
141
+ const contentMatch = afterRole.match(/"content"\s*:\s*/);
142
+ if (!contentMatch || contentMatch.index === undefined) {
143
+ return rawBody;
144
+ }
145
+ const contentStartGlobal = lastUserMatch.index + contentMatch.index + contentMatch[0].length;
146
+ const afterContent = rawBody.slice(contentStartGlobal);
147
+ // Determine if content is string or array
148
+ if (afterContent.startsWith('"')) {
149
+ // String content - find closing quote (handling escapes)
150
+ let i = 1; // Skip opening quote
151
+ while (i < afterContent.length) {
152
+ if (afterContent[i] === '\\') {
153
+ i += 2; // Skip escaped char
154
+ }
155
+ else if (afterContent[i] === '"') {
156
+ // Found closing quote
157
+ const insertPos = contentStartGlobal + i;
158
+ // Insert before closing quote, escape the injection for JSON
159
+ const escapedInjection = injection
160
+ .replace(/\\/g, '\\\\')
161
+ .replace(/"/g, '\\"')
162
+ .replace(/\n/g, '\\n');
163
+ return rawBody.slice(0, insertPos) + '\\n\\n' + escapedInjection + rawBody.slice(insertPos);
164
+ }
165
+ else {
166
+ i++;
167
+ }
168
+ }
169
+ }
170
+ else if (afterContent.startsWith('[')) {
171
+ // Array content - find last text block and append, or add new text block
172
+ // Find the closing ] of the content array
173
+ let depth = 1;
174
+ let i = 1;
175
+ while (i < afterContent.length && depth > 0) {
176
+ const char = afterContent[i];
177
+ if (char === '[')
178
+ depth++;
179
+ else if (char === ']')
180
+ depth--;
181
+ else if (char === '"') {
182
+ // Skip string
183
+ i++;
184
+ while (i < afterContent.length && afterContent[i] !== '"') {
185
+ if (afterContent[i] === '\\')
186
+ i++;
187
+ i++;
188
+ }
189
+ }
190
+ i++;
191
+ }
192
+ if (depth === 0) {
193
+ // Found closing bracket at position i-1
194
+ const insertPos = contentStartGlobal + i - 1;
195
+ // Add new text block before closing bracket
196
+ const escapedInjection = injection
197
+ .replace(/\\/g, '\\\\')
198
+ .replace(/"/g, '\\"')
199
+ .replace(/\n/g, '\\n');
200
+ const newBlock = `,{"type":"text","text":"\\n\\n${escapedInjection}"}`;
201
+ return rawBody.slice(0, insertPos) + newBlock + rawBody.slice(insertPos);
202
+ }
203
+ }
204
+ // Fallback: couldn't parse, return unchanged
205
+ return rawBody;
206
+ }
207
+ // ============================================
208
+ // DEBUG MODE - Controlled via --debug flag
209
+ // ============================================
210
+ let debugMode = false;
211
+ export function setDebugMode(enabled) {
212
+ debugMode = enabled;
213
+ }
214
+ // ============================================
215
+ // FILE LOGGER - Request/Response tracking (debug only)
216
+ // ============================================
217
+ const PROXY_LOG_PATH = path.join(process.cwd(), 'grov-proxy.log');
218
+ let requestCounter = 0;
219
+ function proxyLog(entry) {
220
+ if (!debugMode)
221
+ return; // Skip file logging unless --debug flag
222
+ const logEntry = {
223
+ timestamp: new Date().toISOString(),
224
+ ...entry,
225
+ };
226
+ const line = JSON.stringify(logEntry) + '\n';
227
+ fs.appendFileSync(PROXY_LOG_PATH, line);
228
+ }
229
+ /**
230
+ * Log token usage to console (always shown, compact format)
231
+ */
232
+ function logTokenUsage(requestId, usage, latencyMs) {
233
+ const total = usage.cacheCreation + usage.cacheRead;
234
+ const hitRatio = total > 0 ? ((usage.cacheRead / total) * 100).toFixed(0) : '0';
235
+ console.log(`[${requestId}] ${hitRatio}% cache | in:${usage.inputTokens} out:${usage.outputTokens} | create:${usage.cacheCreation} read:${usage.cacheRead} | ${latencyMs}ms`);
236
+ }
16
237
  /**
17
238
  * Helper to append text to system prompt (handles string or array format)
18
239
  */
@@ -21,8 +242,13 @@ function appendToSystemPrompt(body, textToAppend) {
21
242
  body.system = body.system + textToAppend;
22
243
  }
23
244
  else if (Array.isArray(body.system)) {
24
- // Append as new text block
25
- body.system.push({ type: 'text', text: textToAppend });
245
+ // Append as new text block WITHOUT cache_control
246
+ // Anthropic allows max 4 cache blocks - Claude Code already uses 2+
247
+ // Grov's injections are small (~2KB) so uncached is fine
248
+ body.system.push({
249
+ type: 'text',
250
+ text: textToAppend,
251
+ });
26
252
  }
27
253
  else {
28
254
  // No system prompt yet, create as string
@@ -44,6 +270,45 @@ function getSystemPromptText(body) {
44
270
  }
45
271
  return '';
46
272
  }
273
+ /**
274
+ * Inject text into raw body string WITHOUT re-serializing
275
+ * This preserves the original formatting/whitespace for cache compatibility
276
+ *
277
+ * Adds a new text block to the end of the system array
278
+ */
279
+ function injectIntoRawBody(rawBody, injectionText) {
280
+ // Find the system array in the raw JSON
281
+ // Pattern: "system": [....]
282
+ const systemMatch = rawBody.match(/"system"\s*:\s*\[/);
283
+ if (!systemMatch || systemMatch.index === undefined) {
284
+ return { modified: rawBody, success: false };
285
+ }
286
+ // Find the matching closing bracket for the system array
287
+ const startIndex = systemMatch.index + systemMatch[0].length;
288
+ let bracketCount = 1;
289
+ let endIndex = startIndex;
290
+ for (let i = startIndex; i < rawBody.length && bracketCount > 0; i++) {
291
+ const char = rawBody[i];
292
+ if (char === '[')
293
+ bracketCount++;
294
+ else if (char === ']')
295
+ bracketCount--;
296
+ if (bracketCount === 0) {
297
+ endIndex = i;
298
+ break;
299
+ }
300
+ }
301
+ if (bracketCount !== 0) {
302
+ return { modified: rawBody, success: false };
303
+ }
304
+ // Escape the injection text for JSON
305
+ const escapedText = JSON.stringify(injectionText).slice(1, -1); // Remove outer quotes
306
+ // Create the new block (without cache_control - will be cache_creation)
307
+ const newBlock = `,{"type":"text","text":"${escapedText}"}`;
308
+ // Insert before the closing bracket
309
+ const modified = rawBody.slice(0, endIndex) + newBlock + rawBody.slice(endIndex);
310
+ return { modified, success: true };
311
+ }
47
312
  // Session tracking (in-memory for active sessions)
48
313
  const activeSessions = new Map();
49
314
  /**
@@ -54,16 +319,24 @@ export function createServer() {
54
319
  logger: false, // Disabled - all debug goes to ~/.grov/debug.log
55
320
  bodyLimit: config.BODY_LIMIT,
56
321
  });
322
+ // Custom JSON parser that preserves raw bytes for cache preservation
323
+ fastify.addContentTypeParser('application/json', { parseAs: 'buffer' }, (req, body, done) => {
324
+ // Store raw bytes on request for later use
325
+ req.rawBody = body;
326
+ try {
327
+ const json = JSON.parse(body.toString('utf-8'));
328
+ done(null, json);
329
+ }
330
+ catch (err) {
331
+ done(err, undefined);
332
+ }
333
+ });
57
334
  // Health check endpoint
58
335
  fastify.get('/health', async () => {
59
336
  return { status: 'ok', timestamp: new Date().toISOString() };
60
337
  });
61
338
  // Main messages endpoint
62
- fastify.post('/v1/messages', {
63
- config: {
64
- rawBody: true,
65
- },
66
- }, handleMessages);
339
+ fastify.post('/v1/messages', handleMessages);
67
340
  // Catch-all for other Anthropic endpoints (pass through)
68
341
  fastify.all('/*', async (request, reply) => {
69
342
  fastify.log.warn(`Unhandled endpoint: ${request.method} ${request.url}`);
@@ -78,14 +351,12 @@ async function handleMessages(request, reply) {
78
351
  const logger = request.log;
79
352
  const startTime = Date.now();
80
353
  const model = request.body.model;
81
- // Skip Haiku subagents - forward directly without any tracking
82
- // Haiku requests are Task tool spawns for exploration, they don't make decisions
83
- // All reasoning and decisions happen in the main model (Opus/Sonnet)
84
354
  if (model.includes('haiku')) {
85
355
  logger.info({ msg: 'Skipping Haiku subagent', model });
86
356
  try {
87
- const result = await forwardToAnthropic(request.body, request.headers, logger);
88
- const latency = Date.now() - startTime;
357
+ // Force non-streaming for Haiku too
358
+ const haikusBody = { ...request.body, stream: false };
359
+ const result = await forwardToAnthropic(haikusBody, request.headers, logger);
89
360
  return reply
90
361
  .status(result.statusCode)
91
362
  .header('content-type', 'application/json')
@@ -110,6 +381,7 @@ async function handleMessages(request, reply) {
110
381
  promptCount: sessionInfo.promptCount,
111
382
  projectPath: sessionInfo.projectPath,
112
383
  });
384
+ const currentRequestId = ++requestCounter;
113
385
  logger.info({
114
386
  msg: 'Incoming request',
115
387
  sessionId: sessionInfo.sessionId.substring(0, 8),
@@ -117,27 +389,131 @@ async function handleMessages(request, reply) {
117
389
  model: request.body.model,
118
390
  messageCount: request.body.messages?.length || 0,
119
391
  });
120
- // === PRE-HANDLER: Modify request if needed ===
121
- const modifiedBody = await preProcessRequest(request.body, sessionInfo, logger);
122
- // === FORWARD TO ANTHROPIC ===
392
+ // Log REQUEST to file
393
+ const rawBodySize = request.rawBody?.length || 0;
394
+ proxyLog({
395
+ requestId: currentRequestId,
396
+ type: 'REQUEST',
397
+ sessionId: sessionInfo.sessionId.substring(0, 8),
398
+ data: {
399
+ model: request.body.model,
400
+ messageCount: request.body.messages?.length || 0,
401
+ promptCount: sessionInfo.promptCount,
402
+ rawBodySize,
403
+ },
404
+ });
405
+ // Process request to get injection text
406
+ // __grovInjection = team memory (system prompt, cached)
407
+ // __grovUserMsgInjection = dynamic content (user message, delta only)
408
+ const processedBody = await preProcessRequest(request.body, sessionInfo, logger);
409
+ const systemInjection = processedBody.__grovInjection;
410
+ const userMsgInjection = processedBody.__grovUserMsgInjection;
411
+ // Get raw body bytes
412
+ const rawBody = request.rawBody;
413
+ let rawBodyStr = rawBody?.toString('utf-8') || '';
414
+ // Track injection sizes for logging
415
+ let systemInjectionSize = 0;
416
+ let userMsgInjectionSize = 0;
417
+ let systemSuccess = false;
418
+ let userMsgSuccess = false;
419
+ // 1. Inject team memory into SYSTEM prompt (cached, constant)
420
+ if (systemInjection && rawBodyStr) {
421
+ const result = injectIntoRawBody(rawBodyStr, '\n\n' + systemInjection);
422
+ rawBodyStr = result.modified;
423
+ systemInjectionSize = systemInjection.length;
424
+ systemSuccess = result.success;
425
+ }
426
+ // 2. Inject dynamic content into LAST USER MESSAGE (delta only)
427
+ if (userMsgInjection && rawBodyStr) {
428
+ rawBodyStr = appendToLastUserMessage(rawBodyStr, userMsgInjection);
429
+ userMsgInjectionSize = userMsgInjection.length;
430
+ userMsgSuccess = true; // appendToLastUserMessage doesn't return success flag
431
+ }
432
+ // Determine final body to send
433
+ let finalBodyToSend;
434
+ if (systemInjection || userMsgInjection) {
435
+ finalBodyToSend = rawBodyStr;
436
+ // Log INJECTION to file with full details
437
+ const wasCached = processedBody.__grovInjectionCached;
438
+ proxyLog({
439
+ requestId: currentRequestId,
440
+ type: 'INJECTION',
441
+ sessionId: sessionInfo.sessionId.substring(0, 8),
442
+ data: {
443
+ systemInjectionSize,
444
+ userMsgInjectionSize,
445
+ totalInjectionSize: systemInjectionSize + userMsgInjectionSize,
446
+ originalSize: rawBody?.length || 0,
447
+ finalSize: rawBodyStr.length,
448
+ systemSuccess,
449
+ userMsgSuccess,
450
+ teamMemoryCached: wasCached,
451
+ // Include actual content for debugging (truncated for log readability)
452
+ systemInjectionPreview: systemInjection ? systemInjection.substring(0, 200) + (systemInjection.length > 200 ? '...' : '') : null,
453
+ userMsgInjectionContent: userMsgInjection || null, // Full content since it's small
454
+ },
455
+ });
456
+ }
457
+ else if (rawBody) {
458
+ // No injection, use original raw bytes
459
+ finalBodyToSend = rawBody;
460
+ }
461
+ else {
462
+ // Fallback to re-serialization (shouldn't happen normally)
463
+ finalBodyToSend = JSON.stringify(processedBody);
464
+ }
465
+ const forwardStart = Date.now();
123
466
  try {
124
- const result = await forwardToAnthropic(modifiedBody, request.headers, logger);
125
- // === POST-HANDLER: Process response with task orchestration ===
467
+ // Forward: raw bytes (with injection inserted) or original raw bytes
468
+ const result = await forwardToAnthropic(processedBody, request.headers, logger, typeof finalBodyToSend === 'string' ? Buffer.from(finalBodyToSend, 'utf-8') : finalBodyToSend);
469
+ const forwardLatency = Date.now() - forwardStart;
470
+ // FIRE-AND-FORGET: Don't block response to Claude Code
471
+ // This prevents retry loops caused by Haiku calls adding latency
126
472
  if (result.statusCode === 200 && isAnthropicResponse(result.body)) {
127
- await postProcessResponse(result.body, sessionInfo, request.body, logger);
473
+ postProcessResponse(result.body, sessionInfo, request.body, logger)
474
+ .catch(err => console.error('[GROV] postProcess error:', err));
128
475
  }
129
- // Return response to Claude Code (unmodified)
130
476
  const latency = Date.now() - startTime;
477
+ const filteredHeaders = filterResponseHeaders(result.headers);
478
+ // Log token usage (always to console, file only in debug mode)
479
+ if (isAnthropicResponse(result.body)) {
480
+ const usage = extractTokenUsage(result.body);
481
+ // Console: compact token summary (always shown)
482
+ logTokenUsage(currentRequestId, usage, latency);
483
+ // File: detailed response log (debug mode only)
484
+ proxyLog({
485
+ requestId: currentRequestId,
486
+ type: 'RESPONSE',
487
+ sessionId: sessionInfo.sessionId.substring(0, 8),
488
+ data: {
489
+ statusCode: result.statusCode,
490
+ latencyMs: latency,
491
+ forwardLatencyMs: forwardLatency,
492
+ inputTokens: usage.inputTokens,
493
+ outputTokens: usage.outputTokens,
494
+ cacheCreation: usage.cacheCreation,
495
+ cacheRead: usage.cacheRead,
496
+ cacheHitRatio: usage.cacheRead > 0 ? (usage.cacheRead / (usage.cacheRead + usage.cacheCreation)).toFixed(2) : '0.00',
497
+ wasSSE: result.wasSSE,
498
+ },
499
+ });
500
+ }
501
+ // If response was SSE, forward raw SSE to Claude Code (it expects streaming)
502
+ // Otherwise, send JSON
503
+ const isSSEResponse = result.wasSSE;
504
+ const responseContentType = isSSEResponse ? 'text/event-stream; charset=utf-8' : 'application/json';
505
+ const responseBody = isSSEResponse ? result.rawBody : JSON.stringify(result.body);
131
506
  logger.info({
132
507
  msg: 'Request complete',
133
508
  statusCode: result.statusCode,
134
509
  latencyMs: latency,
510
+ wasSSE: isSSEResponse,
135
511
  });
136
512
  return reply
137
513
  .status(result.statusCode)
138
- .header('content-type', 'application/json')
139
- .headers(filterResponseHeaders(result.headers))
140
- .send(JSON.stringify(result.body));
514
+ .header('content-type', responseContentType)
515
+ .headers(filteredHeaders)
516
+ .send(responseBody);
141
517
  }
142
518
  catch (error) {
143
519
  if (isForwardError(error)) {
@@ -218,93 +594,130 @@ async function getOrCreateSession(request, logger) {
218
594
  logger.info({ msg: 'No existing session, will create after task analysis' });
219
595
  return { ...sessionInfo, isNew: true, currentSession: null, completedSession };
220
596
  }
597
+ /**
598
+ * Detect request type: 'first', 'continuation', or 'retry'
599
+ * - first: new user message (messageCount changed, last msg is user without tool_result)
600
+ * - continuation: tool result (messageCount changed, last msg has tool_result)
601
+ * - retry: same messageCount as before
602
+ */
603
+ function detectRequestType(messages, sessionId) {
604
+ const currentCount = messages?.length || 0;
605
+ const lastCount = lastMessageCount.get(sessionId);
606
+ lastMessageCount.set(sessionId, currentCount);
607
+ // Same messageCount = retry
608
+ if (lastCount !== undefined && currentCount === lastCount) {
609
+ return 'retry';
610
+ }
611
+ // No messages or no last message = first
612
+ if (!messages || messages.length === 0)
613
+ return 'first';
614
+ const lastMessage = messages[messages.length - 1];
615
+ // Check if last message is tool_result (continuation)
616
+ if (lastMessage.role === 'user') {
617
+ const content = lastMessage.content;
618
+ if (Array.isArray(content)) {
619
+ const hasToolResult = content.some((block) => typeof block === 'object' && block !== null && block.type === 'tool_result');
620
+ if (hasToolResult)
621
+ return 'continuation';
622
+ }
623
+ }
624
+ return 'first';
625
+ }
221
626
  /**
222
627
  * Pre-process request before forwarding
223
- * - Context injection
224
- * - CLEAR operation
628
+ * - Context injection (first request only)
629
+ * - CLEAR operation (first request only)
630
+ * - Drift correction (first request only)
631
+ *
632
+ * SKIP all injections on: retry, continuation
225
633
  */
226
634
  async function preProcessRequest(body, sessionInfo, logger) {
227
635
  const modified = { ...body };
228
- // FIRST: Always inject team memory context (doesn't require sessionState)
229
- const mentionedFiles = extractFilesFromMessages(modified.messages || []);
230
- const teamContext = buildTeamMemoryContext(sessionInfo.projectPath, mentionedFiles);
231
- if (teamContext) {
232
- appendToSystemPrompt(modified, '\n\n' + teamContext);
233
- }
234
- // THEN: Session-specific operations
636
+ // Detect request type: first, continuation, or retry
637
+ const requestType = detectRequestType(modified.messages || [], sessionInfo.sessionId);
638
+ // === NEW ARCHITECTURE: Separate static and dynamic injection ===
639
+ //
640
+ // STATIC (system prompt, cached):
641
+ // - Team memory from PAST sessions only
642
+ // - CLEAR summary when triggered
643
+ // -> Uses __grovInjection + injectIntoRawBody()
644
+ //
645
+ // DYNAMIC (user message, delta only):
646
+ // - Files edited in current session
647
+ // - Key decisions with reasoning
648
+ // - Drift correction, forced recovery
649
+ // -> Uses __grovUserMsgInjection + appendToLastUserMessage()
650
+ // Get session state
235
651
  const sessionState = getSessionState(sessionInfo.sessionId);
236
- if (!sessionState) {
237
- return modified; // Injection already happened above!
652
+ // === CLEAR MODE (100% threshold) ===
653
+ // If token count exceeds threshold AND we have a pre-computed summary, apply CLEAR
654
+ if (sessionState) {
655
+ const currentTokenCount = sessionState.token_count || 0;
656
+ if (currentTokenCount > config.TOKEN_CLEAR_THRESHOLD &&
657
+ sessionState.pending_clear_summary) {
658
+ logger.info({
659
+ msg: 'CLEAR MODE ACTIVATED - resetting conversation',
660
+ tokenCount: currentTokenCount,
661
+ threshold: config.TOKEN_CLEAR_THRESHOLD,
662
+ summaryLength: sessionState.pending_clear_summary.length,
663
+ });
664
+ // 1. Empty messages array (fundamental reset)
665
+ modified.messages = [];
666
+ // 2. Inject summary into system prompt (this will cause cache miss - intentional)
667
+ appendToSystemPrompt(modified, sessionState.pending_clear_summary);
668
+ // 3. Mark session as cleared
669
+ markCleared(sessionInfo.sessionId);
670
+ // 4. Clear pending summary and invalidate team memory cache (new baseline)
671
+ updateSessionState(sessionInfo.sessionId, { pending_clear_summary: undefined });
672
+ cachedInjections.delete(sessionInfo.sessionId);
673
+ // 5. Clear tracking (fresh start after CLEAR)
674
+ sessionInjectionTracking.delete(sessionInfo.sessionId);
675
+ logger.info({ msg: 'CLEAR complete - conversation reset with summary' });
676
+ return modified; // Skip other injections - this is a complete reset
677
+ }
238
678
  }
239
- // Extract latest user message for drift checking
240
- const latestUserMessage = extractGoalFromMessages(body.messages) || '';
241
- // CLEAR operation if token threshold exceeded
242
- if ((sessionState.token_count || 0) > config.TOKEN_CLEAR_THRESHOLD) {
243
- logger.info({
244
- msg: 'Token threshold exceeded, initiating CLEAR',
245
- tokenCount: sessionState.token_count,
246
- threshold: config.TOKEN_CLEAR_THRESHOLD,
247
- });
248
- // Generate summary from session state + steps
249
- let summary;
250
- if (isSummaryAvailable()) {
251
- const steps = getValidatedSteps(sessionInfo.sessionId);
252
- summary = await generateSessionSummary(sessionState, steps);
679
+ // === STATIC INJECTION: Team memory (PAST sessions only) ===
680
+ // Cached per session - identical across all requests for cache preservation
681
+ const cachedTeamMemory = cachedInjections.get(sessionInfo.sessionId);
682
+ if (cachedTeamMemory) {
683
+ // Reuse cached team memory (constant for this session)
684
+ modified.__grovInjection = cachedTeamMemory;
685
+ modified.__grovInjectionCached = true;
686
+ logger.info({ msg: 'Using cached team memory', size: cachedTeamMemory.length });
687
+ }
688
+ else {
689
+ // First request: compute team memory from PAST sessions only
690
+ const mentionedFiles = extractFilesFromMessages(modified.messages || []);
691
+ // Pass currentSessionId to exclude current session data
692
+ const teamContext = buildTeamMemoryContext(sessionInfo.projectPath, mentionedFiles, sessionInfo.sessionId // Exclude current session
693
+ );
694
+ if (teamContext) {
695
+ modified.__grovInjection = teamContext;
696
+ modified.__grovInjectionCached = false;
697
+ // Cache for future requests (stays constant)
698
+ cachedInjections.set(sessionInfo.sessionId, teamContext);
699
+ logger.info({ msg: 'Computed and cached team memory', size: teamContext.length });
253
700
  }
254
- else {
255
- const files = getValidatedSteps(sessionInfo.sessionId).flatMap(s => s.files);
256
- summary = `PREVIOUS SESSION CONTEXT:
257
- Goal: ${sessionState.original_goal || 'Not specified'}
258
- Files worked on: ${[...new Set(files)].slice(0, 10).join(', ') || 'None'}
259
- Please continue from where you left off.`;
260
- }
261
- // Clear messages and inject summary
262
- modified.messages = [];
263
- appendToSystemPrompt(modified, '\n\n' + summary);
264
- // Update session state
265
- markCleared(sessionInfo.sessionId);
266
- logger.info({
267
- msg: 'CLEAR completed',
268
- summaryLength: summary.length,
269
- });
270
701
  }
271
- // Check if session is in drifted or forced mode
272
- if (sessionState.session_mode === 'drifted' || sessionState.session_mode === 'forced') {
273
- const recentSteps = getRecentSteps(sessionInfo.sessionId, 5);
274
- // FORCED MODE: escalation >= 3 -> Haiku generates recovery prompt
275
- if (sessionState.escalation_count >= 3 || sessionState.session_mode === 'forced') {
276
- // Update mode to forced if not already
277
- if (sessionState.session_mode !== 'forced') {
278
- updateSessionMode(sessionInfo.sessionId, 'forced');
279
- }
280
- const lastDrift = lastDriftResults.get(sessionInfo.sessionId);
281
- const driftResult = lastDrift || await checkDrift({ sessionState, recentSteps, latestUserMessage });
282
- const forcedRecovery = await generateForcedRecovery(sessionState, recentSteps.map(s => ({ actionType: s.action_type, files: s.files })), driftResult);
283
- appendToSystemPrompt(modified, forcedRecovery.injectionText);
284
- logger.info({
285
- msg: 'FORCED MODE - Injected Haiku recovery prompt',
286
- escalation: sessionState.escalation_count,
287
- mandatoryAction: forcedRecovery.mandatoryAction.substring(0, 50),
702
+ // SKIP dynamic injection for retries and continuations
703
+ if (requestType !== 'first') {
704
+ return modified;
705
+ }
706
+ // === DYNAMIC INJECTION: User message (delta only) ===
707
+ // Includes: edited files, key decisions, drift correction, forced recovery
708
+ // This goes into the LAST user message, not system prompt
709
+ const dynamicInjection = buildDynamicInjection(sessionInfo.sessionId, sessionState, logger);
710
+ if (dynamicInjection) {
711
+ modified.__grovUserMsgInjection = dynamicInjection;
712
+ logger.info({ msg: 'Dynamic injection ready for user message', size: dynamicInjection.length });
713
+ // Clear pending corrections after building injection
714
+ if (sessionState?.pending_correction || sessionState?.pending_forced_recovery) {
715
+ updateSessionState(sessionInfo.sessionId, {
716
+ pending_correction: undefined,
717
+ pending_forced_recovery: undefined,
288
718
  });
289
719
  }
290
- else {
291
- // DRIFTED MODE: normal correction injection
292
- const driftResult = await checkDrift({ sessionState, recentSteps, latestUserMessage });
293
- const correctionLevel = scoreToCorrectionLevel(driftResult.score);
294
- if (correctionLevel) {
295
- const correction = buildCorrection(driftResult, sessionState, correctionLevel);
296
- const correctionText = formatCorrectionForInjection(correction);
297
- appendToSystemPrompt(modified, correctionText);
298
- logger.info({
299
- msg: 'Injected correction',
300
- level: correctionLevel,
301
- score: driftResult.score,
302
- });
303
- }
304
- }
305
720
  }
306
- // Note: Team memory context injection is now at the TOP of preProcessRequest()
307
- // so it runs even when sessionState is null (new sessions)
308
721
  return modified;
309
722
  }
310
723
  /**
@@ -630,19 +1043,81 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
630
1043
  activeSessionId = sessionInfo.currentSession.session_id;
631
1044
  }
632
1045
  }
1046
+ // AUTO-SAVE on every end_turn (for all task types: new_task, continue, subtask, parallel)
1047
+ // task_complete and subtask_complete already save and return early, so they won't reach here
1048
+ if (isEndTurn && activeSession && activeSessionId) {
1049
+ try {
1050
+ await saveToTeamMemory(activeSessionId, 'complete');
1051
+ markSessionCompleted(activeSessionId);
1052
+ activeSessions.delete(activeSessionId);
1053
+ logger.info({ msg: 'Auto-saved task on end_turn', sessionId: activeSessionId.substring(0, 8) });
1054
+ }
1055
+ catch (err) {
1056
+ logger.info({ msg: 'Auto-save failed', error: String(err) });
1057
+ }
1058
+ }
633
1059
  // Extract token usage
634
1060
  const usage = extractTokenUsage(response);
1061
+ // Use cache metrics as actual context size (cacheCreation + cacheRead)
1062
+ // This is what Anthropic bills for and what determines CLEAR threshold
1063
+ const actualContextSize = usage.cacheCreation + usage.cacheRead;
635
1064
  if (activeSession) {
636
- updateTokenCount(activeSessionId, usage.totalTokens);
1065
+ // Set to actual context size (not cumulative - context size IS the total)
1066
+ updateTokenCount(activeSessionId, actualContextSize);
637
1067
  }
638
1068
  logger.info({
639
1069
  msg: 'Token usage',
640
1070
  input: usage.inputTokens,
641
1071
  output: usage.outputTokens,
642
1072
  total: usage.totalTokens,
1073
+ cacheCreation: usage.cacheCreation,
1074
+ cacheRead: usage.cacheRead,
1075
+ actualContextSize,
643
1076
  activeSession: activeSessionId.substring(0, 8),
644
1077
  });
1078
+ // === CLEAR MODE PRE-COMPUTE (85% threshold) ===
1079
+ // Pre-compute summary before hitting 100% threshold to avoid blocking Haiku call
1080
+ const preComputeThreshold = Math.floor(config.TOKEN_CLEAR_THRESHOLD * 0.85);
1081
+ // Use actualContextSize (cacheCreation + cacheRead) as the real context size
1082
+ if (activeSession &&
1083
+ actualContextSize > preComputeThreshold &&
1084
+ !activeSession.pending_clear_summary &&
1085
+ isSummaryAvailable()) {
1086
+ // Get all validated steps for comprehensive summary
1087
+ const allSteps = getValidatedSteps(activeSessionId);
1088
+ // Generate summary asynchronously (fire-and-forget)
1089
+ generateSessionSummary(activeSession, allSteps, 15000).then(summary => {
1090
+ updateSessionState(activeSessionId, { pending_clear_summary: summary });
1091
+ logger.info({
1092
+ msg: 'CLEAR summary pre-computed',
1093
+ actualContextSize,
1094
+ threshold: preComputeThreshold,
1095
+ summaryLength: summary.length,
1096
+ });
1097
+ }).catch(err => {
1098
+ logger.info({ msg: 'CLEAR summary generation failed', error: String(err) });
1099
+ });
1100
+ }
1101
+ // Capture final_response for ALL end_turn responses (not just Q&A)
1102
+ // This preserves Claude's analysis even when tools were used
1103
+ if (isEndTurn && textContent.length > 100 && activeSessionId) {
1104
+ updateSessionState(activeSessionId, {
1105
+ final_response: textContent.substring(0, 10000),
1106
+ });
1107
+ }
645
1108
  if (actions.length === 0) {
1109
+ // Pure Q&A (no tool calls) - auto-save as task
1110
+ if (isEndTurn && activeSessionId && activeSession) {
1111
+ try {
1112
+ await saveToTeamMemory(activeSessionId, 'complete');
1113
+ markSessionCompleted(activeSessionId);
1114
+ activeSessions.delete(activeSessionId);
1115
+ logger.info({ msg: 'Task saved on final answer', sessionId: activeSessionId.substring(0, 8) });
1116
+ }
1117
+ catch (err) {
1118
+ logger.info({ msg: 'Task save failed', error: String(err) });
1119
+ }
1120
+ }
646
1121
  return;
647
1122
  }
648
1123
  logger.info({
@@ -700,11 +1175,51 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
700
1175
  updateSessionMode(activeSessionId, 'drifted');
701
1176
  markWaitingForRecovery(activeSessionId, true);
702
1177
  incrementEscalation(activeSessionId);
1178
+ // Pre-compute correction for next request (fire-and-forget pattern)
1179
+ // This avoids blocking Haiku calls in preProcessRequest
1180
+ const correction = buildCorrection(driftResult, activeSession, correctionLevel);
1181
+ const correctionText = formatCorrectionForInjection(correction);
1182
+ updateSessionState(activeSessionId, { pending_correction: correctionText });
1183
+ logger.info({
1184
+ msg: 'Pre-computed correction saved',
1185
+ level: correctionLevel,
1186
+ correctionLength: correctionText.length,
1187
+ });
1188
+ }
1189
+ else if (correctionLevel) {
1190
+ // Nudge or correct level - still save correction but don't change mode
1191
+ const correction = buildCorrection(driftResult, activeSession, correctionLevel);
1192
+ const correctionText = formatCorrectionForInjection(correction);
1193
+ updateSessionState(activeSessionId, { pending_correction: correctionText });
1194
+ logger.info({
1195
+ msg: 'Pre-computed mild correction saved',
1196
+ level: correctionLevel,
1197
+ });
703
1198
  }
704
1199
  else if (driftScore >= 8) {
705
1200
  updateSessionMode(activeSessionId, 'normal');
706
1201
  markWaitingForRecovery(activeSessionId, false);
707
1202
  lastDriftResults.delete(activeSessionId);
1203
+ // Clear any pending correction since drift is resolved
1204
+ updateSessionState(activeSessionId, { pending_correction: undefined });
1205
+ }
1206
+ // FORCED MODE: escalation >= 3 triggers Haiku-generated recovery
1207
+ const currentEscalation = activeSession.escalation_count || 0;
1208
+ if (currentEscalation >= 3 && driftScore < 8) {
1209
+ updateSessionMode(activeSessionId, 'forced');
1210
+ // Generate forced recovery asynchronously (fire-and-forget within fire-and-forget)
1211
+ generateForcedRecovery(activeSession, recentSteps.map(s => ({ actionType: s.action_type, files: s.files })), driftResult).then(forcedRecovery => {
1212
+ updateSessionState(activeSessionId, {
1213
+ pending_forced_recovery: forcedRecovery.injectionText,
1214
+ });
1215
+ logger.info({
1216
+ msg: 'Pre-computed forced recovery saved',
1217
+ escalation: currentEscalation,
1218
+ mandatoryAction: forcedRecovery.mandatoryAction?.substring(0, 50),
1219
+ });
1220
+ }).catch(err => {
1221
+ logger.info({ msg: 'Forced recovery generation failed', error: String(err) });
1222
+ });
708
1223
  }
709
1224
  updateLastChecked(activeSessionId, Date.now());
710
1225
  if (skipSteps) {
@@ -728,6 +1243,8 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
728
1243
  }
729
1244
  // Save each action as a step (with reasoning from Claude's text)
730
1245
  for (const action of actions) {
1246
+ // Detect key decisions based on action type and reasoning content
1247
+ const isKeyDecision = detectKeyDecision(action, textContent);
731
1248
  createStep({
732
1249
  session_id: activeSessionId,
733
1250
  action_type: action.actionType,
@@ -737,9 +1254,45 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
737
1254
  reasoning: textContent.substring(0, 1000), // Claude's explanation (truncated)
738
1255
  drift_score: driftScore,
739
1256
  is_validated: !skipSteps,
1257
+ is_key_decision: isKeyDecision,
740
1258
  });
1259
+ if (isKeyDecision) {
1260
+ logger.info({
1261
+ msg: 'Key decision detected',
1262
+ actionType: action.actionType,
1263
+ files: action.files.slice(0, 3),
1264
+ });
1265
+ }
741
1266
  }
742
1267
  }
1268
+ /**
1269
+ * Detect if an action represents a key decision worth injecting later
1270
+ * Key decisions are:
1271
+ * - Edit/write actions (code modifications)
1272
+ * - Actions with decision-related keywords in reasoning
1273
+ * - Actions with substantial reasoning content
1274
+ */
1275
+ function detectKeyDecision(action, reasoning) {
1276
+ // Code modifications are always key decisions
1277
+ if (action.actionType === 'edit' || action.actionType === 'write') {
1278
+ return true;
1279
+ }
1280
+ // Check for decision-related keywords in reasoning
1281
+ const decisionKeywords = [
1282
+ 'decision', 'decided', 'chose', 'chosen', 'selected', 'picked',
1283
+ 'approach', 'strategy', 'solution', 'implementation',
1284
+ 'because', 'reason', 'rationale', 'trade-off', 'tradeoff',
1285
+ 'instead of', 'rather than', 'prefer', 'opted',
1286
+ 'conclusion', 'determined', 'resolved'
1287
+ ];
1288
+ const reasoningLower = reasoning.toLowerCase();
1289
+ const hasDecisionKeyword = decisionKeywords.some(kw => reasoningLower.includes(kw));
1290
+ // Substantial reasoning (>200 chars) with decision keyword = key decision
1291
+ if (hasDecisionKeyword && reasoning.length > 200) {
1292
+ return true;
1293
+ }
1294
+ return false;
1295
+ }
743
1296
  /**
744
1297
  * Extract text content from response for analysis
745
1298
  */
@@ -852,10 +1405,16 @@ function filterResponseHeaders(headers) {
852
1405
  const allowedHeaders = [
853
1406
  'content-type',
854
1407
  'x-request-id',
1408
+ 'request-id',
1409
+ 'x-should-retry',
1410
+ 'retry-after',
1411
+ 'retry-after-ms',
855
1412
  'anthropic-ratelimit-requests-limit',
856
1413
  'anthropic-ratelimit-requests-remaining',
1414
+ 'anthropic-ratelimit-requests-reset',
857
1415
  'anthropic-ratelimit-tokens-limit',
858
1416
  'anthropic-ratelimit-tokens-remaining',
1417
+ 'anthropic-ratelimit-tokens-reset',
859
1418
  ];
860
1419
  for (const header of allowedHeaders) {
861
1420
  const value = headers[header];
@@ -878,19 +1437,23 @@ function isAnthropicResponse(body) {
878
1437
  }
879
1438
  /**
880
1439
  * Start the proxy server
1440
+ * @param options.debug - Enable debug logging to grov-proxy.log
881
1441
  */
882
- export async function startServer() {
1442
+ export async function startServer(options = {}) {
1443
+ // Set debug mode based on flag
1444
+ if (options.debug) {
1445
+ setDebugMode(true);
1446
+ console.log('[DEBUG] Logging to grov-proxy.log');
1447
+ }
883
1448
  const server = createServer();
884
1449
  // Cleanup old completed sessions (older than 24 hours)
885
- const cleanedUp = cleanupOldCompletedSessions();
886
- if (cleanedUp > 0) {
887
- }
1450
+ cleanupOldCompletedSessions();
888
1451
  try {
889
1452
  await server.listen({
890
1453
  host: config.HOST,
891
1454
  port: config.PORT,
892
1455
  });
893
- console.log(`✓ Grov Proxy: http://${config.HOST}:${config.PORT} ${config.ANTHROPIC_BASE_URL}`);
1456
+ console.log(`Grov Proxy: http://${config.HOST}:${config.PORT} -> ${config.ANTHROPIC_BASE_URL}`);
894
1457
  return server;
895
1458
  }
896
1459
  catch (err) {