npm - grov - Versions diffs - 0.2.3 → 0.5.3 - Mend

grov 0.2.3 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/README.md +44 -5
package/dist/cli.js +40 -2
package/dist/commands/login.d.ts +1 -0
package/dist/commands/login.js +115 -0
package/dist/commands/logout.d.ts +1 -0
package/dist/commands/logout.js +13 -0
package/dist/commands/sync.d.ts +8 -0
package/dist/commands/sync.js +127 -0
package/dist/lib/api-client.d.ts +57 -0
package/dist/lib/api-client.js +174 -0
package/dist/lib/cloud-sync.d.ts +33 -0
package/dist/lib/cloud-sync.js +176 -0
package/dist/lib/credentials.d.ts +53 -0
package/dist/lib/credentials.js +201 -0
package/dist/lib/llm-extractor.d.ts +15 -39
package/dist/lib/llm-extractor.js +400 -418
package/dist/lib/store/convenience.d.ts +40 -0
package/dist/lib/store/convenience.js +104 -0
package/dist/lib/store/database.d.ts +22 -0
package/dist/lib/store/database.js +375 -0
package/dist/lib/store/drift.d.ts +9 -0
package/dist/lib/store/drift.js +89 -0
package/dist/lib/store/index.d.ts +7 -0
package/dist/lib/store/index.js +13 -0
package/dist/lib/store/sessions.d.ts +32 -0
package/dist/lib/store/sessions.js +240 -0
package/dist/lib/store/steps.d.ts +40 -0
package/dist/lib/store/steps.js +161 -0
package/dist/lib/store/tasks.d.ts +33 -0
package/dist/lib/store/tasks.js +133 -0
package/dist/lib/store/types.d.ts +167 -0
package/dist/lib/store/types.js +2 -0
package/dist/lib/store.d.ts +1 -406
package/dist/lib/store.js +2 -1356
package/dist/lib/utils.d.ts +5 -0
package/dist/lib/utils.js +45 -0
package/dist/proxy/action-parser.d.ts +10 -2
package/dist/proxy/action-parser.js +4 -2
package/dist/proxy/cache.d.ts +36 -0
package/dist/proxy/cache.js +51 -0
package/dist/proxy/config.d.ts +1 -0
package/dist/proxy/config.js +2 -0
package/dist/proxy/extended-cache.d.ts +10 -0
package/dist/proxy/extended-cache.js +155 -0
package/dist/proxy/forwarder.d.ts +7 -1
package/dist/proxy/forwarder.js +157 -7
package/dist/proxy/handlers/preprocess.d.ts +20 -0
package/dist/proxy/handlers/preprocess.js +169 -0
package/dist/proxy/injection/delta-tracking.d.ts +11 -0
package/dist/proxy/injection/delta-tracking.js +93 -0
package/dist/proxy/injection/injectors.d.ts +7 -0
package/dist/proxy/injection/injectors.js +139 -0
package/dist/proxy/request-processor.d.ts +18 -3
package/dist/proxy/request-processor.js +151 -28
package/dist/proxy/response-processor.js +116 -47
package/dist/proxy/server.d.ts +4 -1
package/dist/proxy/server.js +592 -253
package/dist/proxy/types.d.ts +13 -0
package/dist/proxy/types.js +2 -0
package/dist/proxy/utils/extractors.d.ts +18 -0
package/dist/proxy/utils/extractors.js +109 -0
package/dist/proxy/utils/logging.d.ts +18 -0
package/dist/proxy/utils/logging.js +42 -0
package/package.json +22 -4

package/dist/proxy/server.js CHANGED Viewed

@@ -1,49 +1,26 @@
 // Grov Proxy Server - Fastify + undici
 // Intercepts Claude Code <-> Anthropic API traffic for drift detection and context injection
 import Fastify from 'fastify';
-import { config } from './config.js';
+import { config, buildSafeHeaders } from './config.js';
 import { forwardToAnthropic, isForwardError } from './forwarder.js';
+import { extendedCache, evictOldestCacheEntry, checkExtendedCache, log } from './extended-cache.js';
+import { setDebugMode, getNextRequestId, taskLog, proxyLog, logTokenUsage } from './utils/logging.js';
+import { detectKeyDecision, extractTextContent, extractProjectPath, extractGoalFromMessages, extractConversationHistory } from './utils/extractors.js';
+import { appendToLastUserMessage, injectIntoRawBody } from './injection/injectors.js';
+import { preProcessRequest, setPendingPlanClear } from './handlers/preprocess.js';
 import { parseToolUseBlocks, extractTokenUsage } from './action-parser.js';
-import { createSessionState, getSessionState, updateSessionState, createStep, updateTokenCount, logDriftEvent, getRecentSteps, getValidatedSteps, updateSessionMode, markWaitingForRecovery, incrementEscalation, updateLastChecked, markCleared, getActiveSessionForUser, deleteSessionState, deleteStepsForSession, updateRecentStepsReasoning, markSessionCompleted, getCompletedSessionForProject, cleanupOldCompletedSessions, } from '../lib/store.js';
+import { createSessionState, getSessionState, updateSessionState, createStep, updateTokenCount, logDriftEvent, getRecentSteps, getValidatedSteps, updateSessionMode, markWaitingForRecovery, incrementEscalation, updateLastChecked, getActiveSessionForUser, deleteSessionState, deleteStepsForSession, updateRecentStepsReasoning, markSessionCompleted, getCompletedSessionForProject, cleanupOldCompletedSessions, cleanupStaleActiveSessions, } from '../lib/store.js';
 import { checkDrift, scoreToCorrectionLevel, shouldSkipSteps, isDriftCheckAvailable, checkRecoveryAlignment, generateForcedRecovery, } from '../lib/drift-checker-proxy.js';
 import { buildCorrection, formatCorrectionForInjection } from '../lib/correction-builder-proxy.js';
 import { generateSessionSummary, isSummaryAvailable, extractIntent, isIntentExtractionAvailable, analyzeTaskContext, isTaskAnalysisAvailable, } from '../lib/llm-extractor.js';
-import { buildTeamMemoryContext, extractFilesFromMessages } from './request-processor.js';
 import { saveToTeamMemory } from './response-processor.js';
 import { randomUUID } from 'crypto';
 // Store last drift result for recovery alignment check
 const lastDriftResults = new Map();
-/**
- * Helper to append text to system prompt (handles string or array format)
- */
-function appendToSystemPrompt(body, textToAppend) {
-    if (typeof body.system === 'string') {
-        body.system = body.system + textToAppend;
-    }
-    else if (Array.isArray(body.system)) {
-        // Append as new text block
-        body.system.push({ type: 'text', text: textToAppend });
-    }
-    else {
-        // No system prompt yet, create as string
-        body.system = textToAppend;
-    }
-}
-/**
- * Get system prompt as string (for reading)
- */
-function getSystemPromptText(body) {
-    if (typeof body.system === 'string') {
-        return body.system;
-    }
-    else if (Array.isArray(body.system)) {
-        return body.system
-            .filter(block => block.type === 'text')
-            .map(block => block.text)
-            .join('\n');
-    }
-    return '';
-}
+// Server logger reference (set in startServer)
+let serverLog = null;
+// Track last messageCount per session to detect retries vs new turns
+const lastMessageCount = new Map();
 // Session tracking (in-memory for active sessions)
 const activeSessions = new Map();
 /**
@@ -54,16 +31,24 @@ export function createServer() {
         logger: false, // Disabled - all debug goes to ~/.grov/debug.log
         bodyLimit: config.BODY_LIMIT,
     });
+    // Custom JSON parser that preserves raw bytes for cache preservation
+    fastify.addContentTypeParser('application/json', { parseAs: 'buffer' }, (req, body, done) => {
+        // Store raw bytes on request for later use
+        req.rawBody = body;
+        try {
+            const json = JSON.parse(body.toString('utf-8'));
+            done(null, json);
+        }
+        catch (err) {
+            done(err, undefined);
+        }
+    });
     // Health check endpoint
     fastify.get('/health', async () => {
         return { status: 'ok', timestamp: new Date().toISOString() };
     });
     // Main messages endpoint
-    fastify.post('/v1/messages', {
-        config: {
-            rawBody: true,
-        },
-    }, handleMessages);
+    fastify.post('/v1/messages', handleMessages);
     // Catch-all for other Anthropic endpoints (pass through)
     fastify.all('/*', async (request, reply) => {
         fastify.log.warn(`Unhandled endpoint: ${request.method} ${request.url}`);
@@ -78,14 +63,12 @@ async function handleMessages(request, reply) {
     const logger = request.log;
     const startTime = Date.now();
     const model = request.body.model;
-    // Skip Haiku subagents - forward directly without any tracking
-    // Haiku requests are Task tool spawns for exploration, they don't make decisions
-    // All reasoning and decisions happen in the main model (Opus/Sonnet)
     if (model.includes('haiku')) {
         logger.info({ msg: 'Skipping Haiku subagent', model });
         try {
-            const result = await forwardToAnthropic(request.body, request.headers, logger);
-            const latency = Date.now() - startTime;
+            // Force non-streaming for Haiku too
+            const haikusBody = { ...request.body, stream: false };
+            const result = await forwardToAnthropic(haikusBody, request.headers, logger);
             return reply
                 .status(result.statusCode)
                 .header('content-type', 'application/json')
@@ -110,6 +93,7 @@ async function handleMessages(request, reply) {
         promptCount: sessionInfo.promptCount,
         projectPath: sessionInfo.projectPath,
     });
+    const currentRequestId = getNextRequestId();
     logger.info({
         msg: 'Incoming request',
         sessionId: sessionInfo.sessionId.substring(0, 8),
@@ -117,27 +101,136 @@ async function handleMessages(request, reply) {
         model: request.body.model,
         messageCount: request.body.messages?.length || 0,
     });
-    // === PRE-HANDLER: Modify request if needed ===
-    const modifiedBody = await preProcessRequest(request.body, sessionInfo, logger);
-    // === FORWARD TO ANTHROPIC ===
+    // Log REQUEST to file
+    const rawBodySize = request.rawBody?.length || 0;
+    proxyLog({
+        requestId: currentRequestId,
+        type: 'REQUEST',
+        sessionId: sessionInfo.sessionId.substring(0, 8),
+        data: {
+            model: request.body.model,
+            messageCount: request.body.messages?.length || 0,
+            promptCount: sessionInfo.promptCount,
+            rawBodySize,
+        },
+    });
+    // Process request to get injection text
+    // __grovInjection = team memory (system prompt, cached)
+    // __grovUserMsgInjection = dynamic content (user message, delta only)
+    const processedBody = await preProcessRequest(request.body, sessionInfo, logger, detectRequestType);
+    const systemInjection = processedBody.__grovInjection;
+    const userMsgInjection = processedBody.__grovUserMsgInjection;
+    // Get raw body bytes
+    const rawBody = request.rawBody;
+    let rawBodyStr = rawBody?.toString('utf-8') || '';
+    // Track injection sizes for logging
+    let systemInjectionSize = 0;
+    let userMsgInjectionSize = 0;
+    let systemSuccess = false;
+    let userMsgSuccess = false;
+    // 1. Inject team memory into SYSTEM prompt (cached, constant)
+    if (systemInjection && rawBodyStr) {
+        const result = injectIntoRawBody(rawBodyStr, '\n\n' + systemInjection);
+        rawBodyStr = result.modified;
+        systemInjectionSize = systemInjection.length;
+        systemSuccess = result.success;
+    }
+    // 2. Inject dynamic content into LAST USER MESSAGE (delta only)
+    if (userMsgInjection && rawBodyStr) {
+        rawBodyStr = appendToLastUserMessage(rawBodyStr, userMsgInjection);
+        userMsgInjectionSize = userMsgInjection.length;
+        userMsgSuccess = true; // appendToLastUserMessage doesn't return success flag
+    }
+    // Determine final body to send
+    let finalBodyToSend;
+    if (systemInjection || userMsgInjection) {
+        finalBodyToSend = rawBodyStr;
+        // Log INJECTION to file with full details
+        const wasCached = processedBody.__grovInjectionCached;
+        proxyLog({
+            requestId: currentRequestId,
+            type: 'INJECTION',
+            sessionId: sessionInfo.sessionId.substring(0, 8),
+            data: {
+                systemInjectionSize,
+                userMsgInjectionSize,
+                totalInjectionSize: systemInjectionSize + userMsgInjectionSize,
+                originalSize: rawBody?.length || 0,
+                finalSize: rawBodyStr.length,
+                systemSuccess,
+                userMsgSuccess,
+                teamMemoryCached: wasCached,
+                // Include actual content for debugging (truncated for log readability)
+                systemInjectionPreview: systemInjection ? systemInjection.substring(0, 200) + (systemInjection.length > 200 ? '...' : '') : null,
+                userMsgInjectionContent: userMsgInjection || null, // Full content since it's small
+            },
+        });
+    }
+    else if (rawBody) {
+        // No injection, use original raw bytes
+        finalBodyToSend = rawBody;
+    }
+    else {
+        // Fallback to re-serialization (shouldn't happen normally)
+        finalBodyToSend = JSON.stringify(processedBody);
+    }
+    const forwardStart = Date.now();
     try {
-        const result = await forwardToAnthropic(modifiedBody, request.headers, logger);
-        // === POST-HANDLER: Process response with task orchestration ===
+        // Forward: raw bytes (with injection inserted) or original raw bytes
+        const result = await forwardToAnthropic(processedBody, request.headers, logger, typeof finalBodyToSend === 'string' ? Buffer.from(finalBodyToSend, 'utf-8') : finalBodyToSend);
+        const forwardLatency = Date.now() - forwardStart;
+        // FIRE-AND-FORGET: Don't block response to Claude Code
+        // This prevents retry loops caused by Haiku calls adding latency
         if (result.statusCode === 200 && isAnthropicResponse(result.body)) {
-            await postProcessResponse(result.body, sessionInfo, request.body, logger);
+            // Prepare extended cache data (only if enabled)
+            const extendedCacheData = config.EXTENDED_CACHE_ENABLED ? {
+                headers: buildSafeHeaders(request.headers),
+                rawBody: typeof finalBodyToSend === 'string' ? Buffer.from(finalBodyToSend, 'utf-8') : finalBodyToSend,
+            } : undefined;
+            postProcessResponse(result.body, sessionInfo, request.body, logger, extendedCacheData)
+                .catch(err => console.error('[GROV] postProcess error:', err));
         }
-        // Return response to Claude Code (unmodified)
         const latency = Date.now() - startTime;
+        const filteredHeaders = filterResponseHeaders(result.headers);
+        // Log token usage (always to console, file only in debug mode)
+        if (isAnthropicResponse(result.body)) {
+            const usage = extractTokenUsage(result.body);
+            // Console: compact token summary (always shown)
+            logTokenUsage(currentRequestId, usage, latency);
+            // File: detailed response log (debug mode only)
+            proxyLog({
+                requestId: currentRequestId,
+                type: 'RESPONSE',
+                sessionId: sessionInfo.sessionId.substring(0, 8),
+                data: {
+                    statusCode: result.statusCode,
+                    latencyMs: latency,
+                    forwardLatencyMs: forwardLatency,
+                    inputTokens: usage.inputTokens,
+                    outputTokens: usage.outputTokens,
+                    cacheCreation: usage.cacheCreation,
+                    cacheRead: usage.cacheRead,
+                    cacheHitRatio: usage.cacheRead > 0 ? (usage.cacheRead / (usage.cacheRead + usage.cacheCreation)).toFixed(2) : '0.00',
+                    wasSSE: result.wasSSE,
+                },
+            });
+        }
+        // If response was SSE, forward raw SSE to Claude Code (it expects streaming)
+        // Otherwise, send JSON
+        const isSSEResponse = result.wasSSE;
+        const responseContentType = isSSEResponse ? 'text/event-stream; charset=utf-8' : 'application/json';
+        const responseBody = isSSEResponse ? result.rawBody : JSON.stringify(result.body);
         logger.info({
             msg: 'Request complete',
             statusCode: result.statusCode,
             latencyMs: latency,
+            wasSSE: isSSEResponse,
         });
         return reply
             .status(result.statusCode)
-            .header('content-type', 'application/json')
-            .headers(filterResponseHeaders(result.headers))
-            .send(JSON.stringify(result.body));
+            .header('content-type', responseContentType)
+            .headers(filteredHeaders)
+            .send(responseBody);
     }
     catch (error) {
         if (isForwardError(error)) {
@@ -215,97 +308,38 @@ async function getOrCreateSession(request, logger) {
         projectPath,
     };
     activeSessions.set(tempSessionId, sessionInfo);
+    // Note: team memory is now GLOBAL (not per session), no propagation needed
     logger.info({ msg: 'No existing session, will create after task analysis' });
     return { ...sessionInfo, isNew: true, currentSession: null, completedSession };
 }
 /**
- * Pre-process request before forwarding
- * - Context injection
- * - CLEAR operation
+ * Detect request type: 'first', 'continuation', or 'retry'
+ * - first: new user message (messageCount changed, last msg is user without tool_result)
+ * - continuation: tool result (messageCount changed, last msg has tool_result)
+ * - retry: same messageCount as before
  */
-async function preProcessRequest(body, sessionInfo, logger) {
-    const modified = { ...body };
-    // FIRST: Always inject team memory context (doesn't require sessionState)
-    const mentionedFiles = extractFilesFromMessages(modified.messages || []);
-    const teamContext = buildTeamMemoryContext(sessionInfo.projectPath, mentionedFiles);
-    if (teamContext) {
-        appendToSystemPrompt(modified, '\n\n' + teamContext);
-    }
-    // THEN: Session-specific operations
-    const sessionState = getSessionState(sessionInfo.sessionId);
-    if (!sessionState) {
-        return modified; // Injection already happened above!
-    }
-    // Extract latest user message for drift checking
-    const latestUserMessage = extractGoalFromMessages(body.messages) || '';
-    // CLEAR operation if token threshold exceeded
-    if ((sessionState.token_count || 0) > config.TOKEN_CLEAR_THRESHOLD) {
-        logger.info({
-            msg: 'Token threshold exceeded, initiating CLEAR',
-            tokenCount: sessionState.token_count,
-            threshold: config.TOKEN_CLEAR_THRESHOLD,
-        });
-        // Generate summary from session state + steps
-        let summary;
-        if (isSummaryAvailable()) {
-            const steps = getValidatedSteps(sessionInfo.sessionId);
-            summary = await generateSessionSummary(sessionState, steps);
-        }
-        else {
-            const files = getValidatedSteps(sessionInfo.sessionId).flatMap(s => s.files);
-            summary = `PREVIOUS SESSION CONTEXT:
-Goal: ${sessionState.original_goal || 'Not specified'}
-Files worked on: ${[...new Set(files)].slice(0, 10).join(', ') || 'None'}
-Please continue from where you left off.`;
-        }
-        // Clear messages and inject summary
-        modified.messages = [];
-        appendToSystemPrompt(modified, '\n\n' + summary);
-        // Update session state
-        markCleared(sessionInfo.sessionId);
-        logger.info({
-            msg: 'CLEAR completed',
-            summaryLength: summary.length,
-        });
+function detectRequestType(messages, projectPath) {
+    const currentCount = messages?.length || 0;
+    const lastCount = lastMessageCount.get(projectPath);
+    lastMessageCount.set(projectPath, currentCount);
+    // Same messageCount = retry
+    if (lastCount !== undefined && currentCount === lastCount) {
+        return 'retry';
     }
-    // Check if session is in drifted or forced mode
-    if (sessionState.session_mode === 'drifted' || sessionState.session_mode === 'forced') {
-        const recentSteps = getRecentSteps(sessionInfo.sessionId, 5);
-        // FORCED MODE: escalation >= 3 -> Haiku generates recovery prompt
-        if (sessionState.escalation_count >= 3 || sessionState.session_mode === 'forced') {
-            // Update mode to forced if not already
-            if (sessionState.session_mode !== 'forced') {
-                updateSessionMode(sessionInfo.sessionId, 'forced');
-            }
-            const lastDrift = lastDriftResults.get(sessionInfo.sessionId);
-            const driftResult = lastDrift || await checkDrift({ sessionState, recentSteps, latestUserMessage });
-            const forcedRecovery = await generateForcedRecovery(sessionState, recentSteps.map(s => ({ actionType: s.action_type, files: s.files })), driftResult);
-            appendToSystemPrompt(modified, forcedRecovery.injectionText);
-            logger.info({
-                msg: 'FORCED MODE - Injected Haiku recovery prompt',
-                escalation: sessionState.escalation_count,
-                mandatoryAction: forcedRecovery.mandatoryAction.substring(0, 50),
-            });
-        }
-        else {
-            // DRIFTED MODE: normal correction injection
-            const driftResult = await checkDrift({ sessionState, recentSteps, latestUserMessage });
-            const correctionLevel = scoreToCorrectionLevel(driftResult.score);
-            if (correctionLevel) {
-                const correction = buildCorrection(driftResult, sessionState, correctionLevel);
-                const correctionText = formatCorrectionForInjection(correction);
-                appendToSystemPrompt(modified, correctionText);
-                logger.info({
-                    msg: 'Injected correction',
-                    level: correctionLevel,
-                    score: driftResult.score,
-                });
-            }
+    // No messages or no last message = first
+    if (!messages || messages.length === 0)
+        return 'first';
+    const lastMessage = messages[messages.length - 1];
+    // Check if last message is tool_result (continuation)
+    if (lastMessage.role === 'user') {
+        const content = lastMessage.content;
+        if (Array.isArray(content)) {
+            const hasToolResult = content.some((block) => typeof block === 'object' && block !== null && block.type === 'tool_result');
+            if (hasToolResult)
+                return 'continuation';
         }
     }
-    // Note: Team memory context injection is now at the TOP of preProcessRequest()
-    // so it runs even when sessionState is null (new sessions)
-    return modified;
+    return 'first';
 }
 /**
  * Post-process response after receiving from Anthropic
@@ -317,7 +351,7 @@ Please continue from where you left off.`;
  * - Recovery alignment check (Section 4.4)
  * - Team memory triggers (Section 4.6)
  */
-async function postProcessResponse(response, sessionInfo, requestBody, logger) {
+async function postProcessResponse(response, sessionInfo, requestBody, logger, extendedCacheData) {
     // Parse tool_use blocks
     const actions = parseToolUseBlocks(response);
     // Extract text content for analysis
@@ -339,6 +373,29 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
     if (isWarmup) {
         return;
     }
+    // === EXTENDED CACHE: Capture for keep-alive ===
+    // Only capture on end_turn (user idle starts now, not during tool_use loops)
+    if (isEndTurn && extendedCacheData) {
+        const rawStr = extendedCacheData.rawBody.toString('utf-8');
+        const hasSystem = rawStr.includes('"system"');
+        const hasTools = rawStr.includes('"tools"');
+        const hasCacheCtrl = rawStr.includes('"cache_control"');
+        const msgMatch = rawStr.match(/"messages"\s*:\s*\[/);
+        const msgPos = msgMatch?.index ?? -1;
+        // Use projectPath as key (one entry per conversation, not per task)
+        const cacheKey = sessionInfo.projectPath;
+        // Evict oldest if at capacity (only for NEW entries, not updates)
+        if (!extendedCache.has(cacheKey)) {
+            evictOldestCacheEntry();
+        }
+        extendedCache.set(cacheKey, {
+            headers: extendedCacheData.headers,
+            rawBody: extendedCacheData.rawBody,
+            timestamp: Date.now(),
+            keepAliveCount: 0,
+        });
+        log(`Extended cache: CAPTURE project=${cacheKey.split('/').pop()} size=${rawStr.length} sys=${hasSystem} tools=${hasTools} cache_ctrl=${hasCacheCtrl} msg_pos=${msgPos}`);
+    }
     // If not end_turn (tool_use in progress), skip task orchestration but keep session
     if (!isEndTurn) {
         // Use existing session or create minimal one without LLM calls
@@ -361,23 +418,44 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                 promptCount: 1,
                 projectPath: sessionInfo.projectPath,
             });
+            // Note: team memory is now GLOBAL (not per session), no propagation needed
         }
     }
     else if (isTaskAnalysisAvailable()) {
         // Use completed session for comparison if no active session
         const sessionForComparison = sessionInfo.currentSession || sessionInfo.completedSession;
+        // Extract conversation history for context-aware task analysis
+        const conversationHistory = extractConversationHistory(requestBody.messages || []);
         try {
-            const taskAnalysis = await analyzeTaskContext(sessionForComparison, latestUserMessage, recentSteps, textContent);
+            const taskAnalysis = await analyzeTaskContext(sessionForComparison, latestUserMessage, recentSteps, textContent, conversationHistory);
             logger.info({
                 msg: 'Task analysis',
                 action: taskAnalysis.action,
-                topic_match: taskAnalysis.topic_match,
+                task_type: taskAnalysis.task_type,
                 goal: taskAnalysis.current_goal?.substring(0, 50),
                 reasoning: taskAnalysis.reasoning,
             });
+            // TASK LOG: Analysis result
+            taskLog('TASK_ANALYSIS', {
+                sessionId: sessionInfo.sessionId,
+                action: taskAnalysis.action,
+                task_type: taskAnalysis.task_type,
+                goal: taskAnalysis.current_goal || '',
+                reasoning: taskAnalysis.reasoning || '',
+                userMessage: latestUserMessage.substring(0, 80),
+                hasCurrentSession: !!sessionInfo.currentSession,
+                hasCompletedSession: !!sessionInfo.completedSession,
+            });
             // Update recent steps with reasoning (backfill from end_turn response)
             if (taskAnalysis.step_reasoning && activeSessionId) {
                 const updatedCount = updateRecentStepsReasoning(activeSessionId, taskAnalysis.step_reasoning);
+                // TASK LOG: Step reasoning update
+                taskLog('STEP_REASONING', {
+                    sessionId: activeSessionId,
+                    stepsUpdated: updatedCount,
+                    reasoningEntries: Object.keys(taskAnalysis.step_reasoning).length,
+                    stepIds: Object.keys(taskAnalysis.step_reasoning).join(','),
+                });
             }
             // Handle task orchestration based on analysis
             switch (taskAnalysis.action) {
@@ -396,6 +474,13 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                             });
                             activeSession.original_goal = taskAnalysis.current_goal;
                         }
+                        // TASK LOG: Continue existing session
+                        taskLog('ORCHESTRATION_CONTINUE', {
+                            sessionId: activeSessionId,
+                            source: 'current_session',
+                            goal: activeSession.original_goal,
+                            goalUpdated: taskAnalysis.current_goal !== activeSession.original_goal,
+                        });
                     }
                     else if (sessionInfo.completedSession) {
                         // Reactivate completed session (user wants to continue/add to it)
@@ -411,6 +496,13 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                             promptCount: 1,
                             projectPath: sessionInfo.projectPath,
                         });
+                        // Note: team memory is now GLOBAL (not per session), no propagation needed
+                        // TASK LOG: Reactivate completed session
+                        taskLog('ORCHESTRATION_CONTINUE', {
+                            sessionId: activeSessionId,
+                            source: 'reactivated_completed',
+                            goal: activeSession.original_goal,
+                        });
                     }
                     break;
                 case 'new_task': {
@@ -430,9 +522,24 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                         try {
                             intentData = await extractIntent(latestUserMessage);
                             logger.info({ msg: 'Intent extracted for new task', scopeCount: intentData.expected_scope.length });
+                            // TASK LOG: Intent extraction for new_task
+                            taskLog('INTENT_EXTRACTION', {
+                                sessionId: sessionInfo.sessionId,
+                                context: 'new_task',
+                                goal: intentData.goal,
+                                scopeCount: intentData.expected_scope.length,
+                                scope: intentData.expected_scope.join(', '),
+                                constraints: intentData.constraints.join(', '),
+                                keywords: intentData.keywords.join(', '),
+                            });
                         }
                         catch (err) {
                             logger.info({ msg: 'Intent extraction failed, using basic goal', error: String(err) });
+                            taskLog('INTENT_EXTRACTION_FAILED', {
+                                sessionId: sessionInfo.sessionId,
+                                context: 'new_task',
+                                error: String(err),
+                            });
                         }
                     }
                     const newSessionId = randomUUID();
@@ -452,6 +559,42 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                         projectPath: sessionInfo.projectPath,
                     });
                     logger.info({ msg: 'Created new task session', sessionId: newSessionId.substring(0, 8) });
+                    // TASK LOG: New task created
+                    taskLog('ORCHESTRATION_NEW_TASK', {
+                        sessionId: newSessionId,
+                        goal: intentData.goal,
+                        scopeCount: intentData.expected_scope.length,
+                        keywordsCount: intentData.keywords.length,
+                    });
+                    // Q&A AUTO-SAVE: If this is an information request with a substantive answer
+                    // AND no tool calls, save immediately since pure Q&A completes in a single turn.
+                    // If there ARE tool calls (e.g., Read for "Analyze X"), wait for them to complete
+                    // so steps get captured properly before saving.
+                    if (taskAnalysis.task_type === 'information' && textContent.length > 100 && actions.length === 0) {
+                        logger.info({ msg: 'Q&A detected (pure text) - saving immediately', sessionId: newSessionId.substring(0, 8) });
+                        taskLog('QA_AUTO_SAVE', {
+                            sessionId: newSessionId,
+                            goal: intentData.goal,
+                            responseLength: textContent.length,
+                            toolCalls: 0,
+                        });
+                        // Store the response for reasoning extraction
+                        updateSessionState(newSessionId, {
+                            final_response: textContent.substring(0, 10000),
+                        });
+                        // Save to team memory and mark complete
+                        await saveToTeamMemory(newSessionId, 'complete');
+                        markSessionCompleted(newSessionId);
+                    }
+                    else if (taskAnalysis.task_type === 'information' && actions.length > 0) {
+                        // Q&A with tool calls - don't auto-save, let it continue until task_complete
+                        logger.info({ msg: 'Q&A with tool calls - waiting for completion', sessionId: newSessionId.substring(0, 8), toolCalls: actions.length });
+                        taskLog('QA_DEFERRED', {
+                            sessionId: newSessionId,
+                            goal: intentData.goal,
+                            toolCalls: actions.length,
+                        });
+                    }
                     break;
                 }
                 case 'subtask': {
@@ -465,8 +608,17 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                     if (isIntentExtractionAvailable() && latestUserMessage.length > 10) {
                         try {
                             intentData = await extractIntent(latestUserMessage);
+                            taskLog('INTENT_EXTRACTION', {
+                                sessionId: sessionInfo.sessionId,
+                                context: 'subtask',
+                                goal: intentData.goal,
+                                scope: intentData.expected_scope.join(', '),
+                                keywords: intentData.keywords.join(', '),
+                            });
+                        }
+                        catch (err) {
+                            taskLog('INTENT_EXTRACTION_FAILED', { sessionId: sessionInfo.sessionId, context: 'subtask', error: String(err) });
                         }
-                        catch { /* use fallback */ }
                     }
                     const parentId = sessionInfo.currentSession?.session_id || taskAnalysis.parent_task_id;
                     const subtaskId = randomUUID();
@@ -487,6 +639,12 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                         projectPath: sessionInfo.projectPath,
                     });
                     logger.info({ msg: 'Created subtask session', sessionId: subtaskId.substring(0, 8), parent: parentId?.substring(0, 8) });
+                    // TASK LOG: Subtask created
+                    taskLog('ORCHESTRATION_SUBTASK', {
+                        sessionId: subtaskId,
+                        parentId: parentId || 'none',
+                        goal: intentData.goal,
+                    });
                     break;
                 }
                 case 'parallel_task': {
@@ -500,8 +658,17 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                     if (isIntentExtractionAvailable() && latestUserMessage.length > 10) {
                         try {
                             intentData = await extractIntent(latestUserMessage);
+                            taskLog('INTENT_EXTRACTION', {
+                                sessionId: sessionInfo.sessionId,
+                                context: 'parallel_task',
+                                goal: intentData.goal,
+                                scope: intentData.expected_scope.join(', '),
+                                keywords: intentData.keywords.join(', '),
+                            });
+                        }
+                        catch (err) {
+                            taskLog('INTENT_EXTRACTION_FAILED', { sessionId: sessionInfo.sessionId, context: 'parallel_task', error: String(err) });
                         }
-                        catch { /* use fallback */ }
                     }
                     const parentId = sessionInfo.currentSession?.session_id || taskAnalysis.parent_task_id;
                     const parallelId = randomUUID();
@@ -522,22 +689,89 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                         projectPath: sessionInfo.projectPath,
                     });
                     logger.info({ msg: 'Created parallel task session', sessionId: parallelId.substring(0, 8), parent: parentId?.substring(0, 8) });
+                    // TASK LOG: Parallel task created
+                    taskLog('ORCHESTRATION_PARALLEL', {
+                        sessionId: parallelId,
+                        parentId: parentId || 'none',
+                        goal: intentData.goal,
+                    });
                     break;
                 }
                 case 'task_complete': {
                     // Save to team memory and mark as completed (don't delete yet - keep for new_task detection)
                     if (sessionInfo.currentSession) {
                         try {
+                            // Set final_response BEFORE saving so reasoning extraction has the data
+                            updateSessionState(sessionInfo.currentSession.session_id, {
+                                final_response: textContent.substring(0, 10000),
+                            });
                             await saveToTeamMemory(sessionInfo.currentSession.session_id, 'complete');
                             markSessionCompleted(sessionInfo.currentSession.session_id);
                             activeSessions.delete(sessionInfo.currentSession.session_id);
                             lastDriftResults.delete(sessionInfo.currentSession.session_id);
+                            // TASK LOG: Task completed
+                            taskLog('ORCHESTRATION_TASK_COMPLETE', {
+                                sessionId: sessionInfo.currentSession.session_id,
+                                goal: sessionInfo.currentSession.original_goal,
+                            });
+                            // PLANNING COMPLETE: Trigger CLEAR-like reset for implementation phase
+                            // This ensures next request starts fresh with planning context from team memory
+                            if (taskAnalysis.task_type === 'planning' && isSummaryAvailable()) {
+                                try {
+                                    const allSteps = getValidatedSteps(sessionInfo.currentSession.session_id);
+                                    const planSummary = await generateSessionSummary(sessionInfo.currentSession, allSteps, 2000);
+                                    // Store for next request to trigger CLEAR
+                                    setPendingPlanClear({
+                                        projectPath: sessionInfo.projectPath,
+                                        summary: planSummary,
+                                    });
+                                    // Cache invalidation happens in response-processor.ts after syncTask completes
+                                    logger.info({
+                                        msg: 'PLANNING_CLEAR triggered',
+                                        sessionId: sessionInfo.currentSession.session_id.substring(0, 8),
+                                        summaryLen: planSummary.length,
+                                    });
+                                }
+                                catch {
+                                    // Silent fail - planning CLEAR is optional enhancement
+                                }
+                            }
                             logger.info({ msg: 'Task complete - saved to team memory, marked completed' });
                         }
                         catch (err) {
                             logger.info({ msg: 'Failed to save completed task', error: String(err) });
                         }
                     }
+                    else if (textContent.length > 100) {
+                        // NEW: Handle "instant complete" - task that's new AND immediately complete
+                        // This happens for simple Q&A when Haiku says task_complete without existing session
+                        // Example: user asks clarification question, answer is provided in single turn
+                        try {
+                            const newSessionId = randomUUID();
+                            const instantSession = createSessionState({
+                                session_id: newSessionId,
+                                project_path: sessionInfo.projectPath,
+                                original_goal: taskAnalysis.current_goal || latestUserMessage.substring(0, 500),
+                                task_type: 'main',
+                            });
+                            // Set final_response for reasoning extraction
+                            updateSessionState(newSessionId, {
+                                final_response: textContent.substring(0, 10000),
+                            });
+                            await saveToTeamMemory(newSessionId, 'complete');
+                            markSessionCompleted(newSessionId);
+                            logger.info({ msg: 'Instant complete - new task saved immediately', sessionId: newSessionId.substring(0, 8) });
+                            // TASK LOG: Instant complete (new task that finished in one turn)
+                            taskLog('ORCHESTRATION_TASK_COMPLETE', {
+                                sessionId: newSessionId,
+                                goal: taskAnalysis.current_goal || latestUserMessage.substring(0, 80),
+                                source: 'instant_complete',
+                            });
+                        }
+                        catch (err) {
+                            logger.info({ msg: 'Failed to save instant complete task', error: String(err) });
+                        }
+                    }
                     return; // Done, no more processing needed
                 }
                 case 'subtask_complete': {
@@ -556,6 +790,12 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                                     activeSessionId = parentId;
                                     activeSession = parentSession;
                                     logger.info({ msg: 'Subtask complete - returning to parent', parent: parentId.substring(0, 8) });
+                                    // TASK LOG: Subtask completed
+                                    taskLog('ORCHESTRATION_SUBTASK_COMPLETE', {
+                                        sessionId: sessionInfo.currentSession.session_id,
+                                        parentId: parentId,
+                                        goal: sessionInfo.currentSession.original_goal,
+                                    });
                                 }
                             }
                         }
@@ -580,8 +820,16 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                 if (isIntentExtractionAvailable() && latestUserMessage.length > 10) {
                     try {
                         intentData = await extractIntent(latestUserMessage);
+                        taskLog('INTENT_EXTRACTION', {
+                            sessionId: sessionInfo.sessionId,
+                            context: 'fallback_analysis_failed',
+                            goal: intentData.goal,
+                            scope: intentData.expected_scope.join(', '),
+                        });
+                    }
+                    catch (err) {
+                        taskLog('INTENT_EXTRACTION_FAILED', { sessionId: sessionInfo.sessionId, context: 'fallback_analysis_failed', error: String(err) });
                     }
-                    catch { /* use fallback */ }
                 }
                 const newSessionId = randomUUID();
                 activeSession = createSessionState({
@@ -599,6 +847,11 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
     }
     else {
         // No task analysis available - fallback with intent extraction
+        taskLog('TASK_ANALYSIS_UNAVAILABLE', {
+            sessionId: sessionInfo.sessionId,
+            hasCurrentSession: !!sessionInfo.currentSession,
+            userMessage: latestUserMessage.substring(0, 80),
+        });
         if (!sessionInfo.currentSession) {
             let intentData = {
                 goal: latestUserMessage.substring(0, 500),
@@ -610,8 +863,16 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                 try {
                     intentData = await extractIntent(latestUserMessage);
                     logger.info({ msg: 'Intent extracted (fallback)', scopeCount: intentData.expected_scope.length });
+                    taskLog('INTENT_EXTRACTION', {
+                        sessionId: sessionInfo.sessionId,
+                        context: 'no_analysis_available',
+                        goal: intentData.goal,
+                        scope: intentData.expected_scope.join(', '),
+                    });
+                }
+                catch (err) {
+                    taskLog('INTENT_EXTRACTION_FAILED', { sessionId: sessionInfo.sessionId, context: 'no_analysis_available', error: String(err) });
                 }
-                catch { /* use fallback */ }
             }
             const newSessionId = randomUUID();
             activeSession = createSessionState({
@@ -630,19 +891,64 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
             activeSessionId = sessionInfo.currentSession.session_id;
         }
     }
+    // NOTE: Auto-save on every end_turn was REMOVED
+    // Task saving is now controlled by Haiku's task analysis:
+    // - task_complete: Haiku detected task is done (Q&A answered, implementation verified, planning confirmed)
+    // - subtask_complete: Haiku detected subtask is done
+    // This ensures we only save when work is actually complete, not on every Claude response.
+    // See analyzeTaskContext() in llm-extractor.ts for the decision logic.
     // Extract token usage
     const usage = extractTokenUsage(response);
+    // Use cache metrics as actual context size (cacheCreation + cacheRead)
+    // This is what Anthropic bills for and what determines CLEAR threshold
+    const actualContextSize = usage.cacheCreation + usage.cacheRead;
     if (activeSession) {
-        updateTokenCount(activeSessionId, usage.totalTokens);
+        // Set to actual context size (not cumulative - context size IS the total)
+        updateTokenCount(activeSessionId, actualContextSize);
     }
     logger.info({
         msg: 'Token usage',
         input: usage.inputTokens,
         output: usage.outputTokens,
         total: usage.totalTokens,
+        cacheCreation: usage.cacheCreation,
+        cacheRead: usage.cacheRead,
+        actualContextSize,
         activeSession: activeSessionId.substring(0, 8),
     });
+    // === CLEAR MODE PRE-COMPUTE (85% threshold) ===
+    // Pre-compute summary before hitting 100% threshold to avoid blocking Haiku call
+    const preComputeThreshold = Math.floor(config.TOKEN_CLEAR_THRESHOLD * 0.85);
+    // Use actualContextSize (cacheCreation + cacheRead) as the real context size
+    if (activeSession &&
+        actualContextSize > preComputeThreshold &&
+        !activeSession.pending_clear_summary &&
+        isSummaryAvailable()) {
+        // Get all validated steps for comprehensive summary
+        const allSteps = getValidatedSteps(activeSessionId);
+        // Generate summary asynchronously (fire-and-forget)
+        generateSessionSummary(activeSession, allSteps, 15000).then(summary => {
+            updateSessionState(activeSessionId, { pending_clear_summary: summary });
+            logger.info({
+                msg: 'CLEAR summary pre-computed',
+                actualContextSize,
+                threshold: preComputeThreshold,
+                summaryLength: summary.length,
+            });
+        }).catch(err => {
+            logger.info({ msg: 'CLEAR summary generation failed', error: String(err) });
+        });
+    }
+    // Capture final_response for ALL end_turn responses (not just Q&A)
+    // This preserves Claude's analysis even when tools were used
+    if (isEndTurn && textContent.length > 100 && activeSessionId) {
+        updateSessionState(activeSessionId, {
+            final_response: textContent.substring(0, 10000),
+        });
+    }
     if (actions.length === 0) {
+        // Final response (no tool calls)
+        // NOTE: Task saving is controlled by Haiku's task analysis (see switch case 'task_complete' above)
         return;
     }
     logger.info({
@@ -700,11 +1006,51 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                 updateSessionMode(activeSessionId, 'drifted');
                 markWaitingForRecovery(activeSessionId, true);
                 incrementEscalation(activeSessionId);
+                // Pre-compute correction for next request (fire-and-forget pattern)
+                // This avoids blocking Haiku calls in preProcessRequest
+                const correction = buildCorrection(driftResult, activeSession, correctionLevel);
+                const correctionText = formatCorrectionForInjection(correction);
+                updateSessionState(activeSessionId, { pending_correction: correctionText });
+                logger.info({
+                    msg: 'Pre-computed correction saved',
+                    level: correctionLevel,
+                    correctionLength: correctionText.length,
+                });
+            }
+            else if (correctionLevel) {
+                // Nudge or correct level - still save correction but don't change mode
+                const correction = buildCorrection(driftResult, activeSession, correctionLevel);
+                const correctionText = formatCorrectionForInjection(correction);
+                updateSessionState(activeSessionId, { pending_correction: correctionText });
+                logger.info({
+                    msg: 'Pre-computed mild correction saved',
+                    level: correctionLevel,
+                });
             }
             else if (driftScore >= 8) {
                 updateSessionMode(activeSessionId, 'normal');
                 markWaitingForRecovery(activeSessionId, false);
                 lastDriftResults.delete(activeSessionId);
+                // Clear any pending correction since drift is resolved
+                updateSessionState(activeSessionId, { pending_correction: undefined });
+            }
+            // FORCED MODE: escalation >= 3 triggers Haiku-generated recovery
+            const currentEscalation = activeSession.escalation_count || 0;
+            if (currentEscalation >= 3 && driftScore < 8) {
+                updateSessionMode(activeSessionId, 'forced');
+                // Generate forced recovery asynchronously (fire-and-forget within fire-and-forget)
+                generateForcedRecovery(activeSession, recentSteps.map(s => ({ actionType: s.action_type, files: s.files })), driftResult).then(forcedRecovery => {
+                    updateSessionState(activeSessionId, {
+                        pending_forced_recovery: forcedRecovery.injectionText,
+                    });
+                    logger.info({
+                        msg: 'Pre-computed forced recovery saved',
+                        escalation: currentEscalation,
+                        mandatoryAction: forcedRecovery.mandatoryAction?.substring(0, 50),
+                    });
+                }).catch(err => {
+                    logger.info({ msg: 'Forced recovery generation failed', error: String(err) });
+                });
             }
             updateLastChecked(activeSessionId, Date.now());
             if (skipSteps) {
@@ -727,122 +1073,44 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
         }
     }
     // Save each action as a step (with reasoning from Claude's text)
+    // When multiple actions come from the same Claude response, they share identical reasoning.
+    // We store reasoning only on the first action and set NULL for subsequent ones to avoid duplication.
+    // At query time, we group steps by reasoning (non-NULL starts a group, NULLs continue it)
+    // and reconstruct the full context: reasoning + all associated files/actions.
+    let previousReasoning = null;
+    logger.info({ msg: 'DEDUP_DEBUG', actionsCount: actions.length, textContentLen: textContent.length });
     for (const action of actions) {
+        const currentReasoning = textContent.substring(0, 1000);
+        const isDuplicate = currentReasoning === previousReasoning;
+        logger.info({
+            msg: 'DEDUP_STEP',
+            actionType: action.actionType,
+            isDuplicate,
+            prevLen: previousReasoning?.length || 0,
+            currLen: currentReasoning.length
+        });
+        // Detect key decisions based on action type and reasoning content
+        const isKeyDecision = !isDuplicate && detectKeyDecision(action, textContent);
         createStep({
             session_id: activeSessionId,
             action_type: action.actionType,
             files: action.files,
             folders: action.folders,
             command: action.command,
-            reasoning: textContent.substring(0, 1000), // Claude's explanation (truncated)
+            reasoning: isDuplicate ? undefined : currentReasoning,
             drift_score: driftScore,
             is_validated: !skipSteps,
+            is_key_decision: isKeyDecision,
         });
-    }
-}
-/**
- * Extract text content from response for analysis
- */
-function extractTextContent(response) {
-    return response.content
-        .filter((block) => block.type === 'text')
-        .map(block => block.text)
-        .join('\n');
-}
-/**
- * Detect task completion from response text
- * Returns trigger type or null
- */
-function detectTaskCompletion(text) {
-    const lowerText = text.toLowerCase();
-    // Strong completion indicators
-    const completionPhrases = [
-        'task is complete',
-        'task complete',
-        'implementation is complete',
-        'implementation complete',
-        'successfully implemented',
-        'all changes have been made',
-        'finished implementing',
-        'completed the implementation',
-        'done with the implementation',
-        'completed all the',
-        'all tests pass',
-        'build succeeds',
-    ];
-    for (const phrase of completionPhrases) {
-        if (lowerText.includes(phrase)) {
-            return 'complete';
-        }
-    }
-    // Subtask completion indicators
-    const subtaskPhrases = [
-        'step complete',
-        'phase complete',
-        'finished this step',
-        'moving on to',
-        'now let\'s',
-        'next step',
-    ];
-    for (const phrase of subtaskPhrases) {
-        if (lowerText.includes(phrase)) {
-            return 'subtask';
-        }
-    }
-    return null;
-}
-/**
- * Extract project path from request body
- */
-function extractProjectPath(body) {
-    // Try to extract from system prompt or messages
-    // Handle both string and array format for system prompt
-    let systemPrompt = '';
-    if (typeof body.system === 'string') {
-        systemPrompt = body.system;
-    }
-    else if (Array.isArray(body.system)) {
-        // New API format: system is array of {type: 'text', text: '...'}
-        systemPrompt = body.system
-            .filter((block) => block && typeof block === 'object' && block.type === 'text' && typeof block.text === 'string')
-            .map(block => block.text)
-            .join('\n');
-    }
-    const cwdMatch = systemPrompt.match(/Working directory:\s*([^\n]+)/);
-    if (cwdMatch) {
-        return cwdMatch[1].trim();
-    }
-    return null;
-}
-/**
- * Extract goal from FIRST user message with text content
- * Skips tool_result blocks, filters out system-reminder tags
- */
-function extractGoalFromMessages(messages) {
-    const userMessages = messages?.filter(m => m.role === 'user') || [];
-    for (const userMsg of userMessages) {
-        let rawContent = '';
-        // Handle string content
-        if (typeof userMsg.content === 'string') {
-            rawContent = userMsg.content;
-        }
-        // Handle array content - look for text blocks (skip tool_result)
-        if (Array.isArray(userMsg.content)) {
-            const textBlocks = userMsg.content
-                .filter((block) => block && typeof block === 'object' && block.type === 'text' && typeof block.text === 'string')
-                .map(block => block.text);
-            rawContent = textBlocks.join('\n');
-        }
-        // Remove <system-reminder>...</system-reminder> tags
-        const cleanContent = rawContent
-            .replace(/<system-reminder>[\s\S]*?<\/system-reminder>/g, '')
-            .trim();
-        // If we found valid text content, return it
-        if (cleanContent && cleanContent.length >= 5) {
-            return cleanContent.substring(0, 500);
+        previousReasoning = currentReasoning;
+        if (isKeyDecision) {
+            logger.info({
+                msg: 'Key decision detected',
+                actionType: action.actionType,
+                files: action.files.slice(0, 3),
+            });
         }
     }
-    return undefined;
 }
 /**
  * Filter response headers for forwarding to client
@@ -852,10 +1120,16 @@ function filterResponseHeaders(headers) {
     const allowedHeaders = [
         'content-type',
         'x-request-id',
+        'request-id',
+        'x-should-retry',
+        'retry-after',
+        'retry-after-ms',
         'anthropic-ratelimit-requests-limit',
         'anthropic-ratelimit-requests-remaining',
+        'anthropic-ratelimit-requests-reset',
         'anthropic-ratelimit-tokens-limit',
         'anthropic-ratelimit-tokens-remaining',
+        'anthropic-ratelimit-tokens-reset',
     ];
     for (const header of allowedHeaders) {
         const value = headers[header];
@@ -878,19 +1152,84 @@ function isAnthropicResponse(body) {
 }
 /**
  * Start the proxy server
+ * @param options.debug - Enable debug logging to grov-proxy.log
  */
-export async function startServer() {
+export async function startServer(options = {}) {
+    // Set debug mode based on flag
+    if (options.debug) {
+        setDebugMode(true);
+        console.log('[DEBUG] Logging to grov-proxy.log');
+    }
     const server = createServer();
+    // Set server logger for background tasks
+    serverLog = server.log;
     // Cleanup old completed sessions (older than 24 hours)
-    const cleanedUp = cleanupOldCompletedSessions();
-    if (cleanedUp > 0) {
+    cleanupOldCompletedSessions();
+    // Cleanup stale active sessions (no activity for 1 hour)
+    // Prevents old sessions from being reused in fresh Claude sessions
+    const staleCount = cleanupStaleActiveSessions();
+    if (staleCount > 0) {
+        log(`Cleaned up ${staleCount} stale active session(s)`);
+    }
+    // Start extended cache timer if enabled
+    let extendedCacheTimer = null;
+    // Track active connections for graceful shutdown
+    const activeConnections = new Set();
+    let isShuttingDown = false;
+    // Graceful shutdown handler (works with or without extended cache)
+    const gracefulShutdown = () => {
+        if (isShuttingDown)
+            return;
+        isShuttingDown = true;
+        log('Shutdown initiated...');
+        // 1. Stop extended cache timer if running
+        if (extendedCacheTimer) {
+            clearInterval(extendedCacheTimer);
+            extendedCacheTimer = null;
+            log('Extended cache: timer stopped');
+        }
+        // 2. Clear sensitive cache data
+        if (extendedCache.size > 0) {
+            log(`Extended cache: clearing ${extendedCache.size} entries`);
+            for (const entry of extendedCache.values()) {
+                for (const key of Object.keys(entry.headers)) {
+                    entry.headers[key] = '';
+                }
+                entry.rawBody = Buffer.alloc(0);
+            }
+            extendedCache.clear();
+        }
+        // 3. Stop accepting new connections
+        server.close();
+        // 4. Grace period (500ms) then force close remaining connections
+        setTimeout(() => {
+            if (activeConnections.size > 0) {
+                log(`Force closing ${activeConnections.size} connection(s)`);
+                for (const socket of activeConnections) {
+                    socket.destroy();
+                }
+            }
+            log('Goodbye!');
+            process.exit(0);
+        }, 500);
+    };
+    process.on('SIGTERM', gracefulShutdown);
+    process.on('SIGINT', gracefulShutdown);
+    if (config.EXTENDED_CACHE_ENABLED) {
+        extendedCacheTimer = setInterval(checkExtendedCache, 60_000);
+        log('Extended cache: enabled (keep-alive timer started)');
     }
     try {
         await server.listen({
             host: config.HOST,
             port: config.PORT,
         });
-        console.log(`✓ Grov Proxy: http://${config.HOST}:${config.PORT} → ${config.ANTHROPIC_BASE_URL}`);
+        // Track connections for graceful shutdown
+        server.server.on('connection', (socket) => {
+            activeConnections.add(socket);
+            socket.on('close', () => activeConnections.delete(socket));
+        });
+        console.log(`Grov Proxy: http://${config.HOST}:${config.PORT} -> ${config.ANTHROPIC_BASE_URL}`);
         return server;
     }
     catch (err) {