npm - grov - Versions diffs - 0.5.2 → 0.5.3 - Mend

grov 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/README.md +19 -1
package/dist/cli.js +8 -0
package/dist/lib/api-client.d.ts +18 -1
package/dist/lib/api-client.js +57 -0
package/dist/lib/llm-extractor.d.ts +14 -38
package/dist/lib/llm-extractor.js +380 -406
package/dist/lib/store/convenience.d.ts +40 -0
package/dist/lib/store/convenience.js +104 -0
package/dist/lib/store/database.d.ts +22 -0
package/dist/lib/store/database.js +375 -0
package/dist/lib/store/drift.d.ts +9 -0
package/dist/lib/store/drift.js +89 -0
package/dist/lib/store/index.d.ts +7 -0
package/dist/lib/store/index.js +13 -0
package/dist/lib/store/sessions.d.ts +32 -0
package/dist/lib/store/sessions.js +240 -0
package/dist/lib/store/steps.d.ts +40 -0
package/dist/lib/store/steps.js +161 -0
package/dist/lib/store/tasks.d.ts +33 -0
package/dist/lib/store/tasks.js +133 -0
package/dist/lib/store/types.d.ts +167 -0
package/dist/lib/store/types.js +2 -0
package/dist/lib/store.d.ts +1 -436
package/dist/lib/store.js +2 -1478
package/dist/proxy/cache.d.ts +36 -0
package/dist/proxy/cache.js +51 -0
package/dist/proxy/config.d.ts +1 -0
package/dist/proxy/config.js +2 -0
package/dist/proxy/extended-cache.d.ts +10 -0
package/dist/proxy/extended-cache.js +155 -0
package/dist/proxy/handlers/preprocess.d.ts +20 -0
package/dist/proxy/handlers/preprocess.js +169 -0
package/dist/proxy/injection/delta-tracking.d.ts +11 -0
package/dist/proxy/injection/delta-tracking.js +93 -0
package/dist/proxy/injection/injectors.d.ts +7 -0
package/dist/proxy/injection/injectors.js +139 -0
package/dist/proxy/request-processor.d.ts +18 -4
package/dist/proxy/request-processor.js +151 -30
package/dist/proxy/response-processor.js +93 -45
package/dist/proxy/server.d.ts +0 -1
package/dist/proxy/server.js +342 -566
package/dist/proxy/types.d.ts +13 -0
package/dist/proxy/types.js +2 -0
package/dist/proxy/utils/extractors.d.ts +18 -0
package/dist/proxy/utils/extractors.js +109 -0
package/dist/proxy/utils/logging.d.ts +18 -0
package/dist/proxy/utils/logging.js +42 -0
package/package.json +5 -2

package/dist/proxy/server.js CHANGED Viewed

@@ -1,314 +1,26 @@
 // Grov Proxy Server - Fastify + undici
 // Intercepts Claude Code <-> Anthropic API traffic for drift detection and context injection
 import Fastify from 'fastify';
-import { config } from './config.js';
+import { config, buildSafeHeaders } from './config.js';
 import { forwardToAnthropic, isForwardError } from './forwarder.js';
+import { extendedCache, evictOldestCacheEntry, checkExtendedCache, log } from './extended-cache.js';
+import { setDebugMode, getNextRequestId, taskLog, proxyLog, logTokenUsage } from './utils/logging.js';
+import { detectKeyDecision, extractTextContent, extractProjectPath, extractGoalFromMessages, extractConversationHistory } from './utils/extractors.js';
+import { appendToLastUserMessage, injectIntoRawBody } from './injection/injectors.js';
+import { preProcessRequest, setPendingPlanClear } from './handlers/preprocess.js';
 import { parseToolUseBlocks, extractTokenUsage } from './action-parser.js';
-import { createSessionState, getSessionState, updateSessionState, createStep, updateTokenCount, logDriftEvent, getRecentSteps, getValidatedSteps, updateSessionMode, markWaitingForRecovery, incrementEscalation, updateLastChecked, markCleared, getActiveSessionForUser, deleteSessionState, deleteStepsForSession, updateRecentStepsReasoning, markSessionCompleted, getCompletedSessionForProject, cleanupOldCompletedSessions, getKeyDecisions, getEditedFiles, } from '../lib/store.js';
-import { smartTruncate } from '../lib/utils.js';
+import { createSessionState, getSessionState, updateSessionState, createStep, updateTokenCount, logDriftEvent, getRecentSteps, getValidatedSteps, updateSessionMode, markWaitingForRecovery, incrementEscalation, updateLastChecked, getActiveSessionForUser, deleteSessionState, deleteStepsForSession, updateRecentStepsReasoning, markSessionCompleted, getCompletedSessionForProject, cleanupOldCompletedSessions, cleanupStaleActiveSessions, } from '../lib/store.js';
 import { checkDrift, scoreToCorrectionLevel, shouldSkipSteps, isDriftCheckAvailable, checkRecoveryAlignment, generateForcedRecovery, } from '../lib/drift-checker-proxy.js';
 import { buildCorrection, formatCorrectionForInjection } from '../lib/correction-builder-proxy.js';
 import { generateSessionSummary, isSummaryAvailable, extractIntent, isIntentExtractionAvailable, analyzeTaskContext, isTaskAnalysisAvailable, } from '../lib/llm-extractor.js';
-import { buildTeamMemoryContext, extractFilesFromMessages } from './request-processor.js';
 import { saveToTeamMemory } from './response-processor.js';
 import { randomUUID } from 'crypto';
-import * as fs from 'fs';
-import * as path from 'path';
 // Store last drift result for recovery alignment check
 const lastDriftResults = new Map();
+// Server logger reference (set in startServer)
+let serverLog = null;
 // Track last messageCount per session to detect retries vs new turns
 const lastMessageCount = new Map();
-// Cache injection content per session (MUST be identical across requests for cache preservation)
-// Stored in memory because session DB state doesn't exist on first request
-const cachedInjections = new Map();
-const sessionInjectionTracking = new Map();
-function getOrCreateTracking(sessionId) {
-    if (!sessionInjectionTracking.has(sessionId)) {
-        sessionInjectionTracking.set(sessionId, {
-            files: new Set(),
-            decisionIds: new Set(),
-            reasonings: new Set(),
-        });
-    }
-    return sessionInjectionTracking.get(sessionId);
-}
-/**
- * Build dynamic injection content for user message (DELTA only)
- * Includes: edited files, key decisions, drift correction, forced recovery
- * Only injects NEW content that hasn't been injected before
- */
-function buildDynamicInjection(sessionId, sessionState, logger) {
-    const tracking = getOrCreateTracking(sessionId);
-    const parts = [];
-    const debugInfo = {};
-    // 1. Get edited files (delta - not already injected)
-    const allEditedFiles = getEditedFiles(sessionId);
-    const newFiles = allEditedFiles.filter(f => !tracking.files.has(f));
-    debugInfo.totalEditedFiles = allEditedFiles.length;
-    debugInfo.newEditedFiles = newFiles.length;
-    debugInfo.alreadyTrackedFiles = tracking.files.size;
-    if (newFiles.length > 0) {
-        // Track and add to injection
-        newFiles.forEach(f => tracking.files.add(f));
-        const fileNames = newFiles.slice(0, 5).map(f => f.split('/').pop());
-        parts.push(`[EDITED: ${fileNames.join(', ')}]`);
-        debugInfo.editedFilesInjected = fileNames;
-    }
-    // 2. Get key decisions with reasoning (delta - not already injected)
-    const keyDecisions = getKeyDecisions(sessionId, 5);
-    debugInfo.totalKeyDecisions = keyDecisions.length;
-    debugInfo.alreadyTrackedDecisions = tracking.decisionIds.size;
-    const newDecisions = keyDecisions.filter(d => !tracking.decisionIds.has(d.id) &&
-        d.reasoning &&
-        !tracking.reasonings.has(d.reasoning));
-    debugInfo.newKeyDecisions = newDecisions.length;
-    for (const decision of newDecisions.slice(0, 3)) {
-        tracking.decisionIds.add(decision.id);
-        tracking.reasonings.add(decision.reasoning);
-        const truncated = smartTruncate(decision.reasoning, 120);
-        parts.push(`[DECISION: ${truncated}]`);
-        // Log the original and truncated reasoning for debugging
-        if (logger) {
-            logger.info({
-                msg: 'Key decision reasoning extracted',
-                originalLength: decision.reasoning.length,
-                truncatedLength: truncated.length,
-                original: decision.reasoning.substring(0, 200) + (decision.reasoning.length > 200 ? '...' : ''),
-                truncated,
-            });
-        }
-    }
-    debugInfo.decisionsInjected = newDecisions.slice(0, 3).length;
-    // 3. Add drift correction if pending
-    if (sessionState?.pending_correction) {
-        parts.push(`[DRIFT: ${sessionState.pending_correction}]`);
-        debugInfo.hasDriftCorrection = true;
-        debugInfo.driftCorrectionLength = sessionState.pending_correction.length;
-    }
-    // 4. Add forced recovery if pending
-    if (sessionState?.pending_forced_recovery) {
-        parts.push(`[RECOVERY: ${sessionState.pending_forced_recovery}]`);
-        debugInfo.hasForcedRecovery = true;
-        debugInfo.forcedRecoveryLength = sessionState.pending_forced_recovery.length;
-    }
-    // Log debug info
-    if (logger) {
-        logger.info({
-            msg: 'Dynamic injection build details',
-            ...debugInfo,
-            partsCount: parts.length,
-        });
-    }
-    if (parts.length === 0) {
-        return null;
-    }
-    const injection = '---\n[GROV CONTEXT]\n' + parts.join('\n');
-    // Log final injection content
-    if (logger) {
-        logger.info({
-            msg: 'Dynamic injection content',
-            size: injection.length,
-            content: injection,
-        });
-    }
-    return injection;
-}
-/**
- * Append dynamic injection to the last user message in raw body string
- * This preserves cache for system + previous messages, only the last user msg changes
- */
-function appendToLastUserMessage(rawBody, injection) {
-    // Find the last occurrence of "role":"user" followed by content
-    // We need to find the content field of the last user message and append to it
-    // Strategy: Find all user messages, get the last one, append to its content
-    // This is tricky because content can be string or array
-    // Simpler approach: Find the last user message's closing content
-    // Look for pattern: "role":"user","content":"..." or "role":"user","content":[...]
-    // Find last "role":"user"
-    const userRolePattern = /"role"\s*:\s*"user"/g;
-    let lastUserMatch = null;
-    let match;
-    while ((match = userRolePattern.exec(rawBody)) !== null) {
-        lastUserMatch = match;
-    }
-    if (!lastUserMatch) {
-        // No user message found, can't inject
-        return rawBody;
-    }
-    // From lastUserMatch position, find the content field
-    const afterRole = rawBody.slice(lastUserMatch.index);
-    // Find "content" field after role
-    const contentMatch = afterRole.match(/"content"\s*:\s*/);
-    if (!contentMatch || contentMatch.index === undefined) {
-        return rawBody;
-    }
-    const contentStartGlobal = lastUserMatch.index + contentMatch.index + contentMatch[0].length;
-    const afterContent = rawBody.slice(contentStartGlobal);
-    // Determine if content is string or array
-    if (afterContent.startsWith('"')) {
-        // String content - find closing quote (handling escapes)
-        let i = 1; // Skip opening quote
-        while (i < afterContent.length) {
-            if (afterContent[i] === '\\') {
-                i += 2; // Skip escaped char
-            }
-            else if (afterContent[i] === '"') {
-                // Found closing quote
-                const insertPos = contentStartGlobal + i;
-                // Insert before closing quote, escape the injection for JSON
-                const escapedInjection = injection
-                    .replace(/\\/g, '\\\\')
-                    .replace(/"/g, '\\"')
-                    .replace(/\n/g, '\\n');
-                return rawBody.slice(0, insertPos) + '\\n\\n' + escapedInjection + rawBody.slice(insertPos);
-            }
-            else {
-                i++;
-            }
-        }
-    }
-    else if (afterContent.startsWith('[')) {
-        // Array content - find last text block and append, or add new text block
-        // Find the closing ] of the content array
-        let depth = 1;
-        let i = 1;
-        while (i < afterContent.length && depth > 0) {
-            const char = afterContent[i];
-            if (char === '[')
-                depth++;
-            else if (char === ']')
-                depth--;
-            else if (char === '"') {
-                // Skip string
-                i++;
-                while (i < afterContent.length && afterContent[i] !== '"') {
-                    if (afterContent[i] === '\\')
-                        i++;
-                    i++;
-                }
-            }
-            i++;
-        }
-        if (depth === 0) {
-            // Found closing bracket at position i-1
-            const insertPos = contentStartGlobal + i - 1;
-            // Add new text block before closing bracket
-            const escapedInjection = injection
-                .replace(/\\/g, '\\\\')
-                .replace(/"/g, '\\"')
-                .replace(/\n/g, '\\n');
-            const newBlock = `,{"type":"text","text":"\\n\\n${escapedInjection}"}`;
-            return rawBody.slice(0, insertPos) + newBlock + rawBody.slice(insertPos);
-        }
-    }
-    // Fallback: couldn't parse, return unchanged
-    return rawBody;
-}
-// ============================================
-// DEBUG MODE - Controlled via --debug flag
-// ============================================
-let debugMode = false;
-export function setDebugMode(enabled) {
-    debugMode = enabled;
-}
-// ============================================
-// FILE LOGGER - Request/Response tracking (debug only)
-// ============================================
-const PROXY_LOG_PATH = path.join(process.cwd(), 'grov-proxy.log');
-let requestCounter = 0;
-function proxyLog(entry) {
-    if (!debugMode)
-        return; // Skip file logging unless --debug flag
-    const logEntry = {
-        timestamp: new Date().toISOString(),
-        ...entry,
-    };
-    const line = JSON.stringify(logEntry) + '\n';
-    fs.appendFileSync(PROXY_LOG_PATH, line);
-}
-/**
- * Log token usage to console (always shown, compact format)
- */
-function logTokenUsage(requestId, usage, latencyMs) {
-    const total = usage.cacheCreation + usage.cacheRead;
-    const hitRatio = total > 0 ? ((usage.cacheRead / total) * 100).toFixed(0) : '0';
-    console.log(`[${requestId}] ${hitRatio}% cache | in:${usage.inputTokens} out:${usage.outputTokens} | create:${usage.cacheCreation} read:${usage.cacheRead} | ${latencyMs}ms`);
-}
-/**
- * Helper to append text to system prompt (handles string or array format)
- */
-function appendToSystemPrompt(body, textToAppend) {
-    if (typeof body.system === 'string') {
-        body.system = body.system + textToAppend;
-    }
-    else if (Array.isArray(body.system)) {
-        // Append as new text block WITHOUT cache_control
-        // Anthropic allows max 4 cache blocks - Claude Code already uses 2+
-        // Grov's injections are small (~2KB) so uncached is fine
-        body.system.push({
-            type: 'text',
-            text: textToAppend,
-        });
-    }
-    else {
-        // No system prompt yet, create as string
-        body.system = textToAppend;
-    }
-}
-/**
- * Get system prompt as string (for reading)
- */
-function getSystemPromptText(body) {
-    if (typeof body.system === 'string') {
-        return body.system;
-    }
-    else if (Array.isArray(body.system)) {
-        return body.system
-            .filter(block => block.type === 'text')
-            .map(block => block.text)
-            .join('\n');
-    }
-    return '';
-}
-/**
- * Inject text into raw body string WITHOUT re-serializing
- * This preserves the original formatting/whitespace for cache compatibility
- *
- * Adds a new text block to the end of the system array
- */
-function injectIntoRawBody(rawBody, injectionText) {
-    // Find the system array in the raw JSON
-    // Pattern: "system": [....]
-    const systemMatch = rawBody.match(/"system"\s*:\s*\[/);
-    if (!systemMatch || systemMatch.index === undefined) {
-        return { modified: rawBody, success: false };
-    }
-    // Find the matching closing bracket for the system array
-    const startIndex = systemMatch.index + systemMatch[0].length;
-    let bracketCount = 1;
-    let endIndex = startIndex;
-    for (let i = startIndex; i < rawBody.length && bracketCount > 0; i++) {
-        const char = rawBody[i];
-        if (char === '[')
-            bracketCount++;
-        else if (char === ']')
-            bracketCount--;
-        if (bracketCount === 0) {
-            endIndex = i;
-            break;
-        }
-    }
-    if (bracketCount !== 0) {
-        return { modified: rawBody, success: false };
-    }
-    // Escape the injection text for JSON
-    const escapedText = JSON.stringify(injectionText).slice(1, -1); // Remove outer quotes
-    // Create the new block (without cache_control - will be cache_creation)
-    const newBlock = `,{"type":"text","text":"${escapedText}"}`;
-    // Insert before the closing bracket
-    const modified = rawBody.slice(0, endIndex) + newBlock + rawBody.slice(endIndex);
-    return { modified, success: true };
-}
 // Session tracking (in-memory for active sessions)
 const activeSessions = new Map();
 /**
@@ -381,7 +93,7 @@ async function handleMessages(request, reply) {
         promptCount: sessionInfo.promptCount,
         projectPath: sessionInfo.projectPath,
     });
-    const currentRequestId = ++requestCounter;
+    const currentRequestId = getNextRequestId();
     logger.info({
         msg: 'Incoming request',
         sessionId: sessionInfo.sessionId.substring(0, 8),
@@ -405,7 +117,7 @@ async function handleMessages(request, reply) {
     // Process request to get injection text
     // __grovInjection = team memory (system prompt, cached)
     // __grovUserMsgInjection = dynamic content (user message, delta only)
-    const processedBody = await preProcessRequest(request.body, sessionInfo, logger);
+    const processedBody = await preProcessRequest(request.body, sessionInfo, logger, detectRequestType);
     const systemInjection = processedBody.__grovInjection;
     const userMsgInjection = processedBody.__grovUserMsgInjection;
     // Get raw body bytes
@@ -470,7 +182,12 @@ async function handleMessages(request, reply) {
         // FIRE-AND-FORGET: Don't block response to Claude Code
         // This prevents retry loops caused by Haiku calls adding latency
         if (result.statusCode === 200 && isAnthropicResponse(result.body)) {
-            postProcessResponse(result.body, sessionInfo, request.body, logger)
+            // Prepare extended cache data (only if enabled)
+            const extendedCacheData = config.EXTENDED_CACHE_ENABLED ? {
+                headers: buildSafeHeaders(request.headers),
+                rawBody: typeof finalBodyToSend === 'string' ? Buffer.from(finalBodyToSend, 'utf-8') : finalBodyToSend,
+            } : undefined;
+            postProcessResponse(result.body, sessionInfo, request.body, logger, extendedCacheData)
                 .catch(err => console.error('[GROV] postProcess error:', err));
         }
         const latency = Date.now() - startTime;
@@ -591,6 +308,7 @@ async function getOrCreateSession(request, logger) {
         projectPath,
     };
     activeSessions.set(tempSessionId, sessionInfo);
+    // Note: team memory is now GLOBAL (not per session), no propagation needed
     logger.info({ msg: 'No existing session, will create after task analysis' });
     return { ...sessionInfo, isNew: true, currentSession: null, completedSession };
 }
@@ -600,10 +318,10 @@ async function getOrCreateSession(request, logger) {
  * - continuation: tool result (messageCount changed, last msg has tool_result)
  * - retry: same messageCount as before
  */
-function detectRequestType(messages, sessionId) {
+function detectRequestType(messages, projectPath) {
     const currentCount = messages?.length || 0;
-    const lastCount = lastMessageCount.get(sessionId);
-    lastMessageCount.set(sessionId, currentCount);
+    const lastCount = lastMessageCount.get(projectPath);
+    lastMessageCount.set(projectPath, currentCount);
     // Same messageCount = retry
     if (lastCount !== undefined && currentCount === lastCount) {
         return 'retry';
@@ -623,103 +341,6 @@ function detectRequestType(messages, sessionId) {
     }
     return 'first';
 }
-/**
- * Pre-process request before forwarding
- * - Context injection (first request only)
- * - CLEAR operation (first request only)
- * - Drift correction (first request only)
- *
- * SKIP all injections on: retry, continuation
- */
-async function preProcessRequest(body, sessionInfo, logger) {
-    const modified = { ...body };
-    // Detect request type: first, continuation, or retry
-    const requestType = detectRequestType(modified.messages || [], sessionInfo.sessionId);
-    // === NEW ARCHITECTURE: Separate static and dynamic injection ===
-    //
-    // STATIC (system prompt, cached):
-    //   - Team memory from PAST sessions only
-    //   - CLEAR summary when triggered
-    //   -> Uses __grovInjection + injectIntoRawBody()
-    //
-    // DYNAMIC (user message, delta only):
-    //   - Files edited in current session
-    //   - Key decisions with reasoning
-    //   - Drift correction, forced recovery
-    //   -> Uses __grovUserMsgInjection + appendToLastUserMessage()
-    // Get session state
-    const sessionState = getSessionState(sessionInfo.sessionId);
-    // === CLEAR MODE (100% threshold) ===
-    // If token count exceeds threshold AND we have a pre-computed summary, apply CLEAR
-    if (sessionState) {
-        const currentTokenCount = sessionState.token_count || 0;
-        if (currentTokenCount > config.TOKEN_CLEAR_THRESHOLD &&
-            sessionState.pending_clear_summary) {
-            logger.info({
-                msg: 'CLEAR MODE ACTIVATED - resetting conversation',
-                tokenCount: currentTokenCount,
-                threshold: config.TOKEN_CLEAR_THRESHOLD,
-                summaryLength: sessionState.pending_clear_summary.length,
-            });
-            // 1. Empty messages array (fundamental reset)
-            modified.messages = [];
-            // 2. Inject summary into system prompt (this will cause cache miss - intentional)
-            appendToSystemPrompt(modified, sessionState.pending_clear_summary);
-            // 3. Mark session as cleared
-            markCleared(sessionInfo.sessionId);
-            // 4. Clear pending summary and invalidate team memory cache (new baseline)
-            updateSessionState(sessionInfo.sessionId, { pending_clear_summary: undefined });
-            cachedInjections.delete(sessionInfo.sessionId);
-            // 5. Clear tracking (fresh start after CLEAR)
-            sessionInjectionTracking.delete(sessionInfo.sessionId);
-            logger.info({ msg: 'CLEAR complete - conversation reset with summary' });
-            return modified; // Skip other injections - this is a complete reset
-        }
-    }
-    // === STATIC INJECTION: Team memory (PAST sessions only) ===
-    // Cached per session - identical across all requests for cache preservation
-    const cachedTeamMemory = cachedInjections.get(sessionInfo.sessionId);
-    if (cachedTeamMemory) {
-        // Reuse cached team memory (constant for this session)
-        modified.__grovInjection = cachedTeamMemory;
-        modified.__grovInjectionCached = true;
-        logger.info({ msg: 'Using cached team memory', size: cachedTeamMemory.length });
-    }
-    else {
-        // First request: compute team memory from PAST sessions only
-        const mentionedFiles = extractFilesFromMessages(modified.messages || []);
-        // Pass currentSessionId to exclude current session data
-        const teamContext = buildTeamMemoryContext(sessionInfo.projectPath, mentionedFiles, sessionInfo.sessionId // Exclude current session
-        );
-        if (teamContext) {
-            modified.__grovInjection = teamContext;
-            modified.__grovInjectionCached = false;
-            // Cache for future requests (stays constant)
-            cachedInjections.set(sessionInfo.sessionId, teamContext);
-            logger.info({ msg: 'Computed and cached team memory', size: teamContext.length });
-        }
-    }
-    // SKIP dynamic injection for retries and continuations
-    if (requestType !== 'first') {
-        return modified;
-    }
-    // === DYNAMIC INJECTION: User message (delta only) ===
-    // Includes: edited files, key decisions, drift correction, forced recovery
-    // This goes into the LAST user message, not system prompt
-    const dynamicInjection = buildDynamicInjection(sessionInfo.sessionId, sessionState, logger);
-    if (dynamicInjection) {
-        modified.__grovUserMsgInjection = dynamicInjection;
-        logger.info({ msg: 'Dynamic injection ready for user message', size: dynamicInjection.length });
-        // Clear pending corrections after building injection
-        if (sessionState?.pending_correction || sessionState?.pending_forced_recovery) {
-            updateSessionState(sessionInfo.sessionId, {
-                pending_correction: undefined,
-                pending_forced_recovery: undefined,
-            });
-        }
-    }
-    return modified;
-}
 /**
  * Post-process response after receiving from Anthropic
  * - Task orchestration (new/continue/subtask/complete)
@@ -730,7 +351,7 @@ async function preProcessRequest(body, sessionInfo, logger) {
  * - Recovery alignment check (Section 4.4)
  * - Team memory triggers (Section 4.6)
  */
-async function postProcessResponse(response, sessionInfo, requestBody, logger) {
+async function postProcessResponse(response, sessionInfo, requestBody, logger, extendedCacheData) {
     // Parse tool_use blocks
     const actions = parseToolUseBlocks(response);
     // Extract text content for analysis
@@ -752,6 +373,29 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
     if (isWarmup) {
         return;
     }
+    // === EXTENDED CACHE: Capture for keep-alive ===
+    // Only capture on end_turn (user idle starts now, not during tool_use loops)
+    if (isEndTurn && extendedCacheData) {
+        const rawStr = extendedCacheData.rawBody.toString('utf-8');
+        const hasSystem = rawStr.includes('"system"');
+        const hasTools = rawStr.includes('"tools"');
+        const hasCacheCtrl = rawStr.includes('"cache_control"');
+        const msgMatch = rawStr.match(/"messages"\s*:\s*\[/);
+        const msgPos = msgMatch?.index ?? -1;
+        // Use projectPath as key (one entry per conversation, not per task)
+        const cacheKey = sessionInfo.projectPath;
+        // Evict oldest if at capacity (only for NEW entries, not updates)
+        if (!extendedCache.has(cacheKey)) {
+            evictOldestCacheEntry();
+        }
+        extendedCache.set(cacheKey, {
+            headers: extendedCacheData.headers,
+            rawBody: extendedCacheData.rawBody,
+            timestamp: Date.now(),
+            keepAliveCount: 0,
+        });
+        log(`Extended cache: CAPTURE project=${cacheKey.split('/').pop()} size=${rawStr.length} sys=${hasSystem} tools=${hasTools} cache_ctrl=${hasCacheCtrl} msg_pos=${msgPos}`);
+    }
     // If not end_turn (tool_use in progress), skip task orchestration but keep session
     if (!isEndTurn) {
         // Use existing session or create minimal one without LLM calls
@@ -774,23 +418,44 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                 promptCount: 1,
                 projectPath: sessionInfo.projectPath,
             });
+            // Note: team memory is now GLOBAL (not per session), no propagation needed
         }
     }
     else if (isTaskAnalysisAvailable()) {
         // Use completed session for comparison if no active session
         const sessionForComparison = sessionInfo.currentSession || sessionInfo.completedSession;
+        // Extract conversation history for context-aware task analysis
+        const conversationHistory = extractConversationHistory(requestBody.messages || []);
         try {
-            const taskAnalysis = await analyzeTaskContext(sessionForComparison, latestUserMessage, recentSteps, textContent);
+            const taskAnalysis = await analyzeTaskContext(sessionForComparison, latestUserMessage, recentSteps, textContent, conversationHistory);
             logger.info({
                 msg: 'Task analysis',
                 action: taskAnalysis.action,
-                topic_match: taskAnalysis.topic_match,
+                task_type: taskAnalysis.task_type,
                 goal: taskAnalysis.current_goal?.substring(0, 50),
                 reasoning: taskAnalysis.reasoning,
             });
+            // TASK LOG: Analysis result
+            taskLog('TASK_ANALYSIS', {
+                sessionId: sessionInfo.sessionId,
+                action: taskAnalysis.action,
+                task_type: taskAnalysis.task_type,
+                goal: taskAnalysis.current_goal || '',
+                reasoning: taskAnalysis.reasoning || '',
+                userMessage: latestUserMessage.substring(0, 80),
+                hasCurrentSession: !!sessionInfo.currentSession,
+                hasCompletedSession: !!sessionInfo.completedSession,
+            });
             // Update recent steps with reasoning (backfill from end_turn response)
             if (taskAnalysis.step_reasoning && activeSessionId) {
                 const updatedCount = updateRecentStepsReasoning(activeSessionId, taskAnalysis.step_reasoning);
+                // TASK LOG: Step reasoning update
+                taskLog('STEP_REASONING', {
+                    sessionId: activeSessionId,
+                    stepsUpdated: updatedCount,
+                    reasoningEntries: Object.keys(taskAnalysis.step_reasoning).length,
+                    stepIds: Object.keys(taskAnalysis.step_reasoning).join(','),
+                });
             }
             // Handle task orchestration based on analysis
             switch (taskAnalysis.action) {
@@ -809,6 +474,13 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                             });
                             activeSession.original_goal = taskAnalysis.current_goal;
                         }
+                        // TASK LOG: Continue existing session
+                        taskLog('ORCHESTRATION_CONTINUE', {
+                            sessionId: activeSessionId,
+                            source: 'current_session',
+                            goal: activeSession.original_goal,
+                            goalUpdated: taskAnalysis.current_goal !== activeSession.original_goal,
+                        });
                     }
                     else if (sessionInfo.completedSession) {
                         // Reactivate completed session (user wants to continue/add to it)
@@ -824,6 +496,13 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                             promptCount: 1,
                             projectPath: sessionInfo.projectPath,
                         });
+                        // Note: team memory is now GLOBAL (not per session), no propagation needed
+                        // TASK LOG: Reactivate completed session
+                        taskLog('ORCHESTRATION_CONTINUE', {
+                            sessionId: activeSessionId,
+                            source: 'reactivated_completed',
+                            goal: activeSession.original_goal,
+                        });
                     }
                     break;
                 case 'new_task': {
@@ -843,9 +522,24 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                         try {
                             intentData = await extractIntent(latestUserMessage);
                             logger.info({ msg: 'Intent extracted for new task', scopeCount: intentData.expected_scope.length });
+                            // TASK LOG: Intent extraction for new_task
+                            taskLog('INTENT_EXTRACTION', {
+                                sessionId: sessionInfo.sessionId,
+                                context: 'new_task',
+                                goal: intentData.goal,
+                                scopeCount: intentData.expected_scope.length,
+                                scope: intentData.expected_scope.join(', '),
+                                constraints: intentData.constraints.join(', '),
+                                keywords: intentData.keywords.join(', '),
+                            });
                         }
                         catch (err) {
                             logger.info({ msg: 'Intent extraction failed, using basic goal', error: String(err) });
+                            taskLog('INTENT_EXTRACTION_FAILED', {
+                                sessionId: sessionInfo.sessionId,
+                                context: 'new_task',
+                                error: String(err),
+                            });
                         }
                     }
                     const newSessionId = randomUUID();
@@ -865,6 +559,42 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                         projectPath: sessionInfo.projectPath,
                     });
                     logger.info({ msg: 'Created new task session', sessionId: newSessionId.substring(0, 8) });
+                    // TASK LOG: New task created
+                    taskLog('ORCHESTRATION_NEW_TASK', {
+                        sessionId: newSessionId,
+                        goal: intentData.goal,
+                        scopeCount: intentData.expected_scope.length,
+                        keywordsCount: intentData.keywords.length,
+                    });
+                    // Q&A AUTO-SAVE: If this is an information request with a substantive answer
+                    // AND no tool calls, save immediately since pure Q&A completes in a single turn.
+                    // If there ARE tool calls (e.g., Read for "Analyze X"), wait for them to complete
+                    // so steps get captured properly before saving.
+                    if (taskAnalysis.task_type === 'information' && textContent.length > 100 && actions.length === 0) {
+                        logger.info({ msg: 'Q&A detected (pure text) - saving immediately', sessionId: newSessionId.substring(0, 8) });
+                        taskLog('QA_AUTO_SAVE', {
+                            sessionId: newSessionId,
+                            goal: intentData.goal,
+                            responseLength: textContent.length,
+                            toolCalls: 0,
+                        });
+                        // Store the response for reasoning extraction
+                        updateSessionState(newSessionId, {
+                            final_response: textContent.substring(0, 10000),
+                        });
+                        // Save to team memory and mark complete
+                        await saveToTeamMemory(newSessionId, 'complete');
+                        markSessionCompleted(newSessionId);
+                    }
+                    else if (taskAnalysis.task_type === 'information' && actions.length > 0) {
+                        // Q&A with tool calls - don't auto-save, let it continue until task_complete
+                        logger.info({ msg: 'Q&A with tool calls - waiting for completion', sessionId: newSessionId.substring(0, 8), toolCalls: actions.length });
+                        taskLog('QA_DEFERRED', {
+                            sessionId: newSessionId,
+                            goal: intentData.goal,
+                            toolCalls: actions.length,
+                        });
+                    }
                     break;
                 }
                 case 'subtask': {
@@ -878,8 +608,17 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                     if (isIntentExtractionAvailable() && latestUserMessage.length > 10) {
                         try {
                             intentData = await extractIntent(latestUserMessage);
+                            taskLog('INTENT_EXTRACTION', {
+                                sessionId: sessionInfo.sessionId,
+                                context: 'subtask',
+                                goal: intentData.goal,
+                                scope: intentData.expected_scope.join(', '),
+                                keywords: intentData.keywords.join(', '),
+                            });
+                        }
+                        catch (err) {
+                            taskLog('INTENT_EXTRACTION_FAILED', { sessionId: sessionInfo.sessionId, context: 'subtask', error: String(err) });
                         }
-                        catch { /* use fallback */ }
                     }
                     const parentId = sessionInfo.currentSession?.session_id || taskAnalysis.parent_task_id;
                     const subtaskId = randomUUID();
@@ -900,6 +639,12 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                         projectPath: sessionInfo.projectPath,
                     });
                     logger.info({ msg: 'Created subtask session', sessionId: subtaskId.substring(0, 8), parent: parentId?.substring(0, 8) });
+                    // TASK LOG: Subtask created
+                    taskLog('ORCHESTRATION_SUBTASK', {
+                        sessionId: subtaskId,
+                        parentId: parentId || 'none',
+                        goal: intentData.goal,
+                    });
                     break;
                 }
                 case 'parallel_task': {
@@ -913,8 +658,17 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                     if (isIntentExtractionAvailable() && latestUserMessage.length > 10) {
                         try {
                             intentData = await extractIntent(latestUserMessage);
+                            taskLog('INTENT_EXTRACTION', {
+                                sessionId: sessionInfo.sessionId,
+                                context: 'parallel_task',
+                                goal: intentData.goal,
+                                scope: intentData.expected_scope.join(', '),
+                                keywords: intentData.keywords.join(', '),
+                            });
+                        }
+                        catch (err) {
+                            taskLog('INTENT_EXTRACTION_FAILED', { sessionId: sessionInfo.sessionId, context: 'parallel_task', error: String(err) });
                         }
-                        catch { /* use fallback */ }
                     }
                     const parentId = sessionInfo.currentSession?.session_id || taskAnalysis.parent_task_id;
                     const parallelId = randomUUID();
@@ -935,22 +689,89 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                         projectPath: sessionInfo.projectPath,
                     });
                     logger.info({ msg: 'Created parallel task session', sessionId: parallelId.substring(0, 8), parent: parentId?.substring(0, 8) });
+                    // TASK LOG: Parallel task created
+                    taskLog('ORCHESTRATION_PARALLEL', {
+                        sessionId: parallelId,
+                        parentId: parentId || 'none',
+                        goal: intentData.goal,
+                    });
                     break;
                 }
                 case 'task_complete': {
                     // Save to team memory and mark as completed (don't delete yet - keep for new_task detection)
                     if (sessionInfo.currentSession) {
                         try {
+                            // Set final_response BEFORE saving so reasoning extraction has the data
+                            updateSessionState(sessionInfo.currentSession.session_id, {
+                                final_response: textContent.substring(0, 10000),
+                            });
                             await saveToTeamMemory(sessionInfo.currentSession.session_id, 'complete');
                             markSessionCompleted(sessionInfo.currentSession.session_id);
                             activeSessions.delete(sessionInfo.currentSession.session_id);
                             lastDriftResults.delete(sessionInfo.currentSession.session_id);
+                            // TASK LOG: Task completed
+                            taskLog('ORCHESTRATION_TASK_COMPLETE', {
+                                sessionId: sessionInfo.currentSession.session_id,
+                                goal: sessionInfo.currentSession.original_goal,
+                            });
+                            // PLANNING COMPLETE: Trigger CLEAR-like reset for implementation phase
+                            // This ensures next request starts fresh with planning context from team memory
+                            if (taskAnalysis.task_type === 'planning' && isSummaryAvailable()) {
+                                try {
+                                    const allSteps = getValidatedSteps(sessionInfo.currentSession.session_id);
+                                    const planSummary = await generateSessionSummary(sessionInfo.currentSession, allSteps, 2000);
+                                    // Store for next request to trigger CLEAR
+                                    setPendingPlanClear({
+                                        projectPath: sessionInfo.projectPath,
+                                        summary: planSummary,
+                                    });
+                                    // Cache invalidation happens in response-processor.ts after syncTask completes
+                                    logger.info({
+                                        msg: 'PLANNING_CLEAR triggered',
+                                        sessionId: sessionInfo.currentSession.session_id.substring(0, 8),
+                                        summaryLen: planSummary.length,
+                                    });
+                                }
+                                catch {
+                                    // Silent fail - planning CLEAR is optional enhancement
+                                }
+                            }
                             logger.info({ msg: 'Task complete - saved to team memory, marked completed' });
                         }
                         catch (err) {
                             logger.info({ msg: 'Failed to save completed task', error: String(err) });
                         }
                     }
+                    else if (textContent.length > 100) {
+                        // NEW: Handle "instant complete" - task that's new AND immediately complete
+                        // This happens for simple Q&A when Haiku says task_complete without existing session
+                        // Example: user asks clarification question, answer is provided in single turn
+                        try {
+                            const newSessionId = randomUUID();
+                            const instantSession = createSessionState({
+                                session_id: newSessionId,
+                                project_path: sessionInfo.projectPath,
+                                original_goal: taskAnalysis.current_goal || latestUserMessage.substring(0, 500),
+                                task_type: 'main',
+                            });
+                            // Set final_response for reasoning extraction
+                            updateSessionState(newSessionId, {
+                                final_response: textContent.substring(0, 10000),
+                            });
+                            await saveToTeamMemory(newSessionId, 'complete');
+                            markSessionCompleted(newSessionId);
+                            logger.info({ msg: 'Instant complete - new task saved immediately', sessionId: newSessionId.substring(0, 8) });
+                            // TASK LOG: Instant complete (new task that finished in one turn)
+                            taskLog('ORCHESTRATION_TASK_COMPLETE', {
+                                sessionId: newSessionId,
+                                goal: taskAnalysis.current_goal || latestUserMessage.substring(0, 80),
+                                source: 'instant_complete',
+                            });
+                        }
+                        catch (err) {
+                            logger.info({ msg: 'Failed to save instant complete task', error: String(err) });
+                        }
+                    }
                     return; // Done, no more processing needed
                 }
                 case 'subtask_complete': {
@@ -969,6 +790,12 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                                     activeSessionId = parentId;
                                     activeSession = parentSession;
                                     logger.info({ msg: 'Subtask complete - returning to parent', parent: parentId.substring(0, 8) });
+                                    // TASK LOG: Subtask completed
+                                    taskLog('ORCHESTRATION_SUBTASK_COMPLETE', {
+                                        sessionId: sessionInfo.currentSession.session_id,
+                                        parentId: parentId,
+                                        goal: sessionInfo.currentSession.original_goal,
+                                    });
                                 }
                             }
                         }
@@ -993,8 +820,16 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                 if (isIntentExtractionAvailable() && latestUserMessage.length > 10) {
                     try {
                         intentData = await extractIntent(latestUserMessage);
+                        taskLog('INTENT_EXTRACTION', {
+                            sessionId: sessionInfo.sessionId,
+                            context: 'fallback_analysis_failed',
+                            goal: intentData.goal,
+                            scope: intentData.expected_scope.join(', '),
+                        });
+                    }
+                    catch (err) {
+                        taskLog('INTENT_EXTRACTION_FAILED', { sessionId: sessionInfo.sessionId, context: 'fallback_analysis_failed', error: String(err) });
                     }
-                    catch { /* use fallback */ }
                 }
                 const newSessionId = randomUUID();
                 activeSession = createSessionState({
@@ -1012,6 +847,11 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
     }
     else {
         // No task analysis available - fallback with intent extraction
+        taskLog('TASK_ANALYSIS_UNAVAILABLE', {
+            sessionId: sessionInfo.sessionId,
+            hasCurrentSession: !!sessionInfo.currentSession,
+            userMessage: latestUserMessage.substring(0, 80),
+        });
         if (!sessionInfo.currentSession) {
             let intentData = {
                 goal: latestUserMessage.substring(0, 500),
@@ -1023,8 +863,16 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
                 try {
                     intentData = await extractIntent(latestUserMessage);
                     logger.info({ msg: 'Intent extracted (fallback)', scopeCount: intentData.expected_scope.length });
+                    taskLog('INTENT_EXTRACTION', {
+                        sessionId: sessionInfo.sessionId,
+                        context: 'no_analysis_available',
+                        goal: intentData.goal,
+                        scope: intentData.expected_scope.join(', '),
+                    });
+                }
+                catch (err) {
+                    taskLog('INTENT_EXTRACTION_FAILED', { sessionId: sessionInfo.sessionId, context: 'no_analysis_available', error: String(err) });
                 }
-                catch { /* use fallback */ }
             }
             const newSessionId = randomUUID();
             activeSession = createSessionState({
@@ -1043,19 +891,12 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
             activeSessionId = sessionInfo.currentSession.session_id;
         }
     }
-    // AUTO-SAVE on every end_turn (for all task types: new_task, continue, subtask, parallel)
-    // task_complete and subtask_complete already save and return early, so they won't reach here
-    if (isEndTurn && activeSession && activeSessionId) {
-        try {
-            await saveToTeamMemory(activeSessionId, 'complete');
-            markSessionCompleted(activeSessionId);
-            activeSessions.delete(activeSessionId);
-            logger.info({ msg: 'Auto-saved task on end_turn', sessionId: activeSessionId.substring(0, 8) });
-        }
-        catch (err) {
-            logger.info({ msg: 'Auto-save failed', error: String(err) });
-        }
-    }
+    // NOTE: Auto-save on every end_turn was REMOVED
+    // Task saving is now controlled by Haiku's task analysis:
+    // - task_complete: Haiku detected task is done (Q&A answered, implementation verified, planning confirmed)
+    // - subtask_complete: Haiku detected subtask is done
+    // This ensures we only save when work is actually complete, not on every Claude response.
+    // See analyzeTaskContext() in llm-extractor.ts for the decision logic.
     // Extract token usage
     const usage = extractTokenUsage(response);
     // Use cache metrics as actual context size (cacheCreation + cacheRead)
@@ -1106,18 +947,8 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
         });
     }
     if (actions.length === 0) {
-        // Pure Q&A (no tool calls) - auto-save as task
-        if (isEndTurn && activeSessionId && activeSession) {
-            try {
-                await saveToTeamMemory(activeSessionId, 'complete');
-                markSessionCompleted(activeSessionId);
-                activeSessions.delete(activeSessionId);
-                logger.info({ msg: 'Task saved on final answer', sessionId: activeSessionId.substring(0, 8) });
-            }
-            catch (err) {
-                logger.info({ msg: 'Task save failed', error: String(err) });
-            }
-        }
+        // Final response (no tool calls)
+        // NOTE: Task saving is controlled by Haiku's task analysis (see switch case 'task_complete' above)
         return;
     }
     logger.info({
@@ -1242,20 +1073,36 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
         }
     }
     // Save each action as a step (with reasoning from Claude's text)
+    // When multiple actions come from the same Claude response, they share identical reasoning.
+    // We store reasoning only on the first action and set NULL for subsequent ones to avoid duplication.
+    // At query time, we group steps by reasoning (non-NULL starts a group, NULLs continue it)
+    // and reconstruct the full context: reasoning + all associated files/actions.
+    let previousReasoning = null;
+    logger.info({ msg: 'DEDUP_DEBUG', actionsCount: actions.length, textContentLen: textContent.length });
     for (const action of actions) {
+        const currentReasoning = textContent.substring(0, 1000);
+        const isDuplicate = currentReasoning === previousReasoning;
+        logger.info({
+            msg: 'DEDUP_STEP',
+            actionType: action.actionType,
+            isDuplicate,
+            prevLen: previousReasoning?.length || 0,
+            currLen: currentReasoning.length
+        });
         // Detect key decisions based on action type and reasoning content
-        const isKeyDecision = detectKeyDecision(action, textContent);
+        const isKeyDecision = !isDuplicate && detectKeyDecision(action, textContent);
         createStep({
             session_id: activeSessionId,
             action_type: action.actionType,
             files: action.files,
             folders: action.folders,
             command: action.command,
-            reasoning: textContent.substring(0, 1000), // Claude's explanation (truncated)
+            reasoning: isDuplicate ? undefined : currentReasoning,
             drift_score: driftScore,
             is_validated: !skipSteps,
             is_key_decision: isKeyDecision,
         });
+        previousReasoning = currentReasoning;
         if (isKeyDecision) {
             logger.info({
                 msg: 'Key decision detected',
@@ -1265,138 +1112,6 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
         }
     }
 }
-/**
- * Detect if an action represents a key decision worth injecting later
- * Key decisions are:
- * - Edit/write actions (code modifications)
- * - Actions with decision-related keywords in reasoning
- * - Actions with substantial reasoning content
- */
-function detectKeyDecision(action, reasoning) {
-    // Code modifications are always key decisions
-    if (action.actionType === 'edit' || action.actionType === 'write') {
-        return true;
-    }
-    // Check for decision-related keywords in reasoning
-    const decisionKeywords = [
-        'decision', 'decided', 'chose', 'chosen', 'selected', 'picked',
-        'approach', 'strategy', 'solution', 'implementation',
-        'because', 'reason', 'rationale', 'trade-off', 'tradeoff',
-        'instead of', 'rather than', 'prefer', 'opted',
-        'conclusion', 'determined', 'resolved'
-    ];
-    const reasoningLower = reasoning.toLowerCase();
-    const hasDecisionKeyword = decisionKeywords.some(kw => reasoningLower.includes(kw));
-    // Substantial reasoning (>200 chars) with decision keyword = key decision
-    if (hasDecisionKeyword && reasoning.length > 200) {
-        return true;
-    }
-    return false;
-}
-/**
- * Extract text content from response for analysis
- */
-function extractTextContent(response) {
-    return response.content
-        .filter((block) => block.type === 'text')
-        .map(block => block.text)
-        .join('\n');
-}
-/**
- * Detect task completion from response text
- * Returns trigger type or null
- */
-function detectTaskCompletion(text) {
-    const lowerText = text.toLowerCase();
-    // Strong completion indicators
-    const completionPhrases = [
-        'task is complete',
-        'task complete',
-        'implementation is complete',
-        'implementation complete',
-        'successfully implemented',
-        'all changes have been made',
-        'finished implementing',
-        'completed the implementation',
-        'done with the implementation',
-        'completed all the',
-        'all tests pass',
-        'build succeeds',
-    ];
-    for (const phrase of completionPhrases) {
-        if (lowerText.includes(phrase)) {
-            return 'complete';
-        }
-    }
-    // Subtask completion indicators
-    const subtaskPhrases = [
-        'step complete',
-        'phase complete',
-        'finished this step',
-        'moving on to',
-        'now let\'s',
-        'next step',
-    ];
-    for (const phrase of subtaskPhrases) {
-        if (lowerText.includes(phrase)) {
-            return 'subtask';
-        }
-    }
-    return null;
-}
-/**
- * Extract project path from request body
- */
-function extractProjectPath(body) {
-    // Try to extract from system prompt or messages
-    // Handle both string and array format for system prompt
-    let systemPrompt = '';
-    if (typeof body.system === 'string') {
-        systemPrompt = body.system;
-    }
-    else if (Array.isArray(body.system)) {
-        // New API format: system is array of {type: 'text', text: '...'}
-        systemPrompt = body.system
-            .filter((block) => block && typeof block === 'object' && block.type === 'text' && typeof block.text === 'string')
-            .map(block => block.text)
-            .join('\n');
-    }
-    const cwdMatch = systemPrompt.match(/Working directory:\s*([^\n]+)/);
-    if (cwdMatch) {
-        return cwdMatch[1].trim();
-    }
-    return null;
-}
-/**
- * Extract goal from FIRST user message with text content
- * Skips tool_result blocks, filters out system-reminder tags
- */
-function extractGoalFromMessages(messages) {
-    const userMessages = messages?.filter(m => m.role === 'user') || [];
-    for (const userMsg of userMessages) {
-        let rawContent = '';
-        // Handle string content
-        if (typeof userMsg.content === 'string') {
-            rawContent = userMsg.content;
-        }
-        // Handle array content - look for text blocks (skip tool_result)
-        if (Array.isArray(userMsg.content)) {
-            const textBlocks = userMsg.content
-                .filter((block) => block && typeof block === 'object' && block.type === 'text' && typeof block.text === 'string')
-                .map(block => block.text);
-            rawContent = textBlocks.join('\n');
-        }
-        // Remove <system-reminder>...</system-reminder> tags
-        const cleanContent = rawContent
-            .replace(/<system-reminder>[\s\S]*?<\/system-reminder>/g, '')
-            .trim();
-        // If we found valid text content, return it
-        if (cleanContent && cleanContent.length >= 5) {
-            return cleanContent.substring(0, 500);
-        }
-    }
-    return undefined;
-}
 /**
  * Filter response headers for forwarding to client
  */
@@ -1446,13 +1161,74 @@ export async function startServer(options = {}) {
         console.log('[DEBUG] Logging to grov-proxy.log');
     }
     const server = createServer();
+    // Set server logger for background tasks
+    serverLog = server.log;
     // Cleanup old completed sessions (older than 24 hours)
     cleanupOldCompletedSessions();
+    // Cleanup stale active sessions (no activity for 1 hour)
+    // Prevents old sessions from being reused in fresh Claude sessions
+    const staleCount = cleanupStaleActiveSessions();
+    if (staleCount > 0) {
+        log(`Cleaned up ${staleCount} stale active session(s)`);
+    }
+    // Start extended cache timer if enabled
+    let extendedCacheTimer = null;
+    // Track active connections for graceful shutdown
+    const activeConnections = new Set();
+    let isShuttingDown = false;
+    // Graceful shutdown handler (works with or without extended cache)
+    const gracefulShutdown = () => {
+        if (isShuttingDown)
+            return;
+        isShuttingDown = true;
+        log('Shutdown initiated...');
+        // 1. Stop extended cache timer if running
+        if (extendedCacheTimer) {
+            clearInterval(extendedCacheTimer);
+            extendedCacheTimer = null;
+            log('Extended cache: timer stopped');
+        }
+        // 2. Clear sensitive cache data
+        if (extendedCache.size > 0) {
+            log(`Extended cache: clearing ${extendedCache.size} entries`);
+            for (const entry of extendedCache.values()) {
+                for (const key of Object.keys(entry.headers)) {
+                    entry.headers[key] = '';
+                }
+                entry.rawBody = Buffer.alloc(0);
+            }
+            extendedCache.clear();
+        }
+        // 3. Stop accepting new connections
+        server.close();
+        // 4. Grace period (500ms) then force close remaining connections
+        setTimeout(() => {
+            if (activeConnections.size > 0) {
+                log(`Force closing ${activeConnections.size} connection(s)`);
+                for (const socket of activeConnections) {
+                    socket.destroy();
+                }
+            }
+            log('Goodbye!');
+            process.exit(0);
+        }, 500);
+    };
+    process.on('SIGTERM', gracefulShutdown);
+    process.on('SIGINT', gracefulShutdown);
+    if (config.EXTENDED_CACHE_ENABLED) {
+        extendedCacheTimer = setInterval(checkExtendedCache, 60_000);
+        log('Extended cache: enabled (keep-alive timer started)');
+    }
     try {
         await server.listen({
             host: config.HOST,
             port: config.PORT,
         });
+        // Track connections for graceful shutdown
+        server.server.on('connection', (socket) => {
+            activeConnections.add(socket);
+            socket.on('close', () => activeConnections.delete(socket));
+        });
         console.log(`Grov Proxy: http://${config.HOST}:${config.PORT} -> ${config.ANTHROPIC_BASE_URL}`);
         return server;
     }