grov 0.2.3 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +44 -5
  2. package/dist/cli.js +40 -2
  3. package/dist/commands/login.d.ts +1 -0
  4. package/dist/commands/login.js +115 -0
  5. package/dist/commands/logout.d.ts +1 -0
  6. package/dist/commands/logout.js +13 -0
  7. package/dist/commands/sync.d.ts +8 -0
  8. package/dist/commands/sync.js +127 -0
  9. package/dist/lib/api-client.d.ts +57 -0
  10. package/dist/lib/api-client.js +174 -0
  11. package/dist/lib/cloud-sync.d.ts +33 -0
  12. package/dist/lib/cloud-sync.js +176 -0
  13. package/dist/lib/credentials.d.ts +53 -0
  14. package/dist/lib/credentials.js +201 -0
  15. package/dist/lib/llm-extractor.d.ts +15 -39
  16. package/dist/lib/llm-extractor.js +400 -418
  17. package/dist/lib/store/convenience.d.ts +40 -0
  18. package/dist/lib/store/convenience.js +104 -0
  19. package/dist/lib/store/database.d.ts +22 -0
  20. package/dist/lib/store/database.js +375 -0
  21. package/dist/lib/store/drift.d.ts +9 -0
  22. package/dist/lib/store/drift.js +89 -0
  23. package/dist/lib/store/index.d.ts +7 -0
  24. package/dist/lib/store/index.js +13 -0
  25. package/dist/lib/store/sessions.d.ts +32 -0
  26. package/dist/lib/store/sessions.js +240 -0
  27. package/dist/lib/store/steps.d.ts +40 -0
  28. package/dist/lib/store/steps.js +161 -0
  29. package/dist/lib/store/tasks.d.ts +33 -0
  30. package/dist/lib/store/tasks.js +133 -0
  31. package/dist/lib/store/types.d.ts +167 -0
  32. package/dist/lib/store/types.js +2 -0
  33. package/dist/lib/store.d.ts +1 -406
  34. package/dist/lib/store.js +2 -1356
  35. package/dist/lib/utils.d.ts +5 -0
  36. package/dist/lib/utils.js +45 -0
  37. package/dist/proxy/action-parser.d.ts +10 -2
  38. package/dist/proxy/action-parser.js +4 -2
  39. package/dist/proxy/cache.d.ts +36 -0
  40. package/dist/proxy/cache.js +51 -0
  41. package/dist/proxy/config.d.ts +1 -0
  42. package/dist/proxy/config.js +2 -0
  43. package/dist/proxy/extended-cache.d.ts +10 -0
  44. package/dist/proxy/extended-cache.js +155 -0
  45. package/dist/proxy/forwarder.d.ts +7 -1
  46. package/dist/proxy/forwarder.js +157 -7
  47. package/dist/proxy/handlers/preprocess.d.ts +20 -0
  48. package/dist/proxy/handlers/preprocess.js +169 -0
  49. package/dist/proxy/injection/delta-tracking.d.ts +11 -0
  50. package/dist/proxy/injection/delta-tracking.js +93 -0
  51. package/dist/proxy/injection/injectors.d.ts +7 -0
  52. package/dist/proxy/injection/injectors.js +139 -0
  53. package/dist/proxy/request-processor.d.ts +18 -3
  54. package/dist/proxy/request-processor.js +151 -28
  55. package/dist/proxy/response-processor.js +116 -47
  56. package/dist/proxy/server.d.ts +4 -1
  57. package/dist/proxy/server.js +592 -253
  58. package/dist/proxy/types.d.ts +13 -0
  59. package/dist/proxy/types.js +2 -0
  60. package/dist/proxy/utils/extractors.d.ts +18 -0
  61. package/dist/proxy/utils/extractors.js +109 -0
  62. package/dist/proxy/utils/logging.d.ts +18 -0
  63. package/dist/proxy/utils/logging.js +42 -0
  64. package/package.json +22 -4
@@ -1,49 +1,26 @@
1
1
  // Grov Proxy Server - Fastify + undici
2
2
  // Intercepts Claude Code <-> Anthropic API traffic for drift detection and context injection
3
3
  import Fastify from 'fastify';
4
- import { config } from './config.js';
4
+ import { config, buildSafeHeaders } from './config.js';
5
5
  import { forwardToAnthropic, isForwardError } from './forwarder.js';
6
+ import { extendedCache, evictOldestCacheEntry, checkExtendedCache, log } from './extended-cache.js';
7
+ import { setDebugMode, getNextRequestId, taskLog, proxyLog, logTokenUsage } from './utils/logging.js';
8
+ import { detectKeyDecision, extractTextContent, extractProjectPath, extractGoalFromMessages, extractConversationHistory } from './utils/extractors.js';
9
+ import { appendToLastUserMessage, injectIntoRawBody } from './injection/injectors.js';
10
+ import { preProcessRequest, setPendingPlanClear } from './handlers/preprocess.js';
6
11
  import { parseToolUseBlocks, extractTokenUsage } from './action-parser.js';
7
- import { createSessionState, getSessionState, updateSessionState, createStep, updateTokenCount, logDriftEvent, getRecentSteps, getValidatedSteps, updateSessionMode, markWaitingForRecovery, incrementEscalation, updateLastChecked, markCleared, getActiveSessionForUser, deleteSessionState, deleteStepsForSession, updateRecentStepsReasoning, markSessionCompleted, getCompletedSessionForProject, cleanupOldCompletedSessions, } from '../lib/store.js';
12
+ import { createSessionState, getSessionState, updateSessionState, createStep, updateTokenCount, logDriftEvent, getRecentSteps, getValidatedSteps, updateSessionMode, markWaitingForRecovery, incrementEscalation, updateLastChecked, getActiveSessionForUser, deleteSessionState, deleteStepsForSession, updateRecentStepsReasoning, markSessionCompleted, getCompletedSessionForProject, cleanupOldCompletedSessions, cleanupStaleActiveSessions, } from '../lib/store.js';
8
13
  import { checkDrift, scoreToCorrectionLevel, shouldSkipSteps, isDriftCheckAvailable, checkRecoveryAlignment, generateForcedRecovery, } from '../lib/drift-checker-proxy.js';
9
14
  import { buildCorrection, formatCorrectionForInjection } from '../lib/correction-builder-proxy.js';
10
15
  import { generateSessionSummary, isSummaryAvailable, extractIntent, isIntentExtractionAvailable, analyzeTaskContext, isTaskAnalysisAvailable, } from '../lib/llm-extractor.js';
11
- import { buildTeamMemoryContext, extractFilesFromMessages } from './request-processor.js';
12
16
  import { saveToTeamMemory } from './response-processor.js';
13
17
  import { randomUUID } from 'crypto';
14
18
  // Store last drift result for recovery alignment check
15
19
  const lastDriftResults = new Map();
16
- /**
17
- * Helper to append text to system prompt (handles string or array format)
18
- */
19
- function appendToSystemPrompt(body, textToAppend) {
20
- if (typeof body.system === 'string') {
21
- body.system = body.system + textToAppend;
22
- }
23
- else if (Array.isArray(body.system)) {
24
- // Append as new text block
25
- body.system.push({ type: 'text', text: textToAppend });
26
- }
27
- else {
28
- // No system prompt yet, create as string
29
- body.system = textToAppend;
30
- }
31
- }
32
- /**
33
- * Get system prompt as string (for reading)
34
- */
35
- function getSystemPromptText(body) {
36
- if (typeof body.system === 'string') {
37
- return body.system;
38
- }
39
- else if (Array.isArray(body.system)) {
40
- return body.system
41
- .filter(block => block.type === 'text')
42
- .map(block => block.text)
43
- .join('\n');
44
- }
45
- return '';
46
- }
20
+ // Server logger reference (set in startServer)
21
+ let serverLog = null;
22
+ // Track last messageCount per session to detect retries vs new turns
23
+ const lastMessageCount = new Map();
47
24
  // Session tracking (in-memory for active sessions)
48
25
  const activeSessions = new Map();
49
26
  /**
@@ -54,16 +31,24 @@ export function createServer() {
54
31
  logger: false, // Disabled - all debug goes to ~/.grov/debug.log
55
32
  bodyLimit: config.BODY_LIMIT,
56
33
  });
34
+ // Custom JSON parser that preserves raw bytes for cache preservation
35
+ fastify.addContentTypeParser('application/json', { parseAs: 'buffer' }, (req, body, done) => {
36
+ // Store raw bytes on request for later use
37
+ req.rawBody = body;
38
+ try {
39
+ const json = JSON.parse(body.toString('utf-8'));
40
+ done(null, json);
41
+ }
42
+ catch (err) {
43
+ done(err, undefined);
44
+ }
45
+ });
57
46
  // Health check endpoint
58
47
  fastify.get('/health', async () => {
59
48
  return { status: 'ok', timestamp: new Date().toISOString() };
60
49
  });
61
50
  // Main messages endpoint
62
- fastify.post('/v1/messages', {
63
- config: {
64
- rawBody: true,
65
- },
66
- }, handleMessages);
51
+ fastify.post('/v1/messages', handleMessages);
67
52
  // Catch-all for other Anthropic endpoints (pass through)
68
53
  fastify.all('/*', async (request, reply) => {
69
54
  fastify.log.warn(`Unhandled endpoint: ${request.method} ${request.url}`);
@@ -78,14 +63,12 @@ async function handleMessages(request, reply) {
78
63
  const logger = request.log;
79
64
  const startTime = Date.now();
80
65
  const model = request.body.model;
81
- // Skip Haiku subagents - forward directly without any tracking
82
- // Haiku requests are Task tool spawns for exploration, they don't make decisions
83
- // All reasoning and decisions happen in the main model (Opus/Sonnet)
84
66
  if (model.includes('haiku')) {
85
67
  logger.info({ msg: 'Skipping Haiku subagent', model });
86
68
  try {
87
- const result = await forwardToAnthropic(request.body, request.headers, logger);
88
- const latency = Date.now() - startTime;
69
+ // Force non-streaming for Haiku too
70
+ const haikusBody = { ...request.body, stream: false };
71
+ const result = await forwardToAnthropic(haikusBody, request.headers, logger);
89
72
  return reply
90
73
  .status(result.statusCode)
91
74
  .header('content-type', 'application/json')
@@ -110,6 +93,7 @@ async function handleMessages(request, reply) {
110
93
  promptCount: sessionInfo.promptCount,
111
94
  projectPath: sessionInfo.projectPath,
112
95
  });
96
+ const currentRequestId = getNextRequestId();
113
97
  logger.info({
114
98
  msg: 'Incoming request',
115
99
  sessionId: sessionInfo.sessionId.substring(0, 8),
@@ -117,27 +101,136 @@ async function handleMessages(request, reply) {
117
101
  model: request.body.model,
118
102
  messageCount: request.body.messages?.length || 0,
119
103
  });
120
- // === PRE-HANDLER: Modify request if needed ===
121
- const modifiedBody = await preProcessRequest(request.body, sessionInfo, logger);
122
- // === FORWARD TO ANTHROPIC ===
104
+ // Log REQUEST to file
105
+ const rawBodySize = request.rawBody?.length || 0;
106
+ proxyLog({
107
+ requestId: currentRequestId,
108
+ type: 'REQUEST',
109
+ sessionId: sessionInfo.sessionId.substring(0, 8),
110
+ data: {
111
+ model: request.body.model,
112
+ messageCount: request.body.messages?.length || 0,
113
+ promptCount: sessionInfo.promptCount,
114
+ rawBodySize,
115
+ },
116
+ });
117
+ // Process request to get injection text
118
+ // __grovInjection = team memory (system prompt, cached)
119
+ // __grovUserMsgInjection = dynamic content (user message, delta only)
120
+ const processedBody = await preProcessRequest(request.body, sessionInfo, logger, detectRequestType);
121
+ const systemInjection = processedBody.__grovInjection;
122
+ const userMsgInjection = processedBody.__grovUserMsgInjection;
123
+ // Get raw body bytes
124
+ const rawBody = request.rawBody;
125
+ let rawBodyStr = rawBody?.toString('utf-8') || '';
126
+ // Track injection sizes for logging
127
+ let systemInjectionSize = 0;
128
+ let userMsgInjectionSize = 0;
129
+ let systemSuccess = false;
130
+ let userMsgSuccess = false;
131
+ // 1. Inject team memory into SYSTEM prompt (cached, constant)
132
+ if (systemInjection && rawBodyStr) {
133
+ const result = injectIntoRawBody(rawBodyStr, '\n\n' + systemInjection);
134
+ rawBodyStr = result.modified;
135
+ systemInjectionSize = systemInjection.length;
136
+ systemSuccess = result.success;
137
+ }
138
+ // 2. Inject dynamic content into LAST USER MESSAGE (delta only)
139
+ if (userMsgInjection && rawBodyStr) {
140
+ rawBodyStr = appendToLastUserMessage(rawBodyStr, userMsgInjection);
141
+ userMsgInjectionSize = userMsgInjection.length;
142
+ userMsgSuccess = true; // appendToLastUserMessage doesn't return success flag
143
+ }
144
+ // Determine final body to send
145
+ let finalBodyToSend;
146
+ if (systemInjection || userMsgInjection) {
147
+ finalBodyToSend = rawBodyStr;
148
+ // Log INJECTION to file with full details
149
+ const wasCached = processedBody.__grovInjectionCached;
150
+ proxyLog({
151
+ requestId: currentRequestId,
152
+ type: 'INJECTION',
153
+ sessionId: sessionInfo.sessionId.substring(0, 8),
154
+ data: {
155
+ systemInjectionSize,
156
+ userMsgInjectionSize,
157
+ totalInjectionSize: systemInjectionSize + userMsgInjectionSize,
158
+ originalSize: rawBody?.length || 0,
159
+ finalSize: rawBodyStr.length,
160
+ systemSuccess,
161
+ userMsgSuccess,
162
+ teamMemoryCached: wasCached,
163
+ // Include actual content for debugging (truncated for log readability)
164
+ systemInjectionPreview: systemInjection ? systemInjection.substring(0, 200) + (systemInjection.length > 200 ? '...' : '') : null,
165
+ userMsgInjectionContent: userMsgInjection || null, // Full content since it's small
166
+ },
167
+ });
168
+ }
169
+ else if (rawBody) {
170
+ // No injection, use original raw bytes
171
+ finalBodyToSend = rawBody;
172
+ }
173
+ else {
174
+ // Fallback to re-serialization (shouldn't happen normally)
175
+ finalBodyToSend = JSON.stringify(processedBody);
176
+ }
177
+ const forwardStart = Date.now();
123
178
  try {
124
- const result = await forwardToAnthropic(modifiedBody, request.headers, logger);
125
- // === POST-HANDLER: Process response with task orchestration ===
179
+ // Forward: raw bytes (with injection inserted) or original raw bytes
180
+ const result = await forwardToAnthropic(processedBody, request.headers, logger, typeof finalBodyToSend === 'string' ? Buffer.from(finalBodyToSend, 'utf-8') : finalBodyToSend);
181
+ const forwardLatency = Date.now() - forwardStart;
182
+ // FIRE-AND-FORGET: Don't block response to Claude Code
183
+ // This prevents retry loops caused by Haiku calls adding latency
126
184
  if (result.statusCode === 200 && isAnthropicResponse(result.body)) {
127
- await postProcessResponse(result.body, sessionInfo, request.body, logger);
185
+ // Prepare extended cache data (only if enabled)
186
+ const extendedCacheData = config.EXTENDED_CACHE_ENABLED ? {
187
+ headers: buildSafeHeaders(request.headers),
188
+ rawBody: typeof finalBodyToSend === 'string' ? Buffer.from(finalBodyToSend, 'utf-8') : finalBodyToSend,
189
+ } : undefined;
190
+ postProcessResponse(result.body, sessionInfo, request.body, logger, extendedCacheData)
191
+ .catch(err => console.error('[GROV] postProcess error:', err));
128
192
  }
129
- // Return response to Claude Code (unmodified)
130
193
  const latency = Date.now() - startTime;
194
+ const filteredHeaders = filterResponseHeaders(result.headers);
195
+ // Log token usage (always to console, file only in debug mode)
196
+ if (isAnthropicResponse(result.body)) {
197
+ const usage = extractTokenUsage(result.body);
198
+ // Console: compact token summary (always shown)
199
+ logTokenUsage(currentRequestId, usage, latency);
200
+ // File: detailed response log (debug mode only)
201
+ proxyLog({
202
+ requestId: currentRequestId,
203
+ type: 'RESPONSE',
204
+ sessionId: sessionInfo.sessionId.substring(0, 8),
205
+ data: {
206
+ statusCode: result.statusCode,
207
+ latencyMs: latency,
208
+ forwardLatencyMs: forwardLatency,
209
+ inputTokens: usage.inputTokens,
210
+ outputTokens: usage.outputTokens,
211
+ cacheCreation: usage.cacheCreation,
212
+ cacheRead: usage.cacheRead,
213
+ cacheHitRatio: usage.cacheRead > 0 ? (usage.cacheRead / (usage.cacheRead + usage.cacheCreation)).toFixed(2) : '0.00',
214
+ wasSSE: result.wasSSE,
215
+ },
216
+ });
217
+ }
218
+ // If response was SSE, forward raw SSE to Claude Code (it expects streaming)
219
+ // Otherwise, send JSON
220
+ const isSSEResponse = result.wasSSE;
221
+ const responseContentType = isSSEResponse ? 'text/event-stream; charset=utf-8' : 'application/json';
222
+ const responseBody = isSSEResponse ? result.rawBody : JSON.stringify(result.body);
131
223
  logger.info({
132
224
  msg: 'Request complete',
133
225
  statusCode: result.statusCode,
134
226
  latencyMs: latency,
227
+ wasSSE: isSSEResponse,
135
228
  });
136
229
  return reply
137
230
  .status(result.statusCode)
138
- .header('content-type', 'application/json')
139
- .headers(filterResponseHeaders(result.headers))
140
- .send(JSON.stringify(result.body));
231
+ .header('content-type', responseContentType)
232
+ .headers(filteredHeaders)
233
+ .send(responseBody);
141
234
  }
142
235
  catch (error) {
143
236
  if (isForwardError(error)) {
@@ -215,97 +308,38 @@ async function getOrCreateSession(request, logger) {
215
308
  projectPath,
216
309
  };
217
310
  activeSessions.set(tempSessionId, sessionInfo);
311
+ // Note: team memory is now GLOBAL (not per session), no propagation needed
218
312
  logger.info({ msg: 'No existing session, will create after task analysis' });
219
313
  return { ...sessionInfo, isNew: true, currentSession: null, completedSession };
220
314
  }
221
315
  /**
222
- * Pre-process request before forwarding
223
- * - Context injection
224
- * - CLEAR operation
316
+ * Detect request type: 'first', 'continuation', or 'retry'
317
+ * - first: new user message (messageCount changed, last msg is user without tool_result)
318
+ * - continuation: tool result (messageCount changed, last msg has tool_result)
319
+ * - retry: same messageCount as before
225
320
  */
226
- async function preProcessRequest(body, sessionInfo, logger) {
227
- const modified = { ...body };
228
- // FIRST: Always inject team memory context (doesn't require sessionState)
229
- const mentionedFiles = extractFilesFromMessages(modified.messages || []);
230
- const teamContext = buildTeamMemoryContext(sessionInfo.projectPath, mentionedFiles);
231
- if (teamContext) {
232
- appendToSystemPrompt(modified, '\n\n' + teamContext);
233
- }
234
- // THEN: Session-specific operations
235
- const sessionState = getSessionState(sessionInfo.sessionId);
236
- if (!sessionState) {
237
- return modified; // Injection already happened above!
238
- }
239
- // Extract latest user message for drift checking
240
- const latestUserMessage = extractGoalFromMessages(body.messages) || '';
241
- // CLEAR operation if token threshold exceeded
242
- if ((sessionState.token_count || 0) > config.TOKEN_CLEAR_THRESHOLD) {
243
- logger.info({
244
- msg: 'Token threshold exceeded, initiating CLEAR',
245
- tokenCount: sessionState.token_count,
246
- threshold: config.TOKEN_CLEAR_THRESHOLD,
247
- });
248
- // Generate summary from session state + steps
249
- let summary;
250
- if (isSummaryAvailable()) {
251
- const steps = getValidatedSteps(sessionInfo.sessionId);
252
- summary = await generateSessionSummary(sessionState, steps);
253
- }
254
- else {
255
- const files = getValidatedSteps(sessionInfo.sessionId).flatMap(s => s.files);
256
- summary = `PREVIOUS SESSION CONTEXT:
257
- Goal: ${sessionState.original_goal || 'Not specified'}
258
- Files worked on: ${[...new Set(files)].slice(0, 10).join(', ') || 'None'}
259
- Please continue from where you left off.`;
260
- }
261
- // Clear messages and inject summary
262
- modified.messages = [];
263
- appendToSystemPrompt(modified, '\n\n' + summary);
264
- // Update session state
265
- markCleared(sessionInfo.sessionId);
266
- logger.info({
267
- msg: 'CLEAR completed',
268
- summaryLength: summary.length,
269
- });
321
+ function detectRequestType(messages, projectPath) {
322
+ const currentCount = messages?.length || 0;
323
+ const lastCount = lastMessageCount.get(projectPath);
324
+ lastMessageCount.set(projectPath, currentCount);
325
+ // Same messageCount = retry
326
+ if (lastCount !== undefined && currentCount === lastCount) {
327
+ return 'retry';
270
328
  }
271
- // Check if session is in drifted or forced mode
272
- if (sessionState.session_mode === 'drifted' || sessionState.session_mode === 'forced') {
273
- const recentSteps = getRecentSteps(sessionInfo.sessionId, 5);
274
- // FORCED MODE: escalation >= 3 -> Haiku generates recovery prompt
275
- if (sessionState.escalation_count >= 3 || sessionState.session_mode === 'forced') {
276
- // Update mode to forced if not already
277
- if (sessionState.session_mode !== 'forced') {
278
- updateSessionMode(sessionInfo.sessionId, 'forced');
279
- }
280
- const lastDrift = lastDriftResults.get(sessionInfo.sessionId);
281
- const driftResult = lastDrift || await checkDrift({ sessionState, recentSteps, latestUserMessage });
282
- const forcedRecovery = await generateForcedRecovery(sessionState, recentSteps.map(s => ({ actionType: s.action_type, files: s.files })), driftResult);
283
- appendToSystemPrompt(modified, forcedRecovery.injectionText);
284
- logger.info({
285
- msg: 'FORCED MODE - Injected Haiku recovery prompt',
286
- escalation: sessionState.escalation_count,
287
- mandatoryAction: forcedRecovery.mandatoryAction.substring(0, 50),
288
- });
289
- }
290
- else {
291
- // DRIFTED MODE: normal correction injection
292
- const driftResult = await checkDrift({ sessionState, recentSteps, latestUserMessage });
293
- const correctionLevel = scoreToCorrectionLevel(driftResult.score);
294
- if (correctionLevel) {
295
- const correction = buildCorrection(driftResult, sessionState, correctionLevel);
296
- const correctionText = formatCorrectionForInjection(correction);
297
- appendToSystemPrompt(modified, correctionText);
298
- logger.info({
299
- msg: 'Injected correction',
300
- level: correctionLevel,
301
- score: driftResult.score,
302
- });
303
- }
329
+ // No messages or no last message = first
330
+ if (!messages || messages.length === 0)
331
+ return 'first';
332
+ const lastMessage = messages[messages.length - 1];
333
+ // Check if last message is tool_result (continuation)
334
+ if (lastMessage.role === 'user') {
335
+ const content = lastMessage.content;
336
+ if (Array.isArray(content)) {
337
+ const hasToolResult = content.some((block) => typeof block === 'object' && block !== null && block.type === 'tool_result');
338
+ if (hasToolResult)
339
+ return 'continuation';
304
340
  }
305
341
  }
306
- // Note: Team memory context injection is now at the TOP of preProcessRequest()
307
- // so it runs even when sessionState is null (new sessions)
308
- return modified;
342
+ return 'first';
309
343
  }
310
344
  /**
311
345
  * Post-process response after receiving from Anthropic
@@ -317,7 +351,7 @@ Please continue from where you left off.`;
317
351
  * - Recovery alignment check (Section 4.4)
318
352
  * - Team memory triggers (Section 4.6)
319
353
  */
320
- async function postProcessResponse(response, sessionInfo, requestBody, logger) {
354
+ async function postProcessResponse(response, sessionInfo, requestBody, logger, extendedCacheData) {
321
355
  // Parse tool_use blocks
322
356
  const actions = parseToolUseBlocks(response);
323
357
  // Extract text content for analysis
@@ -339,6 +373,29 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
339
373
  if (isWarmup) {
340
374
  return;
341
375
  }
376
+ // === EXTENDED CACHE: Capture for keep-alive ===
377
+ // Only capture on end_turn (user idle starts now, not during tool_use loops)
378
+ if (isEndTurn && extendedCacheData) {
379
+ const rawStr = extendedCacheData.rawBody.toString('utf-8');
380
+ const hasSystem = rawStr.includes('"system"');
381
+ const hasTools = rawStr.includes('"tools"');
382
+ const hasCacheCtrl = rawStr.includes('"cache_control"');
383
+ const msgMatch = rawStr.match(/"messages"\s*:\s*\[/);
384
+ const msgPos = msgMatch?.index ?? -1;
385
+ // Use projectPath as key (one entry per conversation, not per task)
386
+ const cacheKey = sessionInfo.projectPath;
387
+ // Evict oldest if at capacity (only for NEW entries, not updates)
388
+ if (!extendedCache.has(cacheKey)) {
389
+ evictOldestCacheEntry();
390
+ }
391
+ extendedCache.set(cacheKey, {
392
+ headers: extendedCacheData.headers,
393
+ rawBody: extendedCacheData.rawBody,
394
+ timestamp: Date.now(),
395
+ keepAliveCount: 0,
396
+ });
397
+ log(`Extended cache: CAPTURE project=${cacheKey.split('/').pop()} size=${rawStr.length} sys=${hasSystem} tools=${hasTools} cache_ctrl=${hasCacheCtrl} msg_pos=${msgPos}`);
398
+ }
342
399
  // If not end_turn (tool_use in progress), skip task orchestration but keep session
343
400
  if (!isEndTurn) {
344
401
  // Use existing session or create minimal one without LLM calls
@@ -361,23 +418,44 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
361
418
  promptCount: 1,
362
419
  projectPath: sessionInfo.projectPath,
363
420
  });
421
+ // Note: team memory is now GLOBAL (not per session), no propagation needed
364
422
  }
365
423
  }
366
424
  else if (isTaskAnalysisAvailable()) {
367
425
  // Use completed session for comparison if no active session
368
426
  const sessionForComparison = sessionInfo.currentSession || sessionInfo.completedSession;
427
+ // Extract conversation history for context-aware task analysis
428
+ const conversationHistory = extractConversationHistory(requestBody.messages || []);
369
429
  try {
370
- const taskAnalysis = await analyzeTaskContext(sessionForComparison, latestUserMessage, recentSteps, textContent);
430
+ const taskAnalysis = await analyzeTaskContext(sessionForComparison, latestUserMessage, recentSteps, textContent, conversationHistory);
371
431
  logger.info({
372
432
  msg: 'Task analysis',
373
433
  action: taskAnalysis.action,
374
- topic_match: taskAnalysis.topic_match,
434
+ task_type: taskAnalysis.task_type,
375
435
  goal: taskAnalysis.current_goal?.substring(0, 50),
376
436
  reasoning: taskAnalysis.reasoning,
377
437
  });
438
+ // TASK LOG: Analysis result
439
+ taskLog('TASK_ANALYSIS', {
440
+ sessionId: sessionInfo.sessionId,
441
+ action: taskAnalysis.action,
442
+ task_type: taskAnalysis.task_type,
443
+ goal: taskAnalysis.current_goal || '',
444
+ reasoning: taskAnalysis.reasoning || '',
445
+ userMessage: latestUserMessage.substring(0, 80),
446
+ hasCurrentSession: !!sessionInfo.currentSession,
447
+ hasCompletedSession: !!sessionInfo.completedSession,
448
+ });
378
449
  // Update recent steps with reasoning (backfill from end_turn response)
379
450
  if (taskAnalysis.step_reasoning && activeSessionId) {
380
451
  const updatedCount = updateRecentStepsReasoning(activeSessionId, taskAnalysis.step_reasoning);
452
+ // TASK LOG: Step reasoning update
453
+ taskLog('STEP_REASONING', {
454
+ sessionId: activeSessionId,
455
+ stepsUpdated: updatedCount,
456
+ reasoningEntries: Object.keys(taskAnalysis.step_reasoning).length,
457
+ stepIds: Object.keys(taskAnalysis.step_reasoning).join(','),
458
+ });
381
459
  }
382
460
  // Handle task orchestration based on analysis
383
461
  switch (taskAnalysis.action) {
@@ -396,6 +474,13 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
396
474
  });
397
475
  activeSession.original_goal = taskAnalysis.current_goal;
398
476
  }
477
+ // TASK LOG: Continue existing session
478
+ taskLog('ORCHESTRATION_CONTINUE', {
479
+ sessionId: activeSessionId,
480
+ source: 'current_session',
481
+ goal: activeSession.original_goal,
482
+ goalUpdated: taskAnalysis.current_goal !== activeSession.original_goal,
483
+ });
399
484
  }
400
485
  else if (sessionInfo.completedSession) {
401
486
  // Reactivate completed session (user wants to continue/add to it)
@@ -411,6 +496,13 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
411
496
  promptCount: 1,
412
497
  projectPath: sessionInfo.projectPath,
413
498
  });
499
+ // Note: team memory is now GLOBAL (not per session), no propagation needed
500
+ // TASK LOG: Reactivate completed session
501
+ taskLog('ORCHESTRATION_CONTINUE', {
502
+ sessionId: activeSessionId,
503
+ source: 'reactivated_completed',
504
+ goal: activeSession.original_goal,
505
+ });
414
506
  }
415
507
  break;
416
508
  case 'new_task': {
@@ -430,9 +522,24 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
430
522
  try {
431
523
  intentData = await extractIntent(latestUserMessage);
432
524
  logger.info({ msg: 'Intent extracted for new task', scopeCount: intentData.expected_scope.length });
525
+ // TASK LOG: Intent extraction for new_task
526
+ taskLog('INTENT_EXTRACTION', {
527
+ sessionId: sessionInfo.sessionId,
528
+ context: 'new_task',
529
+ goal: intentData.goal,
530
+ scopeCount: intentData.expected_scope.length,
531
+ scope: intentData.expected_scope.join(', '),
532
+ constraints: intentData.constraints.join(', '),
533
+ keywords: intentData.keywords.join(', '),
534
+ });
433
535
  }
434
536
  catch (err) {
435
537
  logger.info({ msg: 'Intent extraction failed, using basic goal', error: String(err) });
538
+ taskLog('INTENT_EXTRACTION_FAILED', {
539
+ sessionId: sessionInfo.sessionId,
540
+ context: 'new_task',
541
+ error: String(err),
542
+ });
436
543
  }
437
544
  }
438
545
  const newSessionId = randomUUID();
@@ -452,6 +559,42 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
452
559
  projectPath: sessionInfo.projectPath,
453
560
  });
454
561
  logger.info({ msg: 'Created new task session', sessionId: newSessionId.substring(0, 8) });
562
+ // TASK LOG: New task created
563
+ taskLog('ORCHESTRATION_NEW_TASK', {
564
+ sessionId: newSessionId,
565
+ goal: intentData.goal,
566
+ scopeCount: intentData.expected_scope.length,
567
+ keywordsCount: intentData.keywords.length,
568
+ });
569
+ // Q&A AUTO-SAVE: If this is an information request with a substantive answer
570
+ // AND no tool calls, save immediately since pure Q&A completes in a single turn.
571
+ // If there ARE tool calls (e.g., Read for "Analyze X"), wait for them to complete
572
+ // so steps get captured properly before saving.
573
+ if (taskAnalysis.task_type === 'information' && textContent.length > 100 && actions.length === 0) {
574
+ logger.info({ msg: 'Q&A detected (pure text) - saving immediately', sessionId: newSessionId.substring(0, 8) });
575
+ taskLog('QA_AUTO_SAVE', {
576
+ sessionId: newSessionId,
577
+ goal: intentData.goal,
578
+ responseLength: textContent.length,
579
+ toolCalls: 0,
580
+ });
581
+ // Store the response for reasoning extraction
582
+ updateSessionState(newSessionId, {
583
+ final_response: textContent.substring(0, 10000),
584
+ });
585
+ // Save to team memory and mark complete
586
+ await saveToTeamMemory(newSessionId, 'complete');
587
+ markSessionCompleted(newSessionId);
588
+ }
589
+ else if (taskAnalysis.task_type === 'information' && actions.length > 0) {
590
+ // Q&A with tool calls - don't auto-save, let it continue until task_complete
591
+ logger.info({ msg: 'Q&A with tool calls - waiting for completion', sessionId: newSessionId.substring(0, 8), toolCalls: actions.length });
592
+ taskLog('QA_DEFERRED', {
593
+ sessionId: newSessionId,
594
+ goal: intentData.goal,
595
+ toolCalls: actions.length,
596
+ });
597
+ }
455
598
  break;
456
599
  }
457
600
  case 'subtask': {
@@ -465,8 +608,17 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
465
608
  if (isIntentExtractionAvailable() && latestUserMessage.length > 10) {
466
609
  try {
467
610
  intentData = await extractIntent(latestUserMessage);
611
+ taskLog('INTENT_EXTRACTION', {
612
+ sessionId: sessionInfo.sessionId,
613
+ context: 'subtask',
614
+ goal: intentData.goal,
615
+ scope: intentData.expected_scope.join(', '),
616
+ keywords: intentData.keywords.join(', '),
617
+ });
618
+ }
619
+ catch (err) {
620
+ taskLog('INTENT_EXTRACTION_FAILED', { sessionId: sessionInfo.sessionId, context: 'subtask', error: String(err) });
468
621
  }
469
- catch { /* use fallback */ }
470
622
  }
471
623
  const parentId = sessionInfo.currentSession?.session_id || taskAnalysis.parent_task_id;
472
624
  const subtaskId = randomUUID();
@@ -487,6 +639,12 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
487
639
  projectPath: sessionInfo.projectPath,
488
640
  });
489
641
  logger.info({ msg: 'Created subtask session', sessionId: subtaskId.substring(0, 8), parent: parentId?.substring(0, 8) });
642
+ // TASK LOG: Subtask created
643
+ taskLog('ORCHESTRATION_SUBTASK', {
644
+ sessionId: subtaskId,
645
+ parentId: parentId || 'none',
646
+ goal: intentData.goal,
647
+ });
490
648
  break;
491
649
  }
492
650
  case 'parallel_task': {
@@ -500,8 +658,17 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
500
658
  if (isIntentExtractionAvailable() && latestUserMessage.length > 10) {
501
659
  try {
502
660
  intentData = await extractIntent(latestUserMessage);
661
+ taskLog('INTENT_EXTRACTION', {
662
+ sessionId: sessionInfo.sessionId,
663
+ context: 'parallel_task',
664
+ goal: intentData.goal,
665
+ scope: intentData.expected_scope.join(', '),
666
+ keywords: intentData.keywords.join(', '),
667
+ });
668
+ }
669
+ catch (err) {
670
+ taskLog('INTENT_EXTRACTION_FAILED', { sessionId: sessionInfo.sessionId, context: 'parallel_task', error: String(err) });
503
671
  }
504
- catch { /* use fallback */ }
505
672
  }
506
673
  const parentId = sessionInfo.currentSession?.session_id || taskAnalysis.parent_task_id;
507
674
  const parallelId = randomUUID();
@@ -522,22 +689,89 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
522
689
  projectPath: sessionInfo.projectPath,
523
690
  });
524
691
  logger.info({ msg: 'Created parallel task session', sessionId: parallelId.substring(0, 8), parent: parentId?.substring(0, 8) });
692
+ // TASK LOG: Parallel task created
693
+ taskLog('ORCHESTRATION_PARALLEL', {
694
+ sessionId: parallelId,
695
+ parentId: parentId || 'none',
696
+ goal: intentData.goal,
697
+ });
525
698
  break;
526
699
  }
527
700
  case 'task_complete': {
528
701
  // Save to team memory and mark as completed (don't delete yet - keep for new_task detection)
529
702
  if (sessionInfo.currentSession) {
530
703
  try {
704
+ // Set final_response BEFORE saving so reasoning extraction has the data
705
+ updateSessionState(sessionInfo.currentSession.session_id, {
706
+ final_response: textContent.substring(0, 10000),
707
+ });
531
708
  await saveToTeamMemory(sessionInfo.currentSession.session_id, 'complete');
532
709
  markSessionCompleted(sessionInfo.currentSession.session_id);
533
710
  activeSessions.delete(sessionInfo.currentSession.session_id);
534
711
  lastDriftResults.delete(sessionInfo.currentSession.session_id);
712
+ // TASK LOG: Task completed
713
+ taskLog('ORCHESTRATION_TASK_COMPLETE', {
714
+ sessionId: sessionInfo.currentSession.session_id,
715
+ goal: sessionInfo.currentSession.original_goal,
716
+ });
717
+ // PLANNING COMPLETE: Trigger CLEAR-like reset for implementation phase
718
+ // This ensures next request starts fresh with planning context from team memory
719
+ if (taskAnalysis.task_type === 'planning' && isSummaryAvailable()) {
720
+ try {
721
+ const allSteps = getValidatedSteps(sessionInfo.currentSession.session_id);
722
+ const planSummary = await generateSessionSummary(sessionInfo.currentSession, allSteps, 2000);
723
+ // Store for next request to trigger CLEAR
724
+ setPendingPlanClear({
725
+ projectPath: sessionInfo.projectPath,
726
+ summary: planSummary,
727
+ });
728
+ // Cache invalidation happens in response-processor.ts after syncTask completes
729
+ logger.info({
730
+ msg: 'PLANNING_CLEAR triggered',
731
+ sessionId: sessionInfo.currentSession.session_id.substring(0, 8),
732
+ summaryLen: planSummary.length,
733
+ });
734
+ }
735
+ catch {
736
+ // Silent fail - planning CLEAR is optional enhancement
737
+ }
738
+ }
535
739
  logger.info({ msg: 'Task complete - saved to team memory, marked completed' });
536
740
  }
537
741
  catch (err) {
538
742
  logger.info({ msg: 'Failed to save completed task', error: String(err) });
539
743
  }
540
744
  }
745
+ else if (textContent.length > 100) {
746
+ // NEW: Handle "instant complete" - task that's new AND immediately complete
747
+ // This happens for simple Q&A when Haiku says task_complete without existing session
748
+ // Example: user asks clarification question, answer is provided in single turn
749
+ try {
750
+ const newSessionId = randomUUID();
751
+ const instantSession = createSessionState({
752
+ session_id: newSessionId,
753
+ project_path: sessionInfo.projectPath,
754
+ original_goal: taskAnalysis.current_goal || latestUserMessage.substring(0, 500),
755
+ task_type: 'main',
756
+ });
757
+ // Set final_response for reasoning extraction
758
+ updateSessionState(newSessionId, {
759
+ final_response: textContent.substring(0, 10000),
760
+ });
761
+ await saveToTeamMemory(newSessionId, 'complete');
762
+ markSessionCompleted(newSessionId);
763
+ logger.info({ msg: 'Instant complete - new task saved immediately', sessionId: newSessionId.substring(0, 8) });
764
+ // TASK LOG: Instant complete (new task that finished in one turn)
765
+ taskLog('ORCHESTRATION_TASK_COMPLETE', {
766
+ sessionId: newSessionId,
767
+ goal: taskAnalysis.current_goal || latestUserMessage.substring(0, 80),
768
+ source: 'instant_complete',
769
+ });
770
+ }
771
+ catch (err) {
772
+ logger.info({ msg: 'Failed to save instant complete task', error: String(err) });
773
+ }
774
+ }
541
775
  return; // Done, no more processing needed
542
776
  }
543
777
  case 'subtask_complete': {
@@ -556,6 +790,12 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
556
790
  activeSessionId = parentId;
557
791
  activeSession = parentSession;
558
792
  logger.info({ msg: 'Subtask complete - returning to parent', parent: parentId.substring(0, 8) });
793
+ // TASK LOG: Subtask completed
794
+ taskLog('ORCHESTRATION_SUBTASK_COMPLETE', {
795
+ sessionId: sessionInfo.currentSession.session_id,
796
+ parentId: parentId,
797
+ goal: sessionInfo.currentSession.original_goal,
798
+ });
559
799
  }
560
800
  }
561
801
  }
@@ -580,8 +820,16 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
580
820
  if (isIntentExtractionAvailable() && latestUserMessage.length > 10) {
581
821
  try {
582
822
  intentData = await extractIntent(latestUserMessage);
823
+ taskLog('INTENT_EXTRACTION', {
824
+ sessionId: sessionInfo.sessionId,
825
+ context: 'fallback_analysis_failed',
826
+ goal: intentData.goal,
827
+ scope: intentData.expected_scope.join(', '),
828
+ });
829
+ }
830
+ catch (err) {
831
+ taskLog('INTENT_EXTRACTION_FAILED', { sessionId: sessionInfo.sessionId, context: 'fallback_analysis_failed', error: String(err) });
583
832
  }
584
- catch { /* use fallback */ }
585
833
  }
586
834
  const newSessionId = randomUUID();
587
835
  activeSession = createSessionState({
@@ -599,6 +847,11 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
599
847
  }
600
848
  else {
601
849
  // No task analysis available - fallback with intent extraction
850
+ taskLog('TASK_ANALYSIS_UNAVAILABLE', {
851
+ sessionId: sessionInfo.sessionId,
852
+ hasCurrentSession: !!sessionInfo.currentSession,
853
+ userMessage: latestUserMessage.substring(0, 80),
854
+ });
602
855
  if (!sessionInfo.currentSession) {
603
856
  let intentData = {
604
857
  goal: latestUserMessage.substring(0, 500),
@@ -610,8 +863,16 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
610
863
  try {
611
864
  intentData = await extractIntent(latestUserMessage);
612
865
  logger.info({ msg: 'Intent extracted (fallback)', scopeCount: intentData.expected_scope.length });
866
+ taskLog('INTENT_EXTRACTION', {
867
+ sessionId: sessionInfo.sessionId,
868
+ context: 'no_analysis_available',
869
+ goal: intentData.goal,
870
+ scope: intentData.expected_scope.join(', '),
871
+ });
872
+ }
873
+ catch (err) {
874
+ taskLog('INTENT_EXTRACTION_FAILED', { sessionId: sessionInfo.sessionId, context: 'no_analysis_available', error: String(err) });
613
875
  }
614
- catch { /* use fallback */ }
615
876
  }
616
877
  const newSessionId = randomUUID();
617
878
  activeSession = createSessionState({
@@ -630,19 +891,64 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
630
891
  activeSessionId = sessionInfo.currentSession.session_id;
631
892
  }
632
893
  }
894
+ // NOTE: Auto-save on every end_turn was REMOVED
895
+ // Task saving is now controlled by Haiku's task analysis:
896
+ // - task_complete: Haiku detected task is done (Q&A answered, implementation verified, planning confirmed)
897
+ // - subtask_complete: Haiku detected subtask is done
898
+ // This ensures we only save when work is actually complete, not on every Claude response.
899
+ // See analyzeTaskContext() in llm-extractor.ts for the decision logic.
633
900
  // Extract token usage
634
901
  const usage = extractTokenUsage(response);
902
+ // Use cache metrics as actual context size (cacheCreation + cacheRead)
903
+ // This is what Anthropic bills for and what determines CLEAR threshold
904
+ const actualContextSize = usage.cacheCreation + usage.cacheRead;
635
905
  if (activeSession) {
636
- updateTokenCount(activeSessionId, usage.totalTokens);
906
+ // Set to actual context size (not cumulative - context size IS the total)
907
+ updateTokenCount(activeSessionId, actualContextSize);
637
908
  }
638
909
  logger.info({
639
910
  msg: 'Token usage',
640
911
  input: usage.inputTokens,
641
912
  output: usage.outputTokens,
642
913
  total: usage.totalTokens,
914
+ cacheCreation: usage.cacheCreation,
915
+ cacheRead: usage.cacheRead,
916
+ actualContextSize,
643
917
  activeSession: activeSessionId.substring(0, 8),
644
918
  });
919
+ // === CLEAR MODE PRE-COMPUTE (85% threshold) ===
920
+ // Pre-compute summary before hitting 100% threshold to avoid blocking Haiku call
921
+ const preComputeThreshold = Math.floor(config.TOKEN_CLEAR_THRESHOLD * 0.85);
922
+ // Use actualContextSize (cacheCreation + cacheRead) as the real context size
923
+ if (activeSession &&
924
+ actualContextSize > preComputeThreshold &&
925
+ !activeSession.pending_clear_summary &&
926
+ isSummaryAvailable()) {
927
+ // Get all validated steps for comprehensive summary
928
+ const allSteps = getValidatedSteps(activeSessionId);
929
+ // Generate summary asynchronously (fire-and-forget)
930
+ generateSessionSummary(activeSession, allSteps, 15000).then(summary => {
931
+ updateSessionState(activeSessionId, { pending_clear_summary: summary });
932
+ logger.info({
933
+ msg: 'CLEAR summary pre-computed',
934
+ actualContextSize,
935
+ threshold: preComputeThreshold,
936
+ summaryLength: summary.length,
937
+ });
938
+ }).catch(err => {
939
+ logger.info({ msg: 'CLEAR summary generation failed', error: String(err) });
940
+ });
941
+ }
942
+ // Capture final_response for ALL end_turn responses (not just Q&A)
943
+ // This preserves Claude's analysis even when tools were used
944
+ if (isEndTurn && textContent.length > 100 && activeSessionId) {
945
+ updateSessionState(activeSessionId, {
946
+ final_response: textContent.substring(0, 10000),
947
+ });
948
+ }
645
949
  if (actions.length === 0) {
950
+ // Final response (no tool calls)
951
+ // NOTE: Task saving is controlled by Haiku's task analysis (see switch case 'task_complete' above)
646
952
  return;
647
953
  }
648
954
  logger.info({
@@ -700,11 +1006,51 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
700
1006
  updateSessionMode(activeSessionId, 'drifted');
701
1007
  markWaitingForRecovery(activeSessionId, true);
702
1008
  incrementEscalation(activeSessionId);
1009
+ // Pre-compute correction for next request (fire-and-forget pattern)
1010
+ // This avoids blocking Haiku calls in preProcessRequest
1011
+ const correction = buildCorrection(driftResult, activeSession, correctionLevel);
1012
+ const correctionText = formatCorrectionForInjection(correction);
1013
+ updateSessionState(activeSessionId, { pending_correction: correctionText });
1014
+ logger.info({
1015
+ msg: 'Pre-computed correction saved',
1016
+ level: correctionLevel,
1017
+ correctionLength: correctionText.length,
1018
+ });
1019
+ }
1020
+ else if (correctionLevel) {
1021
+ // Nudge or correct level - still save correction but don't change mode
1022
+ const correction = buildCorrection(driftResult, activeSession, correctionLevel);
1023
+ const correctionText = formatCorrectionForInjection(correction);
1024
+ updateSessionState(activeSessionId, { pending_correction: correctionText });
1025
+ logger.info({
1026
+ msg: 'Pre-computed mild correction saved',
1027
+ level: correctionLevel,
1028
+ });
703
1029
  }
704
1030
  else if (driftScore >= 8) {
705
1031
  updateSessionMode(activeSessionId, 'normal');
706
1032
  markWaitingForRecovery(activeSessionId, false);
707
1033
  lastDriftResults.delete(activeSessionId);
1034
+ // Clear any pending correction since drift is resolved
1035
+ updateSessionState(activeSessionId, { pending_correction: undefined });
1036
+ }
1037
+ // FORCED MODE: escalation >= 3 triggers Haiku-generated recovery
1038
+ const currentEscalation = activeSession.escalation_count || 0;
1039
+ if (currentEscalation >= 3 && driftScore < 8) {
1040
+ updateSessionMode(activeSessionId, 'forced');
1041
+ // Generate forced recovery asynchronously (fire-and-forget within fire-and-forget)
1042
+ generateForcedRecovery(activeSession, recentSteps.map(s => ({ actionType: s.action_type, files: s.files })), driftResult).then(forcedRecovery => {
1043
+ updateSessionState(activeSessionId, {
1044
+ pending_forced_recovery: forcedRecovery.injectionText,
1045
+ });
1046
+ logger.info({
1047
+ msg: 'Pre-computed forced recovery saved',
1048
+ escalation: currentEscalation,
1049
+ mandatoryAction: forcedRecovery.mandatoryAction?.substring(0, 50),
1050
+ });
1051
+ }).catch(err => {
1052
+ logger.info({ msg: 'Forced recovery generation failed', error: String(err) });
1053
+ });
708
1054
  }
709
1055
  updateLastChecked(activeSessionId, Date.now());
710
1056
  if (skipSteps) {
@@ -727,122 +1073,44 @@ async function postProcessResponse(response, sessionInfo, requestBody, logger) {
727
1073
  }
728
1074
  }
729
1075
  // Save each action as a step (with reasoning from Claude's text)
1076
+ // When multiple actions come from the same Claude response, they share identical reasoning.
1077
+ // We store reasoning only on the first action and set NULL for subsequent ones to avoid duplication.
1078
+ // At query time, we group steps by reasoning (non-NULL starts a group, NULLs continue it)
1079
+ // and reconstruct the full context: reasoning + all associated files/actions.
1080
+ let previousReasoning = null;
1081
+ logger.info({ msg: 'DEDUP_DEBUG', actionsCount: actions.length, textContentLen: textContent.length });
730
1082
  for (const action of actions) {
1083
+ const currentReasoning = textContent.substring(0, 1000);
1084
+ const isDuplicate = currentReasoning === previousReasoning;
1085
+ logger.info({
1086
+ msg: 'DEDUP_STEP',
1087
+ actionType: action.actionType,
1088
+ isDuplicate,
1089
+ prevLen: previousReasoning?.length || 0,
1090
+ currLen: currentReasoning.length
1091
+ });
1092
+ // Detect key decisions based on action type and reasoning content
1093
+ const isKeyDecision = !isDuplicate && detectKeyDecision(action, textContent);
731
1094
  createStep({
732
1095
  session_id: activeSessionId,
733
1096
  action_type: action.actionType,
734
1097
  files: action.files,
735
1098
  folders: action.folders,
736
1099
  command: action.command,
737
- reasoning: textContent.substring(0, 1000), // Claude's explanation (truncated)
1100
+ reasoning: isDuplicate ? undefined : currentReasoning,
738
1101
  drift_score: driftScore,
739
1102
  is_validated: !skipSteps,
1103
+ is_key_decision: isKeyDecision,
740
1104
  });
741
- }
742
- }
743
- /**
744
- * Extract text content from response for analysis
745
- */
746
- function extractTextContent(response) {
747
- return response.content
748
- .filter((block) => block.type === 'text')
749
- .map(block => block.text)
750
- .join('\n');
751
- }
752
- /**
753
- * Detect task completion from response text
754
- * Returns trigger type or null
755
- */
756
- function detectTaskCompletion(text) {
757
- const lowerText = text.toLowerCase();
758
- // Strong completion indicators
759
- const completionPhrases = [
760
- 'task is complete',
761
- 'task complete',
762
- 'implementation is complete',
763
- 'implementation complete',
764
- 'successfully implemented',
765
- 'all changes have been made',
766
- 'finished implementing',
767
- 'completed the implementation',
768
- 'done with the implementation',
769
- 'completed all the',
770
- 'all tests pass',
771
- 'build succeeds',
772
- ];
773
- for (const phrase of completionPhrases) {
774
- if (lowerText.includes(phrase)) {
775
- return 'complete';
776
- }
777
- }
778
- // Subtask completion indicators
779
- const subtaskPhrases = [
780
- 'step complete',
781
- 'phase complete',
782
- 'finished this step',
783
- 'moving on to',
784
- 'now let\'s',
785
- 'next step',
786
- ];
787
- for (const phrase of subtaskPhrases) {
788
- if (lowerText.includes(phrase)) {
789
- return 'subtask';
790
- }
791
- }
792
- return null;
793
- }
794
- /**
795
- * Extract project path from request body
796
- */
797
- function extractProjectPath(body) {
798
- // Try to extract from system prompt or messages
799
- // Handle both string and array format for system prompt
800
- let systemPrompt = '';
801
- if (typeof body.system === 'string') {
802
- systemPrompt = body.system;
803
- }
804
- else if (Array.isArray(body.system)) {
805
- // New API format: system is array of {type: 'text', text: '...'}
806
- systemPrompt = body.system
807
- .filter((block) => block && typeof block === 'object' && block.type === 'text' && typeof block.text === 'string')
808
- .map(block => block.text)
809
- .join('\n');
810
- }
811
- const cwdMatch = systemPrompt.match(/Working directory:\s*([^\n]+)/);
812
- if (cwdMatch) {
813
- return cwdMatch[1].trim();
814
- }
815
- return null;
816
- }
817
- /**
818
- * Extract goal from FIRST user message with text content
819
- * Skips tool_result blocks, filters out system-reminder tags
820
- */
821
- function extractGoalFromMessages(messages) {
822
- const userMessages = messages?.filter(m => m.role === 'user') || [];
823
- for (const userMsg of userMessages) {
824
- let rawContent = '';
825
- // Handle string content
826
- if (typeof userMsg.content === 'string') {
827
- rawContent = userMsg.content;
828
- }
829
- // Handle array content - look for text blocks (skip tool_result)
830
- if (Array.isArray(userMsg.content)) {
831
- const textBlocks = userMsg.content
832
- .filter((block) => block && typeof block === 'object' && block.type === 'text' && typeof block.text === 'string')
833
- .map(block => block.text);
834
- rawContent = textBlocks.join('\n');
835
- }
836
- // Remove <system-reminder>...</system-reminder> tags
837
- const cleanContent = rawContent
838
- .replace(/<system-reminder>[\s\S]*?<\/system-reminder>/g, '')
839
- .trim();
840
- // If we found valid text content, return it
841
- if (cleanContent && cleanContent.length >= 5) {
842
- return cleanContent.substring(0, 500);
1105
+ previousReasoning = currentReasoning;
1106
+ if (isKeyDecision) {
1107
+ logger.info({
1108
+ msg: 'Key decision detected',
1109
+ actionType: action.actionType,
1110
+ files: action.files.slice(0, 3),
1111
+ });
843
1112
  }
844
1113
  }
845
- return undefined;
846
1114
  }
847
1115
  /**
848
1116
  * Filter response headers for forwarding to client
@@ -852,10 +1120,16 @@ function filterResponseHeaders(headers) {
852
1120
  const allowedHeaders = [
853
1121
  'content-type',
854
1122
  'x-request-id',
1123
+ 'request-id',
1124
+ 'x-should-retry',
1125
+ 'retry-after',
1126
+ 'retry-after-ms',
855
1127
  'anthropic-ratelimit-requests-limit',
856
1128
  'anthropic-ratelimit-requests-remaining',
1129
+ 'anthropic-ratelimit-requests-reset',
857
1130
  'anthropic-ratelimit-tokens-limit',
858
1131
  'anthropic-ratelimit-tokens-remaining',
1132
+ 'anthropic-ratelimit-tokens-reset',
859
1133
  ];
860
1134
  for (const header of allowedHeaders) {
861
1135
  const value = headers[header];
@@ -878,19 +1152,84 @@ function isAnthropicResponse(body) {
878
1152
  }
879
1153
  /**
880
1154
  * Start the proxy server
1155
+ * @param options.debug - Enable debug logging to grov-proxy.log
881
1156
  */
882
- export async function startServer() {
1157
+ export async function startServer(options = {}) {
1158
+ // Set debug mode based on flag
1159
+ if (options.debug) {
1160
+ setDebugMode(true);
1161
+ console.log('[DEBUG] Logging to grov-proxy.log');
1162
+ }
883
1163
  const server = createServer();
1164
+ // Set server logger for background tasks
1165
+ serverLog = server.log;
884
1166
  // Cleanup old completed sessions (older than 24 hours)
885
- const cleanedUp = cleanupOldCompletedSessions();
886
- if (cleanedUp > 0) {
1167
+ cleanupOldCompletedSessions();
1168
+ // Cleanup stale active sessions (no activity for 1 hour)
1169
+ // Prevents old sessions from being reused in fresh Claude sessions
1170
+ const staleCount = cleanupStaleActiveSessions();
1171
+ if (staleCount > 0) {
1172
+ log(`Cleaned up ${staleCount} stale active session(s)`);
1173
+ }
1174
+ // Start extended cache timer if enabled
1175
+ let extendedCacheTimer = null;
1176
+ // Track active connections for graceful shutdown
1177
+ const activeConnections = new Set();
1178
+ let isShuttingDown = false;
1179
+ // Graceful shutdown handler (works with or without extended cache)
1180
+ const gracefulShutdown = () => {
1181
+ if (isShuttingDown)
1182
+ return;
1183
+ isShuttingDown = true;
1184
+ log('Shutdown initiated...');
1185
+ // 1. Stop extended cache timer if running
1186
+ if (extendedCacheTimer) {
1187
+ clearInterval(extendedCacheTimer);
1188
+ extendedCacheTimer = null;
1189
+ log('Extended cache: timer stopped');
1190
+ }
1191
+ // 2. Clear sensitive cache data
1192
+ if (extendedCache.size > 0) {
1193
+ log(`Extended cache: clearing ${extendedCache.size} entries`);
1194
+ for (const entry of extendedCache.values()) {
1195
+ for (const key of Object.keys(entry.headers)) {
1196
+ entry.headers[key] = '';
1197
+ }
1198
+ entry.rawBody = Buffer.alloc(0);
1199
+ }
1200
+ extendedCache.clear();
1201
+ }
1202
+ // 3. Stop accepting new connections
1203
+ server.close();
1204
+ // 4. Grace period (500ms) then force close remaining connections
1205
+ setTimeout(() => {
1206
+ if (activeConnections.size > 0) {
1207
+ log(`Force closing ${activeConnections.size} connection(s)`);
1208
+ for (const socket of activeConnections) {
1209
+ socket.destroy();
1210
+ }
1211
+ }
1212
+ log('Goodbye!');
1213
+ process.exit(0);
1214
+ }, 500);
1215
+ };
1216
+ process.on('SIGTERM', gracefulShutdown);
1217
+ process.on('SIGINT', gracefulShutdown);
1218
+ if (config.EXTENDED_CACHE_ENABLED) {
1219
+ extendedCacheTimer = setInterval(checkExtendedCache, 60_000);
1220
+ log('Extended cache: enabled (keep-alive timer started)');
887
1221
  }
888
1222
  try {
889
1223
  await server.listen({
890
1224
  host: config.HOST,
891
1225
  port: config.PORT,
892
1226
  });
893
- console.log(`✓ Grov Proxy: http://${config.HOST}:${config.PORT} ${config.ANTHROPIC_BASE_URL}`);
1227
+ // Track connections for graceful shutdown
1228
+ server.server.on('connection', (socket) => {
1229
+ activeConnections.add(socket);
1230
+ socket.on('close', () => activeConnections.delete(socket));
1231
+ });
1232
+ console.log(`Grov Proxy: http://${config.HOST}:${config.PORT} -> ${config.ANTHROPIC_BASE_URL}`);
894
1233
  return server;
895
1234
  }
896
1235
  catch (err) {