npm - grov - Versions diffs - 0.5.2 → 0.5.4 - Mend

grov 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/README.md +34 -4
package/dist/cli.js +8 -0
package/dist/lib/api-client.d.ts +18 -1
package/dist/lib/api-client.js +57 -0
package/dist/lib/llm-extractor.d.ts +14 -39
package/dist/lib/llm-extractor.js +379 -407
package/dist/lib/store/convenience.d.ts +40 -0
package/dist/lib/store/convenience.js +104 -0
package/dist/lib/store/database.d.ts +22 -0
package/dist/lib/store/database.js +375 -0
package/dist/lib/store/drift.d.ts +9 -0
package/dist/lib/store/drift.js +89 -0
package/dist/lib/store/index.d.ts +7 -0
package/dist/lib/store/index.js +13 -0
package/dist/lib/store/sessions.d.ts +32 -0
package/dist/lib/store/sessions.js +240 -0
package/dist/lib/store/steps.d.ts +40 -0
package/dist/lib/store/steps.js +161 -0
package/dist/lib/store/tasks.d.ts +33 -0
package/dist/lib/store/tasks.js +133 -0
package/dist/lib/store/types.d.ts +167 -0
package/dist/lib/store/types.js +2 -0
package/dist/lib/store.d.ts +1 -436
package/dist/lib/store.js +2 -1478
package/dist/proxy/cache.d.ts +36 -0
package/dist/proxy/cache.js +51 -0
package/dist/proxy/config.d.ts +1 -0
package/dist/proxy/config.js +2 -0
package/dist/proxy/extended-cache.d.ts +10 -0
package/dist/proxy/extended-cache.js +155 -0
package/dist/proxy/handlers/preprocess.d.ts +20 -0
package/dist/proxy/handlers/preprocess.js +169 -0
package/dist/proxy/injection/delta-tracking.d.ts +11 -0
package/dist/proxy/injection/delta-tracking.js +93 -0
package/dist/proxy/injection/injectors.d.ts +7 -0
package/dist/proxy/injection/injectors.js +139 -0
package/dist/proxy/request-processor.d.ts +18 -4
package/dist/proxy/request-processor.js +151 -30
package/dist/proxy/response-processor.js +93 -45
package/dist/proxy/server.d.ts +0 -1
package/dist/proxy/server.js +366 -582
package/dist/proxy/types.d.ts +13 -0
package/dist/proxy/types.js +2 -0
package/dist/proxy/utils/extractors.d.ts +18 -0
package/dist/proxy/utils/extractors.js +109 -0
package/dist/proxy/utils/logging.d.ts +18 -0
package/dist/proxy/utils/logging.js +42 -0
package/package.json +7 -2
package/postinstall.js +19 -0

package/dist/lib/llm-extractor.js CHANGED Viewed

@@ -1,35 +1,17 @@
-// LLM-based extraction using OpenAI GPT-3.5-turbo for reasoning summaries
-// and Anthropic Claude Haiku for drift detection
-import OpenAI from 'openai';
+// LLM-based extraction using Anthropic Claude Haiku for drift detection
 import Anthropic from '@anthropic-ai/sdk';
 import { config } from 'dotenv';
 import { join } from 'path';
 import { homedir } from 'os';
 import { existsSync } from 'fs';
 import { debugLLM } from './debug.js';
-import { truncate } from './utils.js';
 // Load ~/.grov/.env as fallback for API key
 // This allows users to store their API key in a safe location outside any repo
 const grovEnvPath = join(homedir(), '.grov', '.env');
 if (existsSync(grovEnvPath)) {
     config({ path: grovEnvPath });
 }
-let client = null;
 let anthropicClient = null;
-/**
- * Initialize the OpenAI client
- */
-function getClient() {
-    if (!client) {
-        const apiKey = process.env.OPENAI_API_KEY;
-        if (!apiKey) {
-            // SECURITY: Generic error to avoid confirming API key mechanism exists
-            throw new Error('LLM extraction unavailable');
-        }
-        client = new OpenAI({ apiKey });
-    }
-    return client;
-}
 /**
  * Initialize the Anthropic client
  */
@@ -43,12 +25,6 @@ function getAnthropicClient() {
     }
     return anthropicClient;
 }
-/**
- * Check if LLM extraction is available (OpenAI API key set)
- */
-export function isLLMAvailable() {
-    return !!process.env.OPENAI_API_KEY;
-}
 /**
  * Extract intent from first user prompt using Haiku
  * Called once at session start to populate session_states
@@ -176,272 +152,6 @@ function createFallbackIntent(prompt) {
 export function isIntentExtractionAvailable() {
     return !!(process.env.ANTHROPIC_API_KEY || process.env.GROV_API_KEY);
 }
-/**
- * Check if Anthropic API is available (for drift detection)
- */
-export function isAnthropicAvailable() {
-    return !!process.env.ANTHROPIC_API_KEY;
-}
-/**
- * Get the drift model to use (from env or default)
- */
-export function getDriftModel() {
-    return process.env.GROV_DRIFT_MODEL || 'claude-haiku-4-5';
-}
-/**
- * Extract structured reasoning from a parsed session using GPT-3.5-turbo
- */
-export async function extractReasoning(session) {
-    const openai = getClient();
-    // Build session summary for the prompt
-    const sessionSummary = buildSessionSummary(session);
-    const response = await openai.chat.completions.create({
-        model: 'gpt-3.5-turbo',
-        max_tokens: 1024,
-        messages: [
-            {
-                role: 'system',
-                content: 'You are a helpful assistant that extracts structured information from coding sessions. Always respond with valid JSON only, no explanation.'
-            },
-            {
-                role: 'user',
-                content: `Analyze this Claude Code session and extract a structured reasoning summary.
-SESSION DATA:
-${sessionSummary}
-Extract the following as JSON:
-{
-  "task": "Brief description (1 sentence)",
-  "goal": "The underlying problem being solved",
-  "reasoning_trace": [
-    "Be SPECIFIC: include file names, function names, line numbers when relevant",
-    "Format: '[Action] [target] to/for [purpose]'",
-    "Example: 'Read auth.ts:47 to understand token refresh logic'",
-    "Example: 'Fixed null check in validateToken() - was causing silent failures'",
-    "NOT: 'Investigated auth' or 'Fixed bug'"
-  ],
-  "decisions": [{"choice": "What was decided", "reason": "Why this over alternatives"}],
-  "constraints": ["Discovered limitations, rate limits, incompatibilities"],
-  "status": "complete|partial|question|abandoned",
-  "tags": ["relevant", "domain", "tags"]
-}
-IMPORTANT for reasoning_trace:
-- Each entry should be ACTIONABLE information for future developers
-- Include specific file:line references when possible
-- Explain WHY not just WHAT (e.g., "Chose JWT over sessions because stateless scales better")
-- Bad: "Fixed the bug" / Good: "Fixed race condition in UserService.save() - was missing await"
-Status definitions:
-- "complete": Task was finished, implementation done
-- "partial": Work started but not finished
-- "question": Claude asked a question and is waiting for user response
-- "abandoned": User interrupted or moved to different topic
-RESPONSE RULES:
-- English only (translate if input is in other language)
-- No emojis
-- Valid JSON only`
-            }
-        ]
-    });
-    // Parse the response
-    const content = response.choices[0]?.message?.content;
-    if (!content) {
-        throw new Error('No response from OpenAI');
-    }
-    try {
-        // SECURITY: Parse to plain object first, then sanitize prototype pollution
-        const rawParsed = JSON.parse(content);
-        // SECURITY: Prevent prototype pollution from LLM-generated JSON
-        // An attacker could manipulate LLM to return {"__proto__": {"isAdmin": true}}
-        const pollutionKeys = ['__proto__', 'constructor', 'prototype'];
-        for (const key of pollutionKeys) {
-            if (key in rawParsed) {
-                delete rawParsed[key];
-            }
-        }
-        const extracted = rawParsed;
-        // SECURITY: Validate types to prevent LLM injection attacks
-        const safeTask = typeof extracted.task === 'string' ? extracted.task : '';
-        const safeGoal = typeof extracted.goal === 'string' ? extracted.goal : '';
-        const safeTrace = Array.isArray(extracted.reasoning_trace)
-            ? extracted.reasoning_trace.filter((t) => typeof t === 'string')
-            : [];
-        const safeDecisions = Array.isArray(extracted.decisions)
-            ? extracted.decisions.filter((d) => d && typeof d === 'object' && typeof d.choice === 'string' && typeof d.reason === 'string')
-            : [];
-        const safeConstraints = Array.isArray(extracted.constraints)
-            ? extracted.constraints.filter((c) => typeof c === 'string')
-            : [];
-        const safeTags = Array.isArray(extracted.tags)
-            ? extracted.tags.filter((t) => typeof t === 'string')
-            : [];
-        // Fill defaults with validated values
-        return {
-            task: safeTask || session.userMessages[0]?.substring(0, 100) || 'Unknown task',
-            goal: safeGoal || safeTask || 'Unknown goal',
-            reasoning_trace: safeTrace,
-            files_touched: session.filesRead.concat(session.filesWritten),
-            decisions: safeDecisions,
-            constraints: safeConstraints,
-            status: validateStatus(extracted.status),
-            tags: safeTags
-        };
-    }
-    catch (parseError) {
-        // If JSON parsing fails, return basic extraction
-        debugLLM('Failed to parse LLM response, using fallback');
-        return createFallbackExtraction(session);
-    }
-}
-/**
- * Classify just the task status (lighter weight than full extraction)
- */
-export async function classifyTaskStatus(session) {
-    const openai = getClient();
-    // Get last few exchanges for classification
-    const lastMessages = session.userMessages.slice(-2).join('\n---\n');
-    const lastAssistant = session.assistantMessages.slice(-1)[0] || '';
-    const response = await openai.chat.completions.create({
-        model: 'gpt-3.5-turbo',
-        max_tokens: 50,
-        messages: [
-            {
-                role: 'system',
-                content: 'Classify conversation state. Return ONLY one word: complete, partial, question, or abandoned.'
-            },
-            {
-                role: 'user',
-                content: `Last user message(s):
-${lastMessages}
-Last assistant response (truncated):
-${lastAssistant.substring(0, 500)}
-Files written: ${session.filesWritten.length}
-Files read: ${session.filesRead.length}
-Classification:`
-            }
-        ]
-    });
-    const content = response.choices[0]?.message?.content;
-    if (!content) {
-        return 'partial';
-    }
-    return validateStatus(content.trim().toLowerCase());
-}
-/**
- * Build a summary of the session for the LLM prompt
- */
-function buildSessionSummary(session) {
-    const lines = [];
-    // User messages
-    lines.push('USER MESSAGES:');
-    session.userMessages.forEach((msg, i) => {
-        lines.push(`[${i + 1}] ${truncate(msg, 300)}`);
-    });
-    lines.push('');
-    // Files touched
-    lines.push('FILES READ:');
-    session.filesRead.slice(0, 10).forEach(f => lines.push(`  - ${f}`));
-    if (session.filesRead.length > 10) {
-        lines.push(`  ... and ${session.filesRead.length - 10} more`);
-    }
-    lines.push('');
-    lines.push('FILES WRITTEN/EDITED:');
-    session.filesWritten.forEach(f => lines.push(`  - ${f}`));
-    lines.push('');
-    // Tool usage summary
-    lines.push('TOOL USAGE:');
-    const toolCounts = session.toolCalls.reduce((acc, t) => {
-        acc[t.name] = (acc[t.name] || 0) + 1;
-        return acc;
-    }, {});
-    Object.entries(toolCounts).forEach(([name, count]) => {
-        lines.push(`  - ${name}: ${count}x`);
-    });
-    lines.push('');
-    // Last assistant message (often contains summary/conclusion)
-    const lastAssistant = session.assistantMessages[session.assistantMessages.length - 1];
-    if (lastAssistant) {
-        lines.push('LAST ASSISTANT MESSAGE:');
-        lines.push(truncate(lastAssistant, 500));
-    }
-    return lines.join('\n');
-}
-/**
- * Create fallback extraction when LLM fails
- */
-function createFallbackExtraction(session) {
-    const filesTouched = [...new Set([...session.filesRead, ...session.filesWritten])];
-    return {
-        task: session.userMessages[0]?.substring(0, 100) || 'Unknown task',
-        goal: session.userMessages[0]?.substring(0, 100) || 'Unknown goal',
-        reasoning_trace: generateBasicTrace(session),
-        files_touched: filesTouched,
-        decisions: [],
-        constraints: [],
-        status: session.filesWritten.length > 0 ? 'complete' : 'partial',
-        tags: generateTagsFromFiles(filesTouched)
-    };
-}
-/**
- * Generate basic reasoning trace from tool usage
- */
-function generateBasicTrace(session) {
-    const trace = [];
-    const toolCounts = session.toolCalls.reduce((acc, t) => {
-        acc[t.name] = (acc[t.name] || 0) + 1;
-        return acc;
-    }, {});
-    if (toolCounts['Read'])
-        trace.push(`Read ${toolCounts['Read']} files`);
-    if (toolCounts['Write'])
-        trace.push(`Wrote ${toolCounts['Write']} files`);
-    if (toolCounts['Edit'])
-        trace.push(`Edited ${toolCounts['Edit']} files`);
-    if (toolCounts['Grep'] || toolCounts['Glob'])
-        trace.push('Searched codebase');
-    if (toolCounts['Bash'])
-        trace.push(`Ran ${toolCounts['Bash']} commands`);
-    return trace;
-}
-/**
- * Generate tags from file paths
- */
-function generateTagsFromFiles(files) {
-    const tags = new Set();
-    for (const file of files) {
-        const parts = file.split('/');
-        for (const part of parts) {
-            if (part && !part.includes('.') && part !== 'src' && part !== 'lib') {
-                tags.add(part.toLowerCase());
-            }
-        }
-        // Common patterns
-        if (file.includes('auth'))
-            tags.add('auth');
-        if (file.includes('api'))
-            tags.add('api');
-        if (file.includes('test'))
-            tags.add('test');
-    }
-    return [...tags].slice(0, 10);
-}
-/**
- * Validate and normalize status
- */
-function validateStatus(status) {
-    const normalized = status?.toLowerCase().trim();
-    if (normalized === 'complete' || normalized === 'partial' ||
-        normalized === 'question' || normalized === 'abandoned') {
-        return normalized;
-    }
-    return 'partial'; // Default
-}
 // ============================================
 // SESSION SUMMARY FOR CLEAR OPERATION
 // Reference: plan_proxy_local.md Section 2.3, 4.5
@@ -536,89 +246,222 @@ export function isTaskAnalysisAvailable() {
     return !!(process.env.ANTHROPIC_API_KEY || process.env.GROV_API_KEY);
 }
 /**
- * Analyze task context to determine task status
- * Called after each main model response to orchestrate sessions
- * Also compresses reasoning for steps if assistantResponse > 1000 chars
+ * Format conversation messages for prompt
  */
-export async function analyzeTaskContext(currentSession, latestUserMessage, recentSteps, assistantResponse) {
-    const client = getAnthropicClient();
-    const stepsText = recentSteps.slice(0, 5).map(s => {
+function formatConversationHistory(messages) {
+    if (!messages || messages.length === 0)
+        return 'No conversation history available.';
+    return messages.slice(-10).map(m => {
+        const role = m.role === 'user' ? 'User' : 'Assistant';
+        const content = m.content.substring(0, 800);
+        const truncated = m.content.length > 800 ? '...' : '';
+        return `${role}: ${content}${truncated}`;
+    }).join('\n\n');
+}
+/**
+ * Format tool calls for prompt
+ */
+function formatToolCalls(steps) {
+    if (!steps || steps.length === 0)
+        return 'No tools used yet.';
+    return steps.slice(0, 10).map(s => {
         let desc = `- ${s.action_type}`;
         if (s.files.length > 0) {
             desc += `: ${s.files.slice(0, 3).join(', ')}`;
         }
+        if (s.command) {
+            desc += ` (${s.command.substring(0, 50)})`;
+        }
         return desc;
-    }).join('\n') || 'None';
+    }).join('\n');
+}
+/**
+ * Analyze task context to determine task status
+ * Called after each main model response to orchestrate sessions
+ * Also compresses reasoning for steps if assistantResponse > 1000 chars
+ */
+export async function analyzeTaskContext(currentSession, latestUserMessage, recentSteps, assistantResponse, conversationHistory) {
+    const client = getAnthropicClient();
     // Check if we need to compress reasoning
     const needsCompression = assistantResponse.length > 1000;
     const compressionInstruction = needsCompression
-        ? `\n  "step_reasoning": "Extract CONCLUSIONS and SPECIFIC RECOMMENDATIONS only. Include: exact file paths (e.g., src/lib/utils.ts), function/component names, architectural patterns discovered, and WHY decisions were made. DO NOT write process descriptions like 'explored' or 'analyzed'. Max 800 chars."`
+        ? `,
+  "step_reasoning": "Extract CONCLUSIONS only: specific file paths, function names, patterns discovered, and WHY decisions were made. Max 800 chars. Do not write process descriptions."`
         : '';
-    const compressionRule = needsCompression
-        ? '\n- step_reasoning: Extract CONCLUSIONS (specific files, patterns, decisions) NOT process descriptions. Example GOOD: "Utilities belong in src/lib/utils.ts alongside cn(), formatDate()". Example BAD: "Explored codebase structure".'
-        : '';
-    // Extract topic keywords from goal for comparison
-    const currentGoalKeywords = currentSession?.original_goal
-        ? currentSession.original_goal.toLowerCase().match(/\b\w{4,}\b/g)?.slice(0, 10).join(', ') || ''
-        : '';
-    const prompt = `You are a task orchestrator. Your PRIMARY job is to detect when the user starts a NEW, DIFFERENT task.
+    // Format conversation history
+    const historyText = formatConversationHistory(conversationHistory || []);
+    const toolCallsText = formatToolCalls(recentSteps);
+    const prompt = `You are a task status analyzer. Your job is to examine a conversation between a user and an AI assistant, then determine whether the current task is complete, still in progress, or if a new task has started.
-CURRENT SESSION:
-- Current Goal: "${currentSession?.original_goal || 'No active task'}"
-- Goal Keywords: [${currentGoalKeywords}]
+<input>
+original_goal: ${currentSession?.original_goal || 'No active task - this may be the first message'}
-LATEST USER MESSAGE:
-"${latestUserMessage.substring(0, 500)}"
+messages:
+${historyText}
-RECENT ACTIONS (last 5):
-${stepsText}
+current_assistant_response:
+${assistantResponse ? assistantResponse.substring(0, 2000) : 'No response yet - assistant is still thinking.'}
-ASSISTANT RESPONSE (truncated):
-"${assistantResponse.substring(0, 1500)}${assistantResponse.length > 1500 ? '...' : ''}"
+tool_calls:
+${toolCallsText}
+</input>
-═══════════════════════════════════════════════════════════════
-CRITICAL: Compare the TOPIC of "Current Goal" vs "Latest User Message"
-═══════════════════════════════════════════════════════════════
+<output>
+Return a JSON object with these fields:
+- task_type: one of "information", "planning", or "implementation"
+- action: one of "continue", "task_complete", "new_task", or "subtask_complete"
+- task_id: existing session_id "${currentSession?.session_id || 'NEW'}" or "NEW" for new task
+- reasoning: brief explanation of why you made this decision${compressionInstruction}
+</output>
-Ask yourself:
-1. Is the user message about the SAME subject/feature/file as the current goal?
-2. Or is it about something COMPLETELY DIFFERENT?
+<step_1_identify_task_type>
+First, analyze the original_goal to understand what kind of task this is. Do not rely on specific keywords. Instead, understand the user's intent from the full context of their message.
-EXAMPLES of NEW_TASK (different topic):
-- Goal: "implement authentication" → User: "fix the database migration" → NEW_TASK
-- Goal: "analyze security layer" → User: "create hello.ts script" → NEW_TASK
-- Goal: "refactor user service" → User: "add dark mode to UI" → NEW_TASK
-- Goal: "fix login bug" → User: "write unit tests for payments" → NEW_TASK
+TYPE A - Information Request
+The user wants to learn or understand something. They are seeking knowledge, not asking for any changes or decisions to be made. The answer itself is what they need.
-EXAMPLES of CONTINUE (same topic):
-- Goal: "implement authentication" → User: "now add the logout button" → CONTINUE
-- Goal: "fix login bug" → User: "also check the session timeout" → CONTINUE
-- Goal: "analyze security" → User: "what about rate limiting?" → CONTINUE
+Think about whether the user is curious about how something works, wants an explanation of a concept, or is asking for clarification about existing behavior.
-Return JSON:
-{
-  "action": "continue|new_task|subtask|parallel_task|task_complete|subtask_complete",
-  "topic_match": "YES if same topic, NO if different topic",
-  "task_id": "existing session_id or 'NEW' for new task",
-  "current_goal": "the goal based on LATEST user message",
-  "reasoning": "1 sentence explaining topic comparison"${compressionInstruction}
-}
+Examples of information requests in different phrasings:
+- "How does the authentication system work?"
+- "Explica-mi cum functioneaza cache-ul"
+- "What is the difference between Redis and Memcached?"
+- "Can you walk me through the payment flow?"
+- "I don't understand why this function returns null"
+- "Ce face acest cod?"
+TYPE B - Planning or Decision Request
+The user wants to figure out the best approach before taking action. They need to make a decision or create a plan. The conversation may involve exploring options, discussing tradeoffs, or clarifying requirements.
+Think about whether the user is trying to decide between approaches, wants recommendations for how to build something, or is working toward a plan they will implement later.
+Examples of planning requests in different phrasings:
+- "How should we implement user authentication?"
+- "What's the best way to handle caching for this API?"
+- "Cum ar trebui sa structuram baza de date?"
+- "I'm thinking about using Redis vs Memcached, what do you recommend?"
+- "Let's figure out the architecture before we start coding"
+- "We need to decide on the approach for handling errors"
+TYPE C - Implementation Request
+The user wants actual changes made. They want code written, files edited, commands run, or something built. The task involves using tools to modify the codebase.
+Think about whether the user is asking for something to be created, fixed, changed, or built.
+Examples of implementation requests in different phrasings:
+- "Fix the bug in the login function"
+- "Add caching to the API endpoints"
+- "Fa un refactor la modulul de plati"
+- "Create a new component for the dashboard"
+- "Update the tests to cover edge cases"
+- "Remove the deprecated authentication code"
+</step_1_identify_task_type>
+<step_2_determine_status>
+Now that you know the task type, determine whether it is complete, continuing, or if a new task has begun.
+For TYPE A - Information Request:
+The task is complete when the assistant has provided a clear and complete answer to the user's question. Check the current_assistant_response field - if it contains a substantive answer to the question, the task is complete.
+Each question the user asks is treated as its own separate task. If the user asks a follow-up question, even on the same topic, that is a new task.
+The reason for this is that each answer is valuable on its own and should be saved independently. We do not want to wait for a multi-turn conversation to end before saving useful information.
+When analyzing: Look at current_assistant_response. If it contains an explanation, answer, or clarification that addresses the user's question, return task_complete.
+Example situation: User asks "How does auth work?", assistant explains it fully.
+Decision: task_complete
+Reason: The information request was answered completely.
+Example situation: User asks "How does auth work?", assistant explains, then user asks "What about JWT specifically?"
+Decision for second message: new_task
+Reason: This is a new question requiring a new answer.
+For TYPE B - Planning or Decision Request:
+The task continues while the user and assistant are still exploring options, discussing tradeoffs, or clarifying requirements. The task is complete only when a final decision or plan has been reached and the user has confirmed it.
+Look for signals that indicate the user has made up their mind. These signals come from the overall tone and direction of the conversation, not from specific keywords. The user might express agreement, ask to proceed with implementation, or summarize the chosen approach.
-DECISION RULES:
-1. NO current session → "new_task"
-2. topic_match=NO (different subject) → "new_task"
-3. topic_match=YES + user following up → "continue"
-4. Claude said "done/complete/finished" → "task_complete"
-5. Prerequisite work identified → "subtask"${compressionRule}
+When analyzing, ask yourself: Has the user confirmed a final direction? Are they still weighing options? Have they asked to move forward with a specific approach?
+Example situation: User asks "Should we use JWT or sessions?", assistant explains both, user says "I'm still not sure about refresh tokens"
+Decision: continue
+Reason: The user is still clarifying and has not made a final decision.
+Example situation: User and assistant discussed auth options, user says "OK, JWT with refresh tokens makes sense, let's go with that"
+Decision: task_complete
+Reason: The user confirmed the decision. Planning is complete.
+Example situation: User says "That sounds good, now implement it"
+Decision: task_complete for planning, and a new implementation task will begin
+Reason: Planning concluded with a decision. User is now requesting implementation.
+For TYPE C - Implementation Request:
+The task continues while the assistant is actively making changes using tools like file edits, bash commands, or file writes. The task is complete when the changes are done and verified.
+Look for signals that the work is finished in current_assistant_response: successful test runs, the assistant stating the work is done, or a commit being made. If tests are failing or the assistant indicates more work is needed, the task continues.
+When analyzing: Check current_assistant_response for completion signals. Is the assistant still making changes? Have the changes been verified? Did the assistant confirm completion?
+Example situation: Assistant edited three files and is now running tests.
+Decision: continue
+Reason: Implementation is in progress, verification not yet complete.
+Example situation: Assistant ran tests, they passed, assistant says "Done, the auth bug is fixed"
+Decision: task_complete
+Reason: Changes are complete and verified.
+Example situation: Tests failed after the changes.
+Decision: continue
+Reason: The implementation needs more work to pass verification.
+</step_2_determine_status>
+<step_3_detect_new_task>
+Sometimes the user changes direction entirely. A new task has started when:
+The user asks about something completely unrelated to the original goal.
+The conversation topic shifts to a different part of the codebase or a different feature.
+The previous task was completed and the user is now requesting something new.
+To detect this, compare the current user message to the original_goal. If they are about the same thing, the task is either continuing or complete. If they are about different things, a new task has started.
+Be careful not to confuse follow-up questions with new tasks. A follow-up question on the same topic in an information request is a new task because each answer stands alone. But a follow-up clarification during planning is part of the same planning task.
+Example situation: Original goal was "fix the auth bug", user now asks "also, can you update the README?"
+Decision: new_task
+Reason: Updating README is unrelated to fixing the auth bug.
+Example situation: Original goal was "implement caching", user asks "should we use Redis or Memcached for this?"
+Decision: continue (this is planning within the implementation task)
+Reason: The question is about how to implement the original request.
+Example situation: Original goal was "explain how auth works", user asks "and how does the session storage work?"
+Decision: new_task
+Reason: This is a new information request, separate from the first.
+</step_3_detect_new_task>
+<important_notes>
+Do not rely on specific keywords in any language. The same intent can be expressed many different ways across languages and phrasings. Always understand the intent from the full context.
+The conversation history and tool usage are your most important signals. What has the assistant been doing? What is the user trying to accomplish? Has that goal been achieved?
+CRITICAL - Q&A DURING PLANNING:
+If the current task_type is "planning" and the user asks a clarifying question (e.g., "how does X work?", "what about Y?", "clarify Z"), this is NOT a new information task. It is a CONTINUATION of the planning task. The user is gathering information to make a planning decision, not requesting standalone information.
+- If original task_type was planning → keep it as planning, action=continue
+- Only mark task_complete for planning when user explicitly confirms a final decision or asks to proceed with implementation
+- Asking to "write to file" or "document the plan" is NOT task_complete - it's still part of planning documentation
+When in doubt between continue and task_complete, ask yourself: Would it be valuable to save what we have so far? For information requests, yes, save each answer. For planning, only save when a decision is made. For implementation, only save when work is verified complete.
 RESPONSE RULES:
-- English only (translate if input is in other language)
-- No emojis
-- Valid JSON only`;
+- Return valid JSON only
+- English only in the response (translate reasoning if input is in other language)
+- No markdown formatting, no emojis
+</important_notes>`;
     debugLLM('analyzeTaskContext', `Calling Haiku for task analysis (needsCompression=${needsCompression})`);
     const response = await client.messages.create({
         model: 'claude-haiku-4-5-20251001',
-        max_tokens: needsCompression ? 600 : 300,
+        max_tokens: needsCompression ? 800 : 400,
         messages: [{ role: 'user', content: prompt }],
     });
     const text = response.content[0].type === 'text' ? response.content[0].text : '';
@@ -629,20 +472,24 @@ RESPONSE RULES:
             throw new Error('No JSON found in response');
         }
         const analysis = JSON.parse(jsonMatch[0]);
+        // Ensure task_type has a default value
+        if (!analysis.task_type) {
+            analysis.task_type = 'implementation';
+        }
         // If we didn't need compression but have short response, use it directly
         if (!needsCompression && assistantResponse.length > 0) {
             analysis.step_reasoning = assistantResponse.substring(0, 1000);
         }
-        debugLLM('analyzeTaskContext', `Result: action=${analysis.action}, topic_match=${analysis.topic_match}, goal=${analysis.current_goal.substring(0, 50)}`);
+        debugLLM('analyzeTaskContext', `Result: task_type=${analysis.task_type}, action=${analysis.action}, reasoning="${analysis.reasoning?.substring(0, 150) || 'none'}"`);
         return analysis;
     }
     catch (parseError) {
         debugLLM('analyzeTaskContext', `Parse error: ${String(parseError)}, using fallback`);
         // Fallback: continue existing session or create new
         return {
+            task_type: 'implementation',
             action: currentSession ? 'continue' : 'new_task',
             task_id: currentSession?.session_id || 'NEW',
-            current_goal: latestUserMessage.substring(0, 200),
             reasoning: 'Fallback due to parse error',
             step_reasoning: assistantResponse.substring(0, 1000),
         };
@@ -657,76 +504,151 @@ export function isReasoningExtractionAvailable() {
 /**
  * Extract reasoning trace and decisions from steps
  * Called at task_complete to populate team memory with rich context
+ *
+ * @param formattedSteps - Pre-formatted XML string with grouped steps and actions
+ * @param originalGoal - The original task goal
  */
-export async function extractReasoningAndDecisions(stepsReasoning, originalGoal) {
+export async function extractReasoningAndDecisions(formattedSteps, originalGoal) {
     const client = getAnthropicClient();
-    // Combine all steps reasoning into one text
-    const combinedReasoning = stepsReasoning
-        .filter(r => r && r.length > 10)
-        .join('\n\n---\n\n')
-        .substring(0, 8000);
-    if (combinedReasoning.length < 50) {
+    if (formattedSteps.length < 50) {
         return { reasoning_trace: [], decisions: [] };
     }
-    const prompt = `Extract CONCLUSIONS and KNOWLEDGE from Claude's work - NOT process descriptions.
+    const prompt = `<role>
+You are a Knowledge Engineer specialized in extracting reusable team knowledge from coding sessions.
-ORIGINAL GOAL:
-${originalGoal || 'Not specified'}
+Your output will be stored permanently in team memory and used to help developers in future sessions. Poor extractions waste storage and confuse future assistants. Excellent extractions save hours of repeated investigation.
+</role>
-CLAUDE'S RESPONSE:
-${combinedReasoning}
+<context>
+PROJECT GOAL: ${originalGoal || 'Not specified'}
-═══════════════════════════════════════════════════════════════
-EXTRACT ACTIONABLE CONCLUSIONS - NOT PROCESS
-═══════════════════════════════════════════════════════════════
+This extraction serves two purposes:
+1. Help future developers understand WHAT was discovered in this codebase
+2. Help future developers understand WHY certain decisions were made
+</context>
+<session_data>
+${formattedSteps.substring(0, 8000)}
+</session_data>
+<instructions>
+We need TWO types of knowledge extracted:
+TYPE A: CONCLUSIONS (Factual findings from the session)
+What this means:
+These are FACTS discovered during the session. Things that were explicitly found, read, or confirmed in the code. A new developer reading these should immediately know WHERE to find things and WHAT values/patterns exist.
+Must include:
+- Specific file paths (not just "auth files" but "src/lib/jwt.ts")
+- Specific values (not just "short expiry" but "1 hour access, 7 day refresh")
+- Specific patterns (not just "uses JWT" but "JWT with sub, email, type, teams payload")
+- Specific functions/classes (not just "middleware" but "requireAuth, optionalAuth preHandlers")
+Format: Start with "CONCLUSION: " prefix
+Good examples:
+- "CONCLUSION: JWT tokens stored in ~/.grov/credentials.json with 1hr access/7d refresh expiry"
+- "CONCLUSION: Auth middleware in src/routes/auth.ts exports requireAuth and optionalAuth preHandlers"
+- "CONCLUSION: Device flow polling interval is 5 seconds, endpoint /auth/device/poll"
+Bad examples:
+- "CONCLUSION: Found authentication files" (too vague, no paths)
+- "CONCLUSION: JWT is used for auth" (too generic, no specifics)
+- "CONCLUSION: Explored the codebase" (process description, not finding)
+TYPE B: INSIGHTS (Your analysis and inferences)
+What this means:
+These are YOUR observations that go BEYOND what was explicitly stated. Connections between different parts, patterns you identified, implications for future work. This is where YOU add value beyond just summarizing.
+Types of insights we value:
+1. CONNECTIONS - How do different files/modules relate?
+Example: "jwt.ts handles token creation, credentials.ts handles storage - separation of crypto operations from I/O"
+2. INFERENCES - What decisions were made implicitly?
+Example: "File storage in ~/.grov/ instead of env vars - implies single-user CLI design, not multi-tenant"
+3. PATTERNS - What architectural patterns emerge?
+Example: "All config files use 0600 permissions - security-conscious design for sensitive data"
+4. IMPLICATIONS - What does this mean for future development?
+Example: "1hr token expiry requires background refresh mechanism for long operations to avoid mid-task auth failures"
+Format: Start with "INSIGHT: " prefix
+Good examples:
+- "INSIGHT: Dual-file pattern (jwt.ts + credentials.ts) separates crypto from I/O, reducing attack surface"
+- "INSIGHT: Device Authorization Flow chosen over password flow - enables OAuth providers without storing secrets in CLI"
+- "INSIGHT: Teams array cached in JWT payload - avoids DB query per request but requires token refresh on team changes"
+Bad examples:
+- "INSIGHT: The code is well organized" (subjective, not actionable)
+- "INSIGHT: Authentication is important" (obvious, no value)
+- "INSIGHT: Files were read" (process description, not insight)
+</instructions>
+<output_format>
+Return a JSON object with this structure:
-GOOD examples (specific, reusable knowledge):
-- "Utility functions belong in frontend/lib/utils.ts - existing utils: cn(), formatDate(), debounce()"
-- "Auth tokens stored in localStorage with 15min expiry for long form sessions"
-- "API routes follow REST pattern in /api/v1/ with Zod validation"
-- "Database migrations go in prisma/migrations/ using prisma migrate"
-BAD examples (process descriptions - DO NOT EXTRACT THESE):
-- "Explored the codebase structure"
-- "Analyzed several approaches"
-- "Searched for utility directories"
-- "Looked at the file organization"
-1. REASONING TRACE (conclusions and recommendations):
-   - WHAT was discovered or decided (specific file paths, patterns)
-   - WHY this is the right approach
-   - WHERE this applies in the codebase
-   - Max 10 entries, prioritize specific file/function recommendations
-2. DECISIONS (architectural choices):
-   - Only significant choices that affect future work
-   - What was chosen and why
-   - Max 5 decisions
-Return JSON:
 {
-  "reasoning_trace": [
-    "Utility functions belong in frontend/lib/utils.ts alongside cn(), formatDate(), debounce(), generateId()",
-    "Backend utilities go in backend/app/utils/ with domain-specific files like validation.py",
-    "The @/lib/utils import alias is configured for frontend utility access"
+  "knowledge_pairs": [
+    {
+      "conclusion": "CONCLUSION: [specific factual finding with file paths and values]",
+      "insight": "INSIGHT: [inference or implication RELATED to this conclusion]"
+    },
+    {
+      "conclusion": "CONCLUSION: [another specific finding]",
+      "insight": "INSIGHT: [what this means for future development]"
+    }
   ],
   "decisions": [
-    {"choice": "Add to existing utils.ts rather than new file", "reason": "Maintains established pattern, easier discoverability"},
-    {"choice": "Use frontend/lib/ over src/utils/", "reason": "Follows Next.js conventions used throughout project"}
+    {
+      "choice": "[What was chosen - be specific]",
+      "reason": "[Why - include whether this is factual or inferred]"
+    }
   ]
 }
-RESPONSE RULES:
-- English only
-- No emojis
-- Valid JSON only
-- Extract WHAT and WHERE, not just WHAT was done
-- If no specific conclusions found, return empty arrays`;
-    debugLLM('extractReasoningAndDecisions', `Analyzing ${stepsReasoning.length} steps, ${combinedReasoning.length} chars`);
+IMPORTANT: Generate knowledge as PAIRS where each INSIGHT is directly related to its CONCLUSION.
+Example pair:
+{
+  "conclusion": "CONCLUSION: MemoryCache uses lazy expiration - entries checked/deleted on get(), not via timers",
+  "insight": "INSIGHT: Lazy expiration avoids timer overhead that would accumulate with large caches - trades CPU on read for memory efficiency"
+}
+Rules:
+1. Each pair MUST have a conclusion AND a related insight
+2. The insight MUST add value beyond the conclusion (inference, implication, pattern)
+3. Max 5 pairs (10 entries total) - prioritize most valuable
+4. Max 5 decisions - only significant architectural choices
+5. If you cannot find a meaningful insight for a conclusion, still include the conclusion with insight: null
+6. NEVER include process descriptions ("explored", "searched", "looked at")
+7. English only, no emojis
+8. Use prefixes "CONCLUSION: " and "INSIGHT: " in the strings
+</output_format>
+<validation>
+Before responding, verify:
+- Does each CONCLUSION contain a specific file path or value?
+- Is each INSIGHT directly related to its paired CONCLUSION?
+- Does each INSIGHT add something NOT explicitly in the input?
+- Would a new developer find the pairs useful without seeing the original session?
+- Did I avoid process descriptions?
+- Are the decisions about significant architectural choices?
+</validation>
+Return ONLY valid JSON, no markdown code blocks, no explanation.`;
+    debugLLM('extractReasoningAndDecisions', `Analyzing formatted steps, ${formattedSteps.length} chars`);
     try {
         const response = await client.messages.create({
             model: 'claude-haiku-4-5-20251001',
-            max_tokens: 800,
+            max_tokens: 1500,
             messages: [{ role: 'user', content: prompt }],
         });
         const text = response.content[0].type === 'text' ? response.content[0].text : '';
@@ -735,10 +657,60 @@ RESPONSE RULES:
             debugLLM('extractReasoningAndDecisions', 'No JSON found in response');
             return { reasoning_trace: [], decisions: [] };
         }
-        const result = JSON.parse(jsonMatch[0]);
-        debugLLM('extractReasoningAndDecisions', `Extracted ${result.reasoning_trace?.length || 0} traces, ${result.decisions?.length || 0} decisions`);
+        // Try to parse JSON, with repair attempts for common Haiku formatting issues
+        let result;
+        try {
+            result = JSON.parse(jsonMatch[0]);
+        }
+        catch (parseError) {
+            // Common fixes: trailing commas, unescaped newlines in strings
+            let repaired = jsonMatch[0]
+                .replace(/,\s*}/g, '}') // trailing comma before }
+                .replace(/,\s*]/g, ']') // trailing comma before ]
+                .replace(/\n/g, '\\n') // unescaped newlines
+                .replace(/\r/g, '\\r') // unescaped carriage returns
+                .replace(/\t/g, '\\t'); // unescaped tabs
+            try {
+                result = JSON.parse(repaired);
+            }
+            catch {
+                // Last resort: try to extract just knowledge_pairs array
+                const pairsMatch = jsonMatch[0].match(/"knowledge_pairs"\s*:\s*\[([\s\S]*?)\]/);
+                if (pairsMatch) {
+                    try {
+                        const pairs = JSON.parse(`[${pairsMatch[1].replace(/,\s*$/, '')}]`);
+                        result = { knowledge_pairs: pairs, decisions: [] };
+                    }
+                    catch {
+                        throw parseError; // Re-throw original error
+                    }
+                }
+                else {
+                    throw parseError;
+                }
+            }
+        }
+        // Flatten knowledge_pairs into reasoning_trace (interleaved: conclusion, insight, conclusion, insight...)
+        let reasoningTrace = [];
+        if (result.knowledge_pairs && result.knowledge_pairs.length > 0) {
+            // New format: flatten pairs into interleaved array
+            for (const pair of result.knowledge_pairs) {
+                if (pair.conclusion) {
+                    reasoningTrace.push(pair.conclusion);
+                }
+                if (pair.insight) {
+                    reasoningTrace.push(pair.insight);
+                }
+            }
+            debugLLM('extractReasoningAndDecisions', `Extracted ${result.knowledge_pairs.length} pairs (${reasoningTrace.length} entries), ${result.decisions?.length || 0} decisions`);
+        }
+        else if (result.reasoning_trace) {
+            // Backwards compatibility: old format with flat array
+            reasoningTrace = result.reasoning_trace;
+            debugLLM('extractReasoningAndDecisions', `Extracted ${reasoningTrace.length} traces (old format), ${result.decisions?.length || 0} decisions`);
+        }
         return {
-            reasoning_trace: result.reasoning_trace || [],
+            reasoning_trace: reasoningTrace,
             decisions: result.decisions || [],
         };
     }