grov 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +34 -4
  2. package/dist/cli.js +8 -0
  3. package/dist/lib/api-client.d.ts +18 -1
  4. package/dist/lib/api-client.js +57 -0
  5. package/dist/lib/llm-extractor.d.ts +14 -39
  6. package/dist/lib/llm-extractor.js +379 -407
  7. package/dist/lib/store/convenience.d.ts +40 -0
  8. package/dist/lib/store/convenience.js +104 -0
  9. package/dist/lib/store/database.d.ts +22 -0
  10. package/dist/lib/store/database.js +375 -0
  11. package/dist/lib/store/drift.d.ts +9 -0
  12. package/dist/lib/store/drift.js +89 -0
  13. package/dist/lib/store/index.d.ts +7 -0
  14. package/dist/lib/store/index.js +13 -0
  15. package/dist/lib/store/sessions.d.ts +32 -0
  16. package/dist/lib/store/sessions.js +240 -0
  17. package/dist/lib/store/steps.d.ts +40 -0
  18. package/dist/lib/store/steps.js +161 -0
  19. package/dist/lib/store/tasks.d.ts +33 -0
  20. package/dist/lib/store/tasks.js +133 -0
  21. package/dist/lib/store/types.d.ts +167 -0
  22. package/dist/lib/store/types.js +2 -0
  23. package/dist/lib/store.d.ts +1 -436
  24. package/dist/lib/store.js +2 -1478
  25. package/dist/proxy/cache.d.ts +36 -0
  26. package/dist/proxy/cache.js +51 -0
  27. package/dist/proxy/config.d.ts +1 -0
  28. package/dist/proxy/config.js +2 -0
  29. package/dist/proxy/extended-cache.d.ts +10 -0
  30. package/dist/proxy/extended-cache.js +155 -0
  31. package/dist/proxy/handlers/preprocess.d.ts +20 -0
  32. package/dist/proxy/handlers/preprocess.js +169 -0
  33. package/dist/proxy/injection/delta-tracking.d.ts +11 -0
  34. package/dist/proxy/injection/delta-tracking.js +93 -0
  35. package/dist/proxy/injection/injectors.d.ts +7 -0
  36. package/dist/proxy/injection/injectors.js +139 -0
  37. package/dist/proxy/request-processor.d.ts +18 -4
  38. package/dist/proxy/request-processor.js +151 -30
  39. package/dist/proxy/response-processor.js +93 -45
  40. package/dist/proxy/server.d.ts +0 -1
  41. package/dist/proxy/server.js +366 -582
  42. package/dist/proxy/types.d.ts +13 -0
  43. package/dist/proxy/types.js +2 -0
  44. package/dist/proxy/utils/extractors.d.ts +18 -0
  45. package/dist/proxy/utils/extractors.js +109 -0
  46. package/dist/proxy/utils/logging.d.ts +18 -0
  47. package/dist/proxy/utils/logging.js +42 -0
  48. package/package.json +7 -2
  49. package/postinstall.js +19 -0
@@ -1,35 +1,17 @@
1
- // LLM-based extraction using OpenAI GPT-3.5-turbo for reasoning summaries
2
- // and Anthropic Claude Haiku for drift detection
3
- import OpenAI from 'openai';
1
+ // LLM-based extraction using Anthropic Claude Haiku for drift detection
4
2
  import Anthropic from '@anthropic-ai/sdk';
5
3
  import { config } from 'dotenv';
6
4
  import { join } from 'path';
7
5
  import { homedir } from 'os';
8
6
  import { existsSync } from 'fs';
9
7
  import { debugLLM } from './debug.js';
10
- import { truncate } from './utils.js';
11
8
  // Load ~/.grov/.env as fallback for API key
12
9
  // This allows users to store their API key in a safe location outside any repo
13
10
  const grovEnvPath = join(homedir(), '.grov', '.env');
14
11
  if (existsSync(grovEnvPath)) {
15
12
  config({ path: grovEnvPath });
16
13
  }
17
- let client = null;
18
14
  let anthropicClient = null;
19
- /**
20
- * Initialize the OpenAI client
21
- */
22
- function getClient() {
23
- if (!client) {
24
- const apiKey = process.env.OPENAI_API_KEY;
25
- if (!apiKey) {
26
- // SECURITY: Generic error to avoid confirming API key mechanism exists
27
- throw new Error('LLM extraction unavailable');
28
- }
29
- client = new OpenAI({ apiKey });
30
- }
31
- return client;
32
- }
33
15
  /**
34
16
  * Initialize the Anthropic client
35
17
  */
@@ -43,12 +25,6 @@ function getAnthropicClient() {
43
25
  }
44
26
  return anthropicClient;
45
27
  }
46
- /**
47
- * Check if LLM extraction is available (OpenAI API key set)
48
- */
49
- export function isLLMAvailable() {
50
- return !!process.env.OPENAI_API_KEY;
51
- }
52
28
  /**
53
29
  * Extract intent from first user prompt using Haiku
54
30
  * Called once at session start to populate session_states
@@ -176,272 +152,6 @@ function createFallbackIntent(prompt) {
176
152
  export function isIntentExtractionAvailable() {
177
153
  return !!(process.env.ANTHROPIC_API_KEY || process.env.GROV_API_KEY);
178
154
  }
179
- /**
180
- * Check if Anthropic API is available (for drift detection)
181
- */
182
- export function isAnthropicAvailable() {
183
- return !!process.env.ANTHROPIC_API_KEY;
184
- }
185
- /**
186
- * Get the drift model to use (from env or default)
187
- */
188
- export function getDriftModel() {
189
- return process.env.GROV_DRIFT_MODEL || 'claude-haiku-4-5';
190
- }
191
- /**
192
- * Extract structured reasoning from a parsed session using GPT-3.5-turbo
193
- */
194
- export async function extractReasoning(session) {
195
- const openai = getClient();
196
- // Build session summary for the prompt
197
- const sessionSummary = buildSessionSummary(session);
198
- const response = await openai.chat.completions.create({
199
- model: 'gpt-3.5-turbo',
200
- max_tokens: 1024,
201
- messages: [
202
- {
203
- role: 'system',
204
- content: 'You are a helpful assistant that extracts structured information from coding sessions. Always respond with valid JSON only, no explanation.'
205
- },
206
- {
207
- role: 'user',
208
- content: `Analyze this Claude Code session and extract a structured reasoning summary.
209
-
210
- SESSION DATA:
211
- ${sessionSummary}
212
-
213
- Extract the following as JSON:
214
- {
215
- "task": "Brief description (1 sentence)",
216
- "goal": "The underlying problem being solved",
217
- "reasoning_trace": [
218
- "Be SPECIFIC: include file names, function names, line numbers when relevant",
219
- "Format: '[Action] [target] to/for [purpose]'",
220
- "Example: 'Read auth.ts:47 to understand token refresh logic'",
221
- "Example: 'Fixed null check in validateToken() - was causing silent failures'",
222
- "NOT: 'Investigated auth' or 'Fixed bug'"
223
- ],
224
- "decisions": [{"choice": "What was decided", "reason": "Why this over alternatives"}],
225
- "constraints": ["Discovered limitations, rate limits, incompatibilities"],
226
- "status": "complete|partial|question|abandoned",
227
- "tags": ["relevant", "domain", "tags"]
228
- }
229
-
230
- IMPORTANT for reasoning_trace:
231
- - Each entry should be ACTIONABLE information for future developers
232
- - Include specific file:line references when possible
233
- - Explain WHY not just WHAT (e.g., "Chose JWT over sessions because stateless scales better")
234
- - Bad: "Fixed the bug" / Good: "Fixed race condition in UserService.save() - was missing await"
235
-
236
- Status definitions:
237
- - "complete": Task was finished, implementation done
238
- - "partial": Work started but not finished
239
- - "question": Claude asked a question and is waiting for user response
240
- - "abandoned": User interrupted or moved to different topic
241
-
242
- RESPONSE RULES:
243
- - English only (translate if input is in other language)
244
- - No emojis
245
- - Valid JSON only`
246
- }
247
- ]
248
- });
249
- // Parse the response
250
- const content = response.choices[0]?.message?.content;
251
- if (!content) {
252
- throw new Error('No response from OpenAI');
253
- }
254
- try {
255
- // SECURITY: Parse to plain object first, then sanitize prototype pollution
256
- const rawParsed = JSON.parse(content);
257
- // SECURITY: Prevent prototype pollution from LLM-generated JSON
258
- // An attacker could manipulate LLM to return {"__proto__": {"isAdmin": true}}
259
- const pollutionKeys = ['__proto__', 'constructor', 'prototype'];
260
- for (const key of pollutionKeys) {
261
- if (key in rawParsed) {
262
- delete rawParsed[key];
263
- }
264
- }
265
- const extracted = rawParsed;
266
- // SECURITY: Validate types to prevent LLM injection attacks
267
- const safeTask = typeof extracted.task === 'string' ? extracted.task : '';
268
- const safeGoal = typeof extracted.goal === 'string' ? extracted.goal : '';
269
- const safeTrace = Array.isArray(extracted.reasoning_trace)
270
- ? extracted.reasoning_trace.filter((t) => typeof t === 'string')
271
- : [];
272
- const safeDecisions = Array.isArray(extracted.decisions)
273
- ? extracted.decisions.filter((d) => d && typeof d === 'object' && typeof d.choice === 'string' && typeof d.reason === 'string')
274
- : [];
275
- const safeConstraints = Array.isArray(extracted.constraints)
276
- ? extracted.constraints.filter((c) => typeof c === 'string')
277
- : [];
278
- const safeTags = Array.isArray(extracted.tags)
279
- ? extracted.tags.filter((t) => typeof t === 'string')
280
- : [];
281
- // Fill defaults with validated values
282
- return {
283
- task: safeTask || session.userMessages[0]?.substring(0, 100) || 'Unknown task',
284
- goal: safeGoal || safeTask || 'Unknown goal',
285
- reasoning_trace: safeTrace,
286
- files_touched: session.filesRead.concat(session.filesWritten),
287
- decisions: safeDecisions,
288
- constraints: safeConstraints,
289
- status: validateStatus(extracted.status),
290
- tags: safeTags
291
- };
292
- }
293
- catch (parseError) {
294
- // If JSON parsing fails, return basic extraction
295
- debugLLM('Failed to parse LLM response, using fallback');
296
- return createFallbackExtraction(session);
297
- }
298
- }
299
- /**
300
- * Classify just the task status (lighter weight than full extraction)
301
- */
302
- export async function classifyTaskStatus(session) {
303
- const openai = getClient();
304
- // Get last few exchanges for classification
305
- const lastMessages = session.userMessages.slice(-2).join('\n---\n');
306
- const lastAssistant = session.assistantMessages.slice(-1)[0] || '';
307
- const response = await openai.chat.completions.create({
308
- model: 'gpt-3.5-turbo',
309
- max_tokens: 50,
310
- messages: [
311
- {
312
- role: 'system',
313
- content: 'Classify conversation state. Return ONLY one word: complete, partial, question, or abandoned.'
314
- },
315
- {
316
- role: 'user',
317
- content: `Last user message(s):
318
- ${lastMessages}
319
-
320
- Last assistant response (truncated):
321
- ${lastAssistant.substring(0, 500)}
322
-
323
- Files written: ${session.filesWritten.length}
324
- Files read: ${session.filesRead.length}
325
-
326
- Classification:`
327
- }
328
- ]
329
- });
330
- const content = response.choices[0]?.message?.content;
331
- if (!content) {
332
- return 'partial';
333
- }
334
- return validateStatus(content.trim().toLowerCase());
335
- }
336
- /**
337
- * Build a summary of the session for the LLM prompt
338
- */
339
- function buildSessionSummary(session) {
340
- const lines = [];
341
- // User messages
342
- lines.push('USER MESSAGES:');
343
- session.userMessages.forEach((msg, i) => {
344
- lines.push(`[${i + 1}] ${truncate(msg, 300)}`);
345
- });
346
- lines.push('');
347
- // Files touched
348
- lines.push('FILES READ:');
349
- session.filesRead.slice(0, 10).forEach(f => lines.push(` - ${f}`));
350
- if (session.filesRead.length > 10) {
351
- lines.push(` ... and ${session.filesRead.length - 10} more`);
352
- }
353
- lines.push('');
354
- lines.push('FILES WRITTEN/EDITED:');
355
- session.filesWritten.forEach(f => lines.push(` - ${f}`));
356
- lines.push('');
357
- // Tool usage summary
358
- lines.push('TOOL USAGE:');
359
- const toolCounts = session.toolCalls.reduce((acc, t) => {
360
- acc[t.name] = (acc[t.name] || 0) + 1;
361
- return acc;
362
- }, {});
363
- Object.entries(toolCounts).forEach(([name, count]) => {
364
- lines.push(` - ${name}: ${count}x`);
365
- });
366
- lines.push('');
367
- // Last assistant message (often contains summary/conclusion)
368
- const lastAssistant = session.assistantMessages[session.assistantMessages.length - 1];
369
- if (lastAssistant) {
370
- lines.push('LAST ASSISTANT MESSAGE:');
371
- lines.push(truncate(lastAssistant, 500));
372
- }
373
- return lines.join('\n');
374
- }
375
- /**
376
- * Create fallback extraction when LLM fails
377
- */
378
- function createFallbackExtraction(session) {
379
- const filesTouched = [...new Set([...session.filesRead, ...session.filesWritten])];
380
- return {
381
- task: session.userMessages[0]?.substring(0, 100) || 'Unknown task',
382
- goal: session.userMessages[0]?.substring(0, 100) || 'Unknown goal',
383
- reasoning_trace: generateBasicTrace(session),
384
- files_touched: filesTouched,
385
- decisions: [],
386
- constraints: [],
387
- status: session.filesWritten.length > 0 ? 'complete' : 'partial',
388
- tags: generateTagsFromFiles(filesTouched)
389
- };
390
- }
391
- /**
392
- * Generate basic reasoning trace from tool usage
393
- */
394
- function generateBasicTrace(session) {
395
- const trace = [];
396
- const toolCounts = session.toolCalls.reduce((acc, t) => {
397
- acc[t.name] = (acc[t.name] || 0) + 1;
398
- return acc;
399
- }, {});
400
- if (toolCounts['Read'])
401
- trace.push(`Read ${toolCounts['Read']} files`);
402
- if (toolCounts['Write'])
403
- trace.push(`Wrote ${toolCounts['Write']} files`);
404
- if (toolCounts['Edit'])
405
- trace.push(`Edited ${toolCounts['Edit']} files`);
406
- if (toolCounts['Grep'] || toolCounts['Glob'])
407
- trace.push('Searched codebase');
408
- if (toolCounts['Bash'])
409
- trace.push(`Ran ${toolCounts['Bash']} commands`);
410
- return trace;
411
- }
412
- /**
413
- * Generate tags from file paths
414
- */
415
- function generateTagsFromFiles(files) {
416
- const tags = new Set();
417
- for (const file of files) {
418
- const parts = file.split('/');
419
- for (const part of parts) {
420
- if (part && !part.includes('.') && part !== 'src' && part !== 'lib') {
421
- tags.add(part.toLowerCase());
422
- }
423
- }
424
- // Common patterns
425
- if (file.includes('auth'))
426
- tags.add('auth');
427
- if (file.includes('api'))
428
- tags.add('api');
429
- if (file.includes('test'))
430
- tags.add('test');
431
- }
432
- return [...tags].slice(0, 10);
433
- }
434
- /**
435
- * Validate and normalize status
436
- */
437
- function validateStatus(status) {
438
- const normalized = status?.toLowerCase().trim();
439
- if (normalized === 'complete' || normalized === 'partial' ||
440
- normalized === 'question' || normalized === 'abandoned') {
441
- return normalized;
442
- }
443
- return 'partial'; // Default
444
- }
445
155
  // ============================================
446
156
  // SESSION SUMMARY FOR CLEAR OPERATION
447
157
  // Reference: plan_proxy_local.md Section 2.3, 4.5
@@ -536,89 +246,222 @@ export function isTaskAnalysisAvailable() {
536
246
  return !!(process.env.ANTHROPIC_API_KEY || process.env.GROV_API_KEY);
537
247
  }
538
248
  /**
539
- * Analyze task context to determine task status
540
- * Called after each main model response to orchestrate sessions
541
- * Also compresses reasoning for steps if assistantResponse > 1000 chars
249
+ * Format conversation messages for prompt
542
250
  */
543
- export async function analyzeTaskContext(currentSession, latestUserMessage, recentSteps, assistantResponse) {
544
- const client = getAnthropicClient();
545
- const stepsText = recentSteps.slice(0, 5).map(s => {
251
+ function formatConversationHistory(messages) {
252
+ if (!messages || messages.length === 0)
253
+ return 'No conversation history available.';
254
+ return messages.slice(-10).map(m => {
255
+ const role = m.role === 'user' ? 'User' : 'Assistant';
256
+ const content = m.content.substring(0, 800);
257
+ const truncated = m.content.length > 800 ? '...' : '';
258
+ return `${role}: ${content}${truncated}`;
259
+ }).join('\n\n');
260
+ }
261
+ /**
262
+ * Format tool calls for prompt
263
+ */
264
+ function formatToolCalls(steps) {
265
+ if (!steps || steps.length === 0)
266
+ return 'No tools used yet.';
267
+ return steps.slice(0, 10).map(s => {
546
268
  let desc = `- ${s.action_type}`;
547
269
  if (s.files.length > 0) {
548
270
  desc += `: ${s.files.slice(0, 3).join(', ')}`;
549
271
  }
272
+ if (s.command) {
273
+ desc += ` (${s.command.substring(0, 50)})`;
274
+ }
550
275
  return desc;
551
- }).join('\n') || 'None';
276
+ }).join('\n');
277
+ }
278
+ /**
279
+ * Analyze task context to determine task status
280
+ * Called after each main model response to orchestrate sessions
281
+ * Also compresses reasoning for steps if assistantResponse > 1000 chars
282
+ */
283
+ export async function analyzeTaskContext(currentSession, latestUserMessage, recentSteps, assistantResponse, conversationHistory) {
284
+ const client = getAnthropicClient();
552
285
  // Check if we need to compress reasoning
553
286
  const needsCompression = assistantResponse.length > 1000;
554
287
  const compressionInstruction = needsCompression
555
- ? `\n "step_reasoning": "Extract CONCLUSIONS and SPECIFIC RECOMMENDATIONS only. Include: exact file paths (e.g., src/lib/utils.ts), function/component names, architectural patterns discovered, and WHY decisions were made. DO NOT write process descriptions like 'explored' or 'analyzed'. Max 800 chars."`
288
+ ? `,
289
+ "step_reasoning": "Extract CONCLUSIONS only: specific file paths, function names, patterns discovered, and WHY decisions were made. Max 800 chars. Do not write process descriptions."`
556
290
  : '';
557
- const compressionRule = needsCompression
558
- ? '\n- step_reasoning: Extract CONCLUSIONS (specific files, patterns, decisions) NOT process descriptions. Example GOOD: "Utilities belong in src/lib/utils.ts alongside cn(), formatDate()". Example BAD: "Explored codebase structure".'
559
- : '';
560
- // Extract topic keywords from goal for comparison
561
- const currentGoalKeywords = currentSession?.original_goal
562
- ? currentSession.original_goal.toLowerCase().match(/\b\w{4,}\b/g)?.slice(0, 10).join(', ') || ''
563
- : '';
564
- const prompt = `You are a task orchestrator. Your PRIMARY job is to detect when the user starts a NEW, DIFFERENT task.
291
+ // Format conversation history
292
+ const historyText = formatConversationHistory(conversationHistory || []);
293
+ const toolCallsText = formatToolCalls(recentSteps);
294
+ const prompt = `You are a task status analyzer. Your job is to examine a conversation between a user and an AI assistant, then determine whether the current task is complete, still in progress, or if a new task has started.
565
295
 
566
- CURRENT SESSION:
567
- - Current Goal: "${currentSession?.original_goal || 'No active task'}"
568
- - Goal Keywords: [${currentGoalKeywords}]
296
+ <input>
297
+ original_goal: ${currentSession?.original_goal || 'No active task - this may be the first message'}
569
298
 
570
- LATEST USER MESSAGE:
571
- "${latestUserMessage.substring(0, 500)}"
299
+ messages:
300
+ ${historyText}
572
301
 
573
- RECENT ACTIONS (last 5):
574
- ${stepsText}
302
+ current_assistant_response:
303
+ ${assistantResponse ? assistantResponse.substring(0, 2000) : 'No response yet - assistant is still thinking.'}
575
304
 
576
- ASSISTANT RESPONSE (truncated):
577
- "${assistantResponse.substring(0, 1500)}${assistantResponse.length > 1500 ? '...' : ''}"
305
+ tool_calls:
306
+ ${toolCallsText}
307
+ </input>
578
308
 
579
- ═══════════════════════════════════════════════════════════════
580
- CRITICAL: Compare the TOPIC of "Current Goal" vs "Latest User Message"
581
- ═══════════════════════════════════════════════════════════════
309
+ <output>
310
+ Return a JSON object with these fields:
311
+ - task_type: one of "information", "planning", or "implementation"
312
+ - action: one of "continue", "task_complete", "new_task", or "subtask_complete"
313
+ - task_id: existing session_id "${currentSession?.session_id || 'NEW'}" or "NEW" for new task
314
+ - reasoning: brief explanation of why you made this decision${compressionInstruction}
315
+ </output>
582
316
 
583
- Ask yourself:
584
- 1. Is the user message about the SAME subject/feature/file as the current goal?
585
- 2. Or is it about something COMPLETELY DIFFERENT?
317
+ <step_1_identify_task_type>
318
+ First, analyze the original_goal to understand what kind of task this is. Do not rely on specific keywords. Instead, understand the user's intent from the full context of their message.
586
319
 
587
- EXAMPLES of NEW_TASK (different topic):
588
- - Goal: "implement authentication" User: "fix the database migration" NEW_TASK
589
- - Goal: "analyze security layer" → User: "create hello.ts script" → NEW_TASK
590
- - Goal: "refactor user service" → User: "add dark mode to UI" → NEW_TASK
591
- - Goal: "fix login bug" → User: "write unit tests for payments" → NEW_TASK
320
+ TYPE A - Information Request
321
+ The user wants to learn or understand something. They are seeking knowledge, not asking for any changes or decisions to be made. The answer itself is what they need.
592
322
 
593
- EXAMPLES of CONTINUE (same topic):
594
- - Goal: "implement authentication" → User: "now add the logout button" → CONTINUE
595
- - Goal: "fix login bug" → User: "also check the session timeout" → CONTINUE
596
- - Goal: "analyze security" → User: "what about rate limiting?" → CONTINUE
323
+ Think about whether the user is curious about how something works, wants an explanation of a concept, or is asking for clarification about existing behavior.
597
324
 
598
- Return JSON:
599
- {
600
- "action": "continue|new_task|subtask|parallel_task|task_complete|subtask_complete",
601
- "topic_match": "YES if same topic, NO if different topic",
602
- "task_id": "existing session_id or 'NEW' for new task",
603
- "current_goal": "the goal based on LATEST user message",
604
- "reasoning": "1 sentence explaining topic comparison"${compressionInstruction}
605
- }
325
+ Examples of information requests in different phrasings:
326
+ - "How does the authentication system work?"
327
+ - "Explica-mi cum functioneaza cache-ul"
328
+ - "What is the difference between Redis and Memcached?"
329
+ - "Can you walk me through the payment flow?"
330
+ - "I don't understand why this function returns null"
331
+ - "Ce face acest cod?"
332
+
333
+ TYPE B - Planning or Decision Request
334
+ The user wants to figure out the best approach before taking action. They need to make a decision or create a plan. The conversation may involve exploring options, discussing tradeoffs, or clarifying requirements.
335
+
336
+ Think about whether the user is trying to decide between approaches, wants recommendations for how to build something, or is working toward a plan they will implement later.
337
+
338
+ Examples of planning requests in different phrasings:
339
+ - "How should we implement user authentication?"
340
+ - "What's the best way to handle caching for this API?"
341
+ - "Cum ar trebui sa structuram baza de date?"
342
+ - "I'm thinking about using Redis vs Memcached, what do you recommend?"
343
+ - "Let's figure out the architecture before we start coding"
344
+ - "We need to decide on the approach for handling errors"
345
+
346
+ TYPE C - Implementation Request
347
+ The user wants actual changes made. They want code written, files edited, commands run, or something built. The task involves using tools to modify the codebase.
348
+
349
+ Think about whether the user is asking for something to be created, fixed, changed, or built.
350
+
351
+ Examples of implementation requests in different phrasings:
352
+ - "Fix the bug in the login function"
353
+ - "Add caching to the API endpoints"
354
+ - "Fa un refactor la modulul de plati"
355
+ - "Create a new component for the dashboard"
356
+ - "Update the tests to cover edge cases"
357
+ - "Remove the deprecated authentication code"
358
+ </step_1_identify_task_type>
359
+
360
+ <step_2_determine_status>
361
+ Now that you know the task type, determine whether it is complete, continuing, or if a new task has begun.
362
+
363
+ For TYPE A - Information Request:
364
+ The task is complete when the assistant has provided a clear and complete answer to the user's question. Check the current_assistant_response field - if it contains a substantive answer to the question, the task is complete.
365
+
366
+ Each question the user asks is treated as its own separate task. If the user asks a follow-up question, even on the same topic, that is a new task.
367
+
368
+ The reason for this is that each answer is valuable on its own and should be saved independently. We do not want to wait for a multi-turn conversation to end before saving useful information.
369
+
370
+ When analyzing: Look at current_assistant_response. If it contains an explanation, answer, or clarification that addresses the user's question, return task_complete.
371
+
372
+ Example situation: User asks "How does auth work?", assistant explains it fully.
373
+ Decision: task_complete
374
+ Reason: The information request was answered completely.
375
+
376
+ Example situation: User asks "How does auth work?", assistant explains, then user asks "What about JWT specifically?"
377
+ Decision for second message: new_task
378
+ Reason: This is a new question requiring a new answer.
379
+
380
+ For TYPE B - Planning or Decision Request:
381
+ The task continues while the user and assistant are still exploring options, discussing tradeoffs, or clarifying requirements. The task is complete only when a final decision or plan has been reached and the user has confirmed it.
382
+
383
+ Look for signals that indicate the user has made up their mind. These signals come from the overall tone and direction of the conversation, not from specific keywords. The user might express agreement, ask to proceed with implementation, or summarize the chosen approach.
606
384
 
607
- DECISION RULES:
608
- 1. NO current session → "new_task"
609
- 2. topic_match=NO (different subject) "new_task"
610
- 3. topic_match=YES + user following up → "continue"
611
- 4. Claude said "done/complete/finished" "task_complete"
612
- 5. Prerequisite work identified → "subtask"${compressionRule}
385
+ When analyzing, ask yourself: Has the user confirmed a final direction? Are they still weighing options? Have they asked to move forward with a specific approach?
386
+
387
+ Example situation: User asks "Should we use JWT or sessions?", assistant explains both, user says "I'm still not sure about refresh tokens"
388
+ Decision: continue
389
+ Reason: The user is still clarifying and has not made a final decision.
390
+
391
+ Example situation: User and assistant discussed auth options, user says "OK, JWT with refresh tokens makes sense, let's go with that"
392
+ Decision: task_complete
393
+ Reason: The user confirmed the decision. Planning is complete.
394
+
395
+ Example situation: User says "That sounds good, now implement it"
396
+ Decision: task_complete for planning, and a new implementation task will begin
397
+ Reason: Planning concluded with a decision. User is now requesting implementation.
398
+
399
+ For TYPE C - Implementation Request:
400
+ The task continues while the assistant is actively making changes using tools like file edits, bash commands, or file writes. The task is complete when the changes are done and verified.
401
+
402
+ Look for signals that the work is finished in current_assistant_response: successful test runs, the assistant stating the work is done, or a commit being made. If tests are failing or the assistant indicates more work is needed, the task continues.
403
+
404
+ When analyzing: Check current_assistant_response for completion signals. Is the assistant still making changes? Have the changes been verified? Did the assistant confirm completion?
405
+
406
+ Example situation: Assistant edited three files and is now running tests.
407
+ Decision: continue
408
+ Reason: Implementation is in progress, verification not yet complete.
409
+
410
+ Example situation: Assistant ran tests, they passed, assistant says "Done, the auth bug is fixed"
411
+ Decision: task_complete
412
+ Reason: Changes are complete and verified.
413
+
414
+ Example situation: Tests failed after the changes.
415
+ Decision: continue
416
+ Reason: The implementation needs more work to pass verification.
417
+ </step_2_determine_status>
418
+
419
+ <step_3_detect_new_task>
420
+ Sometimes the user changes direction entirely. A new task has started when:
421
+
422
+ The user asks about something completely unrelated to the original goal.
423
+ The conversation topic shifts to a different part of the codebase or a different feature.
424
+ The previous task was completed and the user is now requesting something new.
425
+
426
+ To detect this, compare the current user message to the original_goal. If they are about the same thing, the task is either continuing or complete. If they are about different things, a new task has started.
427
+
428
+ Be careful not to confuse follow-up questions with new tasks. A follow-up question on the same topic in an information request is a new task because each answer stands alone. But a follow-up clarification during planning is part of the same planning task.
429
+
430
+ Example situation: Original goal was "fix the auth bug", user now asks "also, can you update the README?"
431
+ Decision: new_task
432
+ Reason: Updating README is unrelated to fixing the auth bug.
433
+
434
+ Example situation: Original goal was "implement caching", user asks "should we use Redis or Memcached for this?"
435
+ Decision: continue (this is planning within the implementation task)
436
+ Reason: The question is about how to implement the original request.
437
+
438
+ Example situation: Original goal was "explain how auth works", user asks "and how does the session storage work?"
439
+ Decision: new_task
440
+ Reason: This is a new information request, separate from the first.
441
+ </step_3_detect_new_task>
442
+
443
+ <important_notes>
444
+ Do not rely on specific keywords in any language. The same intent can be expressed many different ways across languages and phrasings. Always understand the intent from the full context.
445
+
446
+ The conversation history and tool usage are your most important signals. What has the assistant been doing? What is the user trying to accomplish? Has that goal been achieved?
447
+
448
+ CRITICAL - Q&A DURING PLANNING:
449
+ If the current task_type is "planning" and the user asks a clarifying question (e.g., "how does X work?", "what about Y?", "clarify Z"), this is NOT a new information task. It is a CONTINUATION of the planning task. The user is gathering information to make a planning decision, not requesting standalone information.
450
+ - If original task_type was planning → keep it as planning, action=continue
451
+ - Only mark task_complete for planning when user explicitly confirms a final decision or asks to proceed with implementation
452
+ - Asking to "write to file" or "document the plan" is NOT task_complete - it's still part of planning documentation
453
+
454
+ When in doubt between continue and task_complete, ask yourself: Would it be valuable to save what we have so far? For information requests, yes, save each answer. For planning, only save when a decision is made. For implementation, only save when work is verified complete.
613
455
 
614
456
  RESPONSE RULES:
615
- - English only (translate if input is in other language)
616
- - No emojis
617
- - Valid JSON only`;
457
+ - Return valid JSON only
458
+ - English only in the response (translate reasoning if input is in other language)
459
+ - No markdown formatting, no emojis
460
+ </important_notes>`;
618
461
  debugLLM('analyzeTaskContext', `Calling Haiku for task analysis (needsCompression=${needsCompression})`);
619
462
  const response = await client.messages.create({
620
463
  model: 'claude-haiku-4-5-20251001',
621
- max_tokens: needsCompression ? 600 : 300,
464
+ max_tokens: needsCompression ? 800 : 400,
622
465
  messages: [{ role: 'user', content: prompt }],
623
466
  });
624
467
  const text = response.content[0].type === 'text' ? response.content[0].text : '';
@@ -629,20 +472,24 @@ RESPONSE RULES:
629
472
  throw new Error('No JSON found in response');
630
473
  }
631
474
  const analysis = JSON.parse(jsonMatch[0]);
475
+ // Ensure task_type has a default value
476
+ if (!analysis.task_type) {
477
+ analysis.task_type = 'implementation';
478
+ }
632
479
  // If we didn't need compression but have short response, use it directly
633
480
  if (!needsCompression && assistantResponse.length > 0) {
634
481
  analysis.step_reasoning = assistantResponse.substring(0, 1000);
635
482
  }
636
- debugLLM('analyzeTaskContext', `Result: action=${analysis.action}, topic_match=${analysis.topic_match}, goal=${analysis.current_goal.substring(0, 50)}`);
483
+ debugLLM('analyzeTaskContext', `Result: task_type=${analysis.task_type}, action=${analysis.action}, reasoning="${analysis.reasoning?.substring(0, 150) || 'none'}"`);
637
484
  return analysis;
638
485
  }
639
486
  catch (parseError) {
640
487
  debugLLM('analyzeTaskContext', `Parse error: ${String(parseError)}, using fallback`);
641
488
  // Fallback: continue existing session or create new
642
489
  return {
490
+ task_type: 'implementation',
643
491
  action: currentSession ? 'continue' : 'new_task',
644
492
  task_id: currentSession?.session_id || 'NEW',
645
- current_goal: latestUserMessage.substring(0, 200),
646
493
  reasoning: 'Fallback due to parse error',
647
494
  step_reasoning: assistantResponse.substring(0, 1000),
648
495
  };
@@ -657,76 +504,151 @@ export function isReasoningExtractionAvailable() {
657
504
  /**
658
505
  * Extract reasoning trace and decisions from steps
659
506
  * Called at task_complete to populate team memory with rich context
507
+ *
508
+ * @param formattedSteps - Pre-formatted XML string with grouped steps and actions
509
+ * @param originalGoal - The original task goal
660
510
  */
661
- export async function extractReasoningAndDecisions(stepsReasoning, originalGoal) {
511
+ export async function extractReasoningAndDecisions(formattedSteps, originalGoal) {
662
512
  const client = getAnthropicClient();
663
- // Combine all steps reasoning into one text
664
- const combinedReasoning = stepsReasoning
665
- .filter(r => r && r.length > 10)
666
- .join('\n\n---\n\n')
667
- .substring(0, 8000);
668
- if (combinedReasoning.length < 50) {
513
+ if (formattedSteps.length < 50) {
669
514
  return { reasoning_trace: [], decisions: [] };
670
515
  }
671
- const prompt = `Extract CONCLUSIONS and KNOWLEDGE from Claude's work - NOT process descriptions.
516
+ const prompt = `<role>
517
+ You are a Knowledge Engineer specialized in extracting reusable team knowledge from coding sessions.
672
518
 
673
- ORIGINAL GOAL:
674
- ${originalGoal || 'Not specified'}
519
+ Your output will be stored permanently in team memory and used to help developers in future sessions. Poor extractions waste storage and confuse future assistants. Excellent extractions save hours of repeated investigation.
520
+ </role>
675
521
 
676
- CLAUDE'S RESPONSE:
677
- ${combinedReasoning}
522
+ <context>
523
+ PROJECT GOAL: ${originalGoal || 'Not specified'}
678
524
 
679
- ═══════════════════════════════════════════════════════════════
680
- EXTRACT ACTIONABLE CONCLUSIONS - NOT PROCESS
681
- ═══════════════════════════════════════════════════════════════
525
+ This extraction serves two purposes:
526
+ 1. Help future developers understand WHAT was discovered in this codebase
527
+ 2. Help future developers understand WHY certain decisions were made
528
+ </context>
529
+
530
+ <session_data>
531
+ ${formattedSteps.substring(0, 8000)}
532
+ </session_data>
533
+
534
+ <instructions>
535
+
536
+ We need TWO types of knowledge extracted:
537
+
538
+ TYPE A: CONCLUSIONS (Factual findings from the session)
539
+
540
+ What this means:
541
+ These are FACTS discovered during the session. Things that were explicitly found, read, or confirmed in the code. A new developer reading these should immediately know WHERE to find things and WHAT values/patterns exist.
542
+
543
+ Must include:
544
+ - Specific file paths (not just "auth files" but "src/lib/jwt.ts")
545
+ - Specific values (not just "short expiry" but "1 hour access, 7 day refresh")
546
+ - Specific patterns (not just "uses JWT" but "JWT with sub, email, type, teams payload")
547
+ - Specific functions/classes (not just "middleware" but "requireAuth, optionalAuth preHandlers")
548
+
549
+ Format: Start with "CONCLUSION: " prefix
550
+
551
+ Good examples:
552
+ - "CONCLUSION: JWT tokens stored in ~/.grov/credentials.json with 1hr access/7d refresh expiry"
553
+ - "CONCLUSION: Auth middleware in src/routes/auth.ts exports requireAuth and optionalAuth preHandlers"
554
+ - "CONCLUSION: Device flow polling interval is 5 seconds, endpoint /auth/device/poll"
555
+
556
+ Bad examples:
557
+ - "CONCLUSION: Found authentication files" (too vague, no paths)
558
+ - "CONCLUSION: JWT is used for auth" (too generic, no specifics)
559
+ - "CONCLUSION: Explored the codebase" (process description, not finding)
560
+
561
+
562
+ TYPE B: INSIGHTS (Your analysis and inferences)
563
+
564
+ What this means:
565
+ These are YOUR observations that go BEYOND what was explicitly stated. Connections between different parts, patterns you identified, implications for future work. This is where YOU add value beyond just summarizing.
566
+
567
+ Types of insights we value:
568
+
569
+ 1. CONNECTIONS - How do different files/modules relate?
570
+ Example: "jwt.ts handles token creation, credentials.ts handles storage - separation of crypto operations from I/O"
571
+
572
+ 2. INFERENCES - What decisions were made implicitly?
573
+ Example: "File storage in ~/.grov/ instead of env vars - implies single-user CLI design, not multi-tenant"
574
+
575
+ 3. PATTERNS - What architectural patterns emerge?
576
+ Example: "All config files use 0600 permissions - security-conscious design for sensitive data"
577
+
578
+ 4. IMPLICATIONS - What does this mean for future development?
579
+ Example: "1hr token expiry requires background refresh mechanism for long operations to avoid mid-task auth failures"
580
+
581
+ Format: Start with "INSIGHT: " prefix
582
+
583
+ Good examples:
584
+ - "INSIGHT: Dual-file pattern (jwt.ts + credentials.ts) separates crypto from I/O, reducing attack surface"
585
+ - "INSIGHT: Device Authorization Flow chosen over password flow - enables OAuth providers without storing secrets in CLI"
586
+ - "INSIGHT: Teams array cached in JWT payload - avoids DB query per request but requires token refresh on team changes"
587
+
588
+ Bad examples:
589
+ - "INSIGHT: The code is well organized" (subjective, not actionable)
590
+ - "INSIGHT: Authentication is important" (obvious, no value)
591
+ - "INSIGHT: Files were read" (process description, not insight)
592
+
593
+ </instructions>
594
+
595
+ <output_format>
596
+ Return a JSON object with this structure:
682
597
 
683
- GOOD examples (specific, reusable knowledge):
684
- - "Utility functions belong in frontend/lib/utils.ts - existing utils: cn(), formatDate(), debounce()"
685
- - "Auth tokens stored in localStorage with 15min expiry for long form sessions"
686
- - "API routes follow REST pattern in /api/v1/ with Zod validation"
687
- - "Database migrations go in prisma/migrations/ using prisma migrate"
688
-
689
- BAD examples (process descriptions - DO NOT EXTRACT THESE):
690
- - "Explored the codebase structure"
691
- - "Analyzed several approaches"
692
- - "Searched for utility directories"
693
- - "Looked at the file organization"
694
-
695
- 1. REASONING TRACE (conclusions and recommendations):
696
- - WHAT was discovered or decided (specific file paths, patterns)
697
- - WHY this is the right approach
698
- - WHERE this applies in the codebase
699
- - Max 10 entries, prioritize specific file/function recommendations
700
-
701
- 2. DECISIONS (architectural choices):
702
- - Only significant choices that affect future work
703
- - What was chosen and why
704
- - Max 5 decisions
705
-
706
- Return JSON:
707
598
  {
708
- "reasoning_trace": [
709
- "Utility functions belong in frontend/lib/utils.ts alongside cn(), formatDate(), debounce(), generateId()",
710
- "Backend utilities go in backend/app/utils/ with domain-specific files like validation.py",
711
- "The @/lib/utils import alias is configured for frontend utility access"
599
+ "knowledge_pairs": [
600
+ {
601
+ "conclusion": "CONCLUSION: [specific factual finding with file paths and values]",
602
+ "insight": "INSIGHT: [inference or implication RELATED to this conclusion]"
603
+ },
604
+ {
605
+ "conclusion": "CONCLUSION: [another specific finding]",
606
+ "insight": "INSIGHT: [what this means for future development]"
607
+ }
712
608
  ],
713
609
  "decisions": [
714
- {"choice": "Add to existing utils.ts rather than new file", "reason": "Maintains established pattern, easier discoverability"},
715
- {"choice": "Use frontend/lib/ over src/utils/", "reason": "Follows Next.js conventions used throughout project"}
610
+ {
611
+ "choice": "[What was chosen - be specific]",
612
+ "reason": "[Why - include whether this is factual or inferred]"
613
+ }
716
614
  ]
717
615
  }
718
616
 
719
- RESPONSE RULES:
720
- - English only
721
- - No emojis
722
- - Valid JSON only
723
- - Extract WHAT and WHERE, not just WHAT was done
724
- - If no specific conclusions found, return empty arrays`;
725
- debugLLM('extractReasoningAndDecisions', `Analyzing ${stepsReasoning.length} steps, ${combinedReasoning.length} chars`);
617
+ IMPORTANT: Generate knowledge as PAIRS where each INSIGHT is directly related to its CONCLUSION.
618
+
619
+ Example pair:
620
+ {
621
+ "conclusion": "CONCLUSION: MemoryCache uses lazy expiration - entries checked/deleted on get(), not via timers",
622
+ "insight": "INSIGHT: Lazy expiration avoids timer overhead that would accumulate with large caches - trades CPU on read for memory efficiency"
623
+ }
624
+
625
+ Rules:
626
+ 1. Each pair MUST have a conclusion AND a related insight
627
+ 2. The insight MUST add value beyond the conclusion (inference, implication, pattern)
628
+ 3. Max 5 pairs (10 entries total) - prioritize most valuable
629
+ 4. Max 5 decisions - only significant architectural choices
630
+ 5. If you cannot find a meaningful insight for a conclusion, still include the conclusion with insight: null
631
+ 6. NEVER include process descriptions ("explored", "searched", "looked at")
632
+ 7. English only, no emojis
633
+ 8. Use prefixes "CONCLUSION: " and "INSIGHT: " in the strings
634
+ </output_format>
635
+
636
+ <validation>
637
+ Before responding, verify:
638
+ - Does each CONCLUSION contain a specific file path or value?
639
+ - Is each INSIGHT directly related to its paired CONCLUSION?
640
+ - Does each INSIGHT add something NOT explicitly in the input?
641
+ - Would a new developer find the pairs useful without seeing the original session?
642
+ - Did I avoid process descriptions?
643
+ - Are the decisions about significant architectural choices?
644
+ </validation>
645
+
646
+ Return ONLY valid JSON, no markdown code blocks, no explanation.`;
647
+ debugLLM('extractReasoningAndDecisions', `Analyzing formatted steps, ${formattedSteps.length} chars`);
726
648
  try {
727
649
  const response = await client.messages.create({
728
650
  model: 'claude-haiku-4-5-20251001',
729
- max_tokens: 800,
651
+ max_tokens: 1500,
730
652
  messages: [{ role: 'user', content: prompt }],
731
653
  });
732
654
  const text = response.content[0].type === 'text' ? response.content[0].text : '';
@@ -735,10 +657,60 @@ RESPONSE RULES:
735
657
  debugLLM('extractReasoningAndDecisions', 'No JSON found in response');
736
658
  return { reasoning_trace: [], decisions: [] };
737
659
  }
738
- const result = JSON.parse(jsonMatch[0]);
739
- debugLLM('extractReasoningAndDecisions', `Extracted ${result.reasoning_trace?.length || 0} traces, ${result.decisions?.length || 0} decisions`);
660
+ // Try to parse JSON, with repair attempts for common Haiku formatting issues
661
+ let result;
662
+ try {
663
+ result = JSON.parse(jsonMatch[0]);
664
+ }
665
+ catch (parseError) {
666
+ // Common fixes: trailing commas, unescaped newlines in strings
667
+ let repaired = jsonMatch[0]
668
+ .replace(/,\s*}/g, '}') // trailing comma before }
669
+ .replace(/,\s*]/g, ']') // trailing comma before ]
670
+ .replace(/\n/g, '\\n') // unescaped newlines
671
+ .replace(/\r/g, '\\r') // unescaped carriage returns
672
+ .replace(/\t/g, '\\t'); // unescaped tabs
673
+ try {
674
+ result = JSON.parse(repaired);
675
+ }
676
+ catch {
677
+ // Last resort: try to extract just knowledge_pairs array
678
+ const pairsMatch = jsonMatch[0].match(/"knowledge_pairs"\s*:\s*\[([\s\S]*?)\]/);
679
+ if (pairsMatch) {
680
+ try {
681
+ const pairs = JSON.parse(`[${pairsMatch[1].replace(/,\s*$/, '')}]`);
682
+ result = { knowledge_pairs: pairs, decisions: [] };
683
+ }
684
+ catch {
685
+ throw parseError; // Re-throw original error
686
+ }
687
+ }
688
+ else {
689
+ throw parseError;
690
+ }
691
+ }
692
+ }
693
+ // Flatten knowledge_pairs into reasoning_trace (interleaved: conclusion, insight, conclusion, insight...)
694
+ let reasoningTrace = [];
695
+ if (result.knowledge_pairs && result.knowledge_pairs.length > 0) {
696
+ // New format: flatten pairs into interleaved array
697
+ for (const pair of result.knowledge_pairs) {
698
+ if (pair.conclusion) {
699
+ reasoningTrace.push(pair.conclusion);
700
+ }
701
+ if (pair.insight) {
702
+ reasoningTrace.push(pair.insight);
703
+ }
704
+ }
705
+ debugLLM('extractReasoningAndDecisions', `Extracted ${result.knowledge_pairs.length} pairs (${reasoningTrace.length} entries), ${result.decisions?.length || 0} decisions`);
706
+ }
707
+ else if (result.reasoning_trace) {
708
+ // Backwards compatibility: old format with flat array
709
+ reasoningTrace = result.reasoning_trace;
710
+ debugLLM('extractReasoningAndDecisions', `Extracted ${reasoningTrace.length} traces (old format), ${result.decisions?.length || 0} decisions`);
711
+ }
740
712
  return {
741
- reasoning_trace: result.reasoning_trace || [],
713
+ reasoning_trace: reasoningTrace,
742
714
  decisions: result.decisions || [],
743
715
  };
744
716
  }