grov 0.2.3 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +44 -5
  2. package/dist/cli.js +40 -2
  3. package/dist/commands/login.d.ts +1 -0
  4. package/dist/commands/login.js +115 -0
  5. package/dist/commands/logout.d.ts +1 -0
  6. package/dist/commands/logout.js +13 -0
  7. package/dist/commands/sync.d.ts +8 -0
  8. package/dist/commands/sync.js +127 -0
  9. package/dist/lib/api-client.d.ts +57 -0
  10. package/dist/lib/api-client.js +174 -0
  11. package/dist/lib/cloud-sync.d.ts +33 -0
  12. package/dist/lib/cloud-sync.js +176 -0
  13. package/dist/lib/credentials.d.ts +53 -0
  14. package/dist/lib/credentials.js +201 -0
  15. package/dist/lib/llm-extractor.d.ts +15 -39
  16. package/dist/lib/llm-extractor.js +400 -418
  17. package/dist/lib/store/convenience.d.ts +40 -0
  18. package/dist/lib/store/convenience.js +104 -0
  19. package/dist/lib/store/database.d.ts +22 -0
  20. package/dist/lib/store/database.js +375 -0
  21. package/dist/lib/store/drift.d.ts +9 -0
  22. package/dist/lib/store/drift.js +89 -0
  23. package/dist/lib/store/index.d.ts +7 -0
  24. package/dist/lib/store/index.js +13 -0
  25. package/dist/lib/store/sessions.d.ts +32 -0
  26. package/dist/lib/store/sessions.js +240 -0
  27. package/dist/lib/store/steps.d.ts +40 -0
  28. package/dist/lib/store/steps.js +161 -0
  29. package/dist/lib/store/tasks.d.ts +33 -0
  30. package/dist/lib/store/tasks.js +133 -0
  31. package/dist/lib/store/types.d.ts +167 -0
  32. package/dist/lib/store/types.js +2 -0
  33. package/dist/lib/store.d.ts +1 -406
  34. package/dist/lib/store.js +2 -1356
  35. package/dist/lib/utils.d.ts +5 -0
  36. package/dist/lib/utils.js +45 -0
  37. package/dist/proxy/action-parser.d.ts +10 -2
  38. package/dist/proxy/action-parser.js +4 -2
  39. package/dist/proxy/cache.d.ts +36 -0
  40. package/dist/proxy/cache.js +51 -0
  41. package/dist/proxy/config.d.ts +1 -0
  42. package/dist/proxy/config.js +2 -0
  43. package/dist/proxy/extended-cache.d.ts +10 -0
  44. package/dist/proxy/extended-cache.js +155 -0
  45. package/dist/proxy/forwarder.d.ts +7 -1
  46. package/dist/proxy/forwarder.js +157 -7
  47. package/dist/proxy/handlers/preprocess.d.ts +20 -0
  48. package/dist/proxy/handlers/preprocess.js +169 -0
  49. package/dist/proxy/injection/delta-tracking.d.ts +11 -0
  50. package/dist/proxy/injection/delta-tracking.js +93 -0
  51. package/dist/proxy/injection/injectors.d.ts +7 -0
  52. package/dist/proxy/injection/injectors.js +139 -0
  53. package/dist/proxy/request-processor.d.ts +18 -3
  54. package/dist/proxy/request-processor.js +151 -28
  55. package/dist/proxy/response-processor.js +116 -47
  56. package/dist/proxy/server.d.ts +4 -1
  57. package/dist/proxy/server.js +592 -253
  58. package/dist/proxy/types.d.ts +13 -0
  59. package/dist/proxy/types.js +2 -0
  60. package/dist/proxy/utils/extractors.d.ts +18 -0
  61. package/dist/proxy/utils/extractors.js +109 -0
  62. package/dist/proxy/utils/logging.d.ts +18 -0
  63. package/dist/proxy/utils/logging.js +42 -0
  64. package/package.json +22 -4
@@ -1,35 +1,17 @@
1
- // LLM-based extraction using OpenAI GPT-3.5-turbo for reasoning summaries
2
- // and Anthropic Claude Haiku for drift detection
3
- import OpenAI from 'openai';
1
+ // LLM-based extraction using Anthropic Claude Haiku for drift detection
4
2
  import Anthropic from '@anthropic-ai/sdk';
5
3
  import { config } from 'dotenv';
6
4
  import { join } from 'path';
7
5
  import { homedir } from 'os';
8
6
  import { existsSync } from 'fs';
9
7
  import { debugLLM } from './debug.js';
10
- import { truncate } from './utils.js';
11
8
  // Load ~/.grov/.env as fallback for API key
12
9
  // This allows users to store their API key in a safe location outside any repo
13
10
  const grovEnvPath = join(homedir(), '.grov', '.env');
14
11
  if (existsSync(grovEnvPath)) {
15
12
  config({ path: grovEnvPath });
16
13
  }
17
- let client = null;
18
14
  let anthropicClient = null;
19
- /**
20
- * Initialize the OpenAI client
21
- */
22
- function getClient() {
23
- if (!client) {
24
- const apiKey = process.env.OPENAI_API_KEY;
25
- if (!apiKey) {
26
- // SECURITY: Generic error to avoid confirming API key mechanism exists
27
- throw new Error('LLM extraction unavailable');
28
- }
29
- client = new OpenAI({ apiKey });
30
- }
31
- return client;
32
- }
33
15
  /**
34
16
  * Initialize the Anthropic client
35
17
  */
@@ -43,12 +25,6 @@ function getAnthropicClient() {
43
25
  }
44
26
  return anthropicClient;
45
27
  }
46
- /**
47
- * Check if LLM extraction is available (OpenAI API key set)
48
- */
49
- export function isLLMAvailable() {
50
- return !!process.env.OPENAI_API_KEY;
51
- }
52
28
  /**
53
29
  * Extract intent from first user prompt using Haiku
54
30
  * Called once at session start to populate session_states
@@ -176,272 +152,6 @@ function createFallbackIntent(prompt) {
176
152
  export function isIntentExtractionAvailable() {
177
153
  return !!(process.env.ANTHROPIC_API_KEY || process.env.GROV_API_KEY);
178
154
  }
179
- /**
180
- * Check if Anthropic API is available (for drift detection)
181
- */
182
- export function isAnthropicAvailable() {
183
- return !!process.env.ANTHROPIC_API_KEY;
184
- }
185
- /**
186
- * Get the drift model to use (from env or default)
187
- */
188
- export function getDriftModel() {
189
- return process.env.GROV_DRIFT_MODEL || 'claude-haiku-4-5';
190
- }
191
- /**
192
- * Extract structured reasoning from a parsed session using GPT-3.5-turbo
193
- */
194
- export async function extractReasoning(session) {
195
- const openai = getClient();
196
- // Build session summary for the prompt
197
- const sessionSummary = buildSessionSummary(session);
198
- const response = await openai.chat.completions.create({
199
- model: 'gpt-3.5-turbo',
200
- max_tokens: 1024,
201
- messages: [
202
- {
203
- role: 'system',
204
- content: 'You are a helpful assistant that extracts structured information from coding sessions. Always respond with valid JSON only, no explanation.'
205
- },
206
- {
207
- role: 'user',
208
- content: `Analyze this Claude Code session and extract a structured reasoning summary.
209
-
210
- SESSION DATA:
211
- ${sessionSummary}
212
-
213
- Extract the following as JSON:
214
- {
215
- "task": "Brief description (1 sentence)",
216
- "goal": "The underlying problem being solved",
217
- "reasoning_trace": [
218
- "Be SPECIFIC: include file names, function names, line numbers when relevant",
219
- "Format: '[Action] [target] to/for [purpose]'",
220
- "Example: 'Read auth.ts:47 to understand token refresh logic'",
221
- "Example: 'Fixed null check in validateToken() - was causing silent failures'",
222
- "NOT: 'Investigated auth' or 'Fixed bug'"
223
- ],
224
- "decisions": [{"choice": "What was decided", "reason": "Why this over alternatives"}],
225
- "constraints": ["Discovered limitations, rate limits, incompatibilities"],
226
- "status": "complete|partial|question|abandoned",
227
- "tags": ["relevant", "domain", "tags"]
228
- }
229
-
230
- IMPORTANT for reasoning_trace:
231
- - Each entry should be ACTIONABLE information for future developers
232
- - Include specific file:line references when possible
233
- - Explain WHY not just WHAT (e.g., "Chose JWT over sessions because stateless scales better")
234
- - Bad: "Fixed the bug" / Good: "Fixed race condition in UserService.save() - was missing await"
235
-
236
- Status definitions:
237
- - "complete": Task was finished, implementation done
238
- - "partial": Work started but not finished
239
- - "question": Claude asked a question and is waiting for user response
240
- - "abandoned": User interrupted or moved to different topic
241
-
242
- RESPONSE RULES:
243
- - English only (translate if input is in other language)
244
- - No emojis
245
- - Valid JSON only`
246
- }
247
- ]
248
- });
249
- // Parse the response
250
- const content = response.choices[0]?.message?.content;
251
- if (!content) {
252
- throw new Error('No response from OpenAI');
253
- }
254
- try {
255
- // SECURITY: Parse to plain object first, then sanitize prototype pollution
256
- const rawParsed = JSON.parse(content);
257
- // SECURITY: Prevent prototype pollution from LLM-generated JSON
258
- // An attacker could manipulate LLM to return {"__proto__": {"isAdmin": true}}
259
- const pollutionKeys = ['__proto__', 'constructor', 'prototype'];
260
- for (const key of pollutionKeys) {
261
- if (key in rawParsed) {
262
- delete rawParsed[key];
263
- }
264
- }
265
- const extracted = rawParsed;
266
- // SECURITY: Validate types to prevent LLM injection attacks
267
- const safeTask = typeof extracted.task === 'string' ? extracted.task : '';
268
- const safeGoal = typeof extracted.goal === 'string' ? extracted.goal : '';
269
- const safeTrace = Array.isArray(extracted.reasoning_trace)
270
- ? extracted.reasoning_trace.filter((t) => typeof t === 'string')
271
- : [];
272
- const safeDecisions = Array.isArray(extracted.decisions)
273
- ? extracted.decisions.filter((d) => d && typeof d === 'object' && typeof d.choice === 'string' && typeof d.reason === 'string')
274
- : [];
275
- const safeConstraints = Array.isArray(extracted.constraints)
276
- ? extracted.constraints.filter((c) => typeof c === 'string')
277
- : [];
278
- const safeTags = Array.isArray(extracted.tags)
279
- ? extracted.tags.filter((t) => typeof t === 'string')
280
- : [];
281
- // Fill defaults with validated values
282
- return {
283
- task: safeTask || session.userMessages[0]?.substring(0, 100) || 'Unknown task',
284
- goal: safeGoal || safeTask || 'Unknown goal',
285
- reasoning_trace: safeTrace,
286
- files_touched: session.filesRead.concat(session.filesWritten),
287
- decisions: safeDecisions,
288
- constraints: safeConstraints,
289
- status: validateStatus(extracted.status),
290
- tags: safeTags
291
- };
292
- }
293
- catch (parseError) {
294
- // If JSON parsing fails, return basic extraction
295
- debugLLM('Failed to parse LLM response, using fallback');
296
- return createFallbackExtraction(session);
297
- }
298
- }
299
- /**
300
- * Classify just the task status (lighter weight than full extraction)
301
- */
302
- export async function classifyTaskStatus(session) {
303
- const openai = getClient();
304
- // Get last few exchanges for classification
305
- const lastMessages = session.userMessages.slice(-2).join('\n---\n');
306
- const lastAssistant = session.assistantMessages.slice(-1)[0] || '';
307
- const response = await openai.chat.completions.create({
308
- model: 'gpt-3.5-turbo',
309
- max_tokens: 50,
310
- messages: [
311
- {
312
- role: 'system',
313
- content: 'Classify conversation state. Return ONLY one word: complete, partial, question, or abandoned.'
314
- },
315
- {
316
- role: 'user',
317
- content: `Last user message(s):
318
- ${lastMessages}
319
-
320
- Last assistant response (truncated):
321
- ${lastAssistant.substring(0, 500)}
322
-
323
- Files written: ${session.filesWritten.length}
324
- Files read: ${session.filesRead.length}
325
-
326
- Classification:`
327
- }
328
- ]
329
- });
330
- const content = response.choices[0]?.message?.content;
331
- if (!content) {
332
- return 'partial';
333
- }
334
- return validateStatus(content.trim().toLowerCase());
335
- }
336
- /**
337
- * Build a summary of the session for the LLM prompt
338
- */
339
- function buildSessionSummary(session) {
340
- const lines = [];
341
- // User messages
342
- lines.push('USER MESSAGES:');
343
- session.userMessages.forEach((msg, i) => {
344
- lines.push(`[${i + 1}] ${truncate(msg, 300)}`);
345
- });
346
- lines.push('');
347
- // Files touched
348
- lines.push('FILES READ:');
349
- session.filesRead.slice(0, 10).forEach(f => lines.push(` - ${f}`));
350
- if (session.filesRead.length > 10) {
351
- lines.push(` ... and ${session.filesRead.length - 10} more`);
352
- }
353
- lines.push('');
354
- lines.push('FILES WRITTEN/EDITED:');
355
- session.filesWritten.forEach(f => lines.push(` - ${f}`));
356
- lines.push('');
357
- // Tool usage summary
358
- lines.push('TOOL USAGE:');
359
- const toolCounts = session.toolCalls.reduce((acc, t) => {
360
- acc[t.name] = (acc[t.name] || 0) + 1;
361
- return acc;
362
- }, {});
363
- Object.entries(toolCounts).forEach(([name, count]) => {
364
- lines.push(` - ${name}: ${count}x`);
365
- });
366
- lines.push('');
367
- // Last assistant message (often contains summary/conclusion)
368
- const lastAssistant = session.assistantMessages[session.assistantMessages.length - 1];
369
- if (lastAssistant) {
370
- lines.push('LAST ASSISTANT MESSAGE:');
371
- lines.push(truncate(lastAssistant, 500));
372
- }
373
- return lines.join('\n');
374
- }
375
- /**
376
- * Create fallback extraction when LLM fails
377
- */
378
- function createFallbackExtraction(session) {
379
- const filesTouched = [...new Set([...session.filesRead, ...session.filesWritten])];
380
- return {
381
- task: session.userMessages[0]?.substring(0, 100) || 'Unknown task',
382
- goal: session.userMessages[0]?.substring(0, 100) || 'Unknown goal',
383
- reasoning_trace: generateBasicTrace(session),
384
- files_touched: filesTouched,
385
- decisions: [],
386
- constraints: [],
387
- status: session.filesWritten.length > 0 ? 'complete' : 'partial',
388
- tags: generateTagsFromFiles(filesTouched)
389
- };
390
- }
391
- /**
392
- * Generate basic reasoning trace from tool usage
393
- */
394
- function generateBasicTrace(session) {
395
- const trace = [];
396
- const toolCounts = session.toolCalls.reduce((acc, t) => {
397
- acc[t.name] = (acc[t.name] || 0) + 1;
398
- return acc;
399
- }, {});
400
- if (toolCounts['Read'])
401
- trace.push(`Read ${toolCounts['Read']} files`);
402
- if (toolCounts['Write'])
403
- trace.push(`Wrote ${toolCounts['Write']} files`);
404
- if (toolCounts['Edit'])
405
- trace.push(`Edited ${toolCounts['Edit']} files`);
406
- if (toolCounts['Grep'] || toolCounts['Glob'])
407
- trace.push('Searched codebase');
408
- if (toolCounts['Bash'])
409
- trace.push(`Ran ${toolCounts['Bash']} commands`);
410
- return trace;
411
- }
412
- /**
413
- * Generate tags from file paths
414
- */
415
- function generateTagsFromFiles(files) {
416
- const tags = new Set();
417
- for (const file of files) {
418
- const parts = file.split('/');
419
- for (const part of parts) {
420
- if (part && !part.includes('.') && part !== 'src' && part !== 'lib') {
421
- tags.add(part.toLowerCase());
422
- }
423
- }
424
- // Common patterns
425
- if (file.includes('auth'))
426
- tags.add('auth');
427
- if (file.includes('api'))
428
- tags.add('api');
429
- if (file.includes('test'))
430
- tags.add('test');
431
- }
432
- return [...tags].slice(0, 10);
433
- }
434
- /**
435
- * Validate and normalize status
436
- */
437
- function validateStatus(status) {
438
- const normalized = status?.toLowerCase().trim();
439
- if (normalized === 'complete' || normalized === 'partial' ||
440
- normalized === 'question' || normalized === 'abandoned') {
441
- return normalized;
442
- }
443
- return 'partial'; // Default
444
- }
445
155
  // ============================================
446
156
  // SESSION SUMMARY FOR CLEAR OPERATION
447
157
  // Reference: plan_proxy_local.md Section 2.3, 4.5
@@ -456,23 +166,30 @@ export function isSummaryAvailable() {
456
166
  * Generate session summary for CLEAR operation
457
167
  * Reference: plan_proxy_local.md Section 2.3, 4.5
458
168
  */
459
- export async function generateSessionSummary(sessionState, steps) {
169
+ export async function generateSessionSummary(sessionState, steps, maxTokens = 800 // Default 800, CLEAR mode uses 15000
170
+ ) {
460
171
  const client = getAnthropicClient();
172
+ // For larger summaries, include more steps
173
+ const stepLimit = maxTokens > 5000 ? 50 : 20;
174
+ const wordLimit = Math.min(Math.floor(maxTokens / 2), 10000); // ~2 tokens per word
461
175
  const stepsText = steps
462
176
  .filter(s => s.is_validated)
463
- .slice(-20)
177
+ .slice(-stepLimit)
464
178
  .map(step => {
465
179
  let desc = `- ${step.action_type}`;
466
180
  if (step.files.length > 0) {
467
181
  desc += `: ${step.files.join(', ')}`;
468
182
  }
469
183
  if (step.command) {
470
- desc += ` (${step.command.substring(0, 50)})`;
184
+ desc += ` (${step.command.substring(0, 100)})`;
185
+ }
186
+ if (step.reasoning && maxTokens > 5000) {
187
+ desc += `\n Reasoning: ${step.reasoning.substring(0, 200)}`;
471
188
  }
472
189
  return desc;
473
190
  })
474
191
  .join('\n');
475
- const prompt = `Create a concise summary of this coding session for context continuation.
192
+ const prompt = `Create a ${maxTokens > 5000 ? 'comprehensive' : 'concise'} summary of this coding session for context continuation.
476
193
 
477
194
  ORIGINAL GOAL: ${sessionState.original_goal || 'Not specified'}
478
195
 
@@ -483,18 +200,19 @@ CONSTRAINTS: ${sessionState.constraints.join(', ') || 'None'}
483
200
  ACTIONS TAKEN:
484
201
  ${stepsText || 'No actions recorded'}
485
202
 
486
- Create a summary with these sections (keep total under 500 words):
487
- 1. ORIGINAL GOAL: (1 sentence)
488
- 2. PROGRESS: (2-3 bullet points of what was accomplished)
489
- 3. KEY DECISIONS: (any important choices made)
490
- 4. FILES MODIFIED: (list of files)
491
- 5. CURRENT STATE: (where the work left off)
492
- 6. NEXT STEPS: (recommended next actions)
203
+ Create a summary with these sections (keep total under ${wordLimit} words):
204
+ 1. ORIGINAL GOAL: (1-2 sentences)
205
+ 2. PROGRESS: (${maxTokens > 5000 ? '5-10' : '2-3'} bullet points of what was accomplished)
206
+ 3. KEY DECISIONS: (important architectural/design choices made, with reasoning)
207
+ 4. FILES MODIFIED: (list of files with brief description of changes)
208
+ 5. CURRENT STATE: (detailed status of where the work left off)
209
+ 6. NEXT STEPS: (recommended next actions to continue)
210
+ ${maxTokens > 5000 ? '7. IMPORTANT CONTEXT: (any critical information that must not be lost)' : ''}
493
211
 
494
212
  Format as plain text, not JSON.`;
495
213
  const response = await client.messages.create({
496
214
  model: 'claude-haiku-4-5-20251001',
497
- max_tokens: 800,
215
+ max_tokens: maxTokens,
498
216
  messages: [{ role: 'user', content: prompt }],
499
217
  });
500
218
  const content = response.content?.[0];
@@ -528,89 +246,223 @@ export function isTaskAnalysisAvailable() {
528
246
  return !!(process.env.ANTHROPIC_API_KEY || process.env.GROV_API_KEY);
529
247
  }
530
248
  /**
531
- * Analyze task context to determine task status
532
- * Called after each main model response to orchestrate sessions
533
- * Also compresses reasoning for steps if assistantResponse > 1000 chars
249
+ * Format conversation messages for prompt
534
250
  */
535
- export async function analyzeTaskContext(currentSession, latestUserMessage, recentSteps, assistantResponse) {
536
- const client = getAnthropicClient();
537
- const stepsText = recentSteps.slice(0, 5).map(s => {
251
+ function formatConversationHistory(messages) {
252
+ if (!messages || messages.length === 0)
253
+ return 'No conversation history available.';
254
+ return messages.slice(-10).map(m => {
255
+ const role = m.role === 'user' ? 'User' : 'Assistant';
256
+ const content = m.content.substring(0, 800);
257
+ const truncated = m.content.length > 800 ? '...' : '';
258
+ return `${role}: ${content}${truncated}`;
259
+ }).join('\n\n');
260
+ }
261
+ /**
262
+ * Format tool calls for prompt
263
+ */
264
+ function formatToolCalls(steps) {
265
+ if (!steps || steps.length === 0)
266
+ return 'No tools used yet.';
267
+ return steps.slice(0, 10).map(s => {
538
268
  let desc = `- ${s.action_type}`;
539
269
  if (s.files.length > 0) {
540
270
  desc += `: ${s.files.slice(0, 3).join(', ')}`;
541
271
  }
272
+ if (s.command) {
273
+ desc += ` (${s.command.substring(0, 50)})`;
274
+ }
542
275
  return desc;
543
- }).join('\n') || 'None';
276
+ }).join('\n');
277
+ }
278
+ /**
279
+ * Analyze task context to determine task status
280
+ * Called after each main model response to orchestrate sessions
281
+ * Also compresses reasoning for steps if assistantResponse > 1000 chars
282
+ */
283
+ export async function analyzeTaskContext(currentSession, latestUserMessage, recentSteps, assistantResponse, conversationHistory) {
284
+ const client = getAnthropicClient();
544
285
  // Check if we need to compress reasoning
545
286
  const needsCompression = assistantResponse.length > 1000;
546
287
  const compressionInstruction = needsCompression
547
- ? `\n "step_reasoning": "Extract CONCLUSIONS and SPECIFIC RECOMMENDATIONS only. Include: exact file paths (e.g., src/lib/utils.ts), function/component names, architectural patterns discovered, and WHY decisions were made. DO NOT write process descriptions like 'explored' or 'analyzed'. Max 800 chars."`
288
+ ? `,
289
+ "step_reasoning": "Extract CONCLUSIONS only: specific file paths, function names, patterns discovered, and WHY decisions were made. Max 800 chars. Do not write process descriptions."`
548
290
  : '';
549
- const compressionRule = needsCompression
550
- ? '\n- step_reasoning: Extract CONCLUSIONS (specific files, patterns, decisions) NOT process descriptions. Example GOOD: "Utilities belong in src/lib/utils.ts alongside cn(), formatDate()". Example BAD: "Explored codebase structure".'
551
- : '';
552
- // Extract topic keywords from goal for comparison
553
- const currentGoalKeywords = currentSession?.original_goal
554
- ? currentSession.original_goal.toLowerCase().match(/\b\w{4,}\b/g)?.slice(0, 10).join(', ') || ''
555
- : '';
556
- const prompt = `You are a task orchestrator. Your PRIMARY job is to detect when the user starts a NEW, DIFFERENT task.
291
+ // Format conversation history
292
+ const historyText = formatConversationHistory(conversationHistory || []);
293
+ const toolCallsText = formatToolCalls(recentSteps);
294
+ const prompt = `You are a task status analyzer. Your job is to examine a conversation between a user and an AI assistant, then determine whether the current task is complete, still in progress, or if a new task has started.
557
295
 
558
- CURRENT SESSION:
559
- - Current Goal: "${currentSession?.original_goal || 'No active task'}"
560
- - Goal Keywords: [${currentGoalKeywords}]
296
+ <input>
297
+ original_goal: ${currentSession?.original_goal || 'No active task - this may be the first message'}
561
298
 
562
- LATEST USER MESSAGE:
563
- "${latestUserMessage.substring(0, 500)}"
299
+ messages:
300
+ ${historyText}
564
301
 
565
- RECENT ACTIONS (last 5):
566
- ${stepsText}
302
+ current_assistant_response:
303
+ ${assistantResponse ? assistantResponse.substring(0, 2000) : 'No response yet - assistant is still thinking.'}
567
304
 
568
- ASSISTANT RESPONSE (truncated):
569
- "${assistantResponse.substring(0, 1500)}${assistantResponse.length > 1500 ? '...' : ''}"
305
+ tool_calls:
306
+ ${toolCallsText}
307
+ </input>
570
308
 
571
- ═══════════════════════════════════════════════════════════════
572
- CRITICAL: Compare the TOPIC of "Current Goal" vs "Latest User Message"
573
- ═══════════════════════════════════════════════════════════════
309
+ <output>
310
+ Return a JSON object with these fields:
311
+ - task_type: one of "information", "planning", or "implementation"
312
+ - action: one of "continue", "task_complete", "new_task", or "subtask_complete"
313
+ - task_id: existing session_id "${currentSession?.session_id || 'NEW'}" or "NEW" for new task
314
+ - current_goal: the goal based on the latest user message
315
+ - reasoning: brief explanation of why you made this decision${compressionInstruction}
316
+ </output>
574
317
 
575
- Ask yourself:
576
- 1. Is the user message about the SAME subject/feature/file as the current goal?
577
- 2. Or is it about something COMPLETELY DIFFERENT?
318
+ <step_1_identify_task_type>
319
+ First, analyze the original_goal to understand what kind of task this is. Do not rely on specific keywords. Instead, understand the user's intent from the full context of their message.
578
320
 
579
- EXAMPLES of NEW_TASK (different topic):
580
- - Goal: "implement authentication" User: "fix the database migration" NEW_TASK
581
- - Goal: "analyze security layer" → User: "create hello.ts script" → NEW_TASK
582
- - Goal: "refactor user service" → User: "add dark mode to UI" → NEW_TASK
583
- - Goal: "fix login bug" → User: "write unit tests for payments" → NEW_TASK
321
+ TYPE A - Information Request
322
+ The user wants to learn or understand something. They are seeking knowledge, not asking for any changes or decisions to be made. The answer itself is what they need.
584
323
 
585
- EXAMPLES of CONTINUE (same topic):
586
- - Goal: "implement authentication" → User: "now add the logout button" → CONTINUE
587
- - Goal: "fix login bug" → User: "also check the session timeout" → CONTINUE
588
- - Goal: "analyze security" → User: "what about rate limiting?" → CONTINUE
324
+ Think about whether the user is curious about how something works, wants an explanation of a concept, or is asking for clarification about existing behavior.
589
325
 
590
- Return JSON:
591
- {
592
- "action": "continue|new_task|subtask|parallel_task|task_complete|subtask_complete",
593
- "topic_match": "YES if same topic, NO if different topic",
594
- "task_id": "existing session_id or 'NEW' for new task",
595
- "current_goal": "the goal based on LATEST user message",
596
- "reasoning": "1 sentence explaining topic comparison"${compressionInstruction}
597
- }
326
+ Examples of information requests in different phrasings:
327
+ - "How does the authentication system work?"
328
+ - "Explica-mi cum functioneaza cache-ul"
329
+ - "What is the difference between Redis and Memcached?"
330
+ - "Can you walk me through the payment flow?"
331
+ - "I don't understand why this function returns null"
332
+ - "Ce face acest cod?"
333
+
334
+ TYPE B - Planning or Decision Request
335
+ The user wants to figure out the best approach before taking action. They need to make a decision or create a plan. The conversation may involve exploring options, discussing tradeoffs, or clarifying requirements.
336
+
337
+ Think about whether the user is trying to decide between approaches, wants recommendations for how to build something, or is working toward a plan they will implement later.
338
+
339
+ Examples of planning requests in different phrasings:
340
+ - "How should we implement user authentication?"
341
+ - "What's the best way to handle caching for this API?"
342
+ - "Cum ar trebui sa structuram baza de date?"
343
+ - "I'm thinking about using Redis vs Memcached, what do you recommend?"
344
+ - "Let's figure out the architecture before we start coding"
345
+ - "We need to decide on the approach for handling errors"
346
+
347
+ TYPE C - Implementation Request
348
+ The user wants actual changes made. They want code written, files edited, commands run, or something built. The task involves using tools to modify the codebase.
349
+
350
+ Think about whether the user is asking for something to be created, fixed, changed, or built.
351
+
352
+ Examples of implementation requests in different phrasings:
353
+ - "Fix the bug in the login function"
354
+ - "Add caching to the API endpoints"
355
+ - "Fa un refactor la modulul de plati"
356
+ - "Create a new component for the dashboard"
357
+ - "Update the tests to cover edge cases"
358
+ - "Remove the deprecated authentication code"
359
+ </step_1_identify_task_type>
360
+
361
+ <step_2_determine_status>
362
+ Now that you know the task type, determine whether it is complete, continuing, or if a new task has begun.
363
+
364
+ For TYPE A - Information Request:
365
+ The task is complete when the assistant has provided a clear and complete answer to the user's question. Check the current_assistant_response field - if it contains a substantive answer to the question, the task is complete.
366
+
367
+ Each question the user asks is treated as its own separate task. If the user asks a follow-up question, even on the same topic, that is a new task.
368
+
369
+ The reason for this is that each answer is valuable on its own and should be saved independently. We do not want to wait for a multi-turn conversation to end before saving useful information.
370
+
371
+ When analyzing: Look at current_assistant_response. If it contains an explanation, answer, or clarification that addresses the user's question, return task_complete.
372
+
373
+ Example situation: User asks "How does auth work?", assistant explains it fully.
374
+ Decision: task_complete
375
+ Reason: The information request was answered completely.
376
+
377
+ Example situation: User asks "How does auth work?", assistant explains, then user asks "What about JWT specifically?"
378
+ Decision for second message: new_task
379
+ Reason: This is a new question requiring a new answer.
380
+
381
+ For TYPE B - Planning or Decision Request:
382
+ The task continues while the user and assistant are still exploring options, discussing tradeoffs, or clarifying requirements. The task is complete only when a final decision or plan has been reached and the user has confirmed it.
383
+
384
+ Look for signals that indicate the user has made up their mind. These signals come from the overall tone and direction of the conversation, not from specific keywords. The user might express agreement, ask to proceed with implementation, or summarize the chosen approach.
598
385
 
599
- DECISION RULES:
600
- 1. NO current session → "new_task"
601
- 2. topic_match=NO (different subject) "new_task"
602
- 3. topic_match=YES + user following up → "continue"
603
- 4. Claude said "done/complete/finished" "task_complete"
604
- 5. Prerequisite work identified → "subtask"${compressionRule}
386
+ When analyzing, ask yourself: Has the user confirmed a final direction? Are they still weighing options? Have they asked to move forward with a specific approach?
387
+
388
+ Example situation: User asks "Should we use JWT or sessions?", assistant explains both, user says "I'm still not sure about refresh tokens"
389
+ Decision: continue
390
+ Reason: The user is still clarifying and has not made a final decision.
391
+
392
+ Example situation: User and assistant discussed auth options, user says "OK, JWT with refresh tokens makes sense, let's go with that"
393
+ Decision: task_complete
394
+ Reason: The user confirmed the decision. Planning is complete.
395
+
396
+ Example situation: User says "That sounds good, now implement it"
397
+ Decision: task_complete for planning, and a new implementation task will begin
398
+ Reason: Planning concluded with a decision. User is now requesting implementation.
399
+
400
+ For TYPE C - Implementation Request:
401
+ The task continues while the assistant is actively making changes using tools like file edits, bash commands, or file writes. The task is complete when the changes are done and verified.
402
+
403
+ Look for signals that the work is finished in current_assistant_response: successful test runs, the assistant stating the work is done, or a commit being made. If tests are failing or the assistant indicates more work is needed, the task continues.
404
+
405
+ When analyzing: Check current_assistant_response for completion signals. Is the assistant still making changes? Have the changes been verified? Did the assistant confirm completion?
406
+
407
+ Example situation: Assistant edited three files and is now running tests.
408
+ Decision: continue
409
+ Reason: Implementation is in progress, verification not yet complete.
410
+
411
+ Example situation: Assistant ran tests, they passed, assistant says "Done, the auth bug is fixed"
412
+ Decision: task_complete
413
+ Reason: Changes are complete and verified.
414
+
415
+ Example situation: Tests failed after the changes.
416
+ Decision: continue
417
+ Reason: The implementation needs more work to pass verification.
418
+ </step_2_determine_status>
419
+
420
+ <step_3_detect_new_task>
421
+ Sometimes the user changes direction entirely. A new task has started when:
422
+
423
+ The user asks about something completely unrelated to the original goal.
424
+ The conversation topic shifts to a different part of the codebase or a different feature.
425
+ The previous task was completed and the user is now requesting something new.
426
+
427
+ To detect this, compare the current user message to the original_goal. If they are about the same thing, the task is either continuing or complete. If they are about different things, a new task has started.
428
+
429
+ Be careful not to confuse follow-up questions with new tasks. A follow-up question on the same topic in an information request is a new task because each answer stands alone. But a follow-up clarification during planning is part of the same planning task.
430
+
431
+ Example situation: Original goal was "fix the auth bug", user now asks "also, can you update the README?"
432
+ Decision: new_task
433
+ Reason: Updating README is unrelated to fixing the auth bug.
434
+
435
+ Example situation: Original goal was "implement caching", user asks "should we use Redis or Memcached for this?"
436
+ Decision: continue (this is planning within the implementation task)
437
+ Reason: The question is about how to implement the original request.
438
+
439
+ Example situation: Original goal was "explain how auth works", user asks "and how does the session storage work?"
440
+ Decision: new_task
441
+ Reason: This is a new information request, separate from the first.
442
+ </step_3_detect_new_task>
443
+
444
+ <important_notes>
445
+ Do not rely on specific keywords in any language. The same intent can be expressed many different ways across languages and phrasings. Always understand the intent from the full context.
446
+
447
+ The conversation history and tool usage are your most important signals. What has the assistant been doing? What is the user trying to accomplish? Has that goal been achieved?
448
+
449
+ CRITICAL - Q&A DURING PLANNING:
450
+ If the current task_type is "planning" and the user asks a clarifying question (e.g., "how does X work?", "what about Y?", "clarify Z"), this is NOT a new information task. It is a CONTINUATION of the planning task. The user is gathering information to make a planning decision, not requesting standalone information.
451
+ - If original task_type was planning → keep it as planning, action=continue
452
+ - Only mark task_complete for planning when user explicitly confirms a final decision or asks to proceed with implementation
453
+ - Asking to "write to file" or "document the plan" is NOT task_complete - it's still part of planning documentation
454
+
455
+ When in doubt between continue and task_complete, ask yourself: Would it be valuable to save what we have so far? For information requests, yes, save each answer. For planning, only save when a decision is made. For implementation, only save when work is verified complete.
605
456
 
606
457
  RESPONSE RULES:
607
- - English only (translate if input is in other language)
608
- - No emojis
609
- - Valid JSON only`;
458
+ - Return valid JSON only
459
+ - English only in the response (translate reasoning if input is in other language)
460
+ - No markdown formatting, no emojis
461
+ </important_notes>`;
610
462
  debugLLM('analyzeTaskContext', `Calling Haiku for task analysis (needsCompression=${needsCompression})`);
611
463
  const response = await client.messages.create({
612
464
  model: 'claude-haiku-4-5-20251001',
613
- max_tokens: needsCompression ? 600 : 300,
465
+ max_tokens: needsCompression ? 800 : 400,
614
466
  messages: [{ role: 'user', content: prompt }],
615
467
  });
616
468
  const text = response.content[0].type === 'text' ? response.content[0].text : '';
@@ -621,17 +473,22 @@ RESPONSE RULES:
621
473
  throw new Error('No JSON found in response');
622
474
  }
623
475
  const analysis = JSON.parse(jsonMatch[0]);
476
+ // Ensure task_type has a default value
477
+ if (!analysis.task_type) {
478
+ analysis.task_type = 'implementation';
479
+ }
624
480
  // If we didn't need compression but have short response, use it directly
625
481
  if (!needsCompression && assistantResponse.length > 0) {
626
482
  analysis.step_reasoning = assistantResponse.substring(0, 1000);
627
483
  }
628
- debugLLM('analyzeTaskContext', `Result: action=${analysis.action}, topic_match=${analysis.topic_match}, goal=${analysis.current_goal.substring(0, 50)}`);
484
+ debugLLM('analyzeTaskContext', `Result: task_type=${analysis.task_type}, action=${analysis.action}, goal="${analysis.current_goal?.substring(0, 50) || 'N/A'}" reasoning="${analysis.reasoning?.substring(0, 150) || 'none'}"`);
629
485
  return analysis;
630
486
  }
631
487
  catch (parseError) {
632
488
  debugLLM('analyzeTaskContext', `Parse error: ${String(parseError)}, using fallback`);
633
489
  // Fallback: continue existing session or create new
634
490
  return {
491
+ task_type: 'implementation',
635
492
  action: currentSession ? 'continue' : 'new_task',
636
493
  task_id: currentSession?.session_id || 'NEW',
637
494
  current_goal: latestUserMessage.substring(0, 200),
@@ -649,76 +506,151 @@ export function isReasoningExtractionAvailable() {
649
506
  /**
650
507
  * Extract reasoning trace and decisions from steps
651
508
  * Called at task_complete to populate team memory with rich context
509
+ *
510
+ * @param formattedSteps - Pre-formatted XML string with grouped steps and actions
511
+ * @param originalGoal - The original task goal
652
512
  */
653
- export async function extractReasoningAndDecisions(stepsReasoning, originalGoal) {
513
+ export async function extractReasoningAndDecisions(formattedSteps, originalGoal) {
654
514
  const client = getAnthropicClient();
655
- // Combine all steps reasoning into one text
656
- const combinedReasoning = stepsReasoning
657
- .filter(r => r && r.length > 10)
658
- .join('\n\n---\n\n')
659
- .substring(0, 8000);
660
- if (combinedReasoning.length < 50) {
515
+ if (formattedSteps.length < 50) {
661
516
  return { reasoning_trace: [], decisions: [] };
662
517
  }
663
- const prompt = `Extract CONCLUSIONS and KNOWLEDGE from Claude's work - NOT process descriptions.
518
+ const prompt = `<role>
519
+ You are a Knowledge Engineer specialized in extracting reusable team knowledge from coding sessions.
664
520
 
665
- ORIGINAL GOAL:
666
- ${originalGoal || 'Not specified'}
521
+ Your output will be stored permanently in team memory and used to help developers in future sessions. Poor extractions waste storage and confuse future assistants. Excellent extractions save hours of repeated investigation.
522
+ </role>
667
523
 
668
- CLAUDE'S RESPONSE:
669
- ${combinedReasoning}
524
+ <context>
525
+ PROJECT GOAL: ${originalGoal || 'Not specified'}
670
526
 
671
- ═══════════════════════════════════════════════════════════════
672
- EXTRACT ACTIONABLE CONCLUSIONS - NOT PROCESS
673
- ═══════════════════════════════════════════════════════════════
527
+ This extraction serves two purposes:
528
+ 1. Help future developers understand WHAT was discovered in this codebase
529
+ 2. Help future developers understand WHY certain decisions were made
530
+ </context>
531
+
532
+ <session_data>
533
+ ${formattedSteps.substring(0, 8000)}
534
+ </session_data>
535
+
536
+ <instructions>
537
+
538
+ We need TWO types of knowledge extracted:
539
+
540
+ TYPE A: CONCLUSIONS (Factual findings from the session)
541
+
542
+ What this means:
543
+ These are FACTS discovered during the session. Things that were explicitly found, read, or confirmed in the code. A new developer reading these should immediately know WHERE to find things and WHAT values/patterns exist.
544
+
545
+ Must include:
546
+ - Specific file paths (not just "auth files" but "src/lib/jwt.ts")
547
+ - Specific values (not just "short expiry" but "1 hour access, 7 day refresh")
548
+ - Specific patterns (not just "uses JWT" but "JWT with sub, email, type, teams payload")
549
+ - Specific functions/classes (not just "middleware" but "requireAuth, optionalAuth preHandlers")
550
+
551
+ Format: Start with "CONCLUSION: " prefix
552
+
553
+ Good examples:
554
+ - "CONCLUSION: JWT tokens stored in ~/.grov/credentials.json with 1hr access/7d refresh expiry"
555
+ - "CONCLUSION: Auth middleware in src/routes/auth.ts exports requireAuth and optionalAuth preHandlers"
556
+ - "CONCLUSION: Device flow polling interval is 5 seconds, endpoint /auth/device/poll"
557
+
558
+ Bad examples:
559
+ - "CONCLUSION: Found authentication files" (too vague, no paths)
560
+ - "CONCLUSION: JWT is used for auth" (too generic, no specifics)
561
+ - "CONCLUSION: Explored the codebase" (process description, not finding)
562
+
563
+
564
+ TYPE B: INSIGHTS (Your analysis and inferences)
565
+
566
+ What this means:
567
+ These are YOUR observations that go BEYOND what was explicitly stated. Connections between different parts, patterns you identified, implications for future work. This is where YOU add value beyond just summarizing.
568
+
569
+ Types of insights we value:
570
+
571
+ 1. CONNECTIONS - How do different files/modules relate?
572
+ Example: "jwt.ts handles token creation, credentials.ts handles storage - separation of crypto operations from I/O"
573
+
574
+ 2. INFERENCES - What decisions were made implicitly?
575
+ Example: "File storage in ~/.grov/ instead of env vars - implies single-user CLI design, not multi-tenant"
576
+
577
+ 3. PATTERNS - What architectural patterns emerge?
578
+ Example: "All config files use 0600 permissions - security-conscious design for sensitive data"
579
+
580
+ 4. IMPLICATIONS - What does this mean for future development?
581
+ Example: "1hr token expiry requires background refresh mechanism for long operations to avoid mid-task auth failures"
582
+
583
+ Format: Start with "INSIGHT: " prefix
584
+
585
+ Good examples:
586
+ - "INSIGHT: Dual-file pattern (jwt.ts + credentials.ts) separates crypto from I/O, reducing attack surface"
587
+ - "INSIGHT: Device Authorization Flow chosen over password flow - enables OAuth providers without storing secrets in CLI"
588
+ - "INSIGHT: Teams array cached in JWT payload - avoids DB query per request but requires token refresh on team changes"
589
+
590
+ Bad examples:
591
+ - "INSIGHT: The code is well organized" (subjective, not actionable)
592
+ - "INSIGHT: Authentication is important" (obvious, no value)
593
+ - "INSIGHT: Files were read" (process description, not insight)
594
+
595
+ </instructions>
596
+
597
+ <output_format>
598
+ Return a JSON object with this structure:
674
599
 
675
- GOOD examples (specific, reusable knowledge):
676
- - "Utility functions belong in frontend/lib/utils.ts - existing utils: cn(), formatDate(), debounce()"
677
- - "Auth tokens stored in localStorage with 15min expiry for long form sessions"
678
- - "API routes follow REST pattern in /api/v1/ with Zod validation"
679
- - "Database migrations go in prisma/migrations/ using prisma migrate"
680
-
681
- BAD examples (process descriptions - DO NOT EXTRACT THESE):
682
- - "Explored the codebase structure"
683
- - "Analyzed several approaches"
684
- - "Searched for utility directories"
685
- - "Looked at the file organization"
686
-
687
- 1. REASONING TRACE (conclusions and recommendations):
688
- - WHAT was discovered or decided (specific file paths, patterns)
689
- - WHY this is the right approach
690
- - WHERE this applies in the codebase
691
- - Max 10 entries, prioritize specific file/function recommendations
692
-
693
- 2. DECISIONS (architectural choices):
694
- - Only significant choices that affect future work
695
- - What was chosen and why
696
- - Max 5 decisions
697
-
698
- Return JSON:
699
600
  {
700
- "reasoning_trace": [
701
- "Utility functions belong in frontend/lib/utils.ts alongside cn(), formatDate(), debounce(), generateId()",
702
- "Backend utilities go in backend/app/utils/ with domain-specific files like validation.py",
703
- "The @/lib/utils import alias is configured for frontend utility access"
601
+ "knowledge_pairs": [
602
+ {
603
+ "conclusion": "CONCLUSION: [specific factual finding with file paths and values]",
604
+ "insight": "INSIGHT: [inference or implication RELATED to this conclusion]"
605
+ },
606
+ {
607
+ "conclusion": "CONCLUSION: [another specific finding]",
608
+ "insight": "INSIGHT: [what this means for future development]"
609
+ }
704
610
  ],
705
611
  "decisions": [
706
- {"choice": "Add to existing utils.ts rather than new file", "reason": "Maintains established pattern, easier discoverability"},
707
- {"choice": "Use frontend/lib/ over src/utils/", "reason": "Follows Next.js conventions used throughout project"}
612
+ {
613
+ "choice": "[What was chosen - be specific]",
614
+ "reason": "[Why - include whether this is factual or inferred]"
615
+ }
708
616
  ]
709
617
  }
710
618
 
711
- RESPONSE RULES:
712
- - English only
713
- - No emojis
714
- - Valid JSON only
715
- - Extract WHAT and WHERE, not just WHAT was done
716
- - If no specific conclusions found, return empty arrays`;
717
- debugLLM('extractReasoningAndDecisions', `Analyzing ${stepsReasoning.length} steps, ${combinedReasoning.length} chars`);
619
+ IMPORTANT: Generate knowledge as PAIRS where each INSIGHT is directly related to its CONCLUSION.
620
+
621
+ Example pair:
622
+ {
623
+ "conclusion": "CONCLUSION: MemoryCache uses lazy expiration - entries checked/deleted on get(), not via timers",
624
+ "insight": "INSIGHT: Lazy expiration avoids timer overhead that would accumulate with large caches - trades CPU on read for memory efficiency"
625
+ }
626
+
627
+ Rules:
628
+ 1. Each pair MUST have a conclusion AND a related insight
629
+ 2. The insight MUST add value beyond the conclusion (inference, implication, pattern)
630
+ 3. Max 5 pairs (10 entries total) - prioritize most valuable
631
+ 4. Max 5 decisions - only significant architectural choices
632
+ 5. If you cannot find a meaningful insight for a conclusion, still include the conclusion with insight: null
633
+ 6. NEVER include process descriptions ("explored", "searched", "looked at")
634
+ 7. English only, no emojis
635
+ 8. Use prefixes "CONCLUSION: " and "INSIGHT: " in the strings
636
+ </output_format>
637
+
638
+ <validation>
639
+ Before responding, verify:
640
+ - Does each CONCLUSION contain a specific file path or value?
641
+ - Is each INSIGHT directly related to its paired CONCLUSION?
642
+ - Does each INSIGHT add something NOT explicitly in the input?
643
+ - Would a new developer find the pairs useful without seeing the original session?
644
+ - Did I avoid process descriptions?
645
+ - Are the decisions about significant architectural choices?
646
+ </validation>
647
+
648
+ Return ONLY valid JSON, no markdown code blocks, no explanation.`;
649
+ debugLLM('extractReasoningAndDecisions', `Analyzing formatted steps, ${formattedSteps.length} chars`);
718
650
  try {
719
651
  const response = await client.messages.create({
720
652
  model: 'claude-haiku-4-5-20251001',
721
- max_tokens: 800,
653
+ max_tokens: 1500,
722
654
  messages: [{ role: 'user', content: prompt }],
723
655
  });
724
656
  const text = response.content[0].type === 'text' ? response.content[0].text : '';
@@ -727,10 +659,60 @@ RESPONSE RULES:
727
659
  debugLLM('extractReasoningAndDecisions', 'No JSON found in response');
728
660
  return { reasoning_trace: [], decisions: [] };
729
661
  }
730
- const result = JSON.parse(jsonMatch[0]);
731
- debugLLM('extractReasoningAndDecisions', `Extracted ${result.reasoning_trace?.length || 0} traces, ${result.decisions?.length || 0} decisions`);
662
+ // Try to parse JSON, with repair attempts for common Haiku formatting issues
663
+ let result;
664
+ try {
665
+ result = JSON.parse(jsonMatch[0]);
666
+ }
667
+ catch (parseError) {
668
+ // Common fixes: trailing commas, unescaped newlines in strings
669
+ let repaired = jsonMatch[0]
670
+ .replace(/,\s*}/g, '}') // trailing comma before }
671
+ .replace(/,\s*]/g, ']') // trailing comma before ]
672
+ .replace(/\n/g, '\\n') // unescaped newlines
673
+ .replace(/\r/g, '\\r') // unescaped carriage returns
674
+ .replace(/\t/g, '\\t'); // unescaped tabs
675
+ try {
676
+ result = JSON.parse(repaired);
677
+ }
678
+ catch {
679
+ // Last resort: try to extract just knowledge_pairs array
680
+ const pairsMatch = jsonMatch[0].match(/"knowledge_pairs"\s*:\s*\[([\s\S]*?)\]/);
681
+ if (pairsMatch) {
682
+ try {
683
+ const pairs = JSON.parse(`[${pairsMatch[1].replace(/,\s*$/, '')}]`);
684
+ result = { knowledge_pairs: pairs, decisions: [] };
685
+ }
686
+ catch {
687
+ throw parseError; // Re-throw original error
688
+ }
689
+ }
690
+ else {
691
+ throw parseError;
692
+ }
693
+ }
694
+ }
695
+ // Flatten knowledge_pairs into reasoning_trace (interleaved: conclusion, insight, conclusion, insight...)
696
+ let reasoningTrace = [];
697
+ if (result.knowledge_pairs && result.knowledge_pairs.length > 0) {
698
+ // New format: flatten pairs into interleaved array
699
+ for (const pair of result.knowledge_pairs) {
700
+ if (pair.conclusion) {
701
+ reasoningTrace.push(pair.conclusion);
702
+ }
703
+ if (pair.insight) {
704
+ reasoningTrace.push(pair.insight);
705
+ }
706
+ }
707
+ debugLLM('extractReasoningAndDecisions', `Extracted ${result.knowledge_pairs.length} pairs (${reasoningTrace.length} entries), ${result.decisions?.length || 0} decisions`);
708
+ }
709
+ else if (result.reasoning_trace) {
710
+ // Backwards compatibility: old format with flat array
711
+ reasoningTrace = result.reasoning_trace;
712
+ debugLLM('extractReasoningAndDecisions', `Extracted ${reasoningTrace.length} traces (old format), ${result.decisions?.length || 0} decisions`);
713
+ }
732
714
  return {
733
- reasoning_trace: result.reasoning_trace || [],
715
+ reasoning_trace: reasoningTrace,
734
716
  decisions: result.decisions || [],
735
717
  };
736
718
  }