grov 0.2.3 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -5
- package/dist/cli.js +40 -2
- package/dist/commands/login.d.ts +1 -0
- package/dist/commands/login.js +115 -0
- package/dist/commands/logout.d.ts +1 -0
- package/dist/commands/logout.js +13 -0
- package/dist/commands/sync.d.ts +8 -0
- package/dist/commands/sync.js +127 -0
- package/dist/lib/api-client.d.ts +57 -0
- package/dist/lib/api-client.js +174 -0
- package/dist/lib/cloud-sync.d.ts +33 -0
- package/dist/lib/cloud-sync.js +176 -0
- package/dist/lib/credentials.d.ts +53 -0
- package/dist/lib/credentials.js +201 -0
- package/dist/lib/llm-extractor.d.ts +15 -39
- package/dist/lib/llm-extractor.js +400 -418
- package/dist/lib/store/convenience.d.ts +40 -0
- package/dist/lib/store/convenience.js +104 -0
- package/dist/lib/store/database.d.ts +22 -0
- package/dist/lib/store/database.js +375 -0
- package/dist/lib/store/drift.d.ts +9 -0
- package/dist/lib/store/drift.js +89 -0
- package/dist/lib/store/index.d.ts +7 -0
- package/dist/lib/store/index.js +13 -0
- package/dist/lib/store/sessions.d.ts +32 -0
- package/dist/lib/store/sessions.js +240 -0
- package/dist/lib/store/steps.d.ts +40 -0
- package/dist/lib/store/steps.js +161 -0
- package/dist/lib/store/tasks.d.ts +33 -0
- package/dist/lib/store/tasks.js +133 -0
- package/dist/lib/store/types.d.ts +167 -0
- package/dist/lib/store/types.js +2 -0
- package/dist/lib/store.d.ts +1 -406
- package/dist/lib/store.js +2 -1356
- package/dist/lib/utils.d.ts +5 -0
- package/dist/lib/utils.js +45 -0
- package/dist/proxy/action-parser.d.ts +10 -2
- package/dist/proxy/action-parser.js +4 -2
- package/dist/proxy/cache.d.ts +36 -0
- package/dist/proxy/cache.js +51 -0
- package/dist/proxy/config.d.ts +1 -0
- package/dist/proxy/config.js +2 -0
- package/dist/proxy/extended-cache.d.ts +10 -0
- package/dist/proxy/extended-cache.js +155 -0
- package/dist/proxy/forwarder.d.ts +7 -1
- package/dist/proxy/forwarder.js +157 -7
- package/dist/proxy/handlers/preprocess.d.ts +20 -0
- package/dist/proxy/handlers/preprocess.js +169 -0
- package/dist/proxy/injection/delta-tracking.d.ts +11 -0
- package/dist/proxy/injection/delta-tracking.js +93 -0
- package/dist/proxy/injection/injectors.d.ts +7 -0
- package/dist/proxy/injection/injectors.js +139 -0
- package/dist/proxy/request-processor.d.ts +18 -3
- package/dist/proxy/request-processor.js +151 -28
- package/dist/proxy/response-processor.js +116 -47
- package/dist/proxy/server.d.ts +4 -1
- package/dist/proxy/server.js +592 -253
- package/dist/proxy/types.d.ts +13 -0
- package/dist/proxy/types.js +2 -0
- package/dist/proxy/utils/extractors.d.ts +18 -0
- package/dist/proxy/utils/extractors.js +109 -0
- package/dist/proxy/utils/logging.d.ts +18 -0
- package/dist/proxy/utils/logging.js +42 -0
- package/package.json +22 -4
|
@@ -1,35 +1,17 @@
|
|
|
1
|
-
// LLM-based extraction using
|
|
2
|
-
// and Anthropic Claude Haiku for drift detection
|
|
3
|
-
import OpenAI from 'openai';
|
|
1
|
+
// LLM-based extraction using Anthropic Claude Haiku for drift detection
|
|
4
2
|
import Anthropic from '@anthropic-ai/sdk';
|
|
5
3
|
import { config } from 'dotenv';
|
|
6
4
|
import { join } from 'path';
|
|
7
5
|
import { homedir } from 'os';
|
|
8
6
|
import { existsSync } from 'fs';
|
|
9
7
|
import { debugLLM } from './debug.js';
|
|
10
|
-
import { truncate } from './utils.js';
|
|
11
8
|
// Load ~/.grov/.env as fallback for API key
|
|
12
9
|
// This allows users to store their API key in a safe location outside any repo
|
|
13
10
|
const grovEnvPath = join(homedir(), '.grov', '.env');
|
|
14
11
|
if (existsSync(grovEnvPath)) {
|
|
15
12
|
config({ path: grovEnvPath });
|
|
16
13
|
}
|
|
17
|
-
let client = null;
|
|
18
14
|
let anthropicClient = null;
|
|
19
|
-
/**
|
|
20
|
-
* Initialize the OpenAI client
|
|
21
|
-
*/
|
|
22
|
-
function getClient() {
|
|
23
|
-
if (!client) {
|
|
24
|
-
const apiKey = process.env.OPENAI_API_KEY;
|
|
25
|
-
if (!apiKey) {
|
|
26
|
-
// SECURITY: Generic error to avoid confirming API key mechanism exists
|
|
27
|
-
throw new Error('LLM extraction unavailable');
|
|
28
|
-
}
|
|
29
|
-
client = new OpenAI({ apiKey });
|
|
30
|
-
}
|
|
31
|
-
return client;
|
|
32
|
-
}
|
|
33
15
|
/**
|
|
34
16
|
* Initialize the Anthropic client
|
|
35
17
|
*/
|
|
@@ -43,12 +25,6 @@ function getAnthropicClient() {
|
|
|
43
25
|
}
|
|
44
26
|
return anthropicClient;
|
|
45
27
|
}
|
|
46
|
-
/**
|
|
47
|
-
* Check if LLM extraction is available (OpenAI API key set)
|
|
48
|
-
*/
|
|
49
|
-
export function isLLMAvailable() {
|
|
50
|
-
return !!process.env.OPENAI_API_KEY;
|
|
51
|
-
}
|
|
52
28
|
/**
|
|
53
29
|
* Extract intent from first user prompt using Haiku
|
|
54
30
|
* Called once at session start to populate session_states
|
|
@@ -176,272 +152,6 @@ function createFallbackIntent(prompt) {
|
|
|
176
152
|
export function isIntentExtractionAvailable() {
|
|
177
153
|
return !!(process.env.ANTHROPIC_API_KEY || process.env.GROV_API_KEY);
|
|
178
154
|
}
|
|
179
|
-
/**
|
|
180
|
-
* Check if Anthropic API is available (for drift detection)
|
|
181
|
-
*/
|
|
182
|
-
export function isAnthropicAvailable() {
|
|
183
|
-
return !!process.env.ANTHROPIC_API_KEY;
|
|
184
|
-
}
|
|
185
|
-
/**
|
|
186
|
-
* Get the drift model to use (from env or default)
|
|
187
|
-
*/
|
|
188
|
-
export function getDriftModel() {
|
|
189
|
-
return process.env.GROV_DRIFT_MODEL || 'claude-haiku-4-5';
|
|
190
|
-
}
|
|
191
|
-
/**
|
|
192
|
-
* Extract structured reasoning from a parsed session using GPT-3.5-turbo
|
|
193
|
-
*/
|
|
194
|
-
export async function extractReasoning(session) {
|
|
195
|
-
const openai = getClient();
|
|
196
|
-
// Build session summary for the prompt
|
|
197
|
-
const sessionSummary = buildSessionSummary(session);
|
|
198
|
-
const response = await openai.chat.completions.create({
|
|
199
|
-
model: 'gpt-3.5-turbo',
|
|
200
|
-
max_tokens: 1024,
|
|
201
|
-
messages: [
|
|
202
|
-
{
|
|
203
|
-
role: 'system',
|
|
204
|
-
content: 'You are a helpful assistant that extracts structured information from coding sessions. Always respond with valid JSON only, no explanation.'
|
|
205
|
-
},
|
|
206
|
-
{
|
|
207
|
-
role: 'user',
|
|
208
|
-
content: `Analyze this Claude Code session and extract a structured reasoning summary.
|
|
209
|
-
|
|
210
|
-
SESSION DATA:
|
|
211
|
-
${sessionSummary}
|
|
212
|
-
|
|
213
|
-
Extract the following as JSON:
|
|
214
|
-
{
|
|
215
|
-
"task": "Brief description (1 sentence)",
|
|
216
|
-
"goal": "The underlying problem being solved",
|
|
217
|
-
"reasoning_trace": [
|
|
218
|
-
"Be SPECIFIC: include file names, function names, line numbers when relevant",
|
|
219
|
-
"Format: '[Action] [target] to/for [purpose]'",
|
|
220
|
-
"Example: 'Read auth.ts:47 to understand token refresh logic'",
|
|
221
|
-
"Example: 'Fixed null check in validateToken() - was causing silent failures'",
|
|
222
|
-
"NOT: 'Investigated auth' or 'Fixed bug'"
|
|
223
|
-
],
|
|
224
|
-
"decisions": [{"choice": "What was decided", "reason": "Why this over alternatives"}],
|
|
225
|
-
"constraints": ["Discovered limitations, rate limits, incompatibilities"],
|
|
226
|
-
"status": "complete|partial|question|abandoned",
|
|
227
|
-
"tags": ["relevant", "domain", "tags"]
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
IMPORTANT for reasoning_trace:
|
|
231
|
-
- Each entry should be ACTIONABLE information for future developers
|
|
232
|
-
- Include specific file:line references when possible
|
|
233
|
-
- Explain WHY not just WHAT (e.g., "Chose JWT over sessions because stateless scales better")
|
|
234
|
-
- Bad: "Fixed the bug" / Good: "Fixed race condition in UserService.save() - was missing await"
|
|
235
|
-
|
|
236
|
-
Status definitions:
|
|
237
|
-
- "complete": Task was finished, implementation done
|
|
238
|
-
- "partial": Work started but not finished
|
|
239
|
-
- "question": Claude asked a question and is waiting for user response
|
|
240
|
-
- "abandoned": User interrupted or moved to different topic
|
|
241
|
-
|
|
242
|
-
RESPONSE RULES:
|
|
243
|
-
- English only (translate if input is in other language)
|
|
244
|
-
- No emojis
|
|
245
|
-
- Valid JSON only`
|
|
246
|
-
}
|
|
247
|
-
]
|
|
248
|
-
});
|
|
249
|
-
// Parse the response
|
|
250
|
-
const content = response.choices[0]?.message?.content;
|
|
251
|
-
if (!content) {
|
|
252
|
-
throw new Error('No response from OpenAI');
|
|
253
|
-
}
|
|
254
|
-
try {
|
|
255
|
-
// SECURITY: Parse to plain object first, then sanitize prototype pollution
|
|
256
|
-
const rawParsed = JSON.parse(content);
|
|
257
|
-
// SECURITY: Prevent prototype pollution from LLM-generated JSON
|
|
258
|
-
// An attacker could manipulate LLM to return {"__proto__": {"isAdmin": true}}
|
|
259
|
-
const pollutionKeys = ['__proto__', 'constructor', 'prototype'];
|
|
260
|
-
for (const key of pollutionKeys) {
|
|
261
|
-
if (key in rawParsed) {
|
|
262
|
-
delete rawParsed[key];
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
const extracted = rawParsed;
|
|
266
|
-
// SECURITY: Validate types to prevent LLM injection attacks
|
|
267
|
-
const safeTask = typeof extracted.task === 'string' ? extracted.task : '';
|
|
268
|
-
const safeGoal = typeof extracted.goal === 'string' ? extracted.goal : '';
|
|
269
|
-
const safeTrace = Array.isArray(extracted.reasoning_trace)
|
|
270
|
-
? extracted.reasoning_trace.filter((t) => typeof t === 'string')
|
|
271
|
-
: [];
|
|
272
|
-
const safeDecisions = Array.isArray(extracted.decisions)
|
|
273
|
-
? extracted.decisions.filter((d) => d && typeof d === 'object' && typeof d.choice === 'string' && typeof d.reason === 'string')
|
|
274
|
-
: [];
|
|
275
|
-
const safeConstraints = Array.isArray(extracted.constraints)
|
|
276
|
-
? extracted.constraints.filter((c) => typeof c === 'string')
|
|
277
|
-
: [];
|
|
278
|
-
const safeTags = Array.isArray(extracted.tags)
|
|
279
|
-
? extracted.tags.filter((t) => typeof t === 'string')
|
|
280
|
-
: [];
|
|
281
|
-
// Fill defaults with validated values
|
|
282
|
-
return {
|
|
283
|
-
task: safeTask || session.userMessages[0]?.substring(0, 100) || 'Unknown task',
|
|
284
|
-
goal: safeGoal || safeTask || 'Unknown goal',
|
|
285
|
-
reasoning_trace: safeTrace,
|
|
286
|
-
files_touched: session.filesRead.concat(session.filesWritten),
|
|
287
|
-
decisions: safeDecisions,
|
|
288
|
-
constraints: safeConstraints,
|
|
289
|
-
status: validateStatus(extracted.status),
|
|
290
|
-
tags: safeTags
|
|
291
|
-
};
|
|
292
|
-
}
|
|
293
|
-
catch (parseError) {
|
|
294
|
-
// If JSON parsing fails, return basic extraction
|
|
295
|
-
debugLLM('Failed to parse LLM response, using fallback');
|
|
296
|
-
return createFallbackExtraction(session);
|
|
297
|
-
}
|
|
298
|
-
}
|
|
299
|
-
/**
|
|
300
|
-
* Classify just the task status (lighter weight than full extraction)
|
|
301
|
-
*/
|
|
302
|
-
export async function classifyTaskStatus(session) {
|
|
303
|
-
const openai = getClient();
|
|
304
|
-
// Get last few exchanges for classification
|
|
305
|
-
const lastMessages = session.userMessages.slice(-2).join('\n---\n');
|
|
306
|
-
const lastAssistant = session.assistantMessages.slice(-1)[0] || '';
|
|
307
|
-
const response = await openai.chat.completions.create({
|
|
308
|
-
model: 'gpt-3.5-turbo',
|
|
309
|
-
max_tokens: 50,
|
|
310
|
-
messages: [
|
|
311
|
-
{
|
|
312
|
-
role: 'system',
|
|
313
|
-
content: 'Classify conversation state. Return ONLY one word: complete, partial, question, or abandoned.'
|
|
314
|
-
},
|
|
315
|
-
{
|
|
316
|
-
role: 'user',
|
|
317
|
-
content: `Last user message(s):
|
|
318
|
-
${lastMessages}
|
|
319
|
-
|
|
320
|
-
Last assistant response (truncated):
|
|
321
|
-
${lastAssistant.substring(0, 500)}
|
|
322
|
-
|
|
323
|
-
Files written: ${session.filesWritten.length}
|
|
324
|
-
Files read: ${session.filesRead.length}
|
|
325
|
-
|
|
326
|
-
Classification:`
|
|
327
|
-
}
|
|
328
|
-
]
|
|
329
|
-
});
|
|
330
|
-
const content = response.choices[0]?.message?.content;
|
|
331
|
-
if (!content) {
|
|
332
|
-
return 'partial';
|
|
333
|
-
}
|
|
334
|
-
return validateStatus(content.trim().toLowerCase());
|
|
335
|
-
}
|
|
336
|
-
/**
|
|
337
|
-
* Build a summary of the session for the LLM prompt
|
|
338
|
-
*/
|
|
339
|
-
function buildSessionSummary(session) {
|
|
340
|
-
const lines = [];
|
|
341
|
-
// User messages
|
|
342
|
-
lines.push('USER MESSAGES:');
|
|
343
|
-
session.userMessages.forEach((msg, i) => {
|
|
344
|
-
lines.push(`[${i + 1}] ${truncate(msg, 300)}`);
|
|
345
|
-
});
|
|
346
|
-
lines.push('');
|
|
347
|
-
// Files touched
|
|
348
|
-
lines.push('FILES READ:');
|
|
349
|
-
session.filesRead.slice(0, 10).forEach(f => lines.push(` - ${f}`));
|
|
350
|
-
if (session.filesRead.length > 10) {
|
|
351
|
-
lines.push(` ... and ${session.filesRead.length - 10} more`);
|
|
352
|
-
}
|
|
353
|
-
lines.push('');
|
|
354
|
-
lines.push('FILES WRITTEN/EDITED:');
|
|
355
|
-
session.filesWritten.forEach(f => lines.push(` - ${f}`));
|
|
356
|
-
lines.push('');
|
|
357
|
-
// Tool usage summary
|
|
358
|
-
lines.push('TOOL USAGE:');
|
|
359
|
-
const toolCounts = session.toolCalls.reduce((acc, t) => {
|
|
360
|
-
acc[t.name] = (acc[t.name] || 0) + 1;
|
|
361
|
-
return acc;
|
|
362
|
-
}, {});
|
|
363
|
-
Object.entries(toolCounts).forEach(([name, count]) => {
|
|
364
|
-
lines.push(` - ${name}: ${count}x`);
|
|
365
|
-
});
|
|
366
|
-
lines.push('');
|
|
367
|
-
// Last assistant message (often contains summary/conclusion)
|
|
368
|
-
const lastAssistant = session.assistantMessages[session.assistantMessages.length - 1];
|
|
369
|
-
if (lastAssistant) {
|
|
370
|
-
lines.push('LAST ASSISTANT MESSAGE:');
|
|
371
|
-
lines.push(truncate(lastAssistant, 500));
|
|
372
|
-
}
|
|
373
|
-
return lines.join('\n');
|
|
374
|
-
}
|
|
375
|
-
/**
|
|
376
|
-
* Create fallback extraction when LLM fails
|
|
377
|
-
*/
|
|
378
|
-
function createFallbackExtraction(session) {
|
|
379
|
-
const filesTouched = [...new Set([...session.filesRead, ...session.filesWritten])];
|
|
380
|
-
return {
|
|
381
|
-
task: session.userMessages[0]?.substring(0, 100) || 'Unknown task',
|
|
382
|
-
goal: session.userMessages[0]?.substring(0, 100) || 'Unknown goal',
|
|
383
|
-
reasoning_trace: generateBasicTrace(session),
|
|
384
|
-
files_touched: filesTouched,
|
|
385
|
-
decisions: [],
|
|
386
|
-
constraints: [],
|
|
387
|
-
status: session.filesWritten.length > 0 ? 'complete' : 'partial',
|
|
388
|
-
tags: generateTagsFromFiles(filesTouched)
|
|
389
|
-
};
|
|
390
|
-
}
|
|
391
|
-
/**
|
|
392
|
-
* Generate basic reasoning trace from tool usage
|
|
393
|
-
*/
|
|
394
|
-
function generateBasicTrace(session) {
|
|
395
|
-
const trace = [];
|
|
396
|
-
const toolCounts = session.toolCalls.reduce((acc, t) => {
|
|
397
|
-
acc[t.name] = (acc[t.name] || 0) + 1;
|
|
398
|
-
return acc;
|
|
399
|
-
}, {});
|
|
400
|
-
if (toolCounts['Read'])
|
|
401
|
-
trace.push(`Read ${toolCounts['Read']} files`);
|
|
402
|
-
if (toolCounts['Write'])
|
|
403
|
-
trace.push(`Wrote ${toolCounts['Write']} files`);
|
|
404
|
-
if (toolCounts['Edit'])
|
|
405
|
-
trace.push(`Edited ${toolCounts['Edit']} files`);
|
|
406
|
-
if (toolCounts['Grep'] || toolCounts['Glob'])
|
|
407
|
-
trace.push('Searched codebase');
|
|
408
|
-
if (toolCounts['Bash'])
|
|
409
|
-
trace.push(`Ran ${toolCounts['Bash']} commands`);
|
|
410
|
-
return trace;
|
|
411
|
-
}
|
|
412
|
-
/**
|
|
413
|
-
* Generate tags from file paths
|
|
414
|
-
*/
|
|
415
|
-
function generateTagsFromFiles(files) {
|
|
416
|
-
const tags = new Set();
|
|
417
|
-
for (const file of files) {
|
|
418
|
-
const parts = file.split('/');
|
|
419
|
-
for (const part of parts) {
|
|
420
|
-
if (part && !part.includes('.') && part !== 'src' && part !== 'lib') {
|
|
421
|
-
tags.add(part.toLowerCase());
|
|
422
|
-
}
|
|
423
|
-
}
|
|
424
|
-
// Common patterns
|
|
425
|
-
if (file.includes('auth'))
|
|
426
|
-
tags.add('auth');
|
|
427
|
-
if (file.includes('api'))
|
|
428
|
-
tags.add('api');
|
|
429
|
-
if (file.includes('test'))
|
|
430
|
-
tags.add('test');
|
|
431
|
-
}
|
|
432
|
-
return [...tags].slice(0, 10);
|
|
433
|
-
}
|
|
434
|
-
/**
|
|
435
|
-
* Validate and normalize status
|
|
436
|
-
*/
|
|
437
|
-
function validateStatus(status) {
|
|
438
|
-
const normalized = status?.toLowerCase().trim();
|
|
439
|
-
if (normalized === 'complete' || normalized === 'partial' ||
|
|
440
|
-
normalized === 'question' || normalized === 'abandoned') {
|
|
441
|
-
return normalized;
|
|
442
|
-
}
|
|
443
|
-
return 'partial'; // Default
|
|
444
|
-
}
|
|
445
155
|
// ============================================
|
|
446
156
|
// SESSION SUMMARY FOR CLEAR OPERATION
|
|
447
157
|
// Reference: plan_proxy_local.md Section 2.3, 4.5
|
|
@@ -456,23 +166,30 @@ export function isSummaryAvailable() {
|
|
|
456
166
|
* Generate session summary for CLEAR operation
|
|
457
167
|
* Reference: plan_proxy_local.md Section 2.3, 4.5
|
|
458
168
|
*/
|
|
459
|
-
export async function generateSessionSummary(sessionState, steps
|
|
169
|
+
export async function generateSessionSummary(sessionState, steps, maxTokens = 800 // Default 800, CLEAR mode uses 15000
|
|
170
|
+
) {
|
|
460
171
|
const client = getAnthropicClient();
|
|
172
|
+
// For larger summaries, include more steps
|
|
173
|
+
const stepLimit = maxTokens > 5000 ? 50 : 20;
|
|
174
|
+
const wordLimit = Math.min(Math.floor(maxTokens / 2), 10000); // ~2 tokens per word
|
|
461
175
|
const stepsText = steps
|
|
462
176
|
.filter(s => s.is_validated)
|
|
463
|
-
.slice(-
|
|
177
|
+
.slice(-stepLimit)
|
|
464
178
|
.map(step => {
|
|
465
179
|
let desc = `- ${step.action_type}`;
|
|
466
180
|
if (step.files.length > 0) {
|
|
467
181
|
desc += `: ${step.files.join(', ')}`;
|
|
468
182
|
}
|
|
469
183
|
if (step.command) {
|
|
470
|
-
desc += ` (${step.command.substring(0,
|
|
184
|
+
desc += ` (${step.command.substring(0, 100)})`;
|
|
185
|
+
}
|
|
186
|
+
if (step.reasoning && maxTokens > 5000) {
|
|
187
|
+
desc += `\n Reasoning: ${step.reasoning.substring(0, 200)}`;
|
|
471
188
|
}
|
|
472
189
|
return desc;
|
|
473
190
|
})
|
|
474
191
|
.join('\n');
|
|
475
|
-
const prompt = `Create a concise summary of this coding session for context continuation.
|
|
192
|
+
const prompt = `Create a ${maxTokens > 5000 ? 'comprehensive' : 'concise'} summary of this coding session for context continuation.
|
|
476
193
|
|
|
477
194
|
ORIGINAL GOAL: ${sessionState.original_goal || 'Not specified'}
|
|
478
195
|
|
|
@@ -483,18 +200,19 @@ CONSTRAINTS: ${sessionState.constraints.join(', ') || 'None'}
|
|
|
483
200
|
ACTIONS TAKEN:
|
|
484
201
|
${stepsText || 'No actions recorded'}
|
|
485
202
|
|
|
486
|
-
Create a summary with these sections (keep total under
|
|
487
|
-
1. ORIGINAL GOAL: (1
|
|
488
|
-
2. PROGRESS: (2-3 bullet points of what was accomplished)
|
|
489
|
-
3. KEY DECISIONS: (
|
|
490
|
-
4. FILES MODIFIED: (list of files)
|
|
491
|
-
5. CURRENT STATE: (where the work left off)
|
|
492
|
-
6. NEXT STEPS: (recommended next actions)
|
|
203
|
+
Create a summary with these sections (keep total under ${wordLimit} words):
|
|
204
|
+
1. ORIGINAL GOAL: (1-2 sentences)
|
|
205
|
+
2. PROGRESS: (${maxTokens > 5000 ? '5-10' : '2-3'} bullet points of what was accomplished)
|
|
206
|
+
3. KEY DECISIONS: (important architectural/design choices made, with reasoning)
|
|
207
|
+
4. FILES MODIFIED: (list of files with brief description of changes)
|
|
208
|
+
5. CURRENT STATE: (detailed status of where the work left off)
|
|
209
|
+
6. NEXT STEPS: (recommended next actions to continue)
|
|
210
|
+
${maxTokens > 5000 ? '7. IMPORTANT CONTEXT: (any critical information that must not be lost)' : ''}
|
|
493
211
|
|
|
494
212
|
Format as plain text, not JSON.`;
|
|
495
213
|
const response = await client.messages.create({
|
|
496
214
|
model: 'claude-haiku-4-5-20251001',
|
|
497
|
-
max_tokens:
|
|
215
|
+
max_tokens: maxTokens,
|
|
498
216
|
messages: [{ role: 'user', content: prompt }],
|
|
499
217
|
});
|
|
500
218
|
const content = response.content?.[0];
|
|
@@ -528,89 +246,223 @@ export function isTaskAnalysisAvailable() {
|
|
|
528
246
|
return !!(process.env.ANTHROPIC_API_KEY || process.env.GROV_API_KEY);
|
|
529
247
|
}
|
|
530
248
|
/**
|
|
531
|
-
*
|
|
532
|
-
* Called after each main model response to orchestrate sessions
|
|
533
|
-
* Also compresses reasoning for steps if assistantResponse > 1000 chars
|
|
249
|
+
* Format conversation messages for prompt
|
|
534
250
|
*/
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
251
|
+
function formatConversationHistory(messages) {
|
|
252
|
+
if (!messages || messages.length === 0)
|
|
253
|
+
return 'No conversation history available.';
|
|
254
|
+
return messages.slice(-10).map(m => {
|
|
255
|
+
const role = m.role === 'user' ? 'User' : 'Assistant';
|
|
256
|
+
const content = m.content.substring(0, 800);
|
|
257
|
+
const truncated = m.content.length > 800 ? '...' : '';
|
|
258
|
+
return `${role}: ${content}${truncated}`;
|
|
259
|
+
}).join('\n\n');
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Format tool calls for prompt
|
|
263
|
+
*/
|
|
264
|
+
function formatToolCalls(steps) {
|
|
265
|
+
if (!steps || steps.length === 0)
|
|
266
|
+
return 'No tools used yet.';
|
|
267
|
+
return steps.slice(0, 10).map(s => {
|
|
538
268
|
let desc = `- ${s.action_type}`;
|
|
539
269
|
if (s.files.length > 0) {
|
|
540
270
|
desc += `: ${s.files.slice(0, 3).join(', ')}`;
|
|
541
271
|
}
|
|
272
|
+
if (s.command) {
|
|
273
|
+
desc += ` (${s.command.substring(0, 50)})`;
|
|
274
|
+
}
|
|
542
275
|
return desc;
|
|
543
|
-
}).join('\n')
|
|
276
|
+
}).join('\n');
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Analyze task context to determine task status
|
|
280
|
+
* Called after each main model response to orchestrate sessions
|
|
281
|
+
* Also compresses reasoning for steps if assistantResponse > 1000 chars
|
|
282
|
+
*/
|
|
283
|
+
export async function analyzeTaskContext(currentSession, latestUserMessage, recentSteps, assistantResponse, conversationHistory) {
|
|
284
|
+
const client = getAnthropicClient();
|
|
544
285
|
// Check if we need to compress reasoning
|
|
545
286
|
const needsCompression = assistantResponse.length > 1000;
|
|
546
287
|
const compressionInstruction = needsCompression
|
|
547
|
-
?
|
|
288
|
+
? `,
|
|
289
|
+
"step_reasoning": "Extract CONCLUSIONS only: specific file paths, function names, patterns discovered, and WHY decisions were made. Max 800 chars. Do not write process descriptions."`
|
|
548
290
|
: '';
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
const currentGoalKeywords = currentSession?.original_goal
|
|
554
|
-
? currentSession.original_goal.toLowerCase().match(/\b\w{4,}\b/g)?.slice(0, 10).join(', ') || ''
|
|
555
|
-
: '';
|
|
556
|
-
const prompt = `You are a task orchestrator. Your PRIMARY job is to detect when the user starts a NEW, DIFFERENT task.
|
|
291
|
+
// Format conversation history
|
|
292
|
+
const historyText = formatConversationHistory(conversationHistory || []);
|
|
293
|
+
const toolCallsText = formatToolCalls(recentSteps);
|
|
294
|
+
const prompt = `You are a task status analyzer. Your job is to examine a conversation between a user and an AI assistant, then determine whether the current task is complete, still in progress, or if a new task has started.
|
|
557
295
|
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
- Goal Keywords: [${currentGoalKeywords}]
|
|
296
|
+
<input>
|
|
297
|
+
original_goal: ${currentSession?.original_goal || 'No active task - this may be the first message'}
|
|
561
298
|
|
|
562
|
-
|
|
563
|
-
|
|
299
|
+
messages:
|
|
300
|
+
${historyText}
|
|
564
301
|
|
|
565
|
-
|
|
566
|
-
${
|
|
302
|
+
current_assistant_response:
|
|
303
|
+
${assistantResponse ? assistantResponse.substring(0, 2000) : 'No response yet - assistant is still thinking.'}
|
|
567
304
|
|
|
568
|
-
|
|
569
|
-
|
|
305
|
+
tool_calls:
|
|
306
|
+
${toolCallsText}
|
|
307
|
+
</input>
|
|
570
308
|
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
309
|
+
<output>
|
|
310
|
+
Return a JSON object with these fields:
|
|
311
|
+
- task_type: one of "information", "planning", or "implementation"
|
|
312
|
+
- action: one of "continue", "task_complete", "new_task", or "subtask_complete"
|
|
313
|
+
- task_id: existing session_id "${currentSession?.session_id || 'NEW'}" or "NEW" for new task
|
|
314
|
+
- current_goal: the goal based on the latest user message
|
|
315
|
+
- reasoning: brief explanation of why you made this decision${compressionInstruction}
|
|
316
|
+
</output>
|
|
574
317
|
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
2. Or is it about something COMPLETELY DIFFERENT?
|
|
318
|
+
<step_1_identify_task_type>
|
|
319
|
+
First, analyze the original_goal to understand what kind of task this is. Do not rely on specific keywords. Instead, understand the user's intent from the full context of their message.
|
|
578
320
|
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
- Goal: "analyze security layer" → User: "create hello.ts script" → NEW_TASK
|
|
582
|
-
- Goal: "refactor user service" → User: "add dark mode to UI" → NEW_TASK
|
|
583
|
-
- Goal: "fix login bug" → User: "write unit tests for payments" → NEW_TASK
|
|
321
|
+
TYPE A - Information Request
|
|
322
|
+
The user wants to learn or understand something. They are seeking knowledge, not asking for any changes or decisions to be made. The answer itself is what they need.
|
|
584
323
|
|
|
585
|
-
|
|
586
|
-
- Goal: "implement authentication" → User: "now add the logout button" → CONTINUE
|
|
587
|
-
- Goal: "fix login bug" → User: "also check the session timeout" → CONTINUE
|
|
588
|
-
- Goal: "analyze security" → User: "what about rate limiting?" → CONTINUE
|
|
324
|
+
Think about whether the user is curious about how something works, wants an explanation of a concept, or is asking for clarification about existing behavior.
|
|
589
325
|
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
326
|
+
Examples of information requests in different phrasings:
|
|
327
|
+
- "How does the authentication system work?"
|
|
328
|
+
- "Explica-mi cum functioneaza cache-ul"
|
|
329
|
+
- "What is the difference between Redis and Memcached?"
|
|
330
|
+
- "Can you walk me through the payment flow?"
|
|
331
|
+
- "I don't understand why this function returns null"
|
|
332
|
+
- "Ce face acest cod?"
|
|
333
|
+
|
|
334
|
+
TYPE B - Planning or Decision Request
|
|
335
|
+
The user wants to figure out the best approach before taking action. They need to make a decision or create a plan. The conversation may involve exploring options, discussing tradeoffs, or clarifying requirements.
|
|
336
|
+
|
|
337
|
+
Think about whether the user is trying to decide between approaches, wants recommendations for how to build something, or is working toward a plan they will implement later.
|
|
338
|
+
|
|
339
|
+
Examples of planning requests in different phrasings:
|
|
340
|
+
- "How should we implement user authentication?"
|
|
341
|
+
- "What's the best way to handle caching for this API?"
|
|
342
|
+
- "Cum ar trebui sa structuram baza de date?"
|
|
343
|
+
- "I'm thinking about using Redis vs Memcached, what do you recommend?"
|
|
344
|
+
- "Let's figure out the architecture before we start coding"
|
|
345
|
+
- "We need to decide on the approach for handling errors"
|
|
346
|
+
|
|
347
|
+
TYPE C - Implementation Request
|
|
348
|
+
The user wants actual changes made. They want code written, files edited, commands run, or something built. The task involves using tools to modify the codebase.
|
|
349
|
+
|
|
350
|
+
Think about whether the user is asking for something to be created, fixed, changed, or built.
|
|
351
|
+
|
|
352
|
+
Examples of implementation requests in different phrasings:
|
|
353
|
+
- "Fix the bug in the login function"
|
|
354
|
+
- "Add caching to the API endpoints"
|
|
355
|
+
- "Fa un refactor la modulul de plati"
|
|
356
|
+
- "Create a new component for the dashboard"
|
|
357
|
+
- "Update the tests to cover edge cases"
|
|
358
|
+
- "Remove the deprecated authentication code"
|
|
359
|
+
</step_1_identify_task_type>
|
|
360
|
+
|
|
361
|
+
<step_2_determine_status>
|
|
362
|
+
Now that you know the task type, determine whether it is complete, continuing, or if a new task has begun.
|
|
363
|
+
|
|
364
|
+
For TYPE A - Information Request:
|
|
365
|
+
The task is complete when the assistant has provided a clear and complete answer to the user's question. Check the current_assistant_response field - if it contains a substantive answer to the question, the task is complete.
|
|
366
|
+
|
|
367
|
+
Each question the user asks is treated as its own separate task. If the user asks a follow-up question, even on the same topic, that is a new task.
|
|
368
|
+
|
|
369
|
+
The reason for this is that each answer is valuable on its own and should be saved independently. We do not want to wait for a multi-turn conversation to end before saving useful information.
|
|
370
|
+
|
|
371
|
+
When analyzing: Look at current_assistant_response. If it contains an explanation, answer, or clarification that addresses the user's question, return task_complete.
|
|
372
|
+
|
|
373
|
+
Example situation: User asks "How does auth work?", assistant explains it fully.
|
|
374
|
+
Decision: task_complete
|
|
375
|
+
Reason: The information request was answered completely.
|
|
376
|
+
|
|
377
|
+
Example situation: User asks "How does auth work?", assistant explains, then user asks "What about JWT specifically?"
|
|
378
|
+
Decision for second message: new_task
|
|
379
|
+
Reason: This is a new question requiring a new answer.
|
|
380
|
+
|
|
381
|
+
For TYPE B - Planning or Decision Request:
|
|
382
|
+
The task continues while the user and assistant are still exploring options, discussing tradeoffs, or clarifying requirements. The task is complete only when a final decision or plan has been reached and the user has confirmed it.
|
|
383
|
+
|
|
384
|
+
Look for signals that indicate the user has made up their mind. These signals come from the overall tone and direction of the conversation, not from specific keywords. The user might express agreement, ask to proceed with implementation, or summarize the chosen approach.
|
|
598
385
|
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
386
|
+
When analyzing, ask yourself: Has the user confirmed a final direction? Are they still weighing options? Have they asked to move forward with a specific approach?
|
|
387
|
+
|
|
388
|
+
Example situation: User asks "Should we use JWT or sessions?", assistant explains both, user says "I'm still not sure about refresh tokens"
|
|
389
|
+
Decision: continue
|
|
390
|
+
Reason: The user is still clarifying and has not made a final decision.
|
|
391
|
+
|
|
392
|
+
Example situation: User and assistant discussed auth options, user says "OK, JWT with refresh tokens makes sense, let's go with that"
|
|
393
|
+
Decision: task_complete
|
|
394
|
+
Reason: The user confirmed the decision. Planning is complete.
|
|
395
|
+
|
|
396
|
+
Example situation: User says "That sounds good, now implement it"
|
|
397
|
+
Decision: task_complete for planning, and a new implementation task will begin
|
|
398
|
+
Reason: Planning concluded with a decision. User is now requesting implementation.
|
|
399
|
+
|
|
400
|
+
For TYPE C - Implementation Request:
|
|
401
|
+
The task continues while the assistant is actively making changes using tools like file edits, bash commands, or file writes. The task is complete when the changes are done and verified.
|
|
402
|
+
|
|
403
|
+
Look for signals that the work is finished in current_assistant_response: successful test runs, the assistant stating the work is done, or a commit being made. If tests are failing or the assistant indicates more work is needed, the task continues.
|
|
404
|
+
|
|
405
|
+
When analyzing: Check current_assistant_response for completion signals. Is the assistant still making changes? Have the changes been verified? Did the assistant confirm completion?
|
|
406
|
+
|
|
407
|
+
Example situation: Assistant edited three files and is now running tests.
|
|
408
|
+
Decision: continue
|
|
409
|
+
Reason: Implementation is in progress, verification not yet complete.
|
|
410
|
+
|
|
411
|
+
Example situation: Assistant ran tests, they passed, assistant says "Done, the auth bug is fixed"
|
|
412
|
+
Decision: task_complete
|
|
413
|
+
Reason: Changes are complete and verified.
|
|
414
|
+
|
|
415
|
+
Example situation: Tests failed after the changes.
|
|
416
|
+
Decision: continue
|
|
417
|
+
Reason: The implementation needs more work to pass verification.
|
|
418
|
+
</step_2_determine_status>
|
|
419
|
+
|
|
420
|
+
<step_3_detect_new_task>
|
|
421
|
+
Sometimes the user changes direction entirely. A new task has started when:
|
|
422
|
+
|
|
423
|
+
The user asks about something completely unrelated to the original goal.
|
|
424
|
+
The conversation topic shifts to a different part of the codebase or a different feature.
|
|
425
|
+
The previous task was completed and the user is now requesting something new.
|
|
426
|
+
|
|
427
|
+
To detect this, compare the current user message to the original_goal. If they are about the same thing, the task is either continuing or complete. If they are about different things, a new task has started.
|
|
428
|
+
|
|
429
|
+
Be careful not to confuse follow-up questions with new tasks. A follow-up question on the same topic in an information request is a new task because each answer stands alone. But a follow-up clarification during planning is part of the same planning task.
|
|
430
|
+
|
|
431
|
+
Example situation: Original goal was "fix the auth bug", user now asks "also, can you update the README?"
|
|
432
|
+
Decision: new_task
|
|
433
|
+
Reason: Updating README is unrelated to fixing the auth bug.
|
|
434
|
+
|
|
435
|
+
Example situation: Original goal was "implement caching", user asks "should we use Redis or Memcached for this?"
|
|
436
|
+
Decision: continue (this is planning within the implementation task)
|
|
437
|
+
Reason: The question is about how to implement the original request.
|
|
438
|
+
|
|
439
|
+
Example situation: Original goal was "explain how auth works", user asks "and how does the session storage work?"
|
|
440
|
+
Decision: new_task
|
|
441
|
+
Reason: This is a new information request, separate from the first.
|
|
442
|
+
</step_3_detect_new_task>
|
|
443
|
+
|
|
444
|
+
<important_notes>
|
|
445
|
+
Do not rely on specific keywords in any language. The same intent can be expressed many different ways across languages and phrasings. Always understand the intent from the full context.
|
|
446
|
+
|
|
447
|
+
The conversation history and tool usage are your most important signals. What has the assistant been doing? What is the user trying to accomplish? Has that goal been achieved?
|
|
448
|
+
|
|
449
|
+
CRITICAL - Q&A DURING PLANNING:
|
|
450
|
+
If the current task_type is "planning" and the user asks a clarifying question (e.g., "how does X work?", "what about Y?", "clarify Z"), this is NOT a new information task. It is a CONTINUATION of the planning task. The user is gathering information to make a planning decision, not requesting standalone information.
|
|
451
|
+
- If original task_type was planning → keep it as planning, action=continue
|
|
452
|
+
- Only mark task_complete for planning when user explicitly confirms a final decision or asks to proceed with implementation
|
|
453
|
+
- Asking to "write to file" or "document the plan" is NOT task_complete - it's still part of planning documentation
|
|
454
|
+
|
|
455
|
+
When in doubt between continue and task_complete, ask yourself: Would it be valuable to save what we have so far? For information requests, yes, save each answer. For planning, only save when a decision is made. For implementation, only save when work is verified complete.
|
|
605
456
|
|
|
606
457
|
RESPONSE RULES:
|
|
607
|
-
-
|
|
608
|
-
-
|
|
609
|
-
-
|
|
458
|
+
- Return valid JSON only
|
|
459
|
+
- English only in the response (translate reasoning if input is in other language)
|
|
460
|
+
- No markdown formatting, no emojis
|
|
461
|
+
</important_notes>`;
|
|
610
462
|
debugLLM('analyzeTaskContext', `Calling Haiku for task analysis (needsCompression=${needsCompression})`);
|
|
611
463
|
const response = await client.messages.create({
|
|
612
464
|
model: 'claude-haiku-4-5-20251001',
|
|
613
|
-
max_tokens: needsCompression ?
|
|
465
|
+
max_tokens: needsCompression ? 800 : 400,
|
|
614
466
|
messages: [{ role: 'user', content: prompt }],
|
|
615
467
|
});
|
|
616
468
|
const text = response.content[0].type === 'text' ? response.content[0].text : '';
|
|
@@ -621,17 +473,22 @@ RESPONSE RULES:
|
|
|
621
473
|
throw new Error('No JSON found in response');
|
|
622
474
|
}
|
|
623
475
|
const analysis = JSON.parse(jsonMatch[0]);
|
|
476
|
+
// Ensure task_type has a default value
|
|
477
|
+
if (!analysis.task_type) {
|
|
478
|
+
analysis.task_type = 'implementation';
|
|
479
|
+
}
|
|
624
480
|
// If we didn't need compression but have short response, use it directly
|
|
625
481
|
if (!needsCompression && assistantResponse.length > 0) {
|
|
626
482
|
analysis.step_reasoning = assistantResponse.substring(0, 1000);
|
|
627
483
|
}
|
|
628
|
-
debugLLM('analyzeTaskContext', `Result:
|
|
484
|
+
debugLLM('analyzeTaskContext', `Result: task_type=${analysis.task_type}, action=${analysis.action}, goal="${analysis.current_goal?.substring(0, 50) || 'N/A'}" reasoning="${analysis.reasoning?.substring(0, 150) || 'none'}"`);
|
|
629
485
|
return analysis;
|
|
630
486
|
}
|
|
631
487
|
catch (parseError) {
|
|
632
488
|
debugLLM('analyzeTaskContext', `Parse error: ${String(parseError)}, using fallback`);
|
|
633
489
|
// Fallback: continue existing session or create new
|
|
634
490
|
return {
|
|
491
|
+
task_type: 'implementation',
|
|
635
492
|
action: currentSession ? 'continue' : 'new_task',
|
|
636
493
|
task_id: currentSession?.session_id || 'NEW',
|
|
637
494
|
current_goal: latestUserMessage.substring(0, 200),
|
|
@@ -649,76 +506,151 @@ export function isReasoningExtractionAvailable() {
|
|
|
649
506
|
/**
|
|
650
507
|
* Extract reasoning trace and decisions from steps
|
|
651
508
|
* Called at task_complete to populate team memory with rich context
|
|
509
|
+
*
|
|
510
|
+
* @param formattedSteps - Pre-formatted XML string with grouped steps and actions
|
|
511
|
+
* @param originalGoal - The original task goal
|
|
652
512
|
*/
|
|
653
|
-
export async function extractReasoningAndDecisions(
|
|
513
|
+
export async function extractReasoningAndDecisions(formattedSteps, originalGoal) {
|
|
654
514
|
const client = getAnthropicClient();
|
|
655
|
-
|
|
656
|
-
const combinedReasoning = stepsReasoning
|
|
657
|
-
.filter(r => r && r.length > 10)
|
|
658
|
-
.join('\n\n---\n\n')
|
|
659
|
-
.substring(0, 8000);
|
|
660
|
-
if (combinedReasoning.length < 50) {
|
|
515
|
+
if (formattedSteps.length < 50) {
|
|
661
516
|
return { reasoning_trace: [], decisions: [] };
|
|
662
517
|
}
|
|
663
|
-
const prompt =
|
|
518
|
+
const prompt = `<role>
|
|
519
|
+
You are a Knowledge Engineer specialized in extracting reusable team knowledge from coding sessions.
|
|
664
520
|
|
|
665
|
-
|
|
666
|
-
|
|
521
|
+
Your output will be stored permanently in team memory and used to help developers in future sessions. Poor extractions waste storage and confuse future assistants. Excellent extractions save hours of repeated investigation.
|
|
522
|
+
</role>
|
|
667
523
|
|
|
668
|
-
|
|
669
|
-
${
|
|
524
|
+
<context>
|
|
525
|
+
PROJECT GOAL: ${originalGoal || 'Not specified'}
|
|
670
526
|
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
527
|
+
This extraction serves two purposes:
|
|
528
|
+
1. Help future developers understand WHAT was discovered in this codebase
|
|
529
|
+
2. Help future developers understand WHY certain decisions were made
|
|
530
|
+
</context>
|
|
531
|
+
|
|
532
|
+
<session_data>
|
|
533
|
+
${formattedSteps.substring(0, 8000)}
|
|
534
|
+
</session_data>
|
|
535
|
+
|
|
536
|
+
<instructions>
|
|
537
|
+
|
|
538
|
+
We need TWO types of knowledge extracted:
|
|
539
|
+
|
|
540
|
+
TYPE A: CONCLUSIONS (Factual findings from the session)
|
|
541
|
+
|
|
542
|
+
What this means:
|
|
543
|
+
These are FACTS discovered during the session. Things that were explicitly found, read, or confirmed in the code. A new developer reading these should immediately know WHERE to find things and WHAT values/patterns exist.
|
|
544
|
+
|
|
545
|
+
Must include:
|
|
546
|
+
- Specific file paths (not just "auth files" but "src/lib/jwt.ts")
|
|
547
|
+
- Specific values (not just "short expiry" but "1 hour access, 7 day refresh")
|
|
548
|
+
- Specific patterns (not just "uses JWT" but "JWT with sub, email, type, teams payload")
|
|
549
|
+
- Specific functions/classes (not just "middleware" but "requireAuth, optionalAuth preHandlers")
|
|
550
|
+
|
|
551
|
+
Format: Start with "CONCLUSION: " prefix
|
|
552
|
+
|
|
553
|
+
Good examples:
|
|
554
|
+
- "CONCLUSION: JWT tokens stored in ~/.grov/credentials.json with 1hr access/7d refresh expiry"
|
|
555
|
+
- "CONCLUSION: Auth middleware in src/routes/auth.ts exports requireAuth and optionalAuth preHandlers"
|
|
556
|
+
- "CONCLUSION: Device flow polling interval is 5 seconds, endpoint /auth/device/poll"
|
|
557
|
+
|
|
558
|
+
Bad examples:
|
|
559
|
+
- "CONCLUSION: Found authentication files" (too vague, no paths)
|
|
560
|
+
- "CONCLUSION: JWT is used for auth" (too generic, no specifics)
|
|
561
|
+
- "CONCLUSION: Explored the codebase" (process description, not finding)
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
TYPE B: INSIGHTS (Your analysis and inferences)
|
|
565
|
+
|
|
566
|
+
What this means:
|
|
567
|
+
These are YOUR observations that go BEYOND what was explicitly stated. Connections between different parts, patterns you identified, implications for future work. This is where YOU add value beyond just summarizing.
|
|
568
|
+
|
|
569
|
+
Types of insights we value:
|
|
570
|
+
|
|
571
|
+
1. CONNECTIONS - How do different files/modules relate?
|
|
572
|
+
Example: "jwt.ts handles token creation, credentials.ts handles storage - separation of crypto operations from I/O"
|
|
573
|
+
|
|
574
|
+
2. INFERENCES - What decisions were made implicitly?
|
|
575
|
+
Example: "File storage in ~/.grov/ instead of env vars - implies single-user CLI design, not multi-tenant"
|
|
576
|
+
|
|
577
|
+
3. PATTERNS - What architectural patterns emerge?
|
|
578
|
+
Example: "All config files use 0600 permissions - security-conscious design for sensitive data"
|
|
579
|
+
|
|
580
|
+
4. IMPLICATIONS - What does this mean for future development?
|
|
581
|
+
Example: "1hr token expiry requires background refresh mechanism for long operations to avoid mid-task auth failures"
|
|
582
|
+
|
|
583
|
+
Format: Start with "INSIGHT: " prefix
|
|
584
|
+
|
|
585
|
+
Good examples:
|
|
586
|
+
- "INSIGHT: Dual-file pattern (jwt.ts + credentials.ts) separates crypto from I/O, reducing attack surface"
|
|
587
|
+
- "INSIGHT: Device Authorization Flow chosen over password flow - enables OAuth providers without storing secrets in CLI"
|
|
588
|
+
- "INSIGHT: Teams array cached in JWT payload - avoids DB query per request but requires token refresh on team changes"
|
|
589
|
+
|
|
590
|
+
Bad examples:
|
|
591
|
+
- "INSIGHT: The code is well organized" (subjective, not actionable)
|
|
592
|
+
- "INSIGHT: Authentication is important" (obvious, no value)
|
|
593
|
+
- "INSIGHT: Files were read" (process description, not insight)
|
|
594
|
+
|
|
595
|
+
</instructions>
|
|
596
|
+
|
|
597
|
+
<output_format>
|
|
598
|
+
Return a JSON object with this structure:
|
|
674
599
|
|
|
675
|
-
GOOD examples (specific, reusable knowledge):
|
|
676
|
-
- "Utility functions belong in frontend/lib/utils.ts - existing utils: cn(), formatDate(), debounce()"
|
|
677
|
-
- "Auth tokens stored in localStorage with 15min expiry for long form sessions"
|
|
678
|
-
- "API routes follow REST pattern in /api/v1/ with Zod validation"
|
|
679
|
-
- "Database migrations go in prisma/migrations/ using prisma migrate"
|
|
680
|
-
|
|
681
|
-
BAD examples (process descriptions - DO NOT EXTRACT THESE):
|
|
682
|
-
- "Explored the codebase structure"
|
|
683
|
-
- "Analyzed several approaches"
|
|
684
|
-
- "Searched for utility directories"
|
|
685
|
-
- "Looked at the file organization"
|
|
686
|
-
|
|
687
|
-
1. REASONING TRACE (conclusions and recommendations):
|
|
688
|
-
- WHAT was discovered or decided (specific file paths, patterns)
|
|
689
|
-
- WHY this is the right approach
|
|
690
|
-
- WHERE this applies in the codebase
|
|
691
|
-
- Max 10 entries, prioritize specific file/function recommendations
|
|
692
|
-
|
|
693
|
-
2. DECISIONS (architectural choices):
|
|
694
|
-
- Only significant choices that affect future work
|
|
695
|
-
- What was chosen and why
|
|
696
|
-
- Max 5 decisions
|
|
697
|
-
|
|
698
|
-
Return JSON:
|
|
699
600
|
{
|
|
700
|
-
"
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
601
|
+
"knowledge_pairs": [
|
|
602
|
+
{
|
|
603
|
+
"conclusion": "CONCLUSION: [specific factual finding with file paths and values]",
|
|
604
|
+
"insight": "INSIGHT: [inference or implication RELATED to this conclusion]"
|
|
605
|
+
},
|
|
606
|
+
{
|
|
607
|
+
"conclusion": "CONCLUSION: [another specific finding]",
|
|
608
|
+
"insight": "INSIGHT: [what this means for future development]"
|
|
609
|
+
}
|
|
704
610
|
],
|
|
705
611
|
"decisions": [
|
|
706
|
-
{
|
|
707
|
-
|
|
612
|
+
{
|
|
613
|
+
"choice": "[What was chosen - be specific]",
|
|
614
|
+
"reason": "[Why - include whether this is factual or inferred]"
|
|
615
|
+
}
|
|
708
616
|
]
|
|
709
617
|
}
|
|
710
618
|
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
-
|
|
716
|
-
-
|
|
717
|
-
|
|
619
|
+
IMPORTANT: Generate knowledge as PAIRS where each INSIGHT is directly related to its CONCLUSION.
|
|
620
|
+
|
|
621
|
+
Example pair:
|
|
622
|
+
{
|
|
623
|
+
"conclusion": "CONCLUSION: MemoryCache uses lazy expiration - entries checked/deleted on get(), not via timers",
|
|
624
|
+
"insight": "INSIGHT: Lazy expiration avoids timer overhead that would accumulate with large caches - trades CPU on read for memory efficiency"
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
Rules:
|
|
628
|
+
1. Each pair MUST have a conclusion AND a related insight
|
|
629
|
+
2. The insight MUST add value beyond the conclusion (inference, implication, pattern)
|
|
630
|
+
3. Max 5 pairs (10 entries total) - prioritize most valuable
|
|
631
|
+
4. Max 5 decisions - only significant architectural choices
|
|
632
|
+
5. If you cannot find a meaningful insight for a conclusion, still include the conclusion with insight: null
|
|
633
|
+
6. NEVER include process descriptions ("explored", "searched", "looked at")
|
|
634
|
+
7. English only, no emojis
|
|
635
|
+
8. Use prefixes "CONCLUSION: " and "INSIGHT: " in the strings
|
|
636
|
+
</output_format>
|
|
637
|
+
|
|
638
|
+
<validation>
|
|
639
|
+
Before responding, verify:
|
|
640
|
+
- Does each CONCLUSION contain a specific file path or value?
|
|
641
|
+
- Is each INSIGHT directly related to its paired CONCLUSION?
|
|
642
|
+
- Does each INSIGHT add something NOT explicitly in the input?
|
|
643
|
+
- Would a new developer find the pairs useful without seeing the original session?
|
|
644
|
+
- Did I avoid process descriptions?
|
|
645
|
+
- Are the decisions about significant architectural choices?
|
|
646
|
+
</validation>
|
|
647
|
+
|
|
648
|
+
Return ONLY valid JSON, no markdown code blocks, no explanation.`;
|
|
649
|
+
debugLLM('extractReasoningAndDecisions', `Analyzing formatted steps, ${formattedSteps.length} chars`);
|
|
718
650
|
try {
|
|
719
651
|
const response = await client.messages.create({
|
|
720
652
|
model: 'claude-haiku-4-5-20251001',
|
|
721
|
-
max_tokens:
|
|
653
|
+
max_tokens: 1500,
|
|
722
654
|
messages: [{ role: 'user', content: prompt }],
|
|
723
655
|
});
|
|
724
656
|
const text = response.content[0].type === 'text' ? response.content[0].text : '';
|
|
@@ -727,10 +659,60 @@ RESPONSE RULES:
|
|
|
727
659
|
debugLLM('extractReasoningAndDecisions', 'No JSON found in response');
|
|
728
660
|
return { reasoning_trace: [], decisions: [] };
|
|
729
661
|
}
|
|
730
|
-
|
|
731
|
-
|
|
662
|
+
// Try to parse JSON, with repair attempts for common Haiku formatting issues
|
|
663
|
+
let result;
|
|
664
|
+
try {
|
|
665
|
+
result = JSON.parse(jsonMatch[0]);
|
|
666
|
+
}
|
|
667
|
+
catch (parseError) {
|
|
668
|
+
// Common fixes: trailing commas, unescaped newlines in strings
|
|
669
|
+
let repaired = jsonMatch[0]
|
|
670
|
+
.replace(/,\s*}/g, '}') // trailing comma before }
|
|
671
|
+
.replace(/,\s*]/g, ']') // trailing comma before ]
|
|
672
|
+
.replace(/\n/g, '\\n') // unescaped newlines
|
|
673
|
+
.replace(/\r/g, '\\r') // unescaped carriage returns
|
|
674
|
+
.replace(/\t/g, '\\t'); // unescaped tabs
|
|
675
|
+
try {
|
|
676
|
+
result = JSON.parse(repaired);
|
|
677
|
+
}
|
|
678
|
+
catch {
|
|
679
|
+
// Last resort: try to extract just knowledge_pairs array
|
|
680
|
+
const pairsMatch = jsonMatch[0].match(/"knowledge_pairs"\s*:\s*\[([\s\S]*?)\]/);
|
|
681
|
+
if (pairsMatch) {
|
|
682
|
+
try {
|
|
683
|
+
const pairs = JSON.parse(`[${pairsMatch[1].replace(/,\s*$/, '')}]`);
|
|
684
|
+
result = { knowledge_pairs: pairs, decisions: [] };
|
|
685
|
+
}
|
|
686
|
+
catch {
|
|
687
|
+
throw parseError; // Re-throw original error
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
else {
|
|
691
|
+
throw parseError;
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
// Flatten knowledge_pairs into reasoning_trace (interleaved: conclusion, insight, conclusion, insight...)
|
|
696
|
+
let reasoningTrace = [];
|
|
697
|
+
if (result.knowledge_pairs && result.knowledge_pairs.length > 0) {
|
|
698
|
+
// New format: flatten pairs into interleaved array
|
|
699
|
+
for (const pair of result.knowledge_pairs) {
|
|
700
|
+
if (pair.conclusion) {
|
|
701
|
+
reasoningTrace.push(pair.conclusion);
|
|
702
|
+
}
|
|
703
|
+
if (pair.insight) {
|
|
704
|
+
reasoningTrace.push(pair.insight);
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
debugLLM('extractReasoningAndDecisions', `Extracted ${result.knowledge_pairs.length} pairs (${reasoningTrace.length} entries), ${result.decisions?.length || 0} decisions`);
|
|
708
|
+
}
|
|
709
|
+
else if (result.reasoning_trace) {
|
|
710
|
+
// Backwards compatibility: old format with flat array
|
|
711
|
+
reasoningTrace = result.reasoning_trace;
|
|
712
|
+
debugLLM('extractReasoningAndDecisions', `Extracted ${reasoningTrace.length} traces (old format), ${result.decisions?.length || 0} decisions`);
|
|
713
|
+
}
|
|
732
714
|
return {
|
|
733
|
-
reasoning_trace:
|
|
715
|
+
reasoning_trace: reasoningTrace,
|
|
734
716
|
decisions: result.decisions || [],
|
|
735
717
|
};
|
|
736
718
|
}
|