grov 0.5.11 → 0.6.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/agents/registry.d.ts +17 -0
- package/dist/cli/agents/registry.js +132 -0
- package/dist/cli/commands/agents.d.ts +1 -0
- package/dist/cli/commands/agents.js +48 -0
- package/dist/cli/commands/disable.d.ts +1 -0
- package/dist/cli/commands/disable.js +179 -0
- package/dist/cli/commands/doctor.d.ts +1 -0
- package/dist/cli/commands/doctor.js +157 -0
- package/dist/{commands → cli/commands}/drift-test.js +39 -26
- package/dist/cli/commands/init.d.ts +1 -0
- package/dist/cli/commands/init.js +90 -0
- package/dist/{commands → cli/commands}/login.js +19 -18
- package/dist/{commands → cli/commands}/logout.js +1 -1
- package/dist/{commands → cli/commands}/proxy-status.js +1 -1
- package/dist/cli/commands/setup.d.ts +6 -0
- package/dist/cli/commands/setup.js +309 -0
- package/dist/{commands → cli/commands}/status.js +1 -1
- package/dist/{commands → cli/commands}/sync.d.ts +1 -0
- package/dist/{commands → cli/commands}/sync.js +59 -4
- package/dist/{commands → cli/commands}/uninstall.js +2 -2
- package/dist/cli/index.js +270 -0
- package/dist/{lib → core/cloud}/cloud-sync.d.ts +3 -3
- package/dist/{lib → core/cloud}/cloud-sync.js +10 -10
- package/dist/{lib → core/extraction}/correction-builder-proxy.d.ts +1 -1
- package/dist/{lib → core/extraction}/correction-builder-proxy.js +0 -4
- package/dist/{lib → core/extraction}/drift-checker-proxy.d.ts +13 -9
- package/dist/core/extraction/drift-checker-proxy.js +510 -0
- package/dist/{lib → core/extraction}/llm-extractor.d.ts +8 -38
- package/dist/{lib → core/extraction}/llm-extractor.js +132 -220
- package/dist/{lib → core}/store/sessions.js +3 -19
- package/dist/core/store/store.d.ts +1 -0
- package/dist/{lib → core/store}/store.js +1 -1
- package/dist/{lib → core}/store/types.d.ts +0 -4
- package/dist/integrations/mcp/cache.d.ts +27 -0
- package/dist/integrations/mcp/cache.js +106 -0
- package/dist/integrations/mcp/capture/antigravity-parser.d.ts +26 -0
- package/dist/integrations/mcp/capture/antigravity-parser.js +272 -0
- package/dist/integrations/mcp/capture/antigravity-scanner.d.ts +24 -0
- package/dist/integrations/mcp/capture/antigravity-scanner.js +153 -0
- package/dist/integrations/mcp/capture/antigravity-sync-tracker.d.ts +29 -0
- package/dist/integrations/mcp/capture/antigravity-sync-tracker.js +115 -0
- package/dist/integrations/mcp/capture/cli-extractor.d.ts +18 -0
- package/dist/integrations/mcp/capture/cli-extractor.js +258 -0
- package/dist/integrations/mcp/capture/cli-synced.d.ts +4 -0
- package/dist/integrations/mcp/capture/cli-synced.js +62 -0
- package/dist/integrations/mcp/capture/cli-transform.d.ts +30 -0
- package/dist/integrations/mcp/capture/cli-transform.js +62 -0
- package/dist/integrations/mcp/capture/cli-watcher.d.ts +31 -0
- package/dist/integrations/mcp/capture/cli-watcher.js +106 -0
- package/dist/integrations/mcp/capture/hook-handler.d.ts +2 -0
- package/dist/integrations/mcp/capture/hook-handler.js +157 -0
- package/dist/integrations/mcp/capture/sqlite-reader.d.ts +35 -0
- package/dist/integrations/mcp/capture/sqlite-reader.js +388 -0
- package/dist/integrations/mcp/capture/sync-tracker.d.ts +16 -0
- package/dist/integrations/mcp/capture/sync-tracker.js +102 -0
- package/dist/integrations/mcp/clients/cursor/rules-installer.d.ts +19 -0
- package/dist/integrations/mcp/clients/cursor/rules-installer.js +123 -0
- package/dist/integrations/mcp/index.d.ts +1 -0
- package/dist/integrations/mcp/index.js +94 -0
- package/dist/integrations/mcp/logger.d.ts +8 -0
- package/dist/integrations/mcp/logger.js +50 -0
- package/dist/integrations/mcp/server.d.ts +5 -0
- package/dist/integrations/mcp/server.js +58 -0
- package/dist/integrations/mcp/tools/expand.d.ts +1 -0
- package/dist/integrations/mcp/tools/expand.js +53 -0
- package/dist/integrations/mcp/tools/preview.d.ts +1 -0
- package/dist/integrations/mcp/tools/preview.js +64 -0
- package/dist/integrations/proxy/agents/base.d.ts +43 -0
- package/dist/integrations/proxy/agents/base.js +13 -0
- package/dist/{proxy/utils → integrations/proxy/agents/claude}/extractors.d.ts +4 -8
- package/dist/{proxy/utils → integrations/proxy/agents/claude}/extractors.js +4 -33
- package/dist/{proxy → integrations/proxy/agents/claude}/forwarder.d.ts +1 -1
- package/dist/{proxy → integrations/proxy/agents/claude}/forwarder.js +22 -6
- package/dist/integrations/proxy/agents/claude/index.d.ts +43 -0
- package/dist/integrations/proxy/agents/claude/index.js +386 -0
- package/dist/{proxy/action-parser.d.ts → integrations/proxy/agents/claude/parser.d.ts} +1 -1
- package/dist/integrations/proxy/agents/codex/extractors.d.ts +6 -0
- package/dist/integrations/proxy/agents/codex/extractors.js +49 -0
- package/dist/integrations/proxy/agents/codex/forwarder.d.ts +9 -0
- package/dist/integrations/proxy/agents/codex/forwarder.js +125 -0
- package/dist/integrations/proxy/agents/codex/index.d.ts +44 -0
- package/dist/integrations/proxy/agents/codex/index.js +371 -0
- package/dist/integrations/proxy/agents/codex/parser.d.ts +11 -0
- package/dist/integrations/proxy/agents/codex/parser.js +104 -0
- package/dist/integrations/proxy/agents/codex/patch.d.ts +12 -0
- package/dist/integrations/proxy/agents/codex/patch.js +40 -0
- package/dist/integrations/proxy/agents/codex/settings.d.ts +18 -0
- package/dist/integrations/proxy/agents/codex/settings.js +73 -0
- package/dist/integrations/proxy/agents/codex/types.d.ts +59 -0
- package/dist/integrations/proxy/agents/codex/types.js +2 -0
- package/dist/integrations/proxy/agents/index.d.ts +11 -0
- package/dist/integrations/proxy/agents/index.js +25 -0
- package/dist/integrations/proxy/agents/types.d.ts +77 -0
- package/dist/integrations/proxy/agents/types.js +2 -0
- package/dist/{proxy → integrations/proxy/cache}/extended-cache.js +2 -6
- package/dist/{proxy → integrations/proxy}/config.js +1 -1
- package/dist/{proxy → integrations/proxy}/handlers/preprocess.d.ts +3 -3
- package/dist/integrations/proxy/handlers/preprocess.js +194 -0
- package/dist/integrations/proxy/index.js +20 -0
- package/dist/integrations/proxy/injection/memory-injection.d.ts +56 -0
- package/dist/integrations/proxy/injection/memory-injection.js +252 -0
- package/dist/integrations/proxy/orchestrator.d.ts +30 -0
- package/dist/integrations/proxy/orchestrator.js +954 -0
- package/dist/integrations/proxy/request-processor.d.ts +14 -0
- package/dist/integrations/proxy/request-processor.js +68 -0
- package/dist/{proxy → integrations/proxy}/response-processor.d.ts +4 -3
- package/dist/{proxy → integrations/proxy}/response-processor.js +51 -43
- package/dist/{proxy → integrations/proxy}/server.d.ts +0 -1
- package/dist/integrations/proxy/server.js +146 -0
- package/dist/{proxy → integrations/proxy}/types.d.ts +4 -0
- package/dist/{proxy → integrations/proxy}/utils/logging.d.ts +1 -0
- package/dist/{proxy → integrations/proxy}/utils/logging.js +5 -0
- package/package.json +31 -10
- package/postinstall.js +62 -6
- package/dist/cli.js +0 -149
- package/dist/commands/capture.d.ts +0 -6
- package/dist/commands/capture.js +0 -324
- package/dist/commands/disable.d.ts +0 -1
- package/dist/commands/disable.js +0 -14
- package/dist/commands/doctor.d.ts +0 -1
- package/dist/commands/doctor.js +0 -89
- package/dist/commands/init.d.ts +0 -1
- package/dist/commands/init.js +0 -52
- package/dist/commands/inject.d.ts +0 -5
- package/dist/commands/inject.js +0 -88
- package/dist/commands/prompt-inject.d.ts +0 -4
- package/dist/commands/prompt-inject.js +0 -451
- package/dist/commands/unregister.d.ts +0 -1
- package/dist/commands/unregister.js +0 -28
- package/dist/lib/anchor-extractor.d.ts +0 -30
- package/dist/lib/anchor-extractor.js +0 -296
- package/dist/lib/correction-builder.d.ts +0 -10
- package/dist/lib/correction-builder.js +0 -226
- package/dist/lib/drift-checker-proxy.js +0 -373
- package/dist/lib/drift-checker.d.ts +0 -66
- package/dist/lib/drift-checker.js +0 -341
- package/dist/lib/hooks.d.ts +0 -38
- package/dist/lib/hooks.js +0 -291
- package/dist/lib/jsonl-parser.d.ts +0 -87
- package/dist/lib/jsonl-parser.js +0 -281
- package/dist/lib/session-parser.d.ts +0 -44
- package/dist/lib/session-parser.js +0 -256
- package/dist/lib/store.d.ts +0 -1
- package/dist/proxy/cache.d.ts +0 -32
- package/dist/proxy/cache.js +0 -47
- package/dist/proxy/handlers/preprocess.js +0 -186
- package/dist/proxy/index.js +0 -30
- package/dist/proxy/injection/delta-tracking.d.ts +0 -11
- package/dist/proxy/injection/delta-tracking.js +0 -94
- package/dist/proxy/injection/injectors.d.ts +0 -7
- package/dist/proxy/injection/injectors.js +0 -139
- package/dist/proxy/request-processor.d.ts +0 -27
- package/dist/proxy/request-processor.js +0 -233
- package/dist/proxy/server.js +0 -1289
- /package/dist/{commands → cli/commands}/drift-test.d.ts +0 -0
- /package/dist/{commands → cli/commands}/login.d.ts +0 -0
- /package/dist/{commands → cli/commands}/logout.d.ts +0 -0
- /package/dist/{commands → cli/commands}/proxy-status.d.ts +0 -0
- /package/dist/{commands → cli/commands}/status.d.ts +0 -0
- /package/dist/{commands → cli/commands}/uninstall.d.ts +0 -0
- /package/dist/{cli.d.ts → cli/index.d.ts} +0 -0
- /package/dist/{lib → core/cloud}/api-client.d.ts +0 -0
- /package/dist/{lib → core/cloud}/api-client.js +0 -0
- /package/dist/{lib → core/cloud}/credentials.d.ts +0 -0
- /package/dist/{lib → core/cloud}/credentials.js +0 -0
- /package/dist/{lib → core}/store/convenience.d.ts +0 -0
- /package/dist/{lib → core}/store/convenience.js +0 -0
- /package/dist/{lib → core}/store/database.d.ts +0 -0
- /package/dist/{lib → core}/store/database.js +0 -0
- /package/dist/{lib → core}/store/drift.d.ts +0 -0
- /package/dist/{lib → core}/store/drift.js +0 -0
- /package/dist/{lib → core}/store/index.d.ts +0 -0
- /package/dist/{lib → core}/store/index.js +0 -0
- /package/dist/{lib → core}/store/sessions.d.ts +0 -0
- /package/dist/{lib → core}/store/steps.d.ts +0 -0
- /package/dist/{lib → core}/store/steps.js +0 -0
- /package/dist/{lib → core}/store/tasks.d.ts +0 -0
- /package/dist/{lib → core}/store/tasks.js +0 -0
- /package/dist/{lib → core}/store/types.js +0 -0
- /package/dist/{proxy/action-parser.js → integrations/proxy/agents/claude/parser.js} +0 -0
- /package/dist/{lib → integrations/proxy/agents/claude}/settings.d.ts +0 -0
- /package/dist/{lib → integrations/proxy/agents/claude}/settings.js +0 -0
- /package/dist/{proxy → integrations/proxy/cache}/extended-cache.d.ts +0 -0
- /package/dist/{proxy → integrations/proxy}/config.d.ts +0 -0
- /package/dist/{proxy → integrations/proxy}/index.d.ts +0 -0
- /package/dist/{proxy → integrations/proxy}/types.js +0 -0
- /package/dist/{lib → utils}/debug.d.ts +0 -0
- /package/dist/{lib → utils}/debug.js +0 -0
- /package/dist/{lib → utils}/utils.d.ts +0 -0
- /package/dist/{lib → utils}/utils.js +0 -0
|
@@ -0,0 +1,510 @@
|
|
|
1
|
+
// Drift checker for proxy - scores Claude's actions vs original goal
|
|
2
|
+
// Reference: plan_proxy_local.md Section 4.2, 4.3
|
|
3
|
+
import { forwardToAnthropic } from '../../integrations/proxy/agents/claude/forwarder.js';
|
|
4
|
+
import { buildSafeHeaders } from '../../integrations/proxy/config.js';
|
|
5
|
+
import { isDebugMode } from '../../integrations/proxy/utils/logging.js';
|
|
6
|
+
// Haiku model constant
|
|
7
|
+
// Model list: https://docs.anthropic.com/en/docs/about-claude/models
|
|
8
|
+
const HAIKU_MODEL = 'claude-haiku-4-5-20251001';
|
|
9
|
+
async function callHaikuDrift(maxTokens, prompt, headers, context = 'unknown') {
|
|
10
|
+
if (isDebugMode())
|
|
11
|
+
console.log(`[HAIKU] ${context} started`);
|
|
12
|
+
// Use same header filtering as proxy forward - includes all Claude Code headers
|
|
13
|
+
const safeHeaders = buildSafeHeaders(headers);
|
|
14
|
+
try {
|
|
15
|
+
const result = await forwardToAnthropic({
|
|
16
|
+
model: HAIKU_MODEL,
|
|
17
|
+
max_tokens: maxTokens,
|
|
18
|
+
messages: [{ role: 'user', content: prompt }],
|
|
19
|
+
}, safeHeaders);
|
|
20
|
+
// Check for error response
|
|
21
|
+
if (result.statusCode >= 400) {
|
|
22
|
+
const errorBody = result.body;
|
|
23
|
+
throw new Error(errorBody.error?.message || `HTTP ${result.statusCode}`);
|
|
24
|
+
}
|
|
25
|
+
// Parse response
|
|
26
|
+
const body = result.body;
|
|
27
|
+
const text = body.content?.[0]?.type === 'text' ? body.content[0].text || '' : '';
|
|
28
|
+
if (isDebugMode())
|
|
29
|
+
console.log(`[HAIKU] ${context} success`);
|
|
30
|
+
return { text, success: true };
|
|
31
|
+
}
|
|
32
|
+
catch (err) {
|
|
33
|
+
// Always log errors
|
|
34
|
+
console.error(`[HAIKU] ${context} error:`, err.message);
|
|
35
|
+
return { text: '', success: false };
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Main drift check - uses LLM with auth
|
|
40
|
+
*/
|
|
41
|
+
export async function checkDrift(input, headers) {
|
|
42
|
+
try {
|
|
43
|
+
return await checkDriftWithLLM(input, headers);
|
|
44
|
+
}
|
|
45
|
+
catch {
|
|
46
|
+
// Fallback to basic if LLM fails
|
|
47
|
+
return checkDriftBasic(input);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Build repetition context for files edited 5+ times
|
|
52
|
+
*/
|
|
53
|
+
function buildRepetitionContext(steps) {
|
|
54
|
+
const fileCounts = new Map();
|
|
55
|
+
for (const step of steps) {
|
|
56
|
+
if (step.action_type === 'edit' || step.action_type === 'write') {
|
|
57
|
+
for (const file of step.files) {
|
|
58
|
+
fileCounts.set(file, (fileCounts.get(file) || 0) + 1);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
const repeated = [...fileCounts.entries()]
|
|
63
|
+
.filter(([, count]) => count >= 5)
|
|
64
|
+
.map(([file, count]) => `${file} (${count}x)`);
|
|
65
|
+
return repeated.length > 0 ? repeated.join(', ') : '';
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* LLM-based drift check using Haiku
|
|
69
|
+
* Reference: plan_proxy_local.md Section 3.1
|
|
70
|
+
*/
|
|
71
|
+
async function checkDriftWithLLM(input, headers) {
|
|
72
|
+
// WHITELIST only modification actions for drift evaluation
|
|
73
|
+
// Reading/exploring is ALWAYS OK - we only care about actual changes
|
|
74
|
+
//
|
|
75
|
+
// Modification actions (INCLUDE in drift check):
|
|
76
|
+
// - edit: file modifications
|
|
77
|
+
// - write: new files created
|
|
78
|
+
// - bash: commands that might modify state
|
|
79
|
+
//
|
|
80
|
+
// Read-like actions (EXCLUDE from drift check):
|
|
81
|
+
// - read: Read tool, cat, head, tail, less, more, type (Windows)
|
|
82
|
+
// - glob: Glob tool, find, ls, dir (Windows)
|
|
83
|
+
// - grep: Grep tool, rg, ack, findstr (Windows)
|
|
84
|
+
// - task: Subagent (usually research/exploration)
|
|
85
|
+
// - other: Various non-modification tools
|
|
86
|
+
//
|
|
87
|
+
// Note: bash could be read-like (cat) or modify-like (rm, npm install).
|
|
88
|
+
// We include bash because modifications through it are significant.
|
|
89
|
+
const modificationActions = new Set(['edit', 'write', 'bash']);
|
|
90
|
+
const modificationSteps = input.recentSteps
|
|
91
|
+
.slice(-10)
|
|
92
|
+
.filter(step => modificationActions.has(step.action_type));
|
|
93
|
+
const actionsText = modificationSteps
|
|
94
|
+
.map(step => {
|
|
95
|
+
if (step.action_type === 'bash' && step.command) {
|
|
96
|
+
return `- ${step.action_type}: ${step.command.substring(0, 100)}`;
|
|
97
|
+
}
|
|
98
|
+
if (step.files.length > 0) {
|
|
99
|
+
return `- ${step.action_type}: ${step.files.join(', ')}`;
|
|
100
|
+
}
|
|
101
|
+
return `- ${step.action_type}`;
|
|
102
|
+
})
|
|
103
|
+
.join('\n');
|
|
104
|
+
// If we have a latest user message, that's the CURRENT instruction
|
|
105
|
+
// Use 1500 chars to avoid cutting off complex instructions mid-thought
|
|
106
|
+
const currentInstruction = input.latestUserMessage?.substring(0, 1500) || '';
|
|
107
|
+
const hasCurrentInstruction = currentInstruction.length > 20;
|
|
108
|
+
const repetitionContext = buildRepetitionContext(input.recentSteps);
|
|
109
|
+
const prompt = `<purpose>
|
|
110
|
+
You are a LENIENT drift detection system for a coding assistant.
|
|
111
|
+
Your job: Check if Claude is following the user's instructions.
|
|
112
|
+
|
|
113
|
+
CRITICAL MINDSET:
|
|
114
|
+
- Default assumption: Claude is doing fine (score 8)
|
|
115
|
+
- Only lower score when you see CLEAR, OBVIOUS problems
|
|
116
|
+
- Scores 5-10 are all acceptable - no action needed for these scores
|
|
117
|
+
</purpose>
|
|
118
|
+
|
|
119
|
+
<context>
|
|
120
|
+
<current_instruction priority="PRIMARY" weight="90%">
|
|
121
|
+
This is what the user JUST asked for. Compare actions against THIS.
|
|
122
|
+
"${hasCurrentInstruction ? currentInstruction : 'Not specified'}"
|
|
123
|
+
</current_instruction>
|
|
124
|
+
|
|
125
|
+
<original_goal priority="SECONDARY" weight="10%">
|
|
126
|
+
This was the initial goal. User may have changed direction - that's OK.
|
|
127
|
+
"${input.sessionState.original_goal || 'Not specified'}"
|
|
128
|
+
</original_goal>
|
|
129
|
+
|
|
130
|
+
<constraints>
|
|
131
|
+
${input.sessionState.constraints.length > 0 ? input.sessionState.constraints.join(', ') : 'None'}
|
|
132
|
+
</constraints>
|
|
133
|
+
|
|
134
|
+
<recent_actions context_only="true">
|
|
135
|
+
These are Claude's recent actions. Use for understanding, NOT for automatic penalties.
|
|
136
|
+
${actionsText || 'No actions yet'}
|
|
137
|
+
</recent_actions>
|
|
138
|
+
${repetitionContext ? `
|
|
139
|
+
<repetition_notice>
|
|
140
|
+
Files edited 5+ times: ${repetitionContext}
|
|
141
|
+
|
|
142
|
+
This is NOT automatically bad. Check:
|
|
143
|
+
- Is Claude making incremental progress?
|
|
144
|
+
- Or is Claude stuck repeating the exact same fix?
|
|
145
|
+
</repetition_notice>
|
|
146
|
+
` : ''}
|
|
147
|
+
</context>
|
|
148
|
+
|
|
149
|
+
<scoring_guide>
|
|
150
|
+
<score_9_10 meaning="ON TRACK - Excellent">
|
|
151
|
+
Claude is doing EXACTLY what user asked, or very close.
|
|
152
|
+
|
|
153
|
+
GIVE 9-10 WHEN:
|
|
154
|
+
- Actions directly match current_instruction
|
|
155
|
+
- New files created that user requested (even if not explicitly named)
|
|
156
|
+
- Working on files related to the task
|
|
157
|
+
- Making clear progress toward goal
|
|
158
|
+
|
|
159
|
+
EXAMPLES:
|
|
160
|
+
- User: "add login feature" → Claude creates src/auth/login.ts → Score: 10
|
|
161
|
+
- User: "fix the bug in payments" → Claude edits src/payments/checkout.ts → Score: 10
|
|
162
|
+
- User: "research how X works" → Claude creates docs/research-X.md → Score: 10
|
|
163
|
+
- User: "refactor the API" → Claude edits multiple API files → Score: 9
|
|
164
|
+
</score_9_10>
|
|
165
|
+
|
|
166
|
+
<score_5_8 meaning="ACCEPTABLE - Fine, no issues">
|
|
167
|
+
Claude is doing related work, not perfect but acceptable.
|
|
168
|
+
|
|
169
|
+
GIVE 5-8 WHEN:
|
|
170
|
+
- Actions are RELATED to the task but not exact match
|
|
171
|
+
- Claude is exploring/investigating before implementing
|
|
172
|
+
- Some actions seem tangential but could be necessary
|
|
173
|
+
|
|
174
|
+
EXAMPLES:
|
|
175
|
+
- User: "fix auth bug" → Claude reads config files first → Score: 8 (investigating)
|
|
176
|
+
- User: "add feature X" → Claude refactors nearby code first → Score: 7 (prep work)
|
|
177
|
+
- User: "update the UI" → Claude also updates related tests → Score: 8 (good practice)
|
|
178
|
+
</score_5_8>
|
|
179
|
+
|
|
180
|
+
<score_4 meaning="MILD CONCERN - Yellow flag">
|
|
181
|
+
Something seems off but not critically wrong.
|
|
182
|
+
|
|
183
|
+
GIVE 4 WHEN:
|
|
184
|
+
- Actions feel disconnected from instruction
|
|
185
|
+
- Claude might be going in circles
|
|
186
|
+
- Possible misunderstanding of task
|
|
187
|
+
|
|
188
|
+
EXAMPLES:
|
|
189
|
+
- User: "fix login" → Claude spends time on logout → Score: 4 (related but not asked)
|
|
190
|
+
- Claude edits same file 5+ times with SAME reasoning repeated → Score: 4
|
|
191
|
+
- User: "quick fix" → Claude starts major refactor → Score: 4 (scope mismatch)
|
|
192
|
+
</score_4>
|
|
193
|
+
|
|
194
|
+
<score_1_3 meaning="REAL DRIFT - Red flag">
|
|
195
|
+
Claude is clearly doing something WRONG or STUCK.
|
|
196
|
+
|
|
197
|
+
GIVE 1-3 ONLY WHEN:
|
|
198
|
+
- Actions are COMPLETELY unrelated to instruction
|
|
199
|
+
- Claude explicitly violates user's constraints
|
|
200
|
+
- Clear evidence of being stuck in loop (same error, same fix, no progress)
|
|
201
|
+
- Doing the OPPOSITE of what user asked
|
|
202
|
+
|
|
203
|
+
EXAMPLES:
|
|
204
|
+
- User: "fix auth bug" → Claude refactors CSS styling → Score: 2 (completely unrelated)
|
|
205
|
+
- User: "don't modify config" → Claude modifies config → Score: 1 (violated constraint)
|
|
206
|
+
- User: "just analyze, don't change" → Claude rewrites code → Score: 2 (opposite)
|
|
207
|
+
- User: "work on backend" → Claude only touches frontend → Score: 2 (wrong area)
|
|
208
|
+
</score_1_3>
|
|
209
|
+
</scoring_guide>
|
|
210
|
+
|
|
211
|
+
<detailed_rules>
|
|
212
|
+
<rule name="NEW_FILES">
|
|
213
|
+
WHEN IT'S OK (score 9-10):
|
|
214
|
+
- User asked for something that requires new files
|
|
215
|
+
- File is in a logical location for the task
|
|
216
|
+
|
|
217
|
+
WHEN IT'S BAD (score 1-4):
|
|
218
|
+
- File has nothing to do with current instruction
|
|
219
|
+
- User explicitly said "don't create new files"
|
|
220
|
+
|
|
221
|
+
EXAMPLES:
|
|
222
|
+
- User: "create a plan" → Claude creates docs/plan.md → Score: 10
|
|
223
|
+
- User: "fix typo in README" → Claude creates src/new-module.ts → Score: 2
|
|
224
|
+
</rule>
|
|
225
|
+
|
|
226
|
+
<rule name="MULTIPLE_EDITS">
|
|
227
|
+
WHEN IT'S OK (score 8-10):
|
|
228
|
+
- Each edit has DIFFERENT purpose
|
|
229
|
+
- Claude is iterating: add feature → add tests → fix edge case
|
|
230
|
+
|
|
231
|
+
WHEN IT'S BAD (score 3-4):
|
|
232
|
+
- ALL edits have SAME reasoning: "fixing error" → "fixing error" → "fixing error"
|
|
233
|
+
- No progress visible between edits
|
|
234
|
+
|
|
235
|
+
EXAMPLES:
|
|
236
|
+
- Edit 1: "added login" → Edit 2: "added validation" → Edit 3: "added tests" → Score: 9
|
|
237
|
+
- Edit 1: "fix bug" → Edit 2: "fix bug" → Edit 3: "fix bug" → Edit 4: "fix bug" → Score: 3
|
|
238
|
+
</rule>
|
|
239
|
+
|
|
240
|
+
<rule name="WRONG_DIRECTION">
|
|
241
|
+
HOW TO IDENTIFY:
|
|
242
|
+
- Ask: "Does this action help achieve current_instruction?"
|
|
243
|
+
- If answer is "no" or "I can't see how" → wrong direction (score 1-3)
|
|
244
|
+
- If answer is "maybe" or "indirectly" → probably OK (score 6-8)
|
|
245
|
+
|
|
246
|
+
EXAMPLES OF WRONG DIRECTION:
|
|
247
|
+
- User wants backend fix → Claude only touches CSS (score 2)
|
|
248
|
+
- User wants bug fix → Claude adds new features instead (score 3)
|
|
249
|
+
- User wants analysis → Claude starts rewriting without being asked (score 2)
|
|
250
|
+
|
|
251
|
+
NOT WRONG DIRECTION:
|
|
252
|
+
- User wants feature A → Claude reads related code first (score 9)
|
|
253
|
+
- User wants fix X → Claude also updates tests for X (score 9)
|
|
254
|
+
</rule>
|
|
255
|
+
|
|
256
|
+
<rule name="USER_CHANGED_DIRECTION">
|
|
257
|
+
If current_instruction differs from original_goal:
|
|
258
|
+
- ALWAYS prioritize current_instruction (90% weight)
|
|
259
|
+
- If Claude follows current_instruction but not original_goal → Score 9-10
|
|
260
|
+
|
|
261
|
+
EXAMPLE:
|
|
262
|
+
- Original goal: "analyze the codebase"
|
|
263
|
+
- Current instruction: "now create the implementation"
|
|
264
|
+
- Claude creates files → Score: 10 (following CURRENT instruction)
|
|
265
|
+
</rule>
|
|
266
|
+
</detailed_rules>
|
|
267
|
+
|
|
268
|
+
<anti_bias_rules>
|
|
269
|
+
DO NOT:
|
|
270
|
+
- Default to middle scores (5-7) without specific reason
|
|
271
|
+
- Penalize for new files automatically
|
|
272
|
+
- Penalize for multiple edits automatically
|
|
273
|
+
|
|
274
|
+
DO:
|
|
275
|
+
- Start with assumption of score 8
|
|
276
|
+
- Only lower if you find SPECIFIC evidence
|
|
277
|
+
- Give 9-10 generously when Claude is on track
|
|
278
|
+
</anti_bias_rules>
|
|
279
|
+
|
|
280
|
+
<response_format>
|
|
281
|
+
Return ONLY valid JSON:
|
|
282
|
+
{
|
|
283
|
+
"score": <number 1-10>,
|
|
284
|
+
"diagnostic": "<1-2 sentences explaining the score>",
|
|
285
|
+
"evidence": "<specific action or pattern that led to this score>"
|
|
286
|
+
}
|
|
287
|
+
</response_format>`;
|
|
288
|
+
const haikuResult = await callHaikuDrift(300, prompt, headers, 'checkDrift');
|
|
289
|
+
if (!haikuResult.success) {
|
|
290
|
+
return createDefaultResult(8, 'Haiku call failed');
|
|
291
|
+
}
|
|
292
|
+
return parseLLMResponse(haikuResult.text);
|
|
293
|
+
}
|
|
294
|
+
/**
|
|
295
|
+
* Basic drift check without LLM (fallback)
|
|
296
|
+
* Returns safe default - no penalties without LLM analysis
|
|
297
|
+
*/
|
|
298
|
+
export function checkDriftBasic(_input) {
|
|
299
|
+
// Without LLM, we can't make intelligent decisions
|
|
300
|
+
// Return safe default to avoid false positives
|
|
301
|
+
return {
|
|
302
|
+
score: 8,
|
|
303
|
+
driftType: 'none',
|
|
304
|
+
diagnostic: 'Basic check - assuming on track (LLM not available)',
|
|
305
|
+
};
|
|
306
|
+
}
|
|
307
|
+
/**
|
|
308
|
+
* Parse LLM response JSON
|
|
309
|
+
*/
|
|
310
|
+
function parseLLMResponse(text) {
|
|
311
|
+
try {
|
|
312
|
+
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
|
313
|
+
if (!jsonMatch) {
|
|
314
|
+
return createDefaultResult(8, 'No JSON in response');
|
|
315
|
+
}
|
|
316
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
317
|
+
const score = typeof parsed.score === 'number'
|
|
318
|
+
? Math.min(10, Math.max(1, parsed.score))
|
|
319
|
+
: 8;
|
|
320
|
+
return {
|
|
321
|
+
score,
|
|
322
|
+
driftType: scoreToDriftType(score),
|
|
323
|
+
diagnostic: typeof parsed.diagnostic === 'string' ? parsed.diagnostic : 'Unknown',
|
|
324
|
+
suggestedAction: typeof parsed.suggestedAction === 'string' ? parsed.suggestedAction : undefined,
|
|
325
|
+
recoverySteps: Array.isArray(parsed.recoverySteps)
|
|
326
|
+
? parsed.recoverySteps.filter((s) => typeof s === 'string')
|
|
327
|
+
: undefined,
|
|
328
|
+
};
|
|
329
|
+
}
|
|
330
|
+
catch {
|
|
331
|
+
return createDefaultResult(8, 'Failed to parse response');
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
/**
|
|
335
|
+
* Convert score to drift type
|
|
336
|
+
*/
|
|
337
|
+
function scoreToDriftType(score) {
|
|
338
|
+
if (score >= 8)
|
|
339
|
+
return 'none';
|
|
340
|
+
if (score >= 5)
|
|
341
|
+
return 'minor';
|
|
342
|
+
if (score >= 3)
|
|
343
|
+
return 'major';
|
|
344
|
+
return 'critical';
|
|
345
|
+
}
|
|
346
|
+
/**
|
|
347
|
+
* Convert score to correction level
|
|
348
|
+
* Reference: plan_proxy_local.md Section 4.3
|
|
349
|
+
*
|
|
350
|
+
* Thresholds (lenient):
|
|
351
|
+
* - 5-10: OK, no correction needed
|
|
352
|
+
* - 4: nudge (mild reminder)
|
|
353
|
+
* - 3: correct (full correction)
|
|
354
|
+
* - 1-2: intervene (strong intervention)
|
|
355
|
+
*/
|
|
356
|
+
export function scoreToCorrectionLevel(score) {
|
|
357
|
+
if (score >= 5)
|
|
358
|
+
return null; // 5-10 = OK
|
|
359
|
+
if (score === 4)
|
|
360
|
+
return 'nudge'; // mild reminder
|
|
361
|
+
if (score === 3)
|
|
362
|
+
return 'correct'; // full correction
|
|
363
|
+
return 'intervene'; // 1-2 = strong intervention
|
|
364
|
+
}
|
|
365
|
+
/**
|
|
366
|
+
* Check if score requires skipping steps table
|
|
367
|
+
* Reference: plan_proxy_local.md Section 4.2
|
|
368
|
+
*/
|
|
369
|
+
export function shouldSkipSteps(score) {
|
|
370
|
+
return score < 5;
|
|
371
|
+
}
|
|
372
|
+
/**
|
|
373
|
+
* Create default result
|
|
374
|
+
*/
|
|
375
|
+
function createDefaultResult(score, diagnostic) {
|
|
376
|
+
return {
|
|
377
|
+
score,
|
|
378
|
+
driftType: scoreToDriftType(score),
|
|
379
|
+
diagnostic,
|
|
380
|
+
};
|
|
381
|
+
}
|
|
382
|
+
// ============================================
|
|
383
|
+
// RECOVERY ALIGNMENT CHECK
|
|
384
|
+
// Reference: plan_proxy_local.md Section 4.4
|
|
385
|
+
// ============================================
|
|
386
|
+
/**
|
|
387
|
+
* Check if Claude's action aligns with the recovery plan
|
|
388
|
+
* Returns true if aligned, false if still drifting
|
|
389
|
+
*/
|
|
390
|
+
export function checkRecoveryAlignment(proposedAction, recoveryPlan, sessionState) {
|
|
391
|
+
if (!recoveryPlan || recoveryPlan.steps.length === 0) {
|
|
392
|
+
// No recovery plan - allow action (scope checking removed)
|
|
393
|
+
return { aligned: true, reason: 'No recovery plan defined' };
|
|
394
|
+
}
|
|
395
|
+
const firstStep = recoveryPlan.steps[0].toLowerCase();
|
|
396
|
+
const actionDesc = `${proposedAction.actionType} ${proposedAction.files.join(' ')}`.toLowerCase();
|
|
397
|
+
// Check for keyword matches
|
|
398
|
+
const keywords = firstStep.split(/\s+/).filter(w => w.length > 3);
|
|
399
|
+
const matches = keywords.filter(kw => actionDesc.includes(kw) || proposedAction.files.some(f => f.toLowerCase().includes(kw)));
|
|
400
|
+
if (matches.length >= 2 || (matches.length >= 1 && proposedAction.files.length > 0)) {
|
|
401
|
+
return { aligned: true, reason: `Action matches recovery step: ${firstStep}` };
|
|
402
|
+
}
|
|
403
|
+
return { aligned: false, reason: `Expected: ${firstStep}, Got: ${actionDesc}` };
|
|
404
|
+
}
|
|
405
|
+
/**
|
|
406
|
+
* Generate forced recovery prompt using Haiku
|
|
407
|
+
* Called when escalation_count >= 3 (forced mode)
|
|
408
|
+
* This STOPS Claude and injects a specific recovery message
|
|
409
|
+
*/
|
|
410
|
+
export async function generateForcedRecovery(sessionState, recentActions, lastDriftResult, headers) {
|
|
411
|
+
const actionsText = recentActions
|
|
412
|
+
.slice(-5)
|
|
413
|
+
.map(a => `- ${a.actionType}: ${a.files.join(', ')}`)
|
|
414
|
+
.join('\n');
|
|
415
|
+
const prompt = `You are helping recover a coding assistant that has COMPLETELY DRIFTED from its goal.
|
|
416
|
+
|
|
417
|
+
ORIGINAL GOAL: ${sessionState.original_goal || 'Not specified'}
|
|
418
|
+
|
|
419
|
+
CONSTRAINTS: ${sessionState.constraints.join(', ') || 'None'}
|
|
420
|
+
|
|
421
|
+
RECENT ACTIONS (all off-track):
|
|
422
|
+
${actionsText || 'None recorded'}
|
|
423
|
+
|
|
424
|
+
DRIFT DIAGNOSTIC: ${lastDriftResult.diagnostic}
|
|
425
|
+
|
|
426
|
+
ESCALATION COUNT: ${sessionState.escalation_count} (MAX REACHED)
|
|
427
|
+
|
|
428
|
+
Generate a STRICT recovery message that will:
|
|
429
|
+
1. STOP the assistant immediately
|
|
430
|
+
2. FORCE it to acknowledge the drift
|
|
431
|
+
3. Give ONE SPECIFIC, SIMPLE action to get back on track
|
|
432
|
+
|
|
433
|
+
RESPONSE RULES:
|
|
434
|
+
- English only
|
|
435
|
+
- No emojis
|
|
436
|
+
- Return JSON:
|
|
437
|
+
{
|
|
438
|
+
"recoveryPrompt": "The full message to inject (be firm but constructive, ~200 words)",
|
|
439
|
+
"mandatoryAction": "ONE specific action (e.g., 'Read src/auth/login.ts to refocus on authentication')"
|
|
440
|
+
}`;
|
|
441
|
+
const haikuResult = await callHaikuDrift(600, prompt, headers, 'generateForcedRecovery');
|
|
442
|
+
if (!haikuResult.success) {
|
|
443
|
+
return createFallbackForcedRecovery(sessionState);
|
|
444
|
+
}
|
|
445
|
+
try {
|
|
446
|
+
const jsonMatch = haikuResult.text.match(/\{[\s\S]*\}/);
|
|
447
|
+
if (!jsonMatch) {
|
|
448
|
+
return createFallbackForcedRecovery(sessionState);
|
|
449
|
+
}
|
|
450
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
451
|
+
const recoveryPrompt = typeof parsed.recoveryPrompt === 'string'
|
|
452
|
+
? parsed.recoveryPrompt
|
|
453
|
+
: `STOP. Return to: ${sessionState.original_goal}`;
|
|
454
|
+
const mandatoryAction = typeof parsed.mandatoryAction === 'string'
|
|
455
|
+
? parsed.mandatoryAction
|
|
456
|
+
: `Focus on ${sessionState.original_goal}`;
|
|
457
|
+
return {
|
|
458
|
+
recoveryPrompt,
|
|
459
|
+
mandatoryAction,
|
|
460
|
+
injectionText: formatForcedRecoveryInjection(recoveryPrompt, mandatoryAction, sessionState),
|
|
461
|
+
};
|
|
462
|
+
}
|
|
463
|
+
catch {
|
|
464
|
+
return createFallbackForcedRecovery(sessionState);
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
/**
|
|
468
|
+
* Format forced recovery for system prompt injection
|
|
469
|
+
*/
|
|
470
|
+
function formatForcedRecoveryInjection(recoveryPrompt, mandatoryAction, sessionState) {
|
|
471
|
+
return `
|
|
472
|
+
|
|
473
|
+
<grov_forced_recovery>
|
|
474
|
+
════════════════════════════════════════════════════════════
|
|
475
|
+
*** CRITICAL: FORCED RECOVERY MODE ACTIVATED ***
|
|
476
|
+
════════════════════════════════════════════════════════════
|
|
477
|
+
|
|
478
|
+
${recoveryPrompt}
|
|
479
|
+
|
|
480
|
+
────────────────────────────────────────────────────────────
|
|
481
|
+
MANDATORY FIRST ACTION (you MUST do this before ANYTHING else):
|
|
482
|
+
${mandatoryAction}
|
|
483
|
+
────────────────────────────────────────────────────────────
|
|
484
|
+
|
|
485
|
+
Original goal: ${sessionState.original_goal || 'See above'}
|
|
486
|
+
Escalation level: ${sessionState.escalation_count}/3 (MAXIMUM)
|
|
487
|
+
|
|
488
|
+
YOUR NEXT MESSAGE MUST:
|
|
489
|
+
1. Acknowledge: "I understand I have drifted from the goal"
|
|
490
|
+
2. State: "I will now ${mandatoryAction}"
|
|
491
|
+
3. Execute ONLY that action
|
|
492
|
+
|
|
493
|
+
ANY OTHER RESPONSE WILL BE REJECTED.
|
|
494
|
+
════════════════════════════════════════════════════════════
|
|
495
|
+
</grov_forced_recovery>
|
|
496
|
+
|
|
497
|
+
`;
|
|
498
|
+
}
|
|
499
|
+
/**
|
|
500
|
+
* Fallback forced recovery without LLM
|
|
501
|
+
*/
|
|
502
|
+
function createFallbackForcedRecovery(sessionState) {
|
|
503
|
+
const goal = sessionState.original_goal || 'the original task';
|
|
504
|
+
const mandatoryAction = `Stop current work and return to: ${goal}`;
|
|
505
|
+
return {
|
|
506
|
+
recoveryPrompt: `You have completely drifted from your goal. Stop what you're doing immediately and refocus on: ${goal}`,
|
|
507
|
+
mandatoryAction,
|
|
508
|
+
injectionText: formatForcedRecoveryInjection(`You have completely drifted from your goal. Stop what you're doing immediately and refocus on: ${goal}`, mandatoryAction, sessionState),
|
|
509
|
+
};
|
|
510
|
+
}
|
|
@@ -1,31 +1,12 @@
|
|
|
1
|
-
import type { SessionState, StepRecord } from '
|
|
1
|
+
import type { SessionState, StepRecord } from '../store/store.js';
|
|
2
2
|
import type { ReasoningTraceEntry } from '@grov/shared';
|
|
3
|
-
export
|
|
4
|
-
goal: string;
|
|
5
|
-
expected_scope: string[];
|
|
6
|
-
constraints: string[];
|
|
7
|
-
success_criteria?: string[];
|
|
8
|
-
keywords: string[];
|
|
9
|
-
}
|
|
10
|
-
/**
|
|
11
|
-
* Extract intent from first user prompt using Haiku
|
|
12
|
-
* Called once at session start to populate session_states
|
|
13
|
-
* Falls back to basic extraction if API unavailable (for hook compatibility)
|
|
14
|
-
*/
|
|
15
|
-
export declare function extractIntent(firstPrompt: string): Promise<ExtractedIntent>;
|
|
16
|
-
/**
|
|
17
|
-
* Check if intent extraction is available
|
|
18
|
-
*/
|
|
19
|
-
export declare function isIntentExtractionAvailable(): boolean;
|
|
20
|
-
/**
|
|
21
|
-
* Check if session summary generation is available
|
|
22
|
-
*/
|
|
23
|
-
export declare function isSummaryAvailable(): boolean;
|
|
3
|
+
export type RequestHeaders = Record<string, string | string[] | undefined>;
|
|
24
4
|
/**
|
|
25
5
|
* Generate session summary for CLEAR operation
|
|
26
6
|
* Reference: plan_proxy_local.md Section 2.3, 4.5
|
|
27
7
|
*/
|
|
28
|
-
export declare function generateSessionSummary(sessionState: SessionState, steps: StepRecord[], maxTokens
|
|
8
|
+
export declare function generateSessionSummary(sessionState: SessionState, steps: StepRecord[], maxTokens: number | undefined, // Default 800, CLEAR mode uses 15000
|
|
9
|
+
headers: RequestHeaders): Promise<string>;
|
|
29
10
|
/**
|
|
30
11
|
* Task analysis result from Haiku
|
|
31
12
|
*/
|
|
@@ -37,6 +18,7 @@ export interface TaskAnalysis {
|
|
|
37
18
|
parent_task_id?: string;
|
|
38
19
|
reasoning: string;
|
|
39
20
|
step_reasoning?: string;
|
|
21
|
+
constraints?: string[];
|
|
40
22
|
}
|
|
41
23
|
/**
|
|
42
24
|
* Conversation message for task analysis
|
|
@@ -45,16 +27,12 @@ export interface ConversationMessage {
|
|
|
45
27
|
role: 'user' | 'assistant';
|
|
46
28
|
content: string;
|
|
47
29
|
}
|
|
48
|
-
/**
|
|
49
|
-
* Check if task analysis is available
|
|
50
|
-
*/
|
|
51
|
-
export declare function isTaskAnalysisAvailable(): boolean;
|
|
52
30
|
/**
|
|
53
31
|
* Analyze task context to determine task status
|
|
54
32
|
* Called after each main model response to orchestrate sessions
|
|
55
33
|
* Also compresses reasoning for steps if assistantResponse > 1000 chars
|
|
56
34
|
*/
|
|
57
|
-
export declare function analyzeTaskContext(currentSession: SessionState | null, latestUserMessage: string, recentSteps: StepRecord[], assistantResponse: string, conversationHistory
|
|
35
|
+
export declare function analyzeTaskContext(currentSession: SessionState | null, latestUserMessage: string, recentSteps: StepRecord[], assistantResponse: string, conversationHistory: ConversationMessage[] | undefined, headers: RequestHeaders): Promise<TaskAnalysis>;
|
|
58
36
|
export interface ExtractedReasoningAndDecisions {
|
|
59
37
|
system_name: string | null;
|
|
60
38
|
summary: string | null;
|
|
@@ -66,10 +44,6 @@ export interface ExtractedReasoningAndDecisions {
|
|
|
66
44
|
reason: string;
|
|
67
45
|
}>;
|
|
68
46
|
}
|
|
69
|
-
/**
|
|
70
|
-
* Check if reasoning extraction is available
|
|
71
|
-
*/
|
|
72
|
-
export declare function isReasoningExtractionAvailable(): boolean;
|
|
73
47
|
/**
|
|
74
48
|
* Extract reasoning trace and decisions from steps
|
|
75
49
|
* Called at task_complete to populate team memory with rich context
|
|
@@ -77,7 +51,7 @@ export declare function isReasoningExtractionAvailable(): boolean;
|
|
|
77
51
|
* @param formattedSteps - Pre-formatted XML string with grouped steps and actions
|
|
78
52
|
* @param originalGoal - The original task goal
|
|
79
53
|
*/
|
|
80
|
-
export declare function extractReasoningAndDecisions(formattedSteps: string, originalGoal: string): Promise<ExtractedReasoningAndDecisions>;
|
|
54
|
+
export declare function extractReasoningAndDecisions(formattedSteps: string, originalGoal: string, headers: RequestHeaders): Promise<ExtractedReasoningAndDecisions>;
|
|
81
55
|
/**
|
|
82
56
|
* Evolution step in memory history
|
|
83
57
|
*/
|
|
@@ -129,8 +103,4 @@ export interface SessionContext {
|
|
|
129
103
|
original_query: string;
|
|
130
104
|
files_touched: string[];
|
|
131
105
|
}
|
|
132
|
-
|
|
133
|
-
* Check if shouldUpdateMemory is available
|
|
134
|
-
*/
|
|
135
|
-
export declare function isShouldUpdateAvailable(): boolean;
|
|
136
|
-
export declare function shouldUpdateMemory(existingMemory: ExistingMemory, newData: ExtractedReasoningAndDecisions, sessionContext: SessionContext): Promise<ShouldUpdateResult>;
|
|
106
|
+
export declare function shouldUpdateMemory(existingMemory: ExistingMemory, newData: ExtractedReasoningAndDecisions, sessionContext: SessionContext, headers: RequestHeaders): Promise<ShouldUpdateResult>;
|