glitool 1.0.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +115 -48
  2. package/dist/agent.js +232 -37
  3. package/dist/agents/coder.js +46 -34
  4. package/dist/agents/debugger.js +111 -0
  5. package/dist/agents/explainer.js +2 -5
  6. package/dist/agents/git-agent.js +90 -0
  7. package/dist/agents/graph.js +214 -23
  8. package/dist/agents/judge.js +61 -0
  9. package/dist/agents/planner.js +31 -12
  10. package/dist/agents/planningAgent.js +41 -0
  11. package/dist/agents/refactorer.js +97 -0
  12. package/dist/agents/reviewer-agent.js +87 -0
  13. package/dist/agents/reviewer.js +6 -9
  14. package/dist/agents/types.js +1 -0
  15. package/dist/agents/validator.js +93 -0
  16. package/dist/agents/workflow.js +45 -0
  17. package/dist/auth.js +87 -0
  18. package/dist/commands/version.js +1 -0
  19. package/dist/config.js +4 -1
  20. package/dist/confirmHandler.js +4 -2
  21. package/dist/index.js +12 -25
  22. package/dist/llm/classifier.js +61 -0
  23. package/dist/llm/factory.js +50 -0
  24. package/dist/llm/router.js +191 -22
  25. package/dist/logger.js +25 -0
  26. package/dist/processEvents.js +1 -0
  27. package/dist/tools/bashTool.js +90 -0
  28. package/dist/tools/editFileTool.js +14 -3
  29. package/dist/tools/index.js +3 -1
  30. package/dist/tools/listFilesTool.js +19 -21
  31. package/dist/tools/processRegistry.js +36 -0
  32. package/dist/tools/readBackgroundOutput.js +29 -0
  33. package/dist/tools/readFileTool.js +64 -9
  34. package/dist/tools/searchCodeTool.js +14 -4
  35. package/dist/tools/webFetchTool.js +45 -0
  36. package/dist/tools/writeFileTool.js +9 -5
  37. package/dist/trust/riskScorer.js +29 -2
  38. package/dist/ui/App.js +384 -47
  39. package/dist/ui/AuthFlow.js +76 -0
  40. package/dist/ui/ConfirmCard.js +53 -0
  41. package/dist/ui/EscalationCard.js +22 -0
  42. package/dist/ui/ExplainCard.js +5 -0
  43. package/dist/ui/Pipeline.js +37 -0
  44. package/dist/ui/ProcessTrace.js +79 -0
  45. package/dist/ui/RoleRow.js +16 -0
  46. package/dist/ui/RoleRow.test.js +8 -0
  47. package/dist/ui/SlashPalette.js +32 -0
  48. package/dist/ui/StatusBar.js +44 -0
  49. package/dist/ui/ToolLog.js +62 -0
  50. package/dist/ui/Welcome.js +11 -0
  51. package/dist/ui/renderMarkdown.js +41 -0
  52. package/dist/ui/symbols.js +19 -0
  53. package/dist/ui/tokens.js +13 -0
  54. package/dist/version.js +1 -0
  55. package/package.json +56 -54
@@ -0,0 +1,111 @@
1
+ import { makeLlm } from '../llm/factory.js';
2
+ import { createReactAgent } from '@langchain/langgraph/prebuilt';
3
+ import { SystemMessage, HumanMessage } from '@langchain/core/messages';
4
+ import { listFilesTool, readFileTool, searchCodeTool, bashTool, editFileTool, } from '../tools/index.js';
5
+ const DEBUG_SYSTEM_PROMPT = `You are a debugging agent. You investigate first, then patch.
6
+
7
+ ═══════════════════════════════════════════════════════
8
+ DEFAULT BEHAVIOR — START INVESTIGATING.
9
+ ═══════════════════════════════════════════════════════
10
+
11
+ For almost every prompt, you SHOULD:
12
+ 1. Run the failing command via bash, OR
13
+ 2. Read the file the user mentioned, OR
14
+ 3. Search the codebase for the symbol they mentioned.
15
+
16
+ Do NOT ask the user for clarification before trying. A prompt like "npm test is failing", "fix the readFile bug", or "the auth flow throws on logout" is enough signal to begin. Run something concrete first, THEN decide if you need more info.
17
+
18
+ ═══════════════════════════════════════════════════════
19
+ HARD STOPS — apply ONLY after you've started investigating.
20
+ ═══════════════════════════════════════════════════════
21
+
22
+ After your investigation has begun, stop if ANY of these is true:
23
+
24
+ 1. After running the failing command, the output reveals the project is missing setup, not broken:
25
+ - \`npm test\` returns "Error: no test specified" (npm placeholder)
26
+ - The named tool/script doesn't exist at all
27
+ - The user is asking for a feature to be added, not a bug fixed
28
+ → Tell the user: "This isn't a debugger task — [specifics]. Use /coder to add it."
29
+ → DO NOT edit package.json, install dependencies, or scaffold new files.
30
+
31
+ 2. You called the same tool with the same arguments twice and got the same result.
32
+ → Stop. Report what you tried. Ask for more context.
33
+
34
+ 3. \`editFile\` returned an error twice in a row for the same file.
35
+ → Stop. The file content doesn't match your expectation. Read the file fresh with readFile, OR give up and report.
36
+
37
+ 4. You have used 10 tool calls. → Wrap up with what you have.
38
+
39
+ 5. You investigated for 3+ tool calls and STILL don't know what's failing.
40
+ → Now you can ask the user for a specific reproduction step. Not before.
41
+
42
+ ═══════════════════════════════════════════════════════
43
+
44
+ Tool scope:
45
+ - You can: readFile, listFiles, searchCode, bash, editFile
46
+ - You CANNOT: create new files, install dependencies, modify package.json scripts
47
+ - "Minimal patch" = the smallest change that makes the failing thing pass. Adding features is /coder's job. Restructuring code is /refactor's job.
48
+
49
+ Workflow:
50
+
51
+ 1. REPRODUCE — run the failing command via bash (\`npm test\`, \`npx tsc --noEmit\`, etc.), OR readFile the file the user mentioned. Get concrete output.
52
+
53
+ 2. INVESTIGATE — use searchCode + readFile on the actual error symbols.
54
+
55
+ 3. DIAGNOSE — BEFORE editing anything, output:
56
+ ## Diagnosis
57
+ - **Root cause:** one sentence
58
+ - **Where:** file.ts:LINE
59
+ - **Why it fails:** short explanation
60
+
61
+ 4. PATCH — one minimal editFile.
62
+
63
+ 5. VERIFY — re-run the exact command from step 1.
64
+
65
+ Final response format:
66
+
67
+ ## Diagnosis
68
+ ...
69
+
70
+ ## Fix
71
+ - file.ts:LINE — what changed
72
+
73
+ ## Verification
74
+ - ran: \`<command>\`
75
+ - result: pass | fail (with details)
76
+ `;
77
+ export async function runDebugger(userMessage, onToolCall, model) {
78
+ const llm = makeLlm(model);
79
+ const tools = [
80
+ listFilesTool,
81
+ readFileTool,
82
+ searchCodeTool,
83
+ bashTool,
84
+ editFileTool,
85
+ ];
86
+ const agent = createReactAgent({
87
+ llm,
88
+ tools,
89
+ stateModifier: new SystemMessage(DEBUG_SYSTEM_PROMPT),
90
+ });
91
+ let finalText = '';
92
+ const stream = agent.streamEvents({ messages: [new HumanMessage(userMessage)] }, { version: 'v2', recursionLimit: 50 });
93
+ for await (const { event, data, name: eventName } of stream) {
94
+ if (event === 'on_tool_start') {
95
+ onToolCall(eventName, data.input);
96
+ }
97
+ if (event === 'on_chat_model_end') {
98
+ const output = data.output;
99
+ if (typeof output?.content === 'string') {
100
+ finalText = output.content;
101
+ }
102
+ else if (Array.isArray(output?.content)) {
103
+ finalText = output.content
104
+ .filter((c) => c.type === 'text')
105
+ .map((c) => c.text ?? '')
106
+ .join('');
107
+ }
108
+ }
109
+ }
110
+ return finalText || 'Debugger produced no output.';
111
+ }
@@ -1,9 +1,6 @@
1
- import { ChatOpenAI } from "@langchain/openai";
1
+ import { makeLlm } from '../llm/factory.js';
2
2
  import { SystemMessage, HumanMessage } from "@langchain/core/messages";
3
- const explainerLlm = new ChatOpenAI({
4
- model: 'gpt-4o-mini',
5
- apiKey: process.env.OPENAI_API_KEY
6
- });
3
+ const explainerLlm = makeLlm('meta-llama/Llama-3.3-70B-Instruct-Turbo');
7
4
  export async function explainResponse(response) {
8
5
  if (!response || response.length < 50)
9
6
  return '';
@@ -0,0 +1,90 @@
1
+ import { makeLlm } from '../llm/factory.js';
2
+ import { createReactAgent } from '@langchain/langgraph/prebuilt';
3
+ import { SystemMessage, HumanMessage } from '@langchain/core/messages';
4
+ import { bashTool } from '../tools/index.js';
5
+ const GIT_SYSTEM_PROMPT = `You are a git agent. Your ONLY tool is bash, and you may only use it for git operations.
6
+
7
+ HARD STOP CONDITIONS:
8
+ - One git command per piece of information you need. Don't re-run git status repeatedly.
9
+ - If you've called bash with the same git command twice, STOP — report what you found.
10
+ - Total tool calls per run: maximum 8.
11
+
12
+ Strict rules:
13
+ - You cannot read, write, edit, or search files directly. You have no file tools.
14
+ - Every shell command you run must start with \`git\` (or be \`git\` piped through grep/head/tail/wc for filtering output).
15
+ - If the user asks for anything that is not a git operation, refuse and tell them which agent to use:
16
+ - "what does this file do" → /explain
17
+ - "fix this bug" → /debug
18
+ - "review my code" → /review
19
+ - "add a feature" → /coder
20
+
21
+ Capabilities:
22
+ - Status: \`git status\`, \`git log --oneline -n 20\`, \`git branch -vv\`
23
+ - Diffs: \`git diff\`, \`git diff --staged\`, \`git diff main..HEAD\`
24
+ - Stage / unstage: \`git add <files>\`, \`git restore --staged <files>\`
25
+ - Commit: \`git commit -m "<message>"\` — only after the user agrees with the message
26
+ - Push / pull / fetch (these will trigger a confirm prompt — that's fine)
27
+ - Branching: \`git switch\`, \`git switch -c\`, \`git branch\`
28
+ - Stash: \`git stash\`, \`git stash pop\`, \`git stash list\`
29
+ - History: \`git show <ref>\`, \`git blame\`
30
+
31
+ Workflow patterns:
32
+
33
+ When the user asks to commit (any phrasing: "commit", "commit everything", "save my work", etc.):
34
+ 1. Run \`git status\` to see staged, unstaged, and untracked files.
35
+ 2. If there are unstaged or untracked files, run \`git add -A\` to stage everything — unless the user specified particular files, in which case stage only those.
36
+ 3. Run \`git diff --staged\` to see exactly what will be committed.
37
+ 4. Run \`git log --oneline -n 5\` to match the project's commit style.
38
+ 5. Compose a concise commit message — imperative mood, one-line subject, optional body.
39
+ 6. SHOW the message to the user and ask for confirmation. Do NOT commit silently.
40
+ 7. On user confirmation, run \`git commit -m "<message>"\`.
41
+
42
+ When the user asks to push (any phrasing: "push", "push my changes", "push current state", etc.):
43
+ 1. Run \`git status\` to check for unstaged or uncommitted changes.
44
+ 2. If there are unstaged or untracked files → run \`git add -A\` to stage everything.
45
+ 3. Run \`git diff --staged\` to see what is staged.
46
+ 4. If there is anything staged → compose a commit message (imperative mood, one-line subject + optional body).
47
+ Show the message to the user and ask for confirmation. Do NOT commit silently.
48
+ 5. On confirmation → run \`git commit -m "<message>"\`.
49
+ 6. Run \`git remote -v\` to confirm a remote exists. If no remote is listed, stop and tell the user to provide a URL.
50
+ 7. Run \`git push\` — bashTool will surface the confirm gate.
51
+
52
+ If there is nothing to commit (working tree clean) → skip steps 2–5 and go straight to step 6.
53
+
54
+ When the user asks "what changed":
55
+ 1. Run \`git status\` first.
56
+ 2. Then \`git diff --stat\` for the summary.
57
+ 3. Show \`git diff\` only if they ask for details.
58
+
59
+ Always quote ref names and file paths if they contain spaces.
60
+
61
+ Response format: short and direct. Show the relevant command output, then a one-line summary. Don't narrate every step.`;
62
+ export async function runGitAgent(userMessage, onToolCall, model) {
63
+ const llm = makeLlm(model);
64
+ const tools = [bashTool];
65
+ const agent = createReactAgent({
66
+ llm,
67
+ tools,
68
+ stateModifier: new SystemMessage(GIT_SYSTEM_PROMPT),
69
+ });
70
+ let finalText = '';
71
+ const stream = agent.streamEvents({ messages: [new HumanMessage(userMessage)] }, { version: 'v2', recursionLimit: 50 });
72
+ for await (const { event, data, name: eventName } of stream) {
73
+ if (event === 'on_tool_start') {
74
+ onToolCall(eventName, data.input);
75
+ }
76
+ if (event === 'on_chat_model_end') {
77
+ const output = data.output;
78
+ if (typeof output?.content === 'string') {
79
+ finalText = output.content;
80
+ }
81
+ else if (Array.isArray(output?.content)) {
82
+ finalText = output.content
83
+ .filter((c) => c.type === 'text')
84
+ .map((c) => c.text ?? '')
85
+ .join('');
86
+ }
87
+ }
88
+ }
89
+ return finalText || 'Git agent produced no output.';
90
+ }
@@ -1,28 +1,219 @@
1
- import { runPlanner } from './planner.js';
2
- import { runCoder } from './coder.js';
3
- import { runReviewer } from './reviewer.js';
4
- const MAX_ITERATIONS = 1;
5
- export async function runAgentGraph(userMessage, systemPrompt, onToolCall, onStatus) {
6
- onStatus('Planning...');
7
- const plan = await runPlanner(userMessage, systemPrompt);
8
- if (plan.trim() === 'SIMPLE') {
1
+ import { Annotation, StateGraph, START, END, MemorySaver } from "@langchain/langgraph";
2
+ import { runPlanner } from "./planner.js";
3
+ import { runCoder } from "./coder.js";
4
+ import { runJudge } from "./judge.js";
5
+ import { runValidator, formatValidationErrors } from "./validator.js";
6
+ import { buildTopology, formatTopologyAsPlan, topologySummary } from "./workflow.js";
7
+ import { getModelForTier } from "../llm/router.js";
8
+ import fg from 'fast-glob';
9
+ const MAX_CODER_RETRY = 2;
10
+ const MAX_JUDGE_ITERATIONS = 3;
11
+ const GraphState = Annotation.Root({
12
+ userMessage: Annotation({ reducer: (_, b) => b }),
13
+ systemPrompt: Annotation({ reducer: (_, b) => b }),
14
+ decision: Annotation({ reducer: (_, b) => b }),
15
+ steps: Annotation({ reducer: (_, b) => b, default: () => null }),
16
+ topology: Annotation({ reducer: (_, b) => b, default: () => null }),
17
+ plan: Annotation({ reducer: (_, b) => b, default: () => '' }),
18
+ coderOutput: Annotation({ reducer: (_, b) => b, default: () => '' }),
19
+ validationResult: Annotation({ reducer: (_, b) => b, default: () => null }),
20
+ judgeResult: Annotation({ reducer: (_, b) => b, default: () => null }),
21
+ coderRetry: Annotation({ reducer: (_, b) => b, default: () => 0 }),
22
+ judgeLoop: Annotation({ reducer: (_, b) => b, default: () => 0 }),
23
+ lastFailurePoint: Annotation({ reducer: (_, b) => b, default: () => null }),
24
+ plannerHint: Annotation({ reducer: (_, b) => b, default: () => '' }),
25
+ finalOutput: Annotation({ reducer: (_, b) => b, default: () => null }),
26
+ trajectory: Annotation({ reducer: (_, b) => b, default: () => [] }),
27
+ escalated: Annotation({ reducer: (_, b) => b, default: () => false }),
28
+ });
29
+ function extractTarget(args) {
30
+ if (!args)
9
31
  return '';
32
+ const first = Object.values(args)[0];
33
+ if (typeof first === 'string') {
34
+ try {
35
+ const p = JSON.parse(first);
36
+ return p.command ?? p.filePath ?? p.pattern ?? p.query ?? first;
37
+ }
38
+ catch {
39
+ return first;
40
+ }
41
+ }
42
+ if (typeof first === 'object' && first !== null) {
43
+ return first.command ?? first.filePath ?? JSON.stringify(first).slice(0, 50);
44
+ }
45
+ return String(first ?? '');
46
+ }
47
+ export async function runAgentGraph(userMessage, systemPrompt, onToolCall, onStatus, decision, onStageEvent) {
48
+ const plannerModel = getModelForTier('complex');
49
+ const coderModel = decision.recommendedModel;
50
+ const judgeModel = getModelForTier('complex');
51
+ async function plannerNode(state) {
52
+ onStatus('Planning...');
53
+ onStageEvent?.({ type: 'stage_start', stage: 'planner' });
54
+ const files = await fg(['**/*.{ts,tsx,js,jsx}'], {
55
+ cwd: process.cwd(),
56
+ ignore: ['node_modules/**', 'dist/**', '.next/**', '.git/**', 'build/**'],
57
+ onlyFiles: true,
58
+ suppressErrors: true,
59
+ });
60
+ const fileTree = files.slice(0, 200).join('\n');
61
+ const groundedSystemPrompt = `${state.systemPrompt}\n\n=== Project file tree (use exact paths from this list when planning edits) ===\n${fileTree}`;
62
+ const prompt = state.plannerHint
63
+ ? `${state.userMessage}\n\nPrevious attempt failed. Fix hint: ${state.plannerHint}`
64
+ : state.userMessage;
65
+ const steps = await runPlanner(prompt, groundedSystemPrompt, plannerModel);
66
+ if (steps?.length) {
67
+ const planText = steps
68
+ .map((s) => `${s.id}. ${s.action} ${s.target} — ${s.why}`)
69
+ .join('\n');
70
+ onStageEvent?.({ type: 'reasoning', stage: 'planner', text: planText });
71
+ }
72
+ onStageEvent?.({ type: 'stage_done', stage: 'planner' });
73
+ return { steps, coderRetry: 0, plannerHint: '' };
10
74
  }
11
- let coderOutPut = '';
12
- let finalResponse = '';
13
- let approved = false;
14
- let iteration = 0;
15
- while (!approved && iteration < MAX_ITERATIONS) {
16
- onStatus(`Executing plan${iteration > 0 ? ' (fixing issues)' : ''}...`);
17
- coderOutPut = await runCoder(plan, userMessage, onToolCall);
18
- onStatus('Reviewing...');
19
- const review = await runReviewer(plan, coderOutPut, userMessage);
20
- approved = review.approved;
21
- finalResponse = review.finalResponse;
22
- if (!approved) {
23
- onStatus(`Reviewer founder issues: ${review.feedback}`);
75
+ async function workflowNode(state) {
76
+ onStatus('Building execution plan...');
77
+ const topology = buildTopology(state.steps);
78
+ const plan = formatTopologyAsPlan(topology, state.steps);
79
+ onStatus(`Workflow: ${topologySummary(topology)}`);
80
+ return { topology, plan };
81
+ }
82
+ async function coderNode(state) {
83
+ onStatus(`Executing plan${state.coderRetry > 0 ? ' (fixing issues)' : ''}...`);
84
+ onStageEvent?.({ type: 'stage_start', stage: 'coder' });
85
+ const wrappedOnToolCall = (name, args) => {
86
+ onStageEvent?.({ type: 'tool', stage: 'coder', tool: name, target: extractTarget(args) });
87
+ onToolCall(name, args);
88
+ };
89
+ const coderOutput = await runCoder(state.plan, state.userMessage, wrappedOnToolCall, coderModel, (text) => onStageEvent?.({ type: 'reasoning', stage: 'coder', text }));
90
+ onStageEvent?.({ type: 'stage_done', stage: 'coder' });
91
+ return { coderOutput };
92
+ }
93
+ async function validatorNode(state) {
94
+ onStatus('Validating...');
95
+ onStageEvent?.({ type: 'stage_start', stage: 'validator' });
96
+ const validationResult = await runValidator();
97
+ const errCount = validationResult.tsc.errors.length + validationResult.eslint.errors.length;
98
+ const summary = validationResult.overallOk
99
+ ? 'No errors found.'
100
+ : `${errCount} error(s) found — will retry.`;
101
+ onStageEvent?.({ type: 'reasoning', stage: 'validator', text: summary });
102
+ onStageEvent?.({ type: 'stage_done', stage: 'validator' });
103
+ if (validationResult.overallOk)
104
+ return { validationResult };
105
+ const newRetry = state.coderRetry + 1;
106
+ if (newRetry >= MAX_CODER_RETRY) {
107
+ return { validationResult, coderRetry: newRetry };
108
+ }
109
+ const feedback = formatValidationErrors(validationResult);
110
+ onStatus(`Validation failed - retry ${newRetry}`);
111
+ return {
112
+ validationResult,
113
+ coderRetry: newRetry,
114
+ plan: `${state.plan}\n\nFix these issues:\n${feedback}`,
115
+ };
116
+ }
117
+ async function judgeNode(state) {
118
+ onStatus('Judging...');
119
+ onStageEvent?.({ type: 'stage_start', stage: 'judge' });
120
+ const judgment = await runJudge({
121
+ userMessage: state.userMessage,
122
+ plan: state.plan,
123
+ steps: state.steps,
124
+ topology: state.topology,
125
+ coderOutput: state.coderOutput,
126
+ validationResult: state.validationResult,
127
+ loopCount: state.judgeLoop,
128
+ }, judgeModel);
129
+ const verdictText = judgment.verdict === 'ok'
130
+ ? `All tasks complete. ${judgment.reason}`
131
+ : `Issue: ${judgment.reason}`;
132
+ onStageEvent?.({ type: 'reasoning', stage: 'judge', text: verdictText });
133
+ onStageEvent?.({ type: 'stage_done', stage: 'judge' });
134
+ const newJudgeLoop = state.judgeLoop + 1;
135
+ const repeated = judgment.failure_point === state.lastFailurePoint;
136
+ const escalate = newJudgeLoop >= MAX_JUDGE_ITERATIONS || repeated;
137
+ const newTrajectory = [
138
+ ...state.trajectory,
139
+ {
140
+ iteration: newJudgeLoop,
141
+ verdict: judgment.verdict,
142
+ failure_point: judgment.failure_point,
143
+ reason: judgment.reason,
144
+ fix_hint: judgment.fix_hint,
145
+ },
146
+ ];
147
+ if (judgment.verdict === 'ok' || escalate) {
148
+ const didEscalate = escalate && judgment.verdict !== 'ok';
149
+ if (didEscalate) {
150
+ onStatus(`Escalating after ${newJudgeLoop} attempts: ${judgment.reason}`);
151
+ }
152
+ return {
153
+ judgeResult: judgment,
154
+ finalOutput: judgment.finalResponse || state.coderOutput,
155
+ judgeLoop: newJudgeLoop,
156
+ trajectory: newTrajectory,
157
+ escalated: didEscalate,
158
+ };
159
+ }
160
+ onStatus(`Judge: ${judgment.failure_point} - ${judgment.reason}`);
161
+ if (judgment.failure_point === 'plan') {
162
+ return {
163
+ judgeResult: judgment,
164
+ judgeLoop: newJudgeLoop,
165
+ lastFailurePoint: judgment.failure_point,
166
+ plannerHint: judgment.fix_hint,
167
+ steps: null,
168
+ plan: '',
169
+ coderRetry: 0,
170
+ trajectory: newTrajectory,
171
+ };
24
172
  }
25
- iteration++;
173
+ return {
174
+ judgeResult: judgment,
175
+ judgeLoop: newJudgeLoop,
176
+ lastFailurePoint: judgment.failure_point,
177
+ plan: `${state.plan}\n\nJudge feedback (attempt ${state.judgeLoop + 1}): ${judgment.fix_hint}`,
178
+ coderRetry: 0,
179
+ trajectory: newTrajectory,
180
+ };
26
181
  }
27
- return finalResponse || coderOutPut;
182
+ const routeAfterPlanner = (state) => state.steps === null ? END : 'workflow';
183
+ const routeAfterValidator = (state) => {
184
+ if (state.validationResult?.overallOk)
185
+ return 'judge';
186
+ if (state.coderRetry >= MAX_CODER_RETRY)
187
+ return 'judge';
188
+ return 'coder';
189
+ };
190
+ const routeAfterJudge = (state) => {
191
+ if (state.finalOutput !== null)
192
+ return END;
193
+ if (state.judgeResult?.failure_point === 'plan')
194
+ return 'planner';
195
+ return 'coder';
196
+ };
197
+ const app = new StateGraph(GraphState)
198
+ .addNode('planner', plannerNode)
199
+ .addNode('workflow', workflowNode)
200
+ .addNode('coder', coderNode)
201
+ .addNode('validator', validatorNode)
202
+ .addNode('judge', judgeNode)
203
+ .addEdge(START, 'planner')
204
+ .addConditionalEdges('planner', routeAfterPlanner)
205
+ .addEdge('workflow', 'coder')
206
+ .addEdge('coder', 'validator')
207
+ .addConditionalEdges('validator', routeAfterValidator)
208
+ .addConditionalEdges('judge', routeAfterJudge)
209
+ .compile({ checkpointer: new MemorySaver() });
210
+ const threadId = `graph-${Date.now()}-${Math.random().toString(36).slice(2, 7)}`;
211
+ const result = await app.invoke({ userMessage, systemPrompt, decision }, { configurable: { thread_id: threadId }, recursionLimit: 50 });
212
+ return {
213
+ finalOutput: result.finalOutput ?? null,
214
+ escalated: result.escalated ?? false,
215
+ trajectory: result.trajectory ?? [],
216
+ userMessage: result.userMessage,
217
+ plan: result.plan,
218
+ };
28
219
  }
@@ -0,0 +1,61 @@
1
+ import { makeLlm } from '../llm/factory.js';
2
+ import { SystemMessage, HumanMessage } from "@langchain/core/messages";
3
+ export async function runJudge(input, model) {
4
+ const llm = makeLlm(model);
5
+ const outputSummary = input.coderOutput.length > 500 ? input.coderOutput.slice(0, 500) + '... (truncated)' : input.coderOutput;
6
+ const tsc = input.validationResult.tsc;
7
+ const validationSummary = input.validationResult.overallOk
8
+ ? `All checks passed.${tsc.ran ? ' TypeScript compiled successfully.' : ' (no tsconfig found — skipped)'}`
9
+ : [
10
+ !tsc.ok && tsc.ran ? `TS errors: ${tsc.errors.slice(0, 3).join(' | ')}` : '',
11
+ !input.validationResult.eslint.ok ? `lint errors: ${input.validationResult.eslint.errors.slice(0, 3).join(' | ')}` : '',
12
+ ].filter(Boolean).join(', ');
13
+ const response = await llm.invoke([
14
+ new SystemMessage(`You are a code quality judge for an AI coding agent. Evaluate whether the coder fulfilled the user's request.
15
+
16
+ IMPORTANT — how the coder works:
17
+ - The coder writes files using tools (writeFile, editFile). It does NOT print file contents in its response.
18
+ - "What the coder did" is a short text summary — the actual code is on disk, not in this text.
19
+ - If validation ran and passed (TypeScript compiled successfully), the files exist and are correct. Trust this.
20
+ - Only fail if: validation found real errors, OR the coder explicitly said it could not complete a step.
21
+ - Do NOT fail because the coder's text response did not show file contents. That is correct behaviour.
22
+
23
+ Return JSON only:
24
+ {
25
+ "verdict": "ok" or "fail",
26
+ "failure_point": "plan" | "workflow" | "executor" | "final_output" | null,
27
+ "failure_step_id": number or null,
28
+ "reason": "short explanation",
29
+ "fix_hint": "specific instruction to fix the problem, empty if ok",
30
+ "confidence": 0.0 to 1.0,
31
+ "finalResponse": "message to show the user — summarize what was done or what went wrong"
32
+ }
33
+
34
+ failure_point meanings:
35
+ - "plan": the plan itself was wrong or incomplete — needs replanning
36
+ - "workflow": wrong execution order caused the failure
37
+ - "executor": a specific step failed — use failure_step_id
38
+ - "final_output": code runs but doesn't meet the requirement
39
+ - null: everything ok, verdict must be ok`),
40
+ new HumanMessage(`User request: ${input.userMessage}\n\n` +
41
+ `Plan:\n${input.plan}\n\n` +
42
+ `What the coder did:\n${outputSummary}\n\n` +
43
+ `Validation: ${validationSummary}\n\n` +
44
+ `Attempt: ${input.loopCount + 1}`)
45
+ ]);
46
+ try {
47
+ const cleaned = response.content.replace(/```json|```/g, '').trim();
48
+ return JSON.parse(cleaned);
49
+ }
50
+ catch {
51
+ return {
52
+ verdict: 'ok',
53
+ failure_point: null,
54
+ failure_step_id: null,
55
+ reason: '',
56
+ fix_hint: '',
57
+ confidence: 0.5,
58
+ finalResponse: input.coderOutput,
59
+ };
60
+ }
61
+ }
@@ -1,20 +1,39 @@
1
- import { ChatOpenAI } from "@langchain/openai";
1
+ import { makeLlm } from '../llm/factory.js';
2
2
  import { SystemMessage, HumanMessage } from "@langchain/core/messages";
3
- const plannerLlm = new ChatOpenAI({
4
- model: 'gpt-5.4',
5
- apiKey: process.env.OPENAI_API_KEY
6
- });
7
- export async function runPlanner(userMessage, context) {
8
- const response = await plannerLlm.invoke([
9
- new SystemMessage(`You are a coding task planner. Given a user request, output a clear numbered plan.
3
+ export async function runPlanner(userMessage, context, model) {
4
+ const llm = makeLlm(model);
5
+ const response = await llm.invoke([
6
+ new SystemMessage(`You are a coding task planner. Output a structured JSON plan.
7
+
10
8
  Rules:
9
+ - If the task is ONLY a question or explanation with NO file creation or code execution needed, output exactly: SIMPLE
10
+ - Otherwise output a JSON array of steps with this shape:
11
+ [{ "id": 1, "action": "read|edit|create|run|search", "target": "file/path or command", "depends_on": [], "why": "reason" }]
11
12
  - Be specific about which files to read, edit, or create
12
13
  - Do NOT write any code — only plan the steps
13
14
  - Keep it to 3-6 steps maximum
14
- - If the task is ONLY a question or explanation with NO file creation or code execution needed, output exactly: SIMPLE
15
- - Any task that requires creating, editing, or writing files must NEVER be SIMPLE
16
- `),
15
+ - Steps that can run independently should have no shared depends_on
16
+ - depends_on contains the ids of steps that must finish before this one`),
17
17
  new HumanMessage(`Context:\n${context}\n\nUser request: ${userMessage}`)
18
18
  ]);
19
- return response.content;
19
+ const content = response.content.trim();
20
+ if (content === 'SIMPLE')
21
+ return null;
22
+ try {
23
+ const cleaned = content.replace(/```json|```/g, '').trim();
24
+ const steps = JSON.parse(cleaned);
25
+ if (Array.isArray(steps) && steps.length > 0)
26
+ return steps;
27
+ throw new Error('empty array');
28
+ }
29
+ catch {
30
+ // Fallback — treat the whole response as a single unstructured step
31
+ return [{
32
+ id: 1,
33
+ action: 'edit',
34
+ target: 'project',
35
+ depends_on: [],
36
+ why: content,
37
+ }];
38
+ }
20
39
  }
@@ -0,0 +1,41 @@
1
+ import { makeLlm } from '../llm/factory.js';
2
+ import { SystemMessage, HumanMessage } from "@langchain/core/messages";
3
+ import { existsSync, readFileSync, writeFileSync } from "fs";
4
+ import { join } from "path";
5
+ const PLAN_FILE = "plan.md";
6
+ const BLOCKED_EXTENSIONS = ['.ts', '.js', '.tsx', '.jsx', '.py', '.go', '.rs'];
7
+ export async function runPlanningAgent(userMessage, onUsage) {
8
+ const llm = makeLlm('deepseek-ai/DeepSeek-V3');
9
+ const planPath = join(process.cwd(), PLAN_FILE);
10
+ const existingPlan = existsSync(planPath) ? readFileSync(planPath, 'utf-8') : null;
11
+ const systemPrompt = existingPlan ? `...existing logic...` : `You are a planning assistant. Create a clear structured plan based on user's request.
12
+
13
+ BEFORE writing a plan:
14
+ - Look at the user's request and identify the key feature names, file paths, or concepts mentioned.
15
+ - If the request mentions specific files (e.g. "graph.ts", "EscalationCard"), ASSUME those already exist and may already implement what's described.
16
+ - If the request describes a feature without a clear "build me X" verb (no "design", "create", "implement", "plan"), assume the user is asking for ANALYSIS of an existing thing, not net-new work.
17
+
18
+ In your plan, do NOT invent assumptions. If you don't know something, write "QUESTION: ..." and ask the user.
19
+
20
+ Rules:
21
+ - Use Markdown with clear sections and numbered steps
22
+ - Be specific: name files, components, decisions, trade-offs
23
+ - If the request is vague, prefer asking 1-2 clarifying questions over guessing
24
+ - After the plan, write exactly "---" on its own line, then 1-3 bullet points summarising what you created
25
+ `;
26
+ const userContent = existingPlan ? `Current plan:\n\n${existingPlan}\n\nUser request:${userMessage}` : userMessage;
27
+ const response = await llm.invoke([
28
+ new SystemMessage(systemPrompt),
29
+ new HumanMessage(userContent)
30
+ ]);
31
+ const usage = response.usage_metadata;
32
+ if (usage)
33
+ onUsage?.(usage.input_tokens ?? 0, usage.output_tokens ?? 0);
34
+ const content = response.content;
35
+ const splitIndex = content.lastIndexOf('\n---\n');
36
+ const planBody = splitIndex !== -1 ? content.slice(0, splitIndex).trim() : content.trim();
37
+ const changeSummary = splitIndex !== -1 ? content.slice(splitIndex + 5).trim() : 'Plan saved.';
38
+ writeFileSync(planPath, planBody, 'utf-8');
39
+ const action = existingPlan ? 'updated' : 'created';
40
+ return `**plan.md ${action}**\n\n${changeSummary}`;
41
+ }