glitool 1.0.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +115 -48
  2. package/dist/agent.js +232 -37
  3. package/dist/agents/coder.js +46 -34
  4. package/dist/agents/debugger.js +111 -0
  5. package/dist/agents/explainer.js +2 -5
  6. package/dist/agents/git-agent.js +90 -0
  7. package/dist/agents/graph.js +214 -23
  8. package/dist/agents/judge.js +61 -0
  9. package/dist/agents/planner.js +31 -12
  10. package/dist/agents/planningAgent.js +41 -0
  11. package/dist/agents/refactorer.js +97 -0
  12. package/dist/agents/reviewer-agent.js +87 -0
  13. package/dist/agents/reviewer.js +6 -9
  14. package/dist/agents/types.js +1 -0
  15. package/dist/agents/validator.js +93 -0
  16. package/dist/agents/workflow.js +45 -0
  17. package/dist/auth.js +87 -0
  18. package/dist/commands/version.js +1 -0
  19. package/dist/config.js +4 -1
  20. package/dist/confirmHandler.js +4 -2
  21. package/dist/index.js +12 -25
  22. package/dist/llm/classifier.js +61 -0
  23. package/dist/llm/factory.js +50 -0
  24. package/dist/llm/router.js +191 -22
  25. package/dist/logger.js +25 -0
  26. package/dist/processEvents.js +1 -0
  27. package/dist/tools/bashTool.js +90 -0
  28. package/dist/tools/editFileTool.js +14 -3
  29. package/dist/tools/index.js +3 -1
  30. package/dist/tools/listFilesTool.js +19 -21
  31. package/dist/tools/processRegistry.js +36 -0
  32. package/dist/tools/readBackgroundOutput.js +29 -0
  33. package/dist/tools/readFileTool.js +64 -9
  34. package/dist/tools/searchCodeTool.js +14 -4
  35. package/dist/tools/webFetchTool.js +45 -0
  36. package/dist/tools/writeFileTool.js +9 -5
  37. package/dist/trust/riskScorer.js +29 -2
  38. package/dist/ui/App.js +384 -47
  39. package/dist/ui/AuthFlow.js +76 -0
  40. package/dist/ui/ConfirmCard.js +53 -0
  41. package/dist/ui/EscalationCard.js +22 -0
  42. package/dist/ui/ExplainCard.js +5 -0
  43. package/dist/ui/Pipeline.js +37 -0
  44. package/dist/ui/ProcessTrace.js +79 -0
  45. package/dist/ui/RoleRow.js +16 -0
  46. package/dist/ui/RoleRow.test.js +8 -0
  47. package/dist/ui/SlashPalette.js +32 -0
  48. package/dist/ui/StatusBar.js +44 -0
  49. package/dist/ui/ToolLog.js +62 -0
  50. package/dist/ui/Welcome.js +11 -0
  51. package/dist/ui/renderMarkdown.js +41 -0
  52. package/dist/ui/symbols.js +19 -0
  53. package/dist/ui/tokens.js +13 -0
  54. package/dist/version.js +1 -0
  55. package/package.json +56 -54
package/README.md CHANGED
@@ -1,48 +1,115 @@
1
- # glitool
2
-
3
- AI coding assistant for your terminal. Powered by OpenAI.
4
-
5
-
6
- ## Install
7
-
8
- ```bash
9
- npm install -g glitool
10
- ```
11
-
12
- ## Setup
13
-
14
- On first run, glitool will ask for your OpenAI API key. Get one at https://platform.openai.com/api-keys
15
-
16
- Or set it manually:
17
-
18
- ```bash
19
- mkdir ~/.glitool
20
- echo "OPENAI_API_KEY=sk-..." > ~/.glitool/.env
21
- ```
22
-
23
- ## Usage
24
-
25
- ```bash
26
- glitool # start AI chat session
27
- glitool --explain # explain every change in simple language
28
- glitool config --set-name "Your Name"
29
- glitool config --set-model gpt-4o
30
- glitool config --show
31
- ```
32
-
33
- ## Commands (inside chat)
34
-
35
- | Command | Description |
36
- |---------|-------------|
37
- | /help | Show available commands |
38
- | /clear | Clear current session |
39
- | /reset | Clear session + memory |
40
- | /exit | Save and exit |
41
-
42
- ## Requirements
43
- - Node.js 22 or higher
44
- - npm install -g glitool
45
-
46
-
47
- curl -fsSL https://deb.nodesource.com/setup_22.x | sudo -E bash -
48
- sudo apt-get install -y nodejs
1
+ # glitool
2
+
3
+ AI coding assistant for your terminal. Multi-agent pipeline, smart routing,
4
+ and a live process trace — all without leaving your terminal.
5
+
6
+ ## Install
7
+
8
+ ```bash
9
+ npm install -g glitool
10
+ ```
11
+
12
+ ## Setup
13
+
14
+ ```bash
15
+ mkdir -p ~/.glitool
16
+ echo "OPENAI_API_KEY=sk-..." > ~/.glitool/.env
17
+ ```
18
+
19
+ Then start:
20
+
21
+ ```bash
22
+ glitool
23
+ ```
24
+
25
+ ## Slash Commands
26
+
27
+ | Command | Description |
28
+ |---------|-------------|
29
+ | `/plan` | Create a structured plan for a complex task |
30
+ | `/coder` | Run the full multi-agent coding pipeline |
31
+ | `/debug` | Diagnose errors and broken behavior |
32
+ | `/refactor` | Restructure code without changing behavior |
33
+ | `/review` | Audit code for bugs, security, and quality |
34
+ | `/git` | Commit, push, diff, branch — full git operations |
35
+ | `/explain` | Explain a concept or file (no file edits) |
36
+ | `/quick` | Fast chat, cheapest model, no pipeline |
37
+ | `/model` | Show or switch the active model |
38
+ | `/memory` | View project memory and session summary |
39
+ | `/tools` | List available tools |
40
+ | `/clear` | Clear session (keeps memory) |
41
+ | `/reset` | Clear session and wipe memory |
42
+ | `/exit` | Save summary and quit |
43
+
44
+ ## Smart Routing
45
+
46
+ You don't need slash commands. Glitool reads your message and picks the
47
+ right agent automatically:
48
+
49
+ ```
50
+ why is my server crashing? → DEBUGGER
51
+ review src/auth.ts → REVIEWER
52
+ refactor the parser module → REFACTORER
53
+ commit my changes → GIT AGENT
54
+ how does useEffect work? → EXPLAINER
55
+ add a rate limiter → CODER (full pipeline)
56
+ ```
57
+
58
+ ## Multi-Agent Pipeline
59
+
60
+ For coding tasks, four agents run in sequence:
61
+
62
+ ```
63
+ PLANNER → CODER → VALIDATOR → JUDGE
64
+ ```
65
+
66
+ - **PLANNER** reads your request and produces a numbered step-by-step plan
67
+ - **CODER** executes the plan using file and shell tools
68
+ - **VALIDATOR** runs TypeScript and ESLint checks on the result
69
+ - **JUDGE** reviews the output and decides if it meets the requirement
70
+
71
+ Each stage is shown live in the terminal as it runs — with reasoning text
72
+ and every tool call displayed in order.
73
+
74
+ ## Tools
75
+
76
+ Agents have access to:
77
+
78
+ | Tool | What it does |
79
+ |------|-------------|
80
+ | `readFile` | Read any file in the project |
81
+ | `listFiles` | List files matching a glob pattern |
82
+ | `searchCode` | Search source files for a string or pattern |
83
+ | `writeFile` | Create a new file |
84
+ | `editFile` | Edit an existing file |
85
+ | `bash` | Run shell commands (risk-gated) |
86
+ | `webFetch` | Fetch a URL and read its content |
87
+
88
+ Dangerous shell commands (`rm -rf /`, `sudo`, `curl \| sh`) are blocked.
89
+ Sensitive commands (`git push`, `npm publish`) require your confirmation.
90
+
91
+ ## Memory
92
+
93
+ Glitool remembers context across sessions:
94
+
95
+ - **Session memory** — last 40 messages saved per project, auto-summarized
96
+ - **Project memory** — tech stack, architecture decisions, and TODOs
97
+ extracted from your conversations, stored in `.glitool/memory.json`
98
+
99
+ ## Configuration
100
+
101
+ Config file: `~/.glitool/config.json`
102
+
103
+ ```json
104
+ {
105
+ "name": "Developer",
106
+ "preferredLanguage": "TypeScript",
107
+ "codingStyle": "spaces",
108
+ "preferredModel": "gpt-4o-mini"
109
+ }
110
+ ```
111
+
112
+ ## Requirements
113
+
114
+ - Node.js 18 or higher
115
+ - OpenAI API key ([get one here](https://platform.openai.com/api-keys))
package/dist/agent.js CHANGED
@@ -1,4 +1,4 @@
1
- import { writeFileTool, analyzeProjectTool, listFilesTool, readFileTool, searchCodeTool, editFileTool } from "./tools/index.js";
1
+ import { writeFileTool, listFilesTool, readFileTool, searchCodeTool, editFileTool, bashTool, readBackgroundOutputTool, webFetchTool, } from "./tools/index.js";
2
2
  import { AIMessage, BaseMessage, HumanMessage, SystemMessage } from "@langchain/core/messages";
3
3
  import { StructuredTool } from "@langchain/core/tools";
4
4
  import { createReactAgent } from '@langchain/langgraph/prebuilt';
@@ -9,28 +9,39 @@ import { loadProjectMemory } from "./projectMemory.js";
9
9
  import { config as loadEnv } from 'dotenv';
10
10
  import { fileURLToPath } from 'url';
11
11
  import { dirname, join } from 'path';
12
- import { route } from './llm/router.js';
12
+ import { route, stripExplicitPrefix } from './llm/router.js';
13
13
  import { logRouting } from './llm/telemetry.js';
14
14
  import { runAgentGraph } from "./agents/graph.js";
15
+ import { runReviewer } from "./agents/reviewer-agent.js";
15
16
  import os from 'os';
17
+ import { cleanupAll } from "./tools/processRegistry.js";
18
+ import { runPlanningAgent } from "./agents/planningAgent.js";
19
+ import { runDebugger } from "./agents/debugger.js";
20
+ import { runRefactorer } from "./agents/refactorer.js";
21
+ import { runGitAgent } from "./agents/git-agent.js";
22
+ import { ToolMessage } from "@langchain/core/messages";
23
+ import { makeLlm } from './llm/factory.js';
16
24
  const __filename = fileURLToPath(import.meta.url);
17
25
  const __dirname = dirname(__filename);
18
26
  loadEnv({ path: join(os.homedir(), '.glitool', '.env') });
19
- const simpleLlm = new ChatOpenAI({
20
- model: 'gpt-4o-mini',
21
- apiKey: process.env.OPENAI_API_KEY
22
- });
23
- export const llm = new ChatOpenAI({
24
- model: 'gpt-4o-mini',
25
- apiKey: process.env.OPENAI_API_KEY
26
- });
27
- const config = loadConfig();
28
- const tools = [listFilesTool, readFileTool, searchCodeTool, writeFileTool, analyzeProjectTool, editFileTool];
27
+ const MAX_HISTORY_CHARS = 60_000;
28
+ // const simpleLlm = makeLlm('meta-llama/Llama-3.3-70B-Instruct-Turbo');
29
+ export const llm = createLlm('meta-llama/Llama-3.3-70B-Instruct-Turbo');
30
+ function createLlm(model) {
31
+ return makeLlm(model);
32
+ }
33
+ // const config = loadConfig();
34
+ const tools = [listFilesTool, readFileTool, searchCodeTool, writeFileTool, editFileTool, bashTool, readBackgroundOutputTool, webFetchTool];
35
+ process.on('exit', cleanupAll);
36
+ process.on('SIGINT', () => { cleanupAll(); process.exit(0); });
37
+ process.on('SIGTERM', () => { cleanupAll(); process.exit(0); });
29
38
  export const sessionMessages = loadSession();
30
39
  export function clearSession() {
31
40
  sessionMessages.length = 0;
32
41
  saveSession(sessionMessages);
33
42
  }
43
+ const MAX_SUMMARY_CHARS = 2_000;
44
+ const MAX_PROJECT_FACTS_CHARS = 3_000;
34
45
  function buildSystemPrompt() {
35
46
  let summary = loadSummary();
36
47
  const project = loadProjectMemory();
@@ -41,41 +52,216 @@ function buildSystemPrompt() {
41
52
  summary = loadSummary();
42
53
  }
43
54
  }
44
- // const project = loadProjectMemory();
45
55
  let prompt = `You are an expert coding assistant. Be concise and code-focused.
56
+
57
+ CRITICAL — file operations:
58
+ - When the user asks to read, show, view, or display a file, you MUST call the readFile tool. NEVER answer from memory or guess at file contents.
59
+ - When the user asks if a file exists, you MUST call listFiles or readFile to verify. NEVER claim a file is missing without checking.
60
+ - For "read <name>" prompts, call readFile with the bare name — the tool will search the project automatically.
61
+
46
62
  IMPORTANT: If any tool returns USER_CANCELLED, immediately stop all tool calls and tell the user the operation was cancelled. Never retry a cancelled operation.`;
47
- if (summary)
48
- prompt += `\n\nPrevious session summary:\n${summary}`;
49
- if (project)
50
- prompt += `\n\nProject facts:\n${JSON.stringify(project, null, 2)}`;
63
+ if (summary) {
64
+ const capped = summary.length > MAX_SUMMARY_CHARS
65
+ ? summary.slice(0, MAX_SUMMARY_CHARS) + '\n…[summary truncated]'
66
+ : summary;
67
+ prompt += `\n\nPrevious session summary:\n${capped}`;
68
+ }
69
+ if (project) {
70
+ const json = JSON.stringify(project, null, 2);
71
+ const capped = json.length > MAX_PROJECT_FACTS_CHARS
72
+ ? json.slice(0, MAX_PROJECT_FACTS_CHARS) + '\n…[truncated]'
73
+ : json;
74
+ prompt += `\n\nProject facts:\n${capped}`;
75
+ }
51
76
  return prompt;
52
77
  }
53
78
  const systemPrompt = await buildSystemPrompt();
54
- const simpleAgent = createReactAgent({
55
- llm: simpleLlm,
56
- tools,
57
- stateModifier: new SystemMessage(buildSystemPrompt())
58
- });
59
- const complexAgent = createReactAgent({
60
- llm,
61
- tools,
62
- stateModifier: new SystemMessage(buildSystemPrompt())
63
- });
64
- export async function chat(userInput, onToolCall, onStatus, onToken) {
65
- const decision = route(userInput);
79
+ async function tryDirectReadShortcut(prompt, onToolCall) {
80
+ const match = prompt.trim().match(/^(?:read|show|open|cat|view|display|print)\s+(.+?)$/i);
81
+ if (!match)
82
+ return null;
83
+ const target = match[1].trim().replace(/^["']|["']$/g, '');
84
+ if (!target || target.includes(' '))
85
+ return null;
86
+ onToolCall('readFile', { filePath: target });
87
+ let raw;
88
+ try {
89
+ raw = await readFileTool.invoke({ filePath: target });
90
+ }
91
+ catch (err) {
92
+ return `Could not read ${target}: ${err?.message ?? 'unknown error'}`;
93
+ }
94
+ if (typeof raw !== 'string')
95
+ raw = String(raw);
96
+ // Strip the smart-resolve header if present and remember the real path.
97
+ let resolvedPath = target;
98
+ let body = raw;
99
+ const resolveMatch = raw.match(/^\[resolved ".*?" → (.+?)\]\n\n([\s\S]*)$/);
100
+ if (resolveMatch) {
101
+ resolvedPath = resolveMatch[1];
102
+ body = resolveMatch[2];
103
+ }
104
+ const allLines = body.split('\n');
105
+ const totalLines = allLines.length;
106
+ const PREVIEW_LINES = 40;
107
+ const preview = allLines.slice(0, PREVIEW_LINES).join('\n');
108
+ const more = totalLines > PREVIEW_LINES
109
+ ? `\n\n[...${totalLines - PREVIEW_LINES} more lines — open ${resolvedPath} in your editor for the full file, or ask me a question about it]`
110
+ : '';
111
+ return `Read ${resolvedPath} (${totalLines} lines):\n\n${preview}${more}`;
112
+ }
113
+ function trimHistory(messages) {
114
+ // Pass 1: keep only well-formed turns (HumanMessage + final non-tool AIMessage).
115
+ // Drop empty AI messages and any AIMessage that requested a tool — they'd be orphaned without their ToolMessage.
116
+ const cleaned = [];
117
+ for (const m of messages) {
118
+ if (m instanceof HumanMessage) {
119
+ cleaned.push(m);
120
+ continue;
121
+ }
122
+ if (m instanceof AIMessage) {
123
+ const hasToolCalls = (Array.isArray(m.tool_calls) && m.tool_calls.length > 0) ||
124
+ (Array.isArray(m.additional_kwargs?.tool_calls) &&
125
+ m.additional_kwargs.tool_calls.length > 0);
126
+ if (!hasToolCalls && typeof m.content === 'string' && m.content.trim()) {
127
+ cleaned.push(m);
128
+ }
129
+ }
130
+ // ToolMessage and anything else: drop
131
+ }
132
+ // Pass 2: char budget, walking backwards.
133
+ let totalChars = 0;
134
+ const kept = [];
135
+ for (let i = cleaned.length - 1; i >= 0; i--) {
136
+ const content = typeof cleaned[i].content === 'string'
137
+ ? cleaned[i].content
138
+ : JSON.stringify(cleaned[i].content);
139
+ totalChars += content.length;
140
+ if (totalChars > MAX_HISTORY_CHARS)
141
+ break;
142
+ kept.unshift(cleaned[i]);
143
+ }
144
+ return kept;
145
+ }
146
+ const COST_PER_TOKEN = {
147
+ 'gpt-4o-mini': { input: 0.15 / 1_000_000, output: 0.60 / 1_000_000 },
148
+ 'gpt-5.4-mini': { input: 0.75 / 1_000_000, output: 4.50 / 1_000_000 },
149
+ 'gpt-5.4': { input: 2.50 / 1_000_000, output: 15.00 / 1_000_000 },
150
+ 'gpt-5.5': { input: 5.00 / 1_000_000, output: 30.00 / 1_000_000 },
151
+ };
152
+ function estimateCost(model, inputTokens, outputTokens) {
153
+ const rates = COST_PER_TOKEN[model] ?? COST_PER_TOKEN['gpt-4o-mini'];
154
+ return inputTokens * rates.input + outputTokens * rates.output;
155
+ }
156
+ function extractTarget(args) {
157
+ if (!args)
158
+ return '';
159
+ const first = Object.values(args)[0];
160
+ if (typeof first === 'string') {
161
+ try {
162
+ const p = JSON.parse(first);
163
+ return p.command ?? p.filePath ?? p.pattern ?? p.query ?? first;
164
+ }
165
+ catch {
166
+ return first;
167
+ }
168
+ }
169
+ if (typeof first === 'object' && first !== null) {
170
+ return first.command ?? first.filePath ?? JSON.stringify(first).slice(0, 50);
171
+ }
172
+ return String(first ?? '');
173
+ }
174
+ export async function chat(userInput, onToolCall, onStatus, onToken, onEscalation, onUsage, onStageEvent) {
175
+ const decision = await route(userInput, sessionMessages.slice(-6));
66
176
  logRouting(userInput, decision);
67
- sessionMessages.push(new HumanMessage(userInput));
68
- if (decision.domain === 'coding' || decision.tier === 'complex') {
69
- const result = await runAgentGraph(userInput, buildSystemPrompt(), onToolCall, onStatus ?? (() => { }));
70
- if (result !== null && result !== undefined) {
71
- sessionMessages.push(new AIMessage(result));
177
+ const cleanedInput = decision.source === 'explicit' ? stripExplicitPrefix(userInput) : userInput;
178
+ sessionMessages.push(new HumanMessage(cleanedInput));
179
+ const shortcut = await tryDirectReadShortcut(cleanedInput, onToolCall);
180
+ if (shortcut !== null) {
181
+ sessionMessages.push(new AIMessage(shortcut));
182
+ saveSession(sessionMessages);
183
+ return shortcut;
184
+ }
185
+ if (decision.domain === 'planning') {
186
+ onStatus?.('Planning...');
187
+ const result = await runPlanningAgent(cleanedInput, (inputTokens, outputTokens) => {
188
+ onUsage?.(inputTokens + outputTokens, estimateCost('gpt-5.4', inputTokens, outputTokens));
189
+ });
190
+ sessionMessages.push(new AIMessage(result));
191
+ saveSession(sessionMessages);
192
+ return result;
193
+ }
194
+ if (decision.domain === 'review') {
195
+ onStageEvent?.({ type: 'stage_start', stage: 'reviewer' });
196
+ const result = await runReviewer(cleanedInput, (name, args) => {
197
+ onStageEvent?.({ type: 'tool', stage: 'reviewer', tool: name, target: extractTarget(args) });
198
+ onToolCall(name, args);
199
+ }, decision.recommendedModel);
200
+ onStageEvent?.({ type: 'stage_done', stage: 'reviewer' });
201
+ sessionMessages.push(new AIMessage(result));
202
+ saveSession(sessionMessages);
203
+ return result;
204
+ }
205
+ if (decision.domain === 'debugging') {
206
+ onStageEvent?.({ type: 'stage_start', stage: 'debugger' });
207
+ const result = await runDebugger(cleanedInput, (name, args) => {
208
+ onStageEvent?.({ type: 'tool', stage: 'debugger', tool: name, target: extractTarget(args) });
209
+ onToolCall(name, args);
210
+ }, decision.recommendedModel);
211
+ onStageEvent?.({ type: 'stage_done', stage: 'debugger' });
212
+ sessionMessages.push(new AIMessage(result));
213
+ saveSession(sessionMessages);
214
+ return result;
215
+ }
216
+ if (decision.domain === 'refactoring') {
217
+ onStageEvent?.({ type: 'stage_start', stage: 'refactorer' });
218
+ const result = await runRefactorer(cleanedInput, (name, args) => {
219
+ onStageEvent?.({ type: 'tool', stage: 'refactorer', tool: name, target: extractTarget(args) });
220
+ onToolCall(name, args);
221
+ }, decision.recommendedModel);
222
+ onStageEvent?.({ type: 'stage_done', stage: 'refactorer' });
223
+ sessionMessages.push(new AIMessage(result));
224
+ saveSession(sessionMessages);
225
+ return result;
226
+ }
227
+ if (decision.domain === 'git') {
228
+ onStageEvent?.({ type: 'stage_start', stage: 'git_agent' });
229
+ const result = await runGitAgent(cleanedInput, (name, args) => {
230
+ onStageEvent?.({ type: 'tool', stage: 'git_agent', tool: name, target: extractTarget(args) });
231
+ onToolCall(name, args);
232
+ }, decision.recommendedModel);
233
+ onStageEvent?.({ type: 'stage_done', stage: 'git_agent' });
234
+ sessionMessages.push(new AIMessage(result));
235
+ saveSession(sessionMessages);
236
+ return result;
237
+ }
238
+ if (decision.domain === 'coding') {
239
+ const graphResult = await runAgentGraph(cleanedInput, buildSystemPrompt(), onToolCall, onStatus ?? (() => { }), decision, onStageEvent // ← add this
240
+ );
241
+ if (graphResult.escalated && onEscalation) {
242
+ onEscalation({
243
+ userMessage: graphResult.userMessage,
244
+ plan: graphResult.plan,
245
+ trajectory: graphResult.trajectory,
246
+ finalOutput: graphResult.finalOutput ?? '',
247
+ });
248
+ }
249
+ if (graphResult.finalOutput) {
250
+ sessionMessages.push(new AIMessage(graphResult.finalOutput));
72
251
  saveSession(sessionMessages);
73
- return result;
252
+ return graphResult.finalOutput;
74
253
  }
75
254
  }
76
- ;
77
- const eventStrem = simpleAgent.streamEvents({ messages: sessionMessages }, { version: 'v2' });
255
+ const simpleAgent = createReactAgent({
256
+ llm: createLlm(decision.recommendedModel),
257
+ tools,
258
+ stateModifier: new SystemMessage(systemPrompt)
259
+ });
260
+ const trimmed = trimHistory(sessionMessages);
261
+ const eventStrem = simpleAgent.streamEvents({ messages: trimmed }, { version: 'v2' });
78
262
  let finalResponse = '';
263
+ let totalInputTokens = 0;
264
+ let totalOutputTokens = 0;
79
265
  for await (const { event, data, name: eventName } of eventStrem) {
80
266
  if (event === 'on_chat_model_stream') {
81
267
  const chunk = data.chunk;
@@ -100,6 +286,11 @@ export async function chat(userInput, onToolCall, onStatus, onToken) {
100
286
  onToolCall(eventName, data.input);
101
287
  }
102
288
  if (event === 'on_chat_model_end') {
289
+ const usage = data.output?.usage_metadata;
290
+ if (usage) {
291
+ totalInputTokens += usage.input_tokens ?? 0;
292
+ totalOutputTokens += usage.output_tokens ?? 0;
293
+ }
103
294
  if (!finalResponse) {
104
295
  const output = data.output;
105
296
  if (typeof output?.content === 'string') {
@@ -112,6 +303,10 @@ export async function chat(userInput, onToolCall, onStatus, onToken) {
112
303
  if (finalResponse) {
113
304
  sessionMessages.push(new AIMessage(finalResponse));
114
305
  }
306
+ if (onUsage && (totalInputTokens + totalOutputTokens) > 0) {
307
+ const model = decision.recommendedModel;
308
+ onUsage(totalInputTokens + totalOutputTokens, estimateCost(model, totalInputTokens, totalOutputTokens));
309
+ }
115
310
  saveSession(sessionMessages);
116
311
  return finalResponse;
117
312
  }
@@ -1,54 +1,66 @@
1
1
  import { createReactAgent } from "@langchain/langgraph/prebuilt";
2
- import { ChatOpenAI } from "@langchain/openai";
2
+ import { makeLlm } from '../llm/factory.js';
3
3
  import { SystemMessage, HumanMessage, BaseMessage } from "@langchain/core/messages";
4
4
  import { StructuredTool } from "@langchain/core/tools";
5
- import { listFilesTool, readFileTool, searchCodeTool, editFileTool, writeFileTool } from '../tools/index.js';
5
+ import { listFilesTool, readFileTool, searchCodeTool, editFileTool, writeFileTool, bashTool } from '../tools/index.js';
6
6
  import { scoreRisk, getRiskMessage } from "../trust/riskScorer.js";
7
- import { requestConfirm } from "../confirmHandler.js";
8
- const coderLlm = new ChatOpenAI({
9
- model: 'gpt-5.4-mini',
10
- apiKey: process.env.OPENAI_API_KEY
11
- });
12
- const coderAgent = createReactAgent({
13
- llm: coderLlm,
14
- tools: [listFilesTool, readFileTool, searchCodeTool, editFileTool, writeFileTool],
15
- stateModifier: new SystemMessage('You are a coding execution agent. Execute the given plan step by step using tools. Be precise and thorough.')
16
- });
17
- export async function runCoder(plan, userMessage, onToolCall) {
18
- const stream = await coderAgent.stream({
19
- messages: [new HumanMessage(`Plan to execute:\n${plan}\n\nOriginal request: ${userMessage}`)]
7
+ import { log } from "../logger.js";
8
+ export async function runCoder(plan, userMessage, onToolCall, model, onReasoning) {
9
+ const coderLlm = makeLlm(model);
10
+ const coderAgent = createReactAgent({
11
+ llm: coderLlm,
12
+ tools: [listFilesTool, readFileTool, searchCodeTool, editFileTool, writeFileTool, bashTool],
13
+ stateModifier: new SystemMessage(`You are a coding execution agent. Execute the given plan step by step using tools.
14
+
15
+ GROUNDING RULES these are not optional:
16
+
17
+ 1. BEFORE editing any file, READ it first with readFile to confirm structure.
18
+ 2. PREFER searchCode over readFile for navigation. Read whole files only when you'll actually edit them.
19
+ 3. For UI features (slash commands, menus, palettes), search src/ui/, src/components/, src/cli/ first — don't trust the plan's filename blindly.
20
+ 4. After every editFile, if the tool returned an error, STOP and read the file again. Do not retry with guesses.
21
+ 5. You MAY create package.json or tsconfig.json when building a new project from scratch. Never add dependencies to an EXISTING package.json unless explicitly asked. Never run npm install via bash.
22
+ 6. Maximum 5 file reads per task. If you need more, you're doing it wrong — use searchCode instead.
23
+ 7. If you can't safely complete the task, STOP and return a failure message. Do not invent.
24
+
25
+ Be surgical, not exhaustive. Most tasks need 2-4 tool calls, not 15. The validator will catch broken output — you don't need to over-verify.`)
20
26
  });
27
+ const stream = await coderAgent.stream({ messages: [new HumanMessage(`Plan to execute:\n${plan}\n\nOriginal request: ${userMessage}`)] }, { recursionLimit: 60, streamMode: 'updates' });
21
28
  let result = '';
29
+ let blocked = false;
22
30
  for await (const chunk of stream) {
23
- if (chunk.agent?.messages) {
24
- const msgs = chunk.agent.messages;
25
- const msg = msgs.at(-1);
26
- if (msg?.tool_calls?.length > 0) {
27
- const toolCall = msg.tool_calls[0];
31
+ if (blocked)
32
+ break;
33
+ // 'updates' mode gives one complete message per graph step.
34
+ // Agent node = LLM output (reasoning or tool call decision).
35
+ // Tools node = tool results — no useful trace info, skip.
36
+ const agentMsgs = chunk.agent?.messages;
37
+ if (!agentMsgs?.length) {
38
+ log('coder:chunk', { keys: Object.keys(chunk).join(',') });
39
+ continue;
40
+ }
41
+ for (const msg of agentMsgs) {
42
+ const toolCalls = msg.tool_calls;
43
+ const text = typeof msg.content === 'string' ? msg.content.trim() : '';
44
+ if (toolCalls?.length > 0) {
45
+ if (text)
46
+ onReasoning?.(text);
47
+ const toolCall = toolCalls[0];
28
48
  const risk = scoreRisk(toolCall.name, toolCall.args);
29
- const riskMsg = getRiskMessage(toolCall.name, risk, toolCall.args);
49
+ getRiskMessage(toolCall.name, risk, toolCall.args);
30
50
  if (risk === 'high') {
31
51
  onToolCall(toolCall.name, toolCall.args);
32
52
  result = `Blocked: I cannot write to sensitive files like ${toolCall.args?.filePath}.`;
53
+ blocked = true;
33
54
  break;
34
55
  }
35
56
  onToolCall(toolCall.name, toolCall.args);
36
57
  }
37
- else if (msg?.content) {
38
- result = msg.content;
58
+ else if (text) {
59
+ onReasoning?.(text);
60
+ result = text;
39
61
  }
40
62
  }
63
+ log('coder:chunk', { keys: Object.keys(chunk).join(',') });
41
64
  }
42
- // for await (const chunk of stream){
43
- // if(chunk.agent?.messages){
44
- // const msgs = chunk.agent.messages as BaseMessage[];
45
- // const msg = msgs.at(-1);
46
- // if((msg as any)?.tool_calls?.length > 0){
47
- // onToolCall((msg as any).tool_calls[0].name, (msg as any).tool_calls[0].args);
48
- // }else if (msg?.content){
49
- // result = msg.content as string;
50
- // }
51
- // }
52
- // }
53
65
  return result;
54
66
  }