@pentoshi/clai 0.13.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/clai.mjs +25 -0
- package/dist/agent/runner.d.ts +38 -1
- package/dist/agent/runner.js +472 -31
- package/dist/agent/runner.js.map +1 -1
- package/dist/commands/update.js +1 -1
- package/dist/commands/update.js.map +1 -1
- package/dist/llm/anthropic.js +31 -12
- package/dist/llm/anthropic.js.map +1 -1
- package/dist/llm/capabilities.d.ts +13 -0
- package/dist/llm/capabilities.js +107 -24
- package/dist/llm/capabilities.js.map +1 -1
- package/dist/llm/gemini.js +17 -4
- package/dist/llm/gemini.js.map +1 -1
- package/dist/llm/http.d.ts +12 -1
- package/dist/llm/http.js +50 -25
- package/dist/llm/http.js.map +1 -1
- package/dist/llm/ollama.js +16 -8
- package/dist/llm/ollama.js.map +1 -1
- package/dist/modes/agent.d.ts +2 -1
- package/dist/modes/agent.js.map +1 -1
- package/dist/modes/ask.d.ts +2 -1
- package/dist/modes/ask.js +5 -1
- package/dist/modes/ask.js.map +1 -1
- package/dist/os/cwd.d.ts +30 -0
- package/dist/os/cwd.js +76 -0
- package/dist/os/cwd.js.map +1 -0
- package/dist/os/detect.js +2 -1
- package/dist/os/detect.js.map +1 -1
- package/dist/prompts/index.d.ts +1 -1
- package/dist/prompts/index.js +66 -21
- package/dist/prompts/index.js.map +1 -1
- package/dist/repl.d.ts +10 -0
- package/dist/repl.js +260 -28
- package/dist/repl.js.map +1 -1
- package/dist/safety/classifier.js +121 -26
- package/dist/safety/classifier.js.map +1 -1
- package/dist/safety/patterns.d.ts +26 -0
- package/dist/safety/patterns.js +167 -0
- package/dist/safety/patterns.js.map +1 -1
- package/dist/store/config.js +2 -1
- package/dist/store/config.js.map +1 -1
- package/dist/store/history.js +19 -5
- package/dist/store/history.js.map +1 -1
- package/dist/store/plan.d.ts +43 -0
- package/dist/store/plan.js +201 -0
- package/dist/store/plan.js.map +1 -0
- package/dist/store/project.js +3 -2
- package/dist/store/project.js.map +1 -1
- package/dist/tools/capabilities.js +6 -1
- package/dist/tools/capabilities.js.map +1 -1
- package/dist/tools/fs.js +3 -2
- package/dist/tools/fs.js.map +1 -1
- package/dist/tools/image.d.ts +13 -0
- package/dist/tools/image.js +81 -0
- package/dist/tools/image.js.map +1 -0
- package/dist/tools/jobs.js +2 -1
- package/dist/tools/jobs.js.map +1 -1
- package/dist/tools/pdf.d.ts +18 -0
- package/dist/tools/pdf.js +200 -0
- package/dist/tools/pdf.js.map +1 -0
- package/dist/tools/registry.js +79 -7
- package/dist/tools/registry.js.map +1 -1
- package/dist/tools/shell.js +3 -2
- package/dist/tools/shell.js.map +1 -1
- package/dist/types.d.ts +16 -0
- package/dist/ui/keys.d.ts +1 -0
- package/dist/ui/keys.js +4 -0
- package/dist/ui/keys.js.map +1 -1
- package/dist/ui/mentions.d.ts +32 -1
- package/dist/ui/mentions.js +304 -27
- package/dist/ui/mentions.js.map +1 -1
- package/dist/ui/output-pane.js +11 -2
- package/dist/ui/output-pane.js.map +1 -1
- package/dist/ui/plan-pane.d.ts +19 -0
- package/dist/ui/plan-pane.js +101 -0
- package/dist/ui/plan-pane.js.map +1 -0
- package/package.json +4 -1
package/dist/agent/runner.js
CHANGED
|
@@ -19,12 +19,25 @@ import { ensureProviderConfigured } from "../commands/providers.js";
|
|
|
19
19
|
import { rememberThinkingFromText, renderThinkingSummary, } from "../ui/thinking.js";
|
|
20
20
|
import { renderMarkdown, indentAndWrapText } from "../ui/markdown.js";
|
|
21
21
|
import { startThinkingSpinner } from "../ui/spinner.js";
|
|
22
|
+
import { safeCwd } from "../os/cwd.js";
|
|
22
23
|
import { analyzeTask } from "./task-analyzer.js";
|
|
23
24
|
import { LoopGuard } from "./loop-guard.js";
|
|
25
|
+
import { createPlan, loadPlan, savePlan, markTask, } from "../store/plan.js";
|
|
26
|
+
import { renderPlanChecklist, renderPlanSidePane } from "../ui/plan-pane.js";
|
|
27
|
+
/** Render the plan as a right-side pane on wide terminals, else inline. */
|
|
28
|
+
function renderPlanForTerminal(plan) {
|
|
29
|
+
const cols = process.stdout.columns ?? 0;
|
|
30
|
+
const side = process.stdout.isTTY
|
|
31
|
+
? renderPlanSidePane(plan, cols)
|
|
32
|
+
: undefined;
|
|
33
|
+
return side ?? renderPlanChecklist(plan);
|
|
34
|
+
}
|
|
24
35
|
export function createSessionPolicy() {
|
|
25
36
|
return {
|
|
26
37
|
allow: new Set(),
|
|
27
38
|
pentestAuthorized: { value: false },
|
|
39
|
+
sessionId: `sess-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`,
|
|
40
|
+
planApproved: { value: false },
|
|
28
41
|
};
|
|
29
42
|
}
|
|
30
43
|
function tryParseCall(raw) {
|
|
@@ -139,6 +152,147 @@ export function parseToolCall(text, options = {}) {
|
|
|
139
152
|
}
|
|
140
153
|
return undefined;
|
|
141
154
|
}
|
|
155
|
+
// Argument keys that the built-in tools accept. Used to recognize when a
|
|
156
|
+
// model emitted a bare args object (e.g. {"path":"file.pdf"}) — intending a
|
|
157
|
+
// tool call but forgetting the {"name","args"} wrapper and the ```tool fence.
|
|
158
|
+
const TOOL_ARG_KEYS = new Set([
|
|
159
|
+
"command",
|
|
160
|
+
"path",
|
|
161
|
+
"paths",
|
|
162
|
+
"url",
|
|
163
|
+
"query",
|
|
164
|
+
"target",
|
|
165
|
+
"pattern",
|
|
166
|
+
"tool",
|
|
167
|
+
"tools",
|
|
168
|
+
"files",
|
|
169
|
+
"content",
|
|
170
|
+
"calls",
|
|
171
|
+
"record",
|
|
172
|
+
"ports",
|
|
173
|
+
"profile",
|
|
174
|
+
"id",
|
|
175
|
+
"lang",
|
|
176
|
+
"dpi",
|
|
177
|
+
"psm",
|
|
178
|
+
"recursive",
|
|
179
|
+
"oldText",
|
|
180
|
+
"newText",
|
|
181
|
+
"expectedReplacements",
|
|
182
|
+
"goal",
|
|
183
|
+
"tasks",
|
|
184
|
+
"taskId",
|
|
185
|
+
"state",
|
|
186
|
+
"method",
|
|
187
|
+
"body",
|
|
188
|
+
"headers",
|
|
189
|
+
"maxBytes",
|
|
190
|
+
"maxResults",
|
|
191
|
+
"cwd",
|
|
192
|
+
"name",
|
|
193
|
+
"concurrency",
|
|
194
|
+
]);
|
|
195
|
+
/**
|
|
196
|
+
* Strip a single wrapping ```json / ``` fence (if any) and return the inner
|
|
197
|
+
* text trimmed. Leaves un-fenced text unchanged.
|
|
198
|
+
*/
|
|
199
|
+
function stripLoneFence(text) {
|
|
200
|
+
const fenced = text
|
|
201
|
+
.trim()
|
|
202
|
+
.match(/^```[a-zA-Z]*\s*\n?([\s\S]*?)\n?```$/);
|
|
203
|
+
return (fenced?.[1] ?? text).trim();
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* When a model means to call a tool but emits ONLY a bare JSON object —
|
|
207
|
+
* either a proper {"name","args"} that the strict matchers missed, or a bare
|
|
208
|
+
* args object like {"path":"file.pdf"} with the wrapper/fence dropped — this
|
|
209
|
+
* recognizes it. Returns:
|
|
210
|
+
* - { call } when the object is a complete {name, args} tool call, or
|
|
211
|
+
* - { argsOnly: true } when it looks like a bare args object (so the caller
|
|
212
|
+
* can nudge the model to re-emit a properly named, fenced tool call).
|
|
213
|
+
* Returns undefined for anything that is plainly a normal prose/JSON answer.
|
|
214
|
+
*/
|
|
215
|
+
export function recognizeBareToolJson(text) {
|
|
216
|
+
const inner = stripLoneFence(text);
|
|
217
|
+
// Must be a single JSON object spanning the whole (de-fenced) output.
|
|
218
|
+
if (!inner.startsWith("{") || !inner.endsWith("}"))
|
|
219
|
+
return undefined;
|
|
220
|
+
let parsed;
|
|
221
|
+
try {
|
|
222
|
+
parsed = JSON.parse(inner);
|
|
223
|
+
}
|
|
224
|
+
catch {
|
|
225
|
+
return undefined;
|
|
226
|
+
}
|
|
227
|
+
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
|
|
228
|
+
return undefined;
|
|
229
|
+
}
|
|
230
|
+
const obj = parsed;
|
|
231
|
+
// Complete {name, args} call the earlier matchers didn't catch (e.g. not
|
|
232
|
+
// anchored to end-of-string). Recover it directly.
|
|
233
|
+
const direct = tryParseCall(inner);
|
|
234
|
+
if (direct)
|
|
235
|
+
return { call: direct };
|
|
236
|
+
// Bare args object: every key is a known tool-arg key, and it carries at
|
|
237
|
+
// least one identifying arg. Don't treat huge/odd objects as tool args.
|
|
238
|
+
const keys = Object.keys(obj);
|
|
239
|
+
if (keys.length === 0 || keys.length > 6)
|
|
240
|
+
return undefined;
|
|
241
|
+
const allKnown = keys.every((key) => TOOL_ARG_KEYS.has(key));
|
|
242
|
+
if (allKnown)
|
|
243
|
+
return { argsOnly: true };
|
|
244
|
+
return undefined;
|
|
245
|
+
}
|
|
246
|
+
/**
|
|
247
|
+
* Detect an opened-but-unparseable tool call. This happens when the model's
|
|
248
|
+
* output is truncated by the token limit mid-JSON: we see the ```tool fence
|
|
249
|
+
* (or a bare {"name":"...","args" prefix) open, but parseToolCall returns
|
|
250
|
+
* undefined because the JSON never closed. Without this, the broken block
|
|
251
|
+
* leaks to the screen as a "final answer" and the requested action (e.g. a
|
|
252
|
+
* multi-file fs.writeMany scaffold) silently never runs.
|
|
253
|
+
*/
|
|
254
|
+
export function looksLikeTruncatedToolCall(text) {
|
|
255
|
+
// An opened ```tool fence with no closing fence.
|
|
256
|
+
const openFence = /```tool\s*\n?/i.test(text);
|
|
257
|
+
const closeFence = /```tool[\s\S]*?```/i.test(text);
|
|
258
|
+
if (openFence && !closeFence)
|
|
259
|
+
return true;
|
|
260
|
+
// A tool-call JSON object that started but whose braces never balanced.
|
|
261
|
+
const jsonStart = text.search(/\{\s*"name"\s*:\s*"[A-Za-z][\w.]*"\s*,\s*"args"/);
|
|
262
|
+
if (jsonStart >= 0) {
|
|
263
|
+
const slice = text.slice(jsonStart);
|
|
264
|
+
let depth = 0;
|
|
265
|
+
let inString = false;
|
|
266
|
+
let escaped = false;
|
|
267
|
+
let balanced = false;
|
|
268
|
+
for (const ch of slice) {
|
|
269
|
+
if (escaped) {
|
|
270
|
+
escaped = false;
|
|
271
|
+
continue;
|
|
272
|
+
}
|
|
273
|
+
if (ch === "\\") {
|
|
274
|
+
escaped = true;
|
|
275
|
+
continue;
|
|
276
|
+
}
|
|
277
|
+
if (ch === '"')
|
|
278
|
+
inString = !inString;
|
|
279
|
+
if (inString)
|
|
280
|
+
continue;
|
|
281
|
+
if (ch === "{")
|
|
282
|
+
depth += 1;
|
|
283
|
+
else if (ch === "}") {
|
|
284
|
+
depth -= 1;
|
|
285
|
+
if (depth === 0) {
|
|
286
|
+
balanced = true;
|
|
287
|
+
break;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
if (!balanced)
|
|
292
|
+
return true;
|
|
293
|
+
}
|
|
294
|
+
return false;
|
|
295
|
+
}
|
|
142
296
|
/** Extract the text before the tool call block for display purposes */
|
|
143
297
|
function textBeforeToolCall(text) {
|
|
144
298
|
const patterns = [
|
|
@@ -177,13 +331,17 @@ function formatToolArgs(call) {
|
|
|
177
331
|
if (call.name === "fs.writeMany") {
|
|
178
332
|
const files = Array.isArray(call.args.files) ? call.args.files : [];
|
|
179
333
|
const names = files
|
|
180
|
-
.map((f) => f && typeof f === "object"
|
|
334
|
+
.map((f) => f && typeof f === "object"
|
|
335
|
+
? String(f.path ?? "")
|
|
336
|
+
: "")
|
|
181
337
|
.filter(Boolean);
|
|
182
338
|
const preview = names.slice(0, 4).join(", ");
|
|
183
339
|
return `${names.length} file(s)${preview ? `: ${preview}${names.length > 4 ? ", …" : ""}` : ""}`;
|
|
184
340
|
}
|
|
185
341
|
if (call.name === "fs.search")
|
|
186
342
|
return String(call.args.pattern ?? "");
|
|
343
|
+
if (call.name === "image.ocr" || call.name === "pdf.read")
|
|
344
|
+
return String(call.args.path ?? "");
|
|
187
345
|
if (call.name === "http.fetch" || call.name === "web.fetch")
|
|
188
346
|
return String(call.args.url ?? "");
|
|
189
347
|
if (call.name === "web.search")
|
|
@@ -191,7 +349,7 @@ function formatToolArgs(call) {
|
|
|
191
349
|
if (call.name === "pkg.install")
|
|
192
350
|
return String(call.args.tool ?? "");
|
|
193
351
|
if (call.name === "fs.list")
|
|
194
|
-
return String(call.args.path ??
|
|
352
|
+
return String(call.args.path ?? safeCwd());
|
|
195
353
|
return JSON.stringify(call.args);
|
|
196
354
|
}
|
|
197
355
|
const VOLATILE_SIGNAL_RE = /\b(?:current(?:ly)?|latest|newest|today|now|right now|live|recent|breaking|news|release[sd]?|version|prices?|stocks?|market|rates?|weather|forecast|elections?|results?|rankings?|standings?|stats?|cve|advis(?:ory|ories)|vulnerabilit(?:y|ies))\b/i;
|
|
@@ -210,6 +368,12 @@ const BUILD_STACK_RE = /\b(?:react|next(?:\.?js)?|vue|svelte|angular|vite|webpac
|
|
|
210
368
|
// clearly mean "keep going with what we were doing".
|
|
211
369
|
const CONTINUATION_RE = /^(?:do\s+it|build\s+it|build\s+fully|build\s+it\s+fully|go\s+ahead|continue|proceed|keep\s+going|finish(?:\s+it)?|complete(?:\s+it)?|yes|ok(?:ay)?|make\s+it|run\s+it|next|on\s+your\s+own|build\s+(?:fully\s+)?on\s+your\s+own)\b/i;
|
|
212
370
|
const INCOMPLETE_RE = /\b(?:not\s+complete|incomplete|isn'?t\s+(?:done|complete|working|finished)|doesn'?t\s+work|still\s+(?:broken|missing|failing)|missing\s+(?:files?|parts?)|finish\s+(?:the|it)|complete\s+(?:the|it))\b/i;
|
|
371
|
+
// The synthetic message injected when the user runs /implement to approve a
|
|
372
|
+
// plan ("I approve the plan. Execute it now, task by task…"). It must always
|
|
373
|
+
// count as a build/continuation turn — it contains the word "now", which
|
|
374
|
+
// would otherwise trip the volatile-info freshness guard and divert the run
|
|
375
|
+
// into a pointless web.search instead of executing the plan.
|
|
376
|
+
const PLAN_EXECUTION_RE = /\b(?:approve the plan|execute it (?:now|task by task)|task by task|execute the plan|implement the plan)\b/i;
|
|
213
377
|
/**
|
|
214
378
|
* Decide whether this turn should get a generous step budget because it is
|
|
215
379
|
* a multi-file build, a continuation of one, or a "it's not done yet" nudge.
|
|
@@ -221,7 +385,8 @@ export function looksLikeBuildTask(prompt, history) {
|
|
|
221
385
|
if (BUILD_TASK_RE.test(text) ||
|
|
222
386
|
BUILD_STACK_RE.test(text) ||
|
|
223
387
|
CONTINUATION_RE.test(text) ||
|
|
224
|
-
INCOMPLETE_RE.test(text)
|
|
388
|
+
INCOMPLETE_RE.test(text) ||
|
|
389
|
+
PLAN_EXECUTION_RE.test(text)) {
|
|
225
390
|
return true;
|
|
226
391
|
}
|
|
227
392
|
// Inspect recent history: if the conversation was already about building
|
|
@@ -245,11 +410,34 @@ export function requiresFreshWebSearch(prompt) {
|
|
|
245
410
|
if (STATIC_DISAMBIGUATION_RE.test(text) || LOCAL_RUNTIME_RE.test(text)) {
|
|
246
411
|
return false;
|
|
247
412
|
}
|
|
413
|
+
// Plan-execution and terse continuation turns are never "fetch current
|
|
414
|
+
// info" turns, even when they contain words like "now". (We intentionally
|
|
415
|
+
// do NOT exclude on build-stack keywords here — "latest vite version" is a
|
|
416
|
+
// legitimate version lookup. The runAgentLoop caller additionally gates the
|
|
417
|
+
// guard on looksLikeBuildTask so a real scaffold turn never searches.)
|
|
418
|
+
if (PLAN_EXECUTION_RE.test(text) || CONTINUATION_RE.test(text)) {
|
|
419
|
+
return false;
|
|
420
|
+
}
|
|
248
421
|
return (VOLATILE_SIGNAL_RE.test(text) ||
|
|
249
422
|
VOLATILE_ROLE_QUERY_RE.test(text) ||
|
|
250
423
|
ROLE_OF_ENTITY_RE.test(text) ||
|
|
251
424
|
EXPLICIT_WEB_LOOKUP_RE.test(text));
|
|
252
425
|
}
|
|
426
|
+
/**
|
|
427
|
+
* Detect a low-quality "everything in one step" plan task. A single task that
|
|
428
|
+
* itself enumerates many files/actions (multiple commas, an "and", several
|
|
429
|
+
* slashes, or an overlong title) means the model lumped the whole build into
|
|
430
|
+
* one checkbox instead of producing a real ordered checklist.
|
|
431
|
+
*/
|
|
432
|
+
export function isLumpedSingleTask(taskTitles) {
|
|
433
|
+
if (taskTitles.length !== 1)
|
|
434
|
+
return false;
|
|
435
|
+
const only = taskTitles[0];
|
|
436
|
+
return ((only.match(/,/g)?.length ?? 0) >= 2 ||
|
|
437
|
+
/\band\b/i.test(only) ||
|
|
438
|
+
(only.match(/\//g)?.length ?? 0) >= 2 ||
|
|
439
|
+
only.length > 90);
|
|
440
|
+
}
|
|
253
441
|
function freshnessGuardMessage(now = new Date()) {
|
|
254
442
|
return (`Freshness guard for this turn: the latest user prompt appears to ask for current, volatile, or externally verifiable information. The present moment is ${currentDateTimeContext(now)}. ` +
|
|
255
443
|
"Before answering, call web.search FIRST with a concise query derived from the user prompt. " +
|
|
@@ -378,12 +566,161 @@ async function confirmToolExecution(call, autoConfirm, session) {
|
|
|
378
566
|
default: true,
|
|
379
567
|
});
|
|
380
568
|
}
|
|
569
|
+
/** Build the system-context block describing the session's active plan. */
|
|
570
|
+
function planContextMessage(plan, approved) {
|
|
571
|
+
const lines = [];
|
|
572
|
+
lines.push(`ACTIVE PLAN for this session (goal: ${plan.goal}, status: ${plan.status}):`);
|
|
573
|
+
if (plan.detail.trim())
|
|
574
|
+
lines.push(plan.detail.trim());
|
|
575
|
+
lines.push("Tasks:");
|
|
576
|
+
plan.tasks.forEach((t, i) => {
|
|
577
|
+
lines.push(` ${i + 1}. [${t.id}] (${t.state}) ${t.title}`);
|
|
578
|
+
});
|
|
579
|
+
if (approved) {
|
|
580
|
+
lines.push("The user APPROVED this plan. Execute it task by task NOW: before starting a task call " +
|
|
581
|
+
'task.update with {"taskId":"<id>","state":"in_progress"}, do the work with real tool calls, ' +
|
|
582
|
+
'then call task.update {"taskId":"<id>","state":"done"} (or "failed"/"skipped" with a note). ' +
|
|
583
|
+
"Actually run installs and start servers — never claim something ran without a successful tool call. " +
|
|
584
|
+
"When all tasks are done, verify and give a final summary.");
|
|
585
|
+
}
|
|
586
|
+
else {
|
|
587
|
+
lines.push("This plan is NOT yet approved. If the user is refining it, update it with plan.create again. " +
|
|
588
|
+
"Do NOT execute tasks until the user runs /implement.");
|
|
589
|
+
}
|
|
590
|
+
return lines.join("\n");
|
|
591
|
+
}
|
|
592
|
+
/**
|
|
593
|
+
* Handle plan.create / task.update inline. These are session-scoped and
|
|
594
|
+
* persisted via the plan store so the user can view the plan (Ctrl+P) and
|
|
595
|
+
* the agent keeps it in context across the whole session.
|
|
596
|
+
*/
|
|
597
|
+
async function handlePlanTool(call, session, ctx) {
|
|
598
|
+
void ctx;
|
|
599
|
+
if (call.name === "plan.create") {
|
|
600
|
+
const goal = typeof call.args.goal === "string" ? call.args.goal : "";
|
|
601
|
+
const detail = typeof call.args.detail === "string" ? call.args.detail : "";
|
|
602
|
+
const kind = typeof call.args.kind === "string" ? call.args.kind : "general";
|
|
603
|
+
const rawTasks = Array.isArray(call.args.tasks) ? call.args.tasks : [];
|
|
604
|
+
const taskTitles = rawTasks
|
|
605
|
+
.map((t) => (typeof t === "string" ? t : ""))
|
|
606
|
+
.filter(Boolean);
|
|
607
|
+
if (!goal || taskTitles.length === 0) {
|
|
608
|
+
return {
|
|
609
|
+
handled: true,
|
|
610
|
+
ok: false,
|
|
611
|
+
display: chalk.red(" ✗ plan.create needs a non-empty goal and at least one task title\n"),
|
|
612
|
+
modelNote: "plan.create failed: provide a string goal and a non-empty tasks array of step titles.",
|
|
613
|
+
};
|
|
614
|
+
}
|
|
615
|
+
// Reject a low-quality "everything in one step" plan. A single task that
|
|
616
|
+
// itself enumerates many files/actions (commas, "and", slashes) is a sign
|
|
617
|
+
// the model lumped the whole build into one checkbox — split it so the
|
|
618
|
+
// user gets a real, trackable checklist and the executor works step by step.
|
|
619
|
+
if (isLumpedSingleTask(taskTitles)) {
|
|
620
|
+
return {
|
|
621
|
+
handled: true,
|
|
622
|
+
ok: false,
|
|
623
|
+
display: chalk.red(" ✗ plan.create: that single task lumps the whole build into one step\n"),
|
|
624
|
+
modelNote: "plan.create rejected: you put everything into ONE task. Break it into 3-8 SEPARATE, " +
|
|
625
|
+
"ordered tasks — each a distinct action, e.g. 'scaffold package.json + vite config', " +
|
|
626
|
+
"'create index.html + entry (main.jsx)', 'build App + Post components', 'add posts data + styles', " +
|
|
627
|
+
"'install deps and run dev server to verify'. Call plan.create again with that tasks array.",
|
|
628
|
+
};
|
|
629
|
+
}
|
|
630
|
+
const plan = createPlan({
|
|
631
|
+
sessionId: session.sessionId,
|
|
632
|
+
goal,
|
|
633
|
+
detail,
|
|
634
|
+
taskTitles,
|
|
635
|
+
kind,
|
|
636
|
+
});
|
|
637
|
+
await savePlan(plan).catch(() => undefined);
|
|
638
|
+
// A freshly (re)created plan resets approval — the user must /implement.
|
|
639
|
+
session.planApproved.value = false;
|
|
640
|
+
const checklist = renderPlanForTerminal(plan);
|
|
641
|
+
const display = chalk.cyan(" ● planning\n") +
|
|
642
|
+
checklist +
|
|
643
|
+
"\n" +
|
|
644
|
+
chalk.dim(" ✦ plan created — press Ctrl+P to view it, or type /implement to approve and run it\n");
|
|
645
|
+
return {
|
|
646
|
+
handled: true,
|
|
647
|
+
ok: true,
|
|
648
|
+
display,
|
|
649
|
+
modelNote: `Plan saved with ${plan.tasks.length} task(s). STOP here and wait. ` +
|
|
650
|
+
"Do NOT start executing tasks until the user approves with /implement. " +
|
|
651
|
+
"When approved you will receive a message telling you to begin; then work task by task, " +
|
|
652
|
+
"calling task.update to mark each in_progress before and done after you finish it.",
|
|
653
|
+
};
|
|
654
|
+
}
|
|
655
|
+
// task.update
|
|
656
|
+
const plan = await loadPlan(session.sessionId).catch(() => undefined);
|
|
657
|
+
if (!plan) {
|
|
658
|
+
return {
|
|
659
|
+
handled: true,
|
|
660
|
+
ok: false,
|
|
661
|
+
display: chalk.red(" ✗ task.update: no active plan — call plan.create first\n"),
|
|
662
|
+
modelNote: "task.update failed: there is no active plan. Call plan.create first.",
|
|
663
|
+
};
|
|
664
|
+
}
|
|
665
|
+
const taskId = typeof call.args.taskId === "string" ? call.args.taskId : "";
|
|
666
|
+
const stateRaw = typeof call.args.state === "string" ? call.args.state : "";
|
|
667
|
+
const note = typeof call.args.note === "string" ? call.args.note : undefined;
|
|
668
|
+
const validStates = [
|
|
669
|
+
"pending",
|
|
670
|
+
"in_progress",
|
|
671
|
+
"done",
|
|
672
|
+
"failed",
|
|
673
|
+
"skipped",
|
|
674
|
+
];
|
|
675
|
+
if (!validStates.includes(stateRaw)) {
|
|
676
|
+
return {
|
|
677
|
+
handled: true,
|
|
678
|
+
ok: false,
|
|
679
|
+
display: chalk.red(` ✗ task.update: state must be one of ${validStates.join(", ")}\n`),
|
|
680
|
+
modelNote: `task.update failed: state must be one of ${validStates.join(", ")}.`,
|
|
681
|
+
};
|
|
682
|
+
}
|
|
683
|
+
const ok = markTask(plan, taskId, stateRaw, note);
|
|
684
|
+
if (!ok) {
|
|
685
|
+
const ids = plan.tasks.map((t) => t.id).join(", ");
|
|
686
|
+
return {
|
|
687
|
+
handled: true,
|
|
688
|
+
ok: false,
|
|
689
|
+
display: chalk.red(` ✗ task.update: unknown taskId "${taskId}" (have: ${ids})\n`),
|
|
690
|
+
modelNote: `task.update failed: unknown taskId. Valid ids: ${ids}.`,
|
|
691
|
+
};
|
|
692
|
+
}
|
|
693
|
+
if (plan.status === "draft" || plan.status === "approved") {
|
|
694
|
+
plan.status = "in_progress";
|
|
695
|
+
}
|
|
696
|
+
const allDone = plan.tasks.every((t) => t.state === "done" || t.state === "skipped" || t.state === "failed");
|
|
697
|
+
if (allDone)
|
|
698
|
+
plan.status = "completed";
|
|
699
|
+
await savePlan(plan).catch(() => undefined);
|
|
700
|
+
const checklist = renderPlanForTerminal(plan);
|
|
701
|
+
return {
|
|
702
|
+
handled: true,
|
|
703
|
+
ok: true,
|
|
704
|
+
display: checklist + "\n",
|
|
705
|
+
modelNote: allDone
|
|
706
|
+
? "Task updated. ALL tasks are now finished. Verify the result and give your final summary."
|
|
707
|
+
: "Task updated. Continue with the next pending task.",
|
|
708
|
+
};
|
|
709
|
+
}
|
|
381
710
|
export async function runAgentLoop(prompt, options = {}) {
|
|
382
711
|
const config = getConfig();
|
|
383
712
|
const maxSteps = options.maxSteps ?? 30;
|
|
384
713
|
const projectContext = await loadProjectContext();
|
|
385
714
|
const toolNames = availableToolNames();
|
|
386
|
-
|
|
715
|
+
// Build / scaffold / continuation turns must NEVER be diverted into a
|
|
716
|
+
// web.search for "current info". The /implement directive ("Execute it
|
|
717
|
+
// now…") and prompts like "create a react app" contain words such as
|
|
718
|
+
// "now"/"latest" that trip the volatile-info regex; without this guard the
|
|
719
|
+
// agent burns its turn searching the date instead of writing files.
|
|
720
|
+
const buildLikeTurn = looksLikeBuildTask(prompt, options.history);
|
|
721
|
+
const freshWebSearchRequired = !buildLikeTurn &&
|
|
722
|
+
toolNames.includes("web.search") &&
|
|
723
|
+
requiresFreshWebSearch(prompt);
|
|
387
724
|
const systemSections = [renderAgentSystemPrompt(toolNames.join(", "))];
|
|
388
725
|
if (projectContext) {
|
|
389
726
|
systemSections.push(`Project context from .clai/context.md:\n${projectContext}`);
|
|
@@ -391,17 +728,39 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
391
728
|
if (freshWebSearchRequired) {
|
|
392
729
|
systemSections.push(freshnessGuardMessage());
|
|
393
730
|
}
|
|
394
|
-
const fullSystemPrompt = systemSections.join("\n\n");
|
|
395
|
-
const messages = [
|
|
396
|
-
{ role: "system", content: fullSystemPrompt },
|
|
397
|
-
...(options.history ?? []),
|
|
398
|
-
{ role: "user", content: prompt },
|
|
399
|
-
];
|
|
400
731
|
let provider = options.provider ?? config.defaultProvider;
|
|
401
732
|
await ensureProviderConfigured(provider);
|
|
402
733
|
let model = options.model ?? config.defaultModel;
|
|
403
734
|
let lastAnswer = "";
|
|
404
735
|
const session = options.session ?? createSessionPolicy();
|
|
736
|
+
// ── Active plan context ────────────────────────────────────────────
|
|
737
|
+
// If this session already has a plan, inject it so the model keeps it in
|
|
738
|
+
// context. When the user has approved it (via /implement) we instruct the
|
|
739
|
+
// agent to execute task by task; otherwise the agent should refine/wait.
|
|
740
|
+
const activePlan = await loadPlan(session.sessionId).catch(() => undefined);
|
|
741
|
+
if (activePlan) {
|
|
742
|
+
systemSections.push(planContextMessage(activePlan, session.planApproved.value));
|
|
743
|
+
}
|
|
744
|
+
const fullSystemPrompt = systemSections.join("\n\n");
|
|
745
|
+
const userMessage = { role: "user", content: prompt };
|
|
746
|
+
if (options.images && options.images.length > 0) {
|
|
747
|
+
userMessage.images = options.images;
|
|
748
|
+
}
|
|
749
|
+
const messages = [
|
|
750
|
+
{ role: "system", content: fullSystemPrompt },
|
|
751
|
+
...(options.history ?? []),
|
|
752
|
+
userMessage,
|
|
753
|
+
];
|
|
754
|
+
const recoveryUserMessage = (content) => {
|
|
755
|
+
const message = { role: "user", content };
|
|
756
|
+
if (options.images && options.images.length > 0) {
|
|
757
|
+
// Some OpenAI-compatible gateways/models attend most strongly to the
|
|
758
|
+
// latest user turn. Keep the image attached on recovery nudges so a
|
|
759
|
+
// thinking-only retry does not degrade into OCR/tool guessing.
|
|
760
|
+
message.images = options.images;
|
|
761
|
+
}
|
|
762
|
+
return message;
|
|
763
|
+
};
|
|
405
764
|
// Track recent tool calls to detect models stuck in a loop calling the
|
|
406
765
|
// same tool with the same arguments over and over (e.g. pentest.recon
|
|
407
766
|
// called 3× on the same target without summarizing).
|
|
@@ -409,6 +768,13 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
409
768
|
// Track consecutive thinking-only responses so we can nudge the model
|
|
410
769
|
// to actually act instead of silently returning an empty answer.
|
|
411
770
|
let emptyVisibleRetries = 0;
|
|
771
|
+
// Track tool calls truncated by the token limit so we can ask the model
|
|
772
|
+
// to retry in smaller pieces instead of leaking broken JSON as an answer.
|
|
773
|
+
let truncatedToolRetries = 0;
|
|
774
|
+
// Track bare-args JSON tool calls (missing the {name,args} wrapper / fence)
|
|
775
|
+
// so we can nudge the model to re-emit a proper fenced call a few times
|
|
776
|
+
// before giving up, instead of leaking the JSON as a final answer.
|
|
777
|
+
let bareToolJsonRetries = 0;
|
|
412
778
|
// For volatile live-info prompts, make one corrective pass if a model
|
|
413
779
|
// ignores the freshness guard and tries to answer from stale memory.
|
|
414
780
|
let sawFreshWebSearch = false;
|
|
@@ -428,7 +794,7 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
428
794
|
// like a build/scaffold or a continuation of one.
|
|
429
795
|
const analysis = analyzeTask(prompt);
|
|
430
796
|
const hasHistory = (options.history?.length ?? 0) > 0;
|
|
431
|
-
const buildLike =
|
|
797
|
+
const buildLike = buildLikeTurn;
|
|
432
798
|
let stepBudget = analysis.complexity === "simple"
|
|
433
799
|
? 15
|
|
434
800
|
: analysis.complexity === "standard"
|
|
@@ -472,9 +838,11 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
472
838
|
temperature: 0.2,
|
|
473
839
|
// Reasoning models can spend a lot on hidden thinking; give
|
|
474
840
|
// them headroom so the visible answer / tool call isn't
|
|
475
|
-
// truncated to silence.
|
|
476
|
-
//
|
|
477
|
-
|
|
841
|
+
// truncated to silence. The non-thinking budget must be large
|
|
842
|
+
// enough for a multi-file fs.writeMany payload — a truncated
|
|
843
|
+
// tool-call JSON fails to parse and used to leak a broken
|
|
844
|
+
// ```tool block to the screen with no files written.
|
|
845
|
+
maxTokens: config.thinking?.enabled ? 16_384 : 8_192,
|
|
478
846
|
signal: options.signal,
|
|
479
847
|
thinking: config.thinking,
|
|
480
848
|
}, (token) => {
|
|
@@ -526,12 +894,10 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
526
894
|
process.stdout.write(`${renderThinkingSummary(assistantText.thinkContent)}\n`);
|
|
527
895
|
process.stdout.write(chalk.yellow(" ⚠ model produced only thinking — nudging it to take action\n"));
|
|
528
896
|
messages.push({ role: "assistant", content: completion.text });
|
|
529
|
-
messages.push(
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
"Do NOT just think — take action NOW.",
|
|
534
|
-
});
|
|
897
|
+
messages.push(recoveryUserMessage("You only produced internal reasoning with no visible answer or tool call. " +
|
|
898
|
+
"You MUST either call a tool using the ```tool format or provide your final answer. " +
|
|
899
|
+
"If images are attached, inspect them directly for visual details (text, colors, layout, spacing, style) instead of using OCR unless explicitly needed. " +
|
|
900
|
+
"Do NOT just think — take action NOW."));
|
|
535
901
|
continue;
|
|
536
902
|
}
|
|
537
903
|
// Exhausted retries — fall through to the normal empty-answer path
|
|
@@ -541,10 +907,42 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
541
907
|
// Reset the counter on any successful visible output.
|
|
542
908
|
emptyVisibleRetries = 0;
|
|
543
909
|
}
|
|
544
|
-
|
|
910
|
+
let call = parseToolCall(assistantText.visible, {
|
|
545
911
|
strict: getConfig().parserStrict,
|
|
546
912
|
});
|
|
913
|
+
// Recovery: the model meant to call a tool but emitted a bare JSON object
|
|
914
|
+
// with no ```tool fence — either a complete {name,args} the strict
|
|
915
|
+
// matchers missed (recover it directly), or just an args object like
|
|
916
|
+
// {"path":"file.pdf"} with the wrapper dropped (nudge a retry below so
|
|
917
|
+
// the requested action runs instead of the JSON leaking as the answer).
|
|
918
|
+
let bareArgsOnly = false;
|
|
919
|
+
let recoveredFromBareJson = false;
|
|
547
920
|
if (!call) {
|
|
921
|
+
const bare = recognizeBareToolJson(assistantText.visible);
|
|
922
|
+
if (bare?.call) {
|
|
923
|
+
call = bare.call;
|
|
924
|
+
recoveredFromBareJson = true;
|
|
925
|
+
process.stdout.write(chalk.dim(" ℹ recovered an unfenced tool call from bare JSON\n"));
|
|
926
|
+
}
|
|
927
|
+
else if (bare?.argsOnly) {
|
|
928
|
+
bareArgsOnly = true;
|
|
929
|
+
}
|
|
930
|
+
}
|
|
931
|
+
if (!call) {
|
|
932
|
+
if (bareArgsOnly) {
|
|
933
|
+
bareToolJsonRetries += 1;
|
|
934
|
+
if (bareToolJsonRetries <= 3) {
|
|
935
|
+
process.stdout.write(chalk.yellow(" ⚠ tool call missing its name/fence — asking the model to re-emit a proper ```tool block\n"));
|
|
936
|
+
messages.push({ role: "assistant", content: assistantText.visible });
|
|
937
|
+
messages.push(recoveryUserMessage("Your previous message was a bare JSON args object with no tool name and no ```tool fence, so NOTHING ran. " +
|
|
938
|
+
"Reply with ONLY a fenced ```tool block of the form " +
|
|
939
|
+
'`{"name": "<tool>", "args": { ... }}`. For example, to read a PDF:\n' +
|
|
940
|
+
'```tool\n{"name":"pdf.read","args":{"path":"/abs/file.pdf"}}\n```\n' +
|
|
941
|
+
"Choose the correct tool name for the task and include those args."));
|
|
942
|
+
continue;
|
|
943
|
+
}
|
|
944
|
+
// Exhausted retries — fall through to the normal answer path.
|
|
945
|
+
}
|
|
548
946
|
// Detect the case where the model emitted sentinel-style tool-call
|
|
549
947
|
// markers but the body was malformed or truncated. Printing those
|
|
550
948
|
// raw tokens looks like a crash to the user — instead, ask the
|
|
@@ -552,15 +950,33 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
552
950
|
if (/<\|tool_call(?:s_section)?_begin\|>|<\|tool_call_argument_begin\|>/i.test(assistantText.visible)) {
|
|
553
951
|
process.stdout.write(chalk.yellow(" ⚠ tool call was malformed or cut off — asking the model to retry in JSON form\n"));
|
|
554
952
|
messages.push({ role: "assistant", content: assistantText.visible });
|
|
555
|
-
messages.push(
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
'of the form `{"name": "<tool>", "args": { ... }}`. ' +
|
|
560
|
-
"Do not use <|tool_call_begin|> markers.",
|
|
561
|
-
});
|
|
953
|
+
messages.push(recoveryUserMessage("Your previous tool call was malformed or truncated. " +
|
|
954
|
+
"Reply with ONLY a fenced ```tool block containing valid JSON " +
|
|
955
|
+
'of the form `{"name": "<tool>", "args": { ... }}`. ' +
|
|
956
|
+
"Do not use <|tool_call_begin|> markers."));
|
|
562
957
|
continue;
|
|
563
958
|
}
|
|
959
|
+
// Detect a tool call that opened but was cut off by the token limit
|
|
960
|
+
// (most common with a large multi-file fs.writeMany). Retrying with a
|
|
961
|
+
// nudge to split the work is far better than rendering broken JSON as
|
|
962
|
+
// a final answer and leaving the project half-created.
|
|
963
|
+
if (looksLikeTruncatedToolCall(assistantText.visible)) {
|
|
964
|
+
truncatedToolRetries += 1;
|
|
965
|
+
if (truncatedToolRetries <= 3) {
|
|
966
|
+
process.stdout.write(chalk.yellow(" ⚠ tool call was cut off (output too long) — asking the model to retry in smaller pieces\n"));
|
|
967
|
+
messages.push({ role: "assistant", content: assistantText.visible });
|
|
968
|
+
messages.push({
|
|
969
|
+
role: "user",
|
|
970
|
+
content: "Your previous tool call was cut off before it finished — the JSON was incomplete, so NOTHING ran. " +
|
|
971
|
+
"Retry now with a COMPLETE, valid ```tool block. " +
|
|
972
|
+
"If it was a large fs.writeMany, split it into SMALLER batches (3-5 files per call, and keep each file's content concise) " +
|
|
973
|
+
"so the whole JSON fits in one response. Do NOT claim any file was written until a tool call actually succeeds.",
|
|
974
|
+
});
|
|
975
|
+
continue;
|
|
976
|
+
}
|
|
977
|
+
// Exhausted retries — fall through so we don't loop forever, but the
|
|
978
|
+
// user at least sees the (broken) output and the stop notice.
|
|
979
|
+
}
|
|
564
980
|
// Normal final-answer path: strip any stray sentinel tokens that
|
|
565
981
|
// somehow leaked into prose so the answer renders cleanly.
|
|
566
982
|
const cleaned = stripSentinelTokens(assistantText.visible);
|
|
@@ -612,8 +1028,12 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
612
1028
|
if (loopCheck.reason) {
|
|
613
1029
|
process.stdout.write(chalk.dim(` ℹ ${loopCheck.reason}\n`));
|
|
614
1030
|
}
|
|
615
|
-
// Print only non-thinking text before the tool call.
|
|
616
|
-
|
|
1031
|
+
// Print only non-thinking text before the tool call. When the call was
|
|
1032
|
+
// recovered from a bare JSON object (the whole message WAS the call),
|
|
1033
|
+
// there is no prose to show — skip it so we don't echo the raw JSON.
|
|
1034
|
+
const beforeTool = recoveredFromBareJson
|
|
1035
|
+
? ""
|
|
1036
|
+
: textBeforeToolCall(assistantText.visible);
|
|
617
1037
|
if (beforeTool) {
|
|
618
1038
|
process.stdout.write(renderMarkdown(beforeTool) + "\n");
|
|
619
1039
|
}
|
|
@@ -621,6 +1041,25 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
621
1041
|
process.stdout.write(`${renderThinkingSummary(assistantText.thinkContent)}\n`);
|
|
622
1042
|
}
|
|
623
1043
|
messages.push({ role: "assistant", content: assistantText.visible });
|
|
1044
|
+
// ── Plan / task tools (session-scoped, handled inline) ─────────────
|
|
1045
|
+
// These don't go through the generic registry because they need the
|
|
1046
|
+
// session id and mutate the live plan that the user can view (Ctrl+P).
|
|
1047
|
+
if (call.name === "plan.create" || call.name === "task.update") {
|
|
1048
|
+
const planResult = await handlePlanTool(call, session, {
|
|
1049
|
+
loopGuard,
|
|
1050
|
+
step,
|
|
1051
|
+
});
|
|
1052
|
+
if (planResult.handled) {
|
|
1053
|
+
productiveSteps += 1;
|
|
1054
|
+
loopGuard.recordAttempt(step, call.name, call.args, planResult.ok, 0);
|
|
1055
|
+
process.stdout.write(planResult.display);
|
|
1056
|
+
messages.push({
|
|
1057
|
+
role: "tool",
|
|
1058
|
+
content: `Tool ${call.name} result (ok=${planResult.ok}):\n${planResult.modelNote}`,
|
|
1059
|
+
});
|
|
1060
|
+
continue;
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
624
1063
|
const scope = await loadScope();
|
|
625
1064
|
const decision = classifyToolCall(call, { scope });
|
|
626
1065
|
await auditLog("tool.classified", {
|
|
@@ -809,7 +1248,9 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
809
1248
|
? String(call.args.command ?? "").split(/\s+/)[0]
|
|
810
1249
|
: call.name === "net.scan"
|
|
811
1250
|
? "nmap"
|
|
812
|
-
:
|
|
1251
|
+
: call.name === "image.ocr"
|
|
1252
|
+
? "tesseract"
|
|
1253
|
+
: undefined;
|
|
813
1254
|
if (cmdName) {
|
|
814
1255
|
process.stdout.write(chalk.yellow(` ⚠ ${cmdName} not found — asking model to install and retry\n`));
|
|
815
1256
|
messages.push({
|