@workermill/agent 0.7.13 โ 0.7.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/plan-validator.d.ts +1 -1
- package/dist/plan-validator.js +1 -1
- package/dist/planner.d.ts +3 -2
- package/dist/planner.js +29 -53
- package/package.json +1 -1
package/dist/plan-validator.d.ts
CHANGED
|
@@ -39,7 +39,7 @@ export interface CriticResult {
|
|
|
39
39
|
suggestedChanges?: string[];
|
|
40
40
|
}>;
|
|
41
41
|
}
|
|
42
|
-
declare const AUTO_APPROVAL_THRESHOLD =
|
|
42
|
+
declare const AUTO_APPROVAL_THRESHOLD = 80;
|
|
43
43
|
/**
|
|
44
44
|
* Parse execution plan JSON from raw Claude CLI output.
|
|
45
45
|
* Mirrors server-side parseExecutionPlan() in planning-agent-local.ts.
|
package/dist/plan-validator.js
CHANGED
|
@@ -16,7 +16,7 @@ import { generateText } from "./providers.js";
|
|
|
16
16
|
// CONSTANTS
|
|
17
17
|
// ============================================================================
|
|
18
18
|
const MAX_TARGET_FILES = 5;
|
|
19
|
-
const AUTO_APPROVAL_THRESHOLD =
|
|
19
|
+
const AUTO_APPROVAL_THRESHOLD = 80;
|
|
20
20
|
// ============================================================================
|
|
21
21
|
// PLAN PARSING
|
|
22
22
|
// ============================================================================
|
package/dist/planner.d.ts
CHANGED
|
@@ -31,8 +31,9 @@ export interface PlanningTask {
|
|
|
31
31
|
* 2. Run Claude CLI to generate plan
|
|
32
32
|
* 3. Parse plan, apply file cap (max 5 files per story)
|
|
33
33
|
* 4. Run critic validation via Claude CLI
|
|
34
|
-
* 5. If critic approves (score >=
|
|
34
|
+
* 5. If critic approves (score >= 80): post validated plan to API
|
|
35
35
|
* 6. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
|
|
36
|
-
* 7. After MAX_ITERATIONS without approval:
|
|
36
|
+
* 7. After MAX_ITERATIONS without approval: post best plan if score >= 50 (fallback)
|
|
37
|
+
* 8. If no plan scored >= 50: fail the task
|
|
37
38
|
*/
|
|
38
39
|
export declare function planTask(task: PlanningTask, config: AgentConfig, credentials?: ClaimCredentials): Promise<boolean>;
|
package/dist/planner.js
CHANGED
|
@@ -15,7 +15,6 @@
|
|
|
15
15
|
* sees the same planning progress as cloud mode.
|
|
16
16
|
*/
|
|
17
17
|
import chalk from "chalk";
|
|
18
|
-
import ora from "ora";
|
|
19
18
|
import { spawn, execSync } from "child_process";
|
|
20
19
|
import { findClaudePath } from "./config.js";
|
|
21
20
|
import { api } from "./api.js";
|
|
@@ -104,23 +103,6 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
|
|
|
104
103
|
let stderrOutput = "";
|
|
105
104
|
let charsReceived = 0;
|
|
106
105
|
let toolCallCount = 0;
|
|
107
|
-
// Live spinner โ shows elapsed time, phase, and chars generated
|
|
108
|
-
const spinner = ora({
|
|
109
|
-
text: `${taskLabel} Initializing planner...`,
|
|
110
|
-
prefixText: "",
|
|
111
|
-
spinner: "dots",
|
|
112
|
-
}).start();
|
|
113
|
-
function updateSpinner() {
|
|
114
|
-
const elapsed = Math.round((Date.now() - startTime) / 1000);
|
|
115
|
-
const phaseIcon = currentPhase === "reading_repo" ? "๐" :
|
|
116
|
-
currentPhase === "analyzing" ? "๐" :
|
|
117
|
-
currentPhase === "generating_plan" ? "๐" :
|
|
118
|
-
currentPhase === "validating" ? "โ
" : "โณ";
|
|
119
|
-
const stats = chalk.dim(`${formatElapsed(elapsed)} ยท ${charsReceived} chars ยท ${toolCallCount} tools`);
|
|
120
|
-
spinner.text = `${taskLabel} ${phaseIcon} ${phaseLabel(currentPhase, elapsed)} ${stats}`;
|
|
121
|
-
}
|
|
122
|
-
// Update spinner every 500ms for smooth elapsed time display
|
|
123
|
-
const spinnerInterval = setInterval(updateSpinner, 500);
|
|
124
106
|
// Buffered text streaming โ flush complete lines to dashboard every 1s.
|
|
125
107
|
// LLM deltas are tiny fragments; we accumulate until we see '\n', then
|
|
126
108
|
// a 1s interval flushes all complete lines as log entries. On exit we
|
|
@@ -136,10 +118,8 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
|
|
|
136
118
|
if (line.trim()) {
|
|
137
119
|
postLog(taskId, `${PREFIX} ${line}`, "output");
|
|
138
120
|
// Echo planner thoughts to local terminal
|
|
139
|
-
spinner.stop();
|
|
140
121
|
const truncated = line.trim().length > 160 ? line.trim().substring(0, 160) + "โฆ" : line.trim();
|
|
141
122
|
console.log(`${ts()} ${taskLabel} ${chalk.dim("๐ญ")} ${chalk.dim(truncated)}`);
|
|
142
|
-
spinner.start();
|
|
143
123
|
}
|
|
144
124
|
}
|
|
145
125
|
textBuffer = incomplete;
|
|
@@ -156,10 +136,7 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
|
|
|
156
136
|
const elapsed = Math.round((Date.now() - startTime) / 1000);
|
|
157
137
|
const msg = phaseLabel(newPhase, elapsed);
|
|
158
138
|
postLog(taskId, msg);
|
|
159
|
-
spinner.stop();
|
|
160
139
|
console.log(`${ts()} ${taskLabel} ${chalk.dim(msg)}`);
|
|
161
|
-
spinner.start();
|
|
162
|
-
updateSpinner();
|
|
163
140
|
}
|
|
164
141
|
// Flush buffered LLM text to dashboard every 1s (complete lines only)
|
|
165
142
|
const textFlushInterval = setInterval(() => flushTextBuffer(), 1_000);
|
|
@@ -185,9 +162,7 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
|
|
|
185
162
|
lastProgressLogAt = elapsed;
|
|
186
163
|
const msg = `${PREFIX} Planning in progress โ analyzing requirements and decomposing into steps (${formatElapsed(elapsed)} elapsed)`;
|
|
187
164
|
postLog(taskId, msg);
|
|
188
|
-
spinner.stop();
|
|
189
165
|
console.log(`${ts()} ${taskLabel} ${chalk.dim(msg)}`);
|
|
190
|
-
spinner.start();
|
|
191
166
|
}
|
|
192
167
|
}, 5_000);
|
|
193
168
|
// Parse streaming JSON lines from Claude CLI
|
|
@@ -280,9 +255,7 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
|
|
|
280
255
|
clearInterval(progressInterval);
|
|
281
256
|
clearInterval(sseProgressInterval);
|
|
282
257
|
clearInterval(textFlushInterval);
|
|
283
|
-
clearInterval(spinnerInterval);
|
|
284
258
|
flushTextBuffer(true);
|
|
285
|
-
spinner.stop();
|
|
286
259
|
}
|
|
287
260
|
const timeout = setTimeout(() => {
|
|
288
261
|
cleanupAll();
|
|
@@ -407,23 +380,12 @@ function runAnalyst(name, claudePath, model, prompt, repoPath, env, timeoutMs =
|
|
|
407
380
|
let toolCalls = 0;
|
|
408
381
|
let timedOut = false;
|
|
409
382
|
const startMs = Date.now();
|
|
410
|
-
// Live spinner for this analyst
|
|
411
|
-
const analystSpinner = ora({
|
|
412
|
-
text: `${label} Starting (${model})...`,
|
|
413
|
-
spinner: "dots",
|
|
414
|
-
}).start();
|
|
415
|
-
const analystSpinnerInterval = setInterval(() => {
|
|
416
|
-
const elapsed = Math.round((Date.now() - startMs) / 1000);
|
|
417
|
-
analystSpinner.text = `${label} ${chalk.dim(`${formatElapsed(elapsed)} ยท ${toolCalls} tools ยท ${fullText.length} chars`)}`;
|
|
418
|
-
}, 500);
|
|
419
383
|
proc.stderr.on("data", (chunk) => {
|
|
420
384
|
const text = chunk.toString();
|
|
421
385
|
stderrOutput += text;
|
|
422
386
|
// Show stderr in real-time so we can see what's happening
|
|
423
387
|
for (const line of text.split("\n").filter((l) => l.trim())) {
|
|
424
|
-
analystSpinner.stop();
|
|
425
388
|
console.log(`${ts()} ${label} ${chalk.red("stderr:")} ${line.trim()}`);
|
|
426
|
-
analystSpinner.start();
|
|
427
389
|
}
|
|
428
390
|
});
|
|
429
391
|
proc.stdout.on("data", (data) => {
|
|
@@ -446,11 +408,9 @@ function runAnalyst(name, claudePath, model, prompt, repoPath, env, timeoutMs =
|
|
|
446
408
|
// Log analyst reasoning (first line, truncated)
|
|
447
409
|
const thought = block.text.trim().split("\n")[0].substring(0, 120);
|
|
448
410
|
if (thought) {
|
|
449
|
-
analystSpinner.stop();
|
|
450
411
|
console.log(`${ts()} ${label} ${chalk.dim("๐ญ")} ${chalk.dim(thought)}`);
|
|
451
412
|
if (taskId)
|
|
452
413
|
postLog(taskId, `${PREFIX} [${name}] ๐ญ ${thought}`);
|
|
453
|
-
analystSpinner.start();
|
|
454
414
|
}
|
|
455
415
|
}
|
|
456
416
|
else if (block.type === "tool_use") {
|
|
@@ -459,11 +419,9 @@ function runAnalyst(name, claudePath, model, prompt, repoPath, env, timeoutMs =
|
|
|
459
419
|
// Show tool name + input preview (file path, pattern, etc.)
|
|
460
420
|
const inputStr = block.input ? JSON.stringify(block.input) : "";
|
|
461
421
|
const inputPreview = inputStr.length > 80 ? inputStr.substring(0, 80) + "โฆ" : inputStr;
|
|
462
|
-
analystSpinner.stop();
|
|
463
422
|
console.log(`${ts()} ${label} ${chalk.dim(`Tool: ${toolName}`)}${inputPreview ? chalk.dim(` ${inputPreview}`) : ""}`);
|
|
464
423
|
if (taskId)
|
|
465
424
|
postLog(taskId, `${PREFIX} [${name}] Tool: ${toolName} ${inputPreview}`);
|
|
466
|
-
analystSpinner.start();
|
|
467
425
|
}
|
|
468
426
|
}
|
|
469
427
|
}
|
|
@@ -478,11 +436,9 @@ function runAnalyst(name, claudePath, model, prompt, repoPath, env, timeoutMs =
|
|
|
478
436
|
else if (event.type === "content_block_start" && event.content_block?.type === "tool_use") {
|
|
479
437
|
toolCalls++;
|
|
480
438
|
const toolName = event.content_block?.name || "unknown";
|
|
481
|
-
analystSpinner.stop();
|
|
482
439
|
console.log(`${ts()} ${label} ${chalk.dim(`Tool: ${toolName}`)}`);
|
|
483
440
|
if (taskId)
|
|
484
441
|
postLog(taskId, `${PREFIX} [${name}] Tool: ${toolName}`);
|
|
485
|
-
analystSpinner.start();
|
|
486
442
|
}
|
|
487
443
|
else if (event.type === "result" && event.result) {
|
|
488
444
|
resultText =
|
|
@@ -496,8 +452,6 @@ function runAnalyst(name, claudePath, model, prompt, repoPath, env, timeoutMs =
|
|
|
496
452
|
});
|
|
497
453
|
const timeout = setTimeout(() => {
|
|
498
454
|
timedOut = true;
|
|
499
|
-
clearInterval(analystSpinnerInterval);
|
|
500
|
-
analystSpinner.stop();
|
|
501
455
|
proc.kill("SIGTERM");
|
|
502
456
|
const elapsed = Math.round((Date.now() - startMs) / 1000);
|
|
503
457
|
console.log(`${ts()} ${label} ${chalk.yellow("โ Timed out")} after ${formatElapsed(elapsed)} (${toolCalls} tool calls, ${fullText.length} chars)`);
|
|
@@ -507,8 +461,6 @@ function runAnalyst(name, claudePath, model, prompt, repoPath, env, timeoutMs =
|
|
|
507
461
|
}, timeoutMs);
|
|
508
462
|
proc.on("exit", (code) => {
|
|
509
463
|
clearTimeout(timeout);
|
|
510
|
-
clearInterval(analystSpinnerInterval);
|
|
511
|
-
analystSpinner.stop();
|
|
512
464
|
const elapsed = Math.round((Date.now() - startMs) / 1000);
|
|
513
465
|
if (timedOut)
|
|
514
466
|
return; // already resolved
|
|
@@ -532,8 +484,6 @@ function runAnalyst(name, claudePath, model, prompt, repoPath, env, timeoutMs =
|
|
|
532
484
|
});
|
|
533
485
|
proc.on("error", (err) => {
|
|
534
486
|
clearTimeout(timeout);
|
|
535
|
-
clearInterval(analystSpinnerInterval);
|
|
536
|
-
analystSpinner.stop();
|
|
537
487
|
console.log(`${ts()} ${label} ${chalk.red("โ Spawn failed:")} ${err.message}`);
|
|
538
488
|
resolve("");
|
|
539
489
|
});
|
|
@@ -736,9 +686,10 @@ async function runTeamAnalysis(task, basePrompt, claudePath, model, env, repoPat
|
|
|
736
686
|
* 2. Run Claude CLI to generate plan
|
|
737
687
|
* 3. Parse plan, apply file cap (max 5 files per story)
|
|
738
688
|
* 4. Run critic validation via Claude CLI
|
|
739
|
-
* 5. If critic approves (score >=
|
|
689
|
+
* 5. If critic approves (score >= 80): post validated plan to API
|
|
740
690
|
* 6. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
|
|
741
|
-
* 7. After MAX_ITERATIONS without approval:
|
|
691
|
+
* 7. After MAX_ITERATIONS without approval: post best plan if score >= 50 (fallback)
|
|
692
|
+
* 8. If no plan scored >= 50: fail the task
|
|
742
693
|
*/
|
|
743
694
|
export async function planTask(task, config, credentials) {
|
|
744
695
|
const taskLabel = chalk.cyan(task.id.slice(0, 8));
|
|
@@ -972,7 +923,32 @@ export async function planTask(task, config, credentials) {
|
|
|
972
923
|
}
|
|
973
924
|
}
|
|
974
925
|
}
|
|
975
|
-
// All iterations exhausted โ
|
|
926
|
+
// All iterations exhausted โ try best-plan fallback before failing.
|
|
927
|
+
// If we have a plan that scored >= BEST_PLAN_FALLBACK_THRESHOLD, post it
|
|
928
|
+
// with a warning instead of discarding it entirely.
|
|
929
|
+
const BEST_PLAN_FALLBACK_THRESHOLD = 50;
|
|
930
|
+
if (bestPlan && bestScore >= BEST_PLAN_FALLBACK_THRESHOLD) {
|
|
931
|
+
const elapsed = Math.round((Date.now() - startTime) / 1000);
|
|
932
|
+
const msg = `${PREFIX} Best-plan fallback: posting plan with score ${bestScore}/100 (below ${AUTO_APPROVAL_THRESHOLD} threshold, above ${BEST_PLAN_FALLBACK_THRESHOLD} minimum)`;
|
|
933
|
+
console.log(`${ts()} ${taskLabel} ${chalk.yellow("โ ")} ${msg}`);
|
|
934
|
+
await postLog(task.id, msg);
|
|
935
|
+
const planningDurationMs = Date.now() - startTime;
|
|
936
|
+
return await postValidatedPlan(task.id, bestPlan, config.agentId, taskLabel, elapsed, bestScore, [`Best-plan fallback: critic rejected after ${MAX_ITERATIONS} iterations`], criticHistory, totalFileCapTruncations, planningDurationMs, MAX_ITERATIONS);
|
|
937
|
+
}
|
|
938
|
+
// No usable plan โ report failure to server so the task doesn't
|
|
939
|
+
// stay in "planning" status forever (which causes an infinite retry loop).
|
|
940
|
+
try {
|
|
941
|
+
await api.post("/api/agent/plan-failed", {
|
|
942
|
+
taskId: task.id,
|
|
943
|
+
agentId: config.agentId,
|
|
944
|
+
reason: `Critic rejected after ${MAX_ITERATIONS} iterations (best score: ${bestScore}/100, threshold: ${AUTO_APPROVAL_THRESHOLD}, fallback minimum: ${BEST_PLAN_FALLBACK_THRESHOLD})`,
|
|
945
|
+
criticHistory,
|
|
946
|
+
});
|
|
947
|
+
}
|
|
948
|
+
catch {
|
|
949
|
+
// Best-effort โ if the endpoint doesn't exist yet, the task will still
|
|
950
|
+
// be picked up again, but at least we tried.
|
|
951
|
+
}
|
|
976
952
|
return false;
|
|
977
953
|
}
|
|
978
954
|
finally {
|