@workermill/agent 0.7.14 → 0.7.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/plan-validator.d.ts +1 -1
- package/dist/plan-validator.js +1 -1
- package/dist/planner.d.ts +3 -2
- package/dist/planner.js +43 -6
- package/package.json +1 -1
package/dist/plan-validator.d.ts
CHANGED
|
@@ -39,7 +39,7 @@ export interface CriticResult {
|
|
|
39
39
|
suggestedChanges?: string[];
|
|
40
40
|
}>;
|
|
41
41
|
}
|
|
42
|
-
declare const AUTO_APPROVAL_THRESHOLD =
|
|
42
|
+
declare const AUTO_APPROVAL_THRESHOLD = 80;
|
|
43
43
|
/**
|
|
44
44
|
* Parse execution plan JSON from raw Claude CLI output.
|
|
45
45
|
* Mirrors server-side parseExecutionPlan() in planning-agent-local.ts.
|
package/dist/plan-validator.js
CHANGED
|
@@ -16,7 +16,7 @@ import { generateText } from "./providers.js";
|
|
|
16
16
|
// CONSTANTS
|
|
17
17
|
// ============================================================================
|
|
18
18
|
const MAX_TARGET_FILES = 5;
|
|
19
|
-
const AUTO_APPROVAL_THRESHOLD =
|
|
19
|
+
const AUTO_APPROVAL_THRESHOLD = 80;
|
|
20
20
|
// ============================================================================
|
|
21
21
|
// PLAN PARSING
|
|
22
22
|
// ============================================================================
|
package/dist/planner.d.ts
CHANGED
|
@@ -31,8 +31,9 @@ export interface PlanningTask {
|
|
|
31
31
|
* 2. Run Claude CLI to generate plan
|
|
32
32
|
* 3. Parse plan, apply file cap (max 5 files per story)
|
|
33
33
|
* 4. Run critic validation via Claude CLI
|
|
34
|
-
* 5. If critic approves (score >=
|
|
34
|
+
* 5. If critic approves (score >= 80): post validated plan to API
|
|
35
35
|
* 6. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
|
|
36
|
-
* 7. After MAX_ITERATIONS without approval:
|
|
36
|
+
* 7. After MAX_ITERATIONS without approval: post best plan if score >= 50 (fallback)
|
|
37
|
+
* 8. If no plan scored >= 50: fail the task
|
|
37
38
|
*/
|
|
38
39
|
export declare function planTask(task: PlanningTask, config: AgentConfig, credentials?: ClaimCredentials): Promise<boolean>;
|
package/dist/planner.js
CHANGED
|
@@ -686,9 +686,10 @@ async function runTeamAnalysis(task, basePrompt, claudePath, model, env, repoPat
|
|
|
686
686
|
* 2. Run Claude CLI to generate plan
|
|
687
687
|
* 3. Parse plan, apply file cap (max 5 files per story)
|
|
688
688
|
* 4. Run critic validation via Claude CLI
|
|
689
|
-
* 5. If critic approves (score >=
|
|
689
|
+
* 5. If critic approves (score >= 80): post validated plan to API
|
|
690
690
|
* 6. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
|
|
691
|
-
* 7. After MAX_ITERATIONS without approval:
|
|
691
|
+
* 7. After MAX_ITERATIONS without approval: post best plan if score >= 50 (fallback)
|
|
692
|
+
* 8. If no plan scored >= 50: fail the task
|
|
692
693
|
*/
|
|
693
694
|
export async function planTask(task, config, credentials) {
|
|
694
695
|
const taskLabel = chalk.cyan(task.id.slice(0, 8));
|
|
@@ -922,7 +923,42 @@ export async function planTask(task, config, credentials) {
|
|
|
922
923
|
}
|
|
923
924
|
}
|
|
924
925
|
}
|
|
925
|
-
// All iterations exhausted —
|
|
926
|
+
// All iterations exhausted — try best-plan fallback before failing.
|
|
927
|
+
// If we have a plan that scored >= BEST_PLAN_FALLBACK_THRESHOLD, post it
|
|
928
|
+
// with a warning instead of discarding it entirely.
|
|
929
|
+
const BEST_PLAN_FALLBACK_THRESHOLD = 50;
|
|
930
|
+
if (bestPlan && bestScore >= BEST_PLAN_FALLBACK_THRESHOLD) {
|
|
931
|
+
const elapsed = Math.round((Date.now() - startTime) / 1000);
|
|
932
|
+
const msg = `${PREFIX} Best-plan fallback: posting plan with score ${bestScore}/100 (below ${AUTO_APPROVAL_THRESHOLD} threshold, above ${BEST_PLAN_FALLBACK_THRESHOLD} minimum)`;
|
|
933
|
+
console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
|
|
934
|
+
await postLog(task.id, msg);
|
|
935
|
+
const planningDurationMs = Date.now() - startTime;
|
|
936
|
+
const fallbackPosted = await postValidatedPlan(task.id, bestPlan, config.agentId, taskLabel, elapsed, bestScore, [`Best-plan fallback: critic rejected after ${MAX_ITERATIONS} iterations`], criticHistory, totalFileCapTruncations, planningDurationMs, MAX_ITERATIONS);
|
|
937
|
+
if (fallbackPosted) {
|
|
938
|
+
return true;
|
|
939
|
+
}
|
|
940
|
+
// Fallback post failed (404, 409, etc.) — fall through to plan-failed
|
|
941
|
+
// so the task doesn't stay stuck in "planning" status forever.
|
|
942
|
+
console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${PREFIX} Fallback post rejected by server, reporting plan-failed`);
|
|
943
|
+
await postLog(task.id, `${PREFIX} Fallback plan rejected by server — reporting failure`);
|
|
944
|
+
}
|
|
945
|
+
// No usable plan (or fallback rejected) — report failure to server so
|
|
946
|
+
// the task doesn't stay in "planning" status forever (infinite retry loop).
|
|
947
|
+
try {
|
|
948
|
+
const failReason = bestPlan && bestScore >= BEST_PLAN_FALLBACK_THRESHOLD
|
|
949
|
+
? `Best-plan fallback rejected by server after ${MAX_ITERATIONS} iterations (best score: ${bestScore}/100)`
|
|
950
|
+
: `Critic rejected after ${MAX_ITERATIONS} iterations (best score: ${bestScore}/100, threshold: ${AUTO_APPROVAL_THRESHOLD}, fallback minimum: ${BEST_PLAN_FALLBACK_THRESHOLD})`;
|
|
951
|
+
await api.post("/api/agent/plan-failed", {
|
|
952
|
+
taskId: task.id,
|
|
953
|
+
agentId: config.agentId,
|
|
954
|
+
reason: failReason,
|
|
955
|
+
criticHistory,
|
|
956
|
+
});
|
|
957
|
+
}
|
|
958
|
+
catch {
|
|
959
|
+
// Best-effort — if the endpoint doesn't exist yet, the task will still
|
|
960
|
+
// be picked up again, but at least we tried.
|
|
961
|
+
}
|
|
926
962
|
return false;
|
|
927
963
|
}
|
|
928
964
|
finally {
|
|
@@ -964,9 +1000,10 @@ async function postValidatedPlan(taskId, plan, agentId, taskLabel, elapsed, crit
|
|
|
964
1000
|
}
|
|
965
1001
|
catch (error) {
|
|
966
1002
|
const err = error;
|
|
967
|
-
const detail = err.response?.data?.detail || String(error);
|
|
968
|
-
|
|
969
|
-
|
|
1003
|
+
const detail = err.response?.data?.error || err.response?.data?.detail || String(error);
|
|
1004
|
+
const statusCode = err.response?.status ? ` (${err.response.status})` : "";
|
|
1005
|
+
console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Server validation failed${statusCode}: ${detail.substring(0, 100)}`);
|
|
1006
|
+
await postLog(taskId, `${PREFIX} Server-side plan validation failed${statusCode}: ${detail.substring(0, 200)}`, "error", "error");
|
|
970
1007
|
return false;
|
|
971
1008
|
}
|
|
972
1009
|
}
|