@workermill/agent 0.8.3 → 0.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/plan-validator.d.ts +2 -2
- package/dist/plan-validator.js +5 -4
- package/dist/planner.js +2 -2
- package/package.json +1 -1
package/dist/plan-validator.d.ts
CHANGED
|
@@ -18,7 +18,7 @@ export interface PlannedStory {
|
|
|
18
18
|
priority: number;
|
|
19
19
|
estimatedEffort: "small" | "medium" | "large";
|
|
20
20
|
dependencies: string[];
|
|
21
|
-
acceptanceCriteria
|
|
21
|
+
acceptanceCriteria?: string[];
|
|
22
22
|
targetFiles?: string[];
|
|
23
23
|
scope?: string;
|
|
24
24
|
}
|
|
@@ -39,7 +39,7 @@ export interface CriticResult {
|
|
|
39
39
|
suggestedChanges?: string[];
|
|
40
40
|
}>;
|
|
41
41
|
}
|
|
42
|
-
declare const AUTO_APPROVAL_THRESHOLD =
|
|
42
|
+
declare const AUTO_APPROVAL_THRESHOLD = 85;
|
|
43
43
|
/**
|
|
44
44
|
* Parse execution plan JSON from raw Claude CLI output.
|
|
45
45
|
* Mirrors server-side parseExecutionPlan() in planning-agent-local.ts.
|
package/dist/plan-validator.js
CHANGED
|
@@ -17,7 +17,7 @@ import { api } from "./api.js";
|
|
|
17
17
|
// CONSTANTS
|
|
18
18
|
// ============================================================================
|
|
19
19
|
const MAX_TARGET_FILES = 15;
|
|
20
|
-
const AUTO_APPROVAL_THRESHOLD =
|
|
20
|
+
const AUTO_APPROVAL_THRESHOLD = 85;
|
|
21
21
|
// ============================================================================
|
|
22
22
|
// PLAN PARSING
|
|
23
23
|
// ============================================================================
|
|
@@ -217,12 +217,13 @@ Review this execution plan against the PRD:
|
|
|
217
217
|
|
|
218
218
|
**DO check for:**
|
|
219
219
|
1. **Missing Requirements** - Does the plan cover what the PRD asks for?
|
|
220
|
-
2. **
|
|
220
|
+
2. **Scope Clarity** - Is each story's description a brief file scope label (1 line)? Stories should NOT rewrite ticket requirements.
|
|
221
221
|
3. **Security Issues** - Only for tasks involving auth, user data, or external input
|
|
222
|
-
4. **Unrealistic Scope** - Any step targeting >5 files MUST score below
|
|
222
|
+
4. **Unrealistic Scope** - Any step targeting >5 files MUST score below 85 (auto-rejection threshold). Each step should modify at most 5 files. If a step needs more, split it into multiple steps first.
|
|
223
223
|
5. **Missing Operational Steps** - If the PRD requires deployment, provisioning, migrations, or running commands, does the plan include operational steps? Writing code is not the same as deploying it.
|
|
224
224
|
6. **Overlapping File Scope** - If two or more steps share the same targetFiles, this causes parallel merge conflicts. Steps MUST NOT overlap on targetFiles. Deduct 10 points per shared file across steps.
|
|
225
225
|
7. **Serialization Bottleneck** - If more than half the stories depend on a single story that targets >5 files, the plan has a bottleneck. Deduct 15 points — split the foundation or allow more parallel work.
|
|
226
|
+
8. **Requirement Rewriting** - If any story description contains implementation details, acceptance criteria, or rewritten requirements from the PRD, deduct 15 points per offending story. Story descriptions must be ONE-LINE file scope labels (e.g., "Database layer — migrations and entity definitions"). The original ticket is the spec.
|
|
226
227
|
|
|
227
228
|
## Scoring Guide
|
|
228
229
|
|
|
@@ -237,7 +238,7 @@ Respond with ONLY a JSON object (no markdown, no explanation):
|
|
|
237
238
|
{"approved": boolean, "score": number, "risks": ["risk1", "risk2"], "suggestions": ["suggestion1", "suggestion2"], "storyFeedback": [{"storyId": "step-0", "feedback": "specific feedback", "suggestedChanges": ["change1"]}]}
|
|
238
239
|
|
|
239
240
|
Rules:
|
|
240
|
-
- approved = true if score >=
|
|
241
|
+
- approved = true if score >= 85 AND plan is right-sized for task
|
|
241
242
|
- risks = specific issues (empty array if none)
|
|
242
243
|
- suggestions = actionable improvements (empty array if none)
|
|
243
244
|
- storyFeedback = per-step feedback (optional, only for steps that need changes)`;
|
package/dist/planner.js
CHANGED
|
@@ -625,9 +625,9 @@ export async function planTask(task, config, credentials) {
|
|
|
625
625
|
// 2e. Check critic result
|
|
626
626
|
if (!criticResult) {
|
|
627
627
|
// Critic failed (timeout, parse error, etc.) — post plan without critic gate
|
|
628
|
-
const msg = `${PREFIX} Critic validation failed
|
|
628
|
+
const msg = `${PREFIX} ⚠️ CRITIC BYPASSED — Critic validation failed (timeout/parse error). Posting plan WITHOUT quality gate.`;
|
|
629
629
|
console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
|
|
630
|
-
await postLog(task.id, msg);
|
|
630
|
+
await postLog(task.id, msg, "error", "warning");
|
|
631
631
|
const planningDurationMs = Date.now() - startTime;
|
|
632
632
|
return await postValidatedPlan(task.id, plan, config.agentId, taskLabel, elapsed, undefined, undefined, criticHistory, totalFileCapTruncations, planningDurationMs, iteration);
|
|
633
633
|
}
|