@workermill/agent 0.8.3 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,7 +18,7 @@ export interface PlannedStory {
18
18
  priority: number;
19
19
  estimatedEffort: "small" | "medium" | "large";
20
20
  dependencies: string[];
21
- acceptanceCriteria: string[];
21
+ acceptanceCriteria?: string[];
22
22
  targetFiles?: string[];
23
23
  scope?: string;
24
24
  }
@@ -39,7 +39,7 @@ export interface CriticResult {
39
39
  suggestedChanges?: string[];
40
40
  }>;
41
41
  }
42
- declare const AUTO_APPROVAL_THRESHOLD = 80;
42
+ declare const AUTO_APPROVAL_THRESHOLD = 85;
43
43
  /**
44
44
  * Parse execution plan JSON from raw Claude CLI output.
45
45
  * Mirrors server-side parseExecutionPlan() in planning-agent-local.ts.
@@ -17,7 +17,7 @@ import { api } from "./api.js";
17
17
  // CONSTANTS
18
18
  // ============================================================================
19
19
  const MAX_TARGET_FILES = 15;
20
- const AUTO_APPROVAL_THRESHOLD = 80;
20
+ const AUTO_APPROVAL_THRESHOLD = 85;
21
21
  // ============================================================================
22
22
  // PLAN PARSING
23
23
  // ============================================================================
@@ -217,12 +217,13 @@ Review this execution plan against the PRD:
217
217
 
218
218
  **DO check for:**
219
219
  1. **Missing Requirements** - Does the plan cover what the PRD asks for?
220
- 2. **Vague Instructions** - Will the worker know what to do?
220
+ 2. **Scope Clarity** - Is each story's description a brief file scope label (1 line)? Stories should NOT rewrite ticket requirements.
221
221
  3. **Security Issues** - Only for tasks involving auth, user data, or external input
222
- 4. **Unrealistic Scope** - Any step targeting >5 files MUST score below 80 (auto-rejection threshold). Each step should modify at most 5 files. If a step needs more, split it into multiple steps first.
222
+ 4. **Unrealistic Scope** - Any step targeting >5 files MUST score below 85 (auto-rejection threshold). Each step should modify at most 5 files. If a step needs more, split it into multiple steps first.
223
223
  5. **Missing Operational Steps** - If the PRD requires deployment, provisioning, migrations, or running commands, does the plan include operational steps? Writing code is not the same as deploying it.
224
224
  6. **Overlapping File Scope** - If two or more steps share the same targetFiles, this causes parallel merge conflicts. Steps MUST NOT overlap on targetFiles. Deduct 10 points per shared file across steps.
225
225
  7. **Serialization Bottleneck** - If more than half the stories depend on a single story that targets >5 files, the plan has a bottleneck. Deduct 15 points — split the foundation or allow more parallel work.
226
+ 8. **Requirement Rewriting** - If any story description contains implementation details, acceptance criteria, or rewritten requirements from the PRD, deduct 15 points per offending story. Story descriptions must be ONE-LINE file scope labels (e.g., "Database layer — migrations and entity definitions"). The original ticket is the spec.
226
227
 
227
228
  ## Scoring Guide
228
229
 
@@ -237,7 +238,7 @@ Respond with ONLY a JSON object (no markdown, no explanation):
237
238
  {"approved": boolean, "score": number, "risks": ["risk1", "risk2"], "suggestions": ["suggestion1", "suggestion2"], "storyFeedback": [{"storyId": "step-0", "feedback": "specific feedback", "suggestedChanges": ["change1"]}]}
238
239
 
239
240
  Rules:
240
- - approved = true if score >= 80 AND plan is right-sized for task
241
+ - approved = true if score >= 85 AND plan is right-sized for task
241
242
  - risks = specific issues (empty array if none)
242
243
  - suggestions = actionable improvements (empty array if none)
243
244
  - storyFeedback = per-step feedback (optional, only for steps that need changes)`;
package/dist/planner.js CHANGED
@@ -625,9 +625,9 @@ export async function planTask(task, config, credentials) {
625
625
  // 2e. Check critic result
626
626
  if (!criticResult) {
627
627
  // Critic failed (timeout, parse error, etc.) — post plan without critic gate
628
- const msg = `${PREFIX} Critic validation failed posting plan without critic score`;
628
+ const msg = `${PREFIX} ⚠️ CRITIC BYPASSED — Critic validation failed (timeout/parse error). Posting plan WITHOUT quality gate.`;
629
629
  console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
630
- await postLog(task.id, msg);
630
+ await postLog(task.id, msg, "error", "warning");
631
631
  const planningDurationMs = Date.now() - startTime;
632
632
  return await postValidatedPlan(task.id, plan, config.agentId, taskLabel, elapsed, undefined, undefined, criticHistory, totalFileCapTruncations, planningDurationMs, iteration);
633
633
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@workermill/agent",
3
- "version": "0.8.3",
3
+ "version": "0.8.5",
4
4
  "description": "WorkerMill Remote Agent - Run AI workers locally with your Claude Max subscription",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",