npm - goalbuddy - Versions diffs - 0.3.5 → 0.3.6 - Mend

goalbuddy 0.3.5 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/plugins/goalbuddy/skills/goalbuddy/scripts/check-goal-state.mjs CHANGED Viewed

@@ -369,6 +369,9 @@ for (const task of tasks) {
         errors.push(`Worker receipt for ${task.id} has non-passing command status: ${status}`);
       }
     }
+    if (task.receipt.scalar("needs_judge") === true) {
+      warnings.push(`Worker receipt for ${task.id} requests legacy needs_judge; GoalBuddy now lets the PM continue by default and reviews only at phase, risk, ambiguity, rejected-verification, or final-completion boundaries`);
+    }
   }
   if (task.type === "scout" && task.status === "done" && hasReceipt) {
     if (!task.receipt.has("summary")) errors.push(`Scout receipt for ${task.id} missing summary`);
@@ -381,6 +384,8 @@ for (const task of tasks) {
   }
 }
+warnings.push(...microSliceWarnings(tasks, activeTask, goalStatus));
 function validateSubgoal(task) {
   if (isChildCheck) {
     errors.push(`child task ${task.id} must not contain a nested subgoal`);
@@ -430,6 +435,77 @@ function validateSubgoal(task) {
   }
 }
+function microSliceWarnings(tasks, activeTaskId, goalStatus) {
+  const found = [];
+  const guidance = "Board may be micro-slicing. Prefer the largest safe useful slice.";
+  const doneTasks = tasks.filter((task) => task.status === "done");
+  const workerTasks = tasks.filter((task) => task.type === "worker");
+  const recentTinyWorkers = workerTasks.slice(-5).filter((task) => isTinyTask(task));
+  const firstMilestoneComplete = nestedScalar("goal", "first_milestone_complete") === true;
+  if (recentTinyWorkers.length >= 3) {
+    found.push(`${guidance} Three recent Worker tasks look tiny.`);
+  }
+  for (const task of tasks) {
+    if (task.type === "judge" && /pick small reviewable work|select one narrow next task/i.test(task.raw)) {
+      found.push(`${guidance} Judge instructions still ask for small or narrow work.`);
+      break;
+    }
+  }
+  if (goalStatus !== "active" || !activeTaskId) return [...new Set(found)];
+  const activeIndex = tasks.findIndex((task) => task.id === activeTaskId);
+  if (activeIndex === -1) return [...new Set(found)];
+  const active = tasks[activeIndex];
+  if (active.type === "worker") {
+    if (doneTasks.length >= 10 && active.allowedFiles.length > 0 && active.allowedFiles.length <= 2) {
+      found.push(`${guidance} Active Worker ${active.id} has only ${active.allowedFiles.length} allowed_files after ${doneTasks.length} completed tasks.`);
+    }
+    if (firstMilestoneComplete && isTinyTask(active)) {
+      found.push(`${guidance} The first milestone is complete, so the active Worker should move toward the next real milestone.`);
+    }
+    if (isMicroWorkerTask(active)) {
+      found.push(`${guidance} Active Worker ${active.id} looks like another helper-sized slice.`);
+    }
+  }
+  if (active.type !== "judge") return [...new Set(found)];
+  let pairs = 0;
+  for (let index = activeIndex; index > 0; index -= 2) {
+    const judge = tasks[index];
+    const worker = tasks[index - 1];
+    if (!isMicroJudgeForWorker(judge, worker)) break;
+    pairs += 1;
+  }
+  if (pairs >= 2) {
+    found.push(`${guidance} Micro Worker/Judge loop detected ending at ${active.id}.`);
+  }
+  return [...new Set(found)];
+}
+function isMicroJudgeForWorker(judge, worker) {
+  if (!judge || !worker) return false;
+  if (judge.type !== "judge" || worker.type !== "worker") return false;
+  if (!["active", "queued", "done"].includes(judge.status) || worker.status !== "done") return false;
+  const objective = String(judge.objective || "").toLowerCase();
+  return objective.includes(worker.id.toLowerCase()) && /audit|review|approve/.test(objective) && isMicroWorkerTask(worker);
+}
+function isMicroWorkerTask(task) {
+  if (!task || task.type !== "worker") return false;
+  const objective = String(task.objective || "").toLowerCase();
+  if (/collapsed|batch|package|tranche/.test(objective)) return false;
+  return /one narrow|single helper|one helper|per[- ]helper|per[- ]table|projection helper/.test(objective);
+}
+function isTinyTask(task) {
+  if (!task) return false;
+  const text = [task.objective, task.raw, task.receipt?.raw].join(" ").toLowerCase();
+  if (/collapsed|batch|package|tranche|vertical slice|milestone/.test(text)) return false;
+  return /\b(tiny|narrow|single helper|one helper|projection helper|projection function|contract file|read-only proof|doc note|validator|validation wrapper|pure helper|caller-input)\b/.test(text);
+}
 function matchesAllowedFile(file, allowedFiles) {
   return allowedFiles.some((pattern) => globMatch(pattern, file));
 }

package/plugins/goalbuddy/skills/goalbuddy/scripts/render-task-prompt.mjs CHANGED Viewed

@@ -7,7 +7,7 @@ import { parseGoalStateText } from "../extend/local-goal-board/scripts/lib/goal-
 const ROLE_DEFAULTS = {
   scout: { agent: "goal_scout", reasoning: "low", sandbox: "read-only" },
   judge: { agent: "goal_judge", reasoning: "high", sandbox: "read-only" },
-  worker: { agent: "goal_worker", reasoning: "low", sandbox: "workspace-write" },
+  worker: { agent: "goal_worker", reasoning: "medium", sandbox: "workspace-write" },
   pm: { agent: "PM", reasoning: "medium", sandbox: "workspace-write" },
 };
@@ -39,11 +39,13 @@ export function renderTaskPrompt(options) {
     payload: {
       metadata: {
         recommended_agent: defaults.agent,
+        required_spawn_agent_type: defaults.agent === "PM" ? null : defaults.agent,
         recommended_reasoning: reasoning,
         sandbox: defaults.sandbox,
         fork_context_allowed: role !== "worker",
         board_path: board.path,
         child_board_paths: childBoardPaths(board),
+        slice_policy: board.document.rules?.slice_policy || null,
         warnings,
       },
       task: {
@@ -143,15 +145,53 @@ function promptWarnings(board, task) {
     if (stringList(task.allowed_files).length === 0) warnings.push(`Worker task ${task.id} has no allowed_files.`);
     if (stringList(task.verify).length === 0) warnings.push(`Worker task ${task.id} has no verify commands.`);
     if (stringList(task.stop_if).length === 0) warnings.push(`Worker task ${task.id} has no stop_if conditions.`);
+    if (isFalse(board.goal.full_outcome_complete)) {
+      warnings.push(`full_outcome_complete is false and ${task.id} is an active Worker; do not stop after rendering or repairing the board. Execute the Worker unless a stop_if condition applies.`);
+    }
   }
   for (const candidate of board.tasks) {
     if (candidate?.subgoal && Number(candidate.subgoal.depth) !== 1) {
       warnings.push(`Task ${candidate.id} has subgoal.depth ${candidate.subgoal.depth || "<missing>"}; only depth 1 is supported.`);
     }
   }
+  warnings.push(...microSliceWarnings(board, task));
   return warnings;
 }
+function microSliceWarnings(board, task) {
+  const warnings = [];
+  const doneTasks = board.tasks.filter((candidate) => candidate?.status === "done");
+  const recentWorkers = board.tasks
+    .filter((candidate) => normalizeRole(candidate?.type) === "worker")
+    .slice(-5);
+  const recentTinyWorkers = recentWorkers.filter((candidate) => isTinyTask(candidate));
+  const activeRole = normalizeRole(task.type);
+  const activeAllowedFiles = stringList(task.allowed_files);
+  const firstMilestoneComplete = isTrue(board.goal.first_milestone_complete);
+  const microWarning = "Board may be micro-slicing. Prefer the largest safe useful slice.";
+  if (recentTinyWorkers.length >= 3) warnings.push(microWarning);
+  if (doneTasks.length >= 10 && activeRole === "worker" && activeAllowedFiles.length > 0 && activeAllowedFiles.length <= 2) {
+    warnings.push(`${microWarning} Active Worker ${task.id} has only ${activeAllowedFiles.length} allowed_files after ${doneTasks.length} completed tasks.`);
+  }
+  if (firstMilestoneComplete && activeRole === "worker" && isTinyTask(task)) {
+    warnings.push(`${microWarning} The first milestone is complete, so the active Worker should move toward the next real milestone.`);
+  }
+  if (activeRole === "judge" && /pick small reviewable work|select one narrow next task/i.test(String(task.objective || "") + "\n" + stringList(task.constraints).join("\n"))) {
+    warnings.push(`${microWarning} Judge instructions still ask for small or narrow work.`);
+  }
+  return [...new Set(warnings)];
+}
+function isTinyTask(task) {
+  const text = [
+    task?.objective,
+    stringList(task?.constraints).join(" "),
+    task?.receipt?.summary,
+  ].join(" ").toLowerCase();
+  return /\b(tiny|narrow|single helper|one helper|projection helper|projection function|contract file|read-only proof|doc note|validator|validation wrapper|pure helper|caller-input)\b/.test(text);
+}
 function normalizeRole(value) {
   const role = String(value || "pm").toLowerCase();
   return ROLE_DEFAULTS[role] ? role : "pm";
@@ -163,6 +203,14 @@ function normalizeReasoning(value, fallback) {
   return fallback;
 }
+function isFalse(value) {
+  return value === false || String(value).toLowerCase() === "false";
+}
+function isTrue(value) {
+  return value === true || String(value).toLowerCase() === "true";
+}
 function stringList(value) {
   return Array.isArray(value) ? value.filter((item) => item !== null && item !== undefined).map(String) : [];
 }
@@ -175,15 +223,14 @@ function receiptSchema(role) {
       commands: [{ cmd: "<command>", status: "pass | fail | not_run" }],
       summary: "<=120 words",
       remaining_blockers: [],
-      needs_judge: false,
     };
   }
   if (role === "judge") {
     return {
       result: "done | blocked",
-      decision: "approve_next | reject_next | approve_subgoal | reject_subgoal | not_complete | complete",
+      decision: "approved | rejected | approve_subgoal | reject_subgoal | not_complete | complete",
+      full_outcome_complete: false,
       evidence: [],
-      next_allowed_task: null,
       blocked_tasks: [],
       required_board_updates: [],
     };
@@ -204,6 +251,7 @@ function formatPrompt(payload) {
     "",
     "Metadata:",
     `- recommended_agent: ${payload.metadata.recommended_agent}`,
+    `- required_spawn_agent_type: ${payload.metadata.required_spawn_agent_type || "PM fallback"}`,
     `- recommended_reasoning: ${payload.metadata.recommended_reasoning}`,
     `- sandbox: ${payload.metadata.sandbox}`,
     `- fork_context_allowed: ${payload.metadata.fork_context_allowed}`,
@@ -213,12 +261,21 @@ function formatPrompt(payload) {
     lines.push("- child_board_paths:");
     for (const path of payload.metadata.child_board_paths) lines.push(`  - ${path}`);
   }
+  if (payload.metadata.slice_policy) {
+    lines.push(`- slice_policy: ${JSON.stringify(payload.metadata.slice_policy)}`);
+  }
   if (payload.metadata.warnings.length) {
     lines.push("- warnings:");
     for (const warning of payload.metadata.warnings) lines.push(`  - ${warning}`);
   }
   lines.push(
+    "",
+    "Spawn contract:",
+    `- Codex spawn_agent agent_type: ${payload.metadata.required_spawn_agent_type || "do not spawn; run as PM"}`,
+    "- Do not substitute generic scout, worker, or judge agents for GoalBuddy agents.",
+    "- If the required GoalBuddy agent is unavailable, stop spawning and continue as PM fallback or install agents.",
+    "- After one wait_agent timeout with no visible allowed-file changes, stop waiting and recover deterministically.",
     "",
     "Task:",
     `- id: ${payload.task.id}`,

package/plugins/goalbuddy/skills/goalbuddy/templates/agents.md CHANGED Viewed

@@ -5,7 +5,7 @@ Use three generic agents. The main `/goal` thread remains PM and owns the board.
 | Agent | model_reasoning_effort | sandbox_mode | Purpose |
 |---|---:|---|---|
 | goal_scout | low | read-only | Targeted evidence mapping and candidate facts |
-| goal_worker | low | workspace-write | One bounded implementation/recovery task |
+| goal_worker | medium | workspace-write | One coherent bounded implementation/recovery slice |
 | goal_judge | high | read-only | Strategic review, escalation, completion skepticism |
 ## PM Thinking Policy
@@ -44,5 +44,6 @@ Rules:
 - Only the PM loop chooses active tasks, marks tasks done, or completes the goal.
 - Keep at most one write-capable Worker active unless disjoint write scopes are explicit in `state.yaml`.
+- Worker defaults to medium reasoning for implementation tasks and should complete the whole assigned slice.
 - Scout and Judge are read-only and safe to parallelize when their board inputs are clear.
-- Judge is high thinking.
+- Judge is high thinking and should choose the largest safe useful slice, not the narrowest helper.

package/plugins/goalbuddy/skills/goalbuddy/templates/goal.md CHANGED Viewed

@@ -25,7 +25,7 @@
 ## Current Tranche
-<What is enough for the full owner outcome, and what is the current safe slice? For execution goals, the default is continuous: discover enough evidence, choose a safe implementation slice, implement it, verify it, audit it, then immediately advance to the next safe slice until the full original outcome is complete. Plan-only or one-slice-only stopping is valid only when explicitly requested.>
+<What is enough for the full owner outcome, and what is the current largest reversible local work package? For execution goals, the default is continuous: discover enough evidence, choose a coherent work package, implement it, verify it, review only at phase/risk/final boundaries, then immediately advance to the next work package until the full original outcome is complete. Plan-only or one-package-only stopping is valid only when explicitly requested.>
 ## Non-Negotiable Constraints
@@ -37,7 +37,21 @@ Stop only when a final audit proves the full original outcome is complete.
 Do not stop after planning, discovery, or Judge selection if the user asked for working software or automation and a safe Worker task can be activated.
-Do not stop after a single verified Worker slice when the broader owner outcome still has safe local follow-up slices. After each slice audit, advance the board to the next highest-leverage safe Worker task and continue.
+Do not stop after a single verified Worker package when the broader owner outcome still has safe local follow-up work. Advance the board to the next highest-leverage safe Worker package and continue unless a phase, risk, rejected-verification, ambiguity, or final-completion review is due.
+Do not create one Worker/Judge pair per repeated file, table, route, or helper. Put repeated same-shape work into one Worker package and review the package as a whole.
+## Slice Sizing
+Safe means bounded, explicit, verified, and reversible. It does not mean tiny.
+A good task is the largest safe useful slice.
+Small is not the goal. Useful is the goal.
+A Worker should finish the whole assigned slice. A Judge should judge the whole assigned slice. A PM should reorient the board when tasks are safe but not moving the outcome.
+Tiny tasks are allowed when the failure is isolated, the risk is high, the scope is unknown, or the tiny task unlocks a larger slice. Tiny tasks are bad when they keep happening, do not change behavior, only add wrappers/contracts/proof files, or avoid the real milestone.
 Do not stop because a slice needs owner input, credentials, production access, destructive operations, or policy decisions. Mark that exact slice blocked with a receipt, create the smallest safe follow-up or workaround task, and continue all local, non-destructive work that can still move the goal toward the full outcome.
@@ -67,9 +81,9 @@ On every `/goal` continuation:
 6. Assign Scout, Judge, Worker, or PM according to the task.
 7. Write a compact task receipt.
 8. Update the board.
-9. If Judge selected a safe Worker task with `allowed_files`, `verify`, and `stop_if`, activate it and continue unless blocked.
+9. If safe local work remains, choose the next largest reversible Worker package and continue unless blocked.
 10. If a problem, suggestion, or follow-up should become a repo artifact, create an approved issue/PR or ask the operator whether to create one.
-11. Treat a slice audit as a checkpoint, not completion, unless it explicitly proves the full original outcome is complete.
+11. Review at phase, risk, rejected-verification, ambiguity, or final-completion boundaries; do not review every small Worker by habit.
 12. Finish only with a Judge/PM audit receipt that maps receipts and verification back to the original user outcome and records `full_outcome_complete: true`.
 Issue and PR handoffs are supporting artifacts. `state.yaml` remains authoritative, and every external artifact decision must be recorded in a task receipt.

package/plugins/goalbuddy/skills/goalbuddy/templates/state.yaml CHANGED Viewed

@@ -33,6 +33,11 @@ rules:
   missing_input_or_credentials_do_not_stop_goal: true
   preserve_and_validate_existing_plan: true
   intake_misfire_must_be_audited: true
+  slice_policy:
+    max_consecutive_tiny_tasks: 2
+    prefer_vertical_slices: true
+    judge_picks_largest_safe_slice: true
+    worker_completes_whole_slice: true
 agents:
   # installed | bundled_not_installed | missing | unknown
@@ -88,7 +93,7 @@ tasks:
       - "T001 receipt"
     constraints:
       - "Do not implement."
-      - "Pick small reviewable work."
+      - "Pick the largest safe useful slice with clear allowed_files, verify commands, and stop conditions."
     expected_output:
       - "Decision"
       - "Exact Worker objective"