goalbuddy 0.3.5 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/README.md +29 -4
  2. package/goalbuddy/SKILL.md +34 -13
  3. package/goalbuddy/agents/README.md +1 -1
  4. package/goalbuddy/agents/goal_judge.toml +8 -4
  5. package/goalbuddy/agents/goal_worker.toml +8 -5
  6. package/goalbuddy/extend/local-goal-board/scripts/lib/goal-board.mjs +248 -7
  7. package/goalbuddy/extend/local-goal-board/test/local-goal-board.test.mjs +59 -1
  8. package/goalbuddy/scripts/check-goal-state.mjs +76 -0
  9. package/goalbuddy/scripts/render-task-prompt.mjs +61 -4
  10. package/goalbuddy/templates/agents.md +3 -2
  11. package/goalbuddy/templates/goal.md +18 -4
  12. package/goalbuddy/templates/state.yaml +6 -1
  13. package/internal/cli/goal-maker.mjs +108 -8
  14. package/package.json +1 -1
  15. package/plugins/goalbuddy/.claude-plugin/plugin.json +1 -1
  16. package/plugins/goalbuddy/.codex-plugin/plugin.json +1 -1
  17. package/plugins/goalbuddy/agents/goal-judge.md +8 -4
  18. package/plugins/goalbuddy/agents/goal-worker.md +6 -4
  19. package/plugins/goalbuddy/skills/goalbuddy/SKILL.md +34 -13
  20. package/plugins/goalbuddy/skills/goalbuddy/agents/README.md +1 -1
  21. package/plugins/goalbuddy/skills/goalbuddy/agents/goal_judge.toml +8 -4
  22. package/plugins/goalbuddy/skills/goalbuddy/agents/goal_worker.toml +8 -5
  23. package/plugins/goalbuddy/skills/goalbuddy/extend/local-goal-board/scripts/lib/goal-board.mjs +248 -7
  24. package/plugins/goalbuddy/skills/goalbuddy/extend/local-goal-board/test/local-goal-board.test.mjs +59 -1
  25. package/plugins/goalbuddy/skills/goalbuddy/scripts/check-goal-state.mjs +76 -0
  26. package/plugins/goalbuddy/skills/goalbuddy/scripts/render-task-prompt.mjs +61 -4
  27. package/plugins/goalbuddy/skills/goalbuddy/templates/agents.md +3 -2
  28. package/plugins/goalbuddy/skills/goalbuddy/templates/goal.md +18 -4
  29. package/plugins/goalbuddy/skills/goalbuddy/templates/state.yaml +6 -1
@@ -369,6 +369,9 @@ for (const task of tasks) {
369
369
  errors.push(`Worker receipt for ${task.id} has non-passing command status: ${status}`);
370
370
  }
371
371
  }
372
+ if (task.receipt.scalar("needs_judge") === true) {
373
+ warnings.push(`Worker receipt for ${task.id} requests legacy needs_judge; GoalBuddy now lets the PM continue by default and reviews only at phase, risk, ambiguity, rejected-verification, or final-completion boundaries`);
374
+ }
372
375
  }
373
376
  if (task.type === "scout" && task.status === "done" && hasReceipt) {
374
377
  if (!task.receipt.has("summary")) errors.push(`Scout receipt for ${task.id} missing summary`);
@@ -381,6 +384,8 @@ for (const task of tasks) {
381
384
  }
382
385
  }
383
386
 
387
+ warnings.push(...microSliceWarnings(tasks, activeTask, goalStatus));
388
+
384
389
  function validateSubgoal(task) {
385
390
  if (isChildCheck) {
386
391
  errors.push(`child task ${task.id} must not contain a nested subgoal`);
@@ -430,6 +435,77 @@ function validateSubgoal(task) {
430
435
  }
431
436
  }
432
437
 
438
+ function microSliceWarnings(tasks, activeTaskId, goalStatus) {
439
+ const found = [];
440
+ const guidance = "Board may be micro-slicing. Prefer the largest safe useful slice.";
441
+ const doneTasks = tasks.filter((task) => task.status === "done");
442
+ const workerTasks = tasks.filter((task) => task.type === "worker");
443
+ const recentTinyWorkers = workerTasks.slice(-5).filter((task) => isTinyTask(task));
444
+ const firstMilestoneComplete = nestedScalar("goal", "first_milestone_complete") === true;
445
+
446
+ if (recentTinyWorkers.length >= 3) {
447
+ found.push(`${guidance} Three recent Worker tasks look tiny.`);
448
+ }
449
+
450
+ for (const task of tasks) {
451
+ if (task.type === "judge" && /pick small reviewable work|select one narrow next task/i.test(task.raw)) {
452
+ found.push(`${guidance} Judge instructions still ask for small or narrow work.`);
453
+ break;
454
+ }
455
+ }
456
+
457
+ if (goalStatus !== "active" || !activeTaskId) return [...new Set(found)];
458
+ const activeIndex = tasks.findIndex((task) => task.id === activeTaskId);
459
+ if (activeIndex === -1) return [...new Set(found)];
460
+ const active = tasks[activeIndex];
461
+ if (active.type === "worker") {
462
+ if (doneTasks.length >= 10 && active.allowedFiles.length > 0 && active.allowedFiles.length <= 2) {
463
+ found.push(`${guidance} Active Worker ${active.id} has only ${active.allowedFiles.length} allowed_files after ${doneTasks.length} completed tasks.`);
464
+ }
465
+ if (firstMilestoneComplete && isTinyTask(active)) {
466
+ found.push(`${guidance} The first milestone is complete, so the active Worker should move toward the next real milestone.`);
467
+ }
468
+ if (isMicroWorkerTask(active)) {
469
+ found.push(`${guidance} Active Worker ${active.id} looks like another helper-sized slice.`);
470
+ }
471
+ }
472
+ if (active.type !== "judge") return [...new Set(found)];
473
+
474
+ let pairs = 0;
475
+ for (let index = activeIndex; index > 0; index -= 2) {
476
+ const judge = tasks[index];
477
+ const worker = tasks[index - 1];
478
+ if (!isMicroJudgeForWorker(judge, worker)) break;
479
+ pairs += 1;
480
+ }
481
+ if (pairs >= 2) {
482
+ found.push(`${guidance} Micro Worker/Judge loop detected ending at ${active.id}.`);
483
+ }
484
+ return [...new Set(found)];
485
+ }
486
+
487
+ function isMicroJudgeForWorker(judge, worker) {
488
+ if (!judge || !worker) return false;
489
+ if (judge.type !== "judge" || worker.type !== "worker") return false;
490
+ if (!["active", "queued", "done"].includes(judge.status) || worker.status !== "done") return false;
491
+ const objective = String(judge.objective || "").toLowerCase();
492
+ return objective.includes(worker.id.toLowerCase()) && /audit|review|approve/.test(objective) && isMicroWorkerTask(worker);
493
+ }
494
+
495
+ function isMicroWorkerTask(task) {
496
+ if (!task || task.type !== "worker") return false;
497
+ const objective = String(task.objective || "").toLowerCase();
498
+ if (/collapsed|batch|package|tranche/.test(objective)) return false;
499
+ return /one narrow|single helper|one helper|per[- ]helper|per[- ]table|projection helper/.test(objective);
500
+ }
501
+
502
+ function isTinyTask(task) {
503
+ if (!task) return false;
504
+ const text = [task.objective, task.raw, task.receipt?.raw].join(" ").toLowerCase();
505
+ if (/collapsed|batch|package|tranche|vertical slice|milestone/.test(text)) return false;
506
+ return /\b(tiny|narrow|single helper|one helper|projection helper|projection function|contract file|read-only proof|doc note|validator|validation wrapper|pure helper|caller-input)\b/.test(text);
507
+ }
508
+
433
509
  function matchesAllowedFile(file, allowedFiles) {
434
510
  return allowedFiles.some((pattern) => globMatch(pattern, file));
435
511
  }
@@ -7,7 +7,7 @@ import { parseGoalStateText } from "../extend/local-goal-board/scripts/lib/goal-
7
7
  const ROLE_DEFAULTS = {
8
8
  scout: { agent: "goal_scout", reasoning: "low", sandbox: "read-only" },
9
9
  judge: { agent: "goal_judge", reasoning: "high", sandbox: "read-only" },
10
- worker: { agent: "goal_worker", reasoning: "low", sandbox: "workspace-write" },
10
+ worker: { agent: "goal_worker", reasoning: "medium", sandbox: "workspace-write" },
11
11
  pm: { agent: "PM", reasoning: "medium", sandbox: "workspace-write" },
12
12
  };
13
13
 
@@ -39,11 +39,13 @@ export function renderTaskPrompt(options) {
39
39
  payload: {
40
40
  metadata: {
41
41
  recommended_agent: defaults.agent,
42
+ required_spawn_agent_type: defaults.agent === "PM" ? null : defaults.agent,
42
43
  recommended_reasoning: reasoning,
43
44
  sandbox: defaults.sandbox,
44
45
  fork_context_allowed: role !== "worker",
45
46
  board_path: board.path,
46
47
  child_board_paths: childBoardPaths(board),
48
+ slice_policy: board.document.rules?.slice_policy || null,
47
49
  warnings,
48
50
  },
49
51
  task: {
@@ -143,15 +145,53 @@ function promptWarnings(board, task) {
143
145
  if (stringList(task.allowed_files).length === 0) warnings.push(`Worker task ${task.id} has no allowed_files.`);
144
146
  if (stringList(task.verify).length === 0) warnings.push(`Worker task ${task.id} has no verify commands.`);
145
147
  if (stringList(task.stop_if).length === 0) warnings.push(`Worker task ${task.id} has no stop_if conditions.`);
148
+ if (isFalse(board.goal.full_outcome_complete)) {
149
+ warnings.push(`full_outcome_complete is false and ${task.id} is an active Worker; do not stop after rendering or repairing the board. Execute the Worker unless a stop_if condition applies.`);
150
+ }
146
151
  }
147
152
  for (const candidate of board.tasks) {
148
153
  if (candidate?.subgoal && Number(candidate.subgoal.depth) !== 1) {
149
154
  warnings.push(`Task ${candidate.id} has subgoal.depth ${candidate.subgoal.depth || "<missing>"}; only depth 1 is supported.`);
150
155
  }
151
156
  }
157
+ warnings.push(...microSliceWarnings(board, task));
152
158
  return warnings;
153
159
  }
154
160
 
161
+ function microSliceWarnings(board, task) {
162
+ const warnings = [];
163
+ const doneTasks = board.tasks.filter((candidate) => candidate?.status === "done");
164
+ const recentWorkers = board.tasks
165
+ .filter((candidate) => normalizeRole(candidate?.type) === "worker")
166
+ .slice(-5);
167
+ const recentTinyWorkers = recentWorkers.filter((candidate) => isTinyTask(candidate));
168
+ const activeRole = normalizeRole(task.type);
169
+ const activeAllowedFiles = stringList(task.allowed_files);
170
+ const firstMilestoneComplete = isTrue(board.goal.first_milestone_complete);
171
+ const microWarning = "Board may be micro-slicing. Prefer the largest safe useful slice.";
172
+
173
+ if (recentTinyWorkers.length >= 3) warnings.push(microWarning);
174
+ if (doneTasks.length >= 10 && activeRole === "worker" && activeAllowedFiles.length > 0 && activeAllowedFiles.length <= 2) {
175
+ warnings.push(`${microWarning} Active Worker ${task.id} has only ${activeAllowedFiles.length} allowed_files after ${doneTasks.length} completed tasks.`);
176
+ }
177
+ if (firstMilestoneComplete && activeRole === "worker" && isTinyTask(task)) {
178
+ warnings.push(`${microWarning} The first milestone is complete, so the active Worker should move toward the next real milestone.`);
179
+ }
180
+ if (activeRole === "judge" && /pick small reviewable work|select one narrow next task/i.test(String(task.objective || "") + "\n" + stringList(task.constraints).join("\n"))) {
181
+ warnings.push(`${microWarning} Judge instructions still ask for small or narrow work.`);
182
+ }
183
+ return [...new Set(warnings)];
184
+ }
185
+
186
+ function isTinyTask(task) {
187
+ const text = [
188
+ task?.objective,
189
+ stringList(task?.constraints).join(" "),
190
+ task?.receipt?.summary,
191
+ ].join(" ").toLowerCase();
192
+ return /\b(tiny|narrow|single helper|one helper|projection helper|projection function|contract file|read-only proof|doc note|validator|validation wrapper|pure helper|caller-input)\b/.test(text);
193
+ }
194
+
155
195
  function normalizeRole(value) {
156
196
  const role = String(value || "pm").toLowerCase();
157
197
  return ROLE_DEFAULTS[role] ? role : "pm";
@@ -163,6 +203,14 @@ function normalizeReasoning(value, fallback) {
163
203
  return fallback;
164
204
  }
165
205
 
206
+ function isFalse(value) {
207
+ return value === false || String(value).toLowerCase() === "false";
208
+ }
209
+
210
+ function isTrue(value) {
211
+ return value === true || String(value).toLowerCase() === "true";
212
+ }
213
+
166
214
  function stringList(value) {
167
215
  return Array.isArray(value) ? value.filter((item) => item !== null && item !== undefined).map(String) : [];
168
216
  }
@@ -175,15 +223,14 @@ function receiptSchema(role) {
175
223
  commands: [{ cmd: "<command>", status: "pass | fail | not_run" }],
176
224
  summary: "<=120 words",
177
225
  remaining_blockers: [],
178
- needs_judge: false,
179
226
  };
180
227
  }
181
228
  if (role === "judge") {
182
229
  return {
183
230
  result: "done | blocked",
184
- decision: "approve_next | reject_next | approve_subgoal | reject_subgoal | not_complete | complete",
231
+ decision: "approved | rejected | approve_subgoal | reject_subgoal | not_complete | complete",
232
+ full_outcome_complete: false,
185
233
  evidence: [],
186
- next_allowed_task: null,
187
234
  blocked_tasks: [],
188
235
  required_board_updates: [],
189
236
  };
@@ -204,6 +251,7 @@ function formatPrompt(payload) {
204
251
  "",
205
252
  "Metadata:",
206
253
  `- recommended_agent: ${payload.metadata.recommended_agent}`,
254
+ `- required_spawn_agent_type: ${payload.metadata.required_spawn_agent_type || "PM fallback"}`,
207
255
  `- recommended_reasoning: ${payload.metadata.recommended_reasoning}`,
208
256
  `- sandbox: ${payload.metadata.sandbox}`,
209
257
  `- fork_context_allowed: ${payload.metadata.fork_context_allowed}`,
@@ -213,12 +261,21 @@ function formatPrompt(payload) {
213
261
  lines.push("- child_board_paths:");
214
262
  for (const path of payload.metadata.child_board_paths) lines.push(` - ${path}`);
215
263
  }
264
+ if (payload.metadata.slice_policy) {
265
+ lines.push(`- slice_policy: ${JSON.stringify(payload.metadata.slice_policy)}`);
266
+ }
216
267
  if (payload.metadata.warnings.length) {
217
268
  lines.push("- warnings:");
218
269
  for (const warning of payload.metadata.warnings) lines.push(` - ${warning}`);
219
270
  }
220
271
 
221
272
  lines.push(
273
+ "",
274
+ "Spawn contract:",
275
+ `- Codex spawn_agent agent_type: ${payload.metadata.required_spawn_agent_type || "do not spawn; run as PM"}`,
276
+ "- Do not substitute generic scout, worker, or judge agents for GoalBuddy agents.",
277
+ "- If the required GoalBuddy agent is unavailable, stop spawning and continue as PM fallback or install agents.",
278
+ "- After one wait_agent timeout with no visible allowed-file changes, stop waiting and recover deterministically.",
222
279
  "",
223
280
  "Task:",
224
281
  `- id: ${payload.task.id}`,
@@ -5,7 +5,7 @@ Use three generic agents. The main `/goal` thread remains PM and owns the board.
5
5
  | Agent | model_reasoning_effort | sandbox_mode | Purpose |
6
6
  |---|---:|---|---|
7
7
  | goal_scout | low | read-only | Targeted evidence mapping and candidate facts |
8
- | goal_worker | low | workspace-write | One bounded implementation/recovery task |
8
+ | goal_worker | medium | workspace-write | One coherent bounded implementation/recovery slice |
9
9
  | goal_judge | high | read-only | Strategic review, escalation, completion skepticism |
10
10
 
11
11
  ## PM Thinking Policy
@@ -44,5 +44,6 @@ Rules:
44
44
 
45
45
  - Only the PM loop chooses active tasks, marks tasks done, or completes the goal.
46
46
  - Keep at most one write-capable Worker active unless disjoint write scopes are explicit in `state.yaml`.
47
+ - Worker defaults to medium reasoning for implementation tasks and should complete the whole assigned slice.
47
48
  - Scout and Judge are read-only and safe to parallelize when their board inputs are clear.
48
- - Judge is high thinking.
49
+ - Judge is high thinking and should choose the largest safe useful slice, not the narrowest helper.
@@ -25,7 +25,7 @@
25
25
 
26
26
  ## Current Tranche
27
27
 
28
- <What is enough for the full owner outcome, and what is the current safe slice? For execution goals, the default is continuous: discover enough evidence, choose a safe implementation slice, implement it, verify it, audit it, then immediately advance to the next safe slice until the full original outcome is complete. Plan-only or one-slice-only stopping is valid only when explicitly requested.>
28
+ <What is enough for the full owner outcome, and what is the current largest reversible local work package? For execution goals, the default is continuous: discover enough evidence, choose a coherent work package, implement it, verify it, review only at phase/risk/final boundaries, then immediately advance to the next work package until the full original outcome is complete. Plan-only or one-package-only stopping is valid only when explicitly requested.>
29
29
 
30
30
  ## Non-Negotiable Constraints
31
31
 
@@ -37,7 +37,21 @@ Stop only when a final audit proves the full original outcome is complete.
37
37
 
38
38
  Do not stop after planning, discovery, or Judge selection if the user asked for working software or automation and a safe Worker task can be activated.
39
39
 
40
- Do not stop after a single verified Worker slice when the broader owner outcome still has safe local follow-up slices. After each slice audit, advance the board to the next highest-leverage safe Worker task and continue.
40
+ Do not stop after a single verified Worker package when the broader owner outcome still has safe local follow-up work. Advance the board to the next highest-leverage safe Worker package and continue unless a phase, risk, rejected-verification, ambiguity, or final-completion review is due.
41
+
42
+ Do not create one Worker/Judge pair per repeated file, table, route, or helper. Put repeated same-shape work into one Worker package and review the package as a whole.
43
+
44
+ ## Slice Sizing
45
+
46
+ Safe means bounded, explicit, verified, and reversible. It does not mean tiny.
47
+
48
+ A good task is the largest safe useful slice.
49
+
50
+ Small is not the goal. Useful is the goal.
51
+
52
+ A Worker should finish the whole assigned slice. A Judge should judge the whole assigned slice. A PM should reorient the board when tasks are safe but not moving the outcome.
53
+
54
+ Tiny tasks are allowed when the failure is isolated, the risk is high, the scope is unknown, or the tiny task unlocks a larger slice. Tiny tasks are bad when they keep happening, do not change behavior, only add wrappers/contracts/proof files, or avoid the real milestone.
41
55
 
42
56
  Do not stop because a slice needs owner input, credentials, production access, destructive operations, or policy decisions. Mark that exact slice blocked with a receipt, create the smallest safe follow-up or workaround task, and continue all local, non-destructive work that can still move the goal toward the full outcome.
43
57
 
@@ -67,9 +81,9 @@ On every `/goal` continuation:
67
81
  6. Assign Scout, Judge, Worker, or PM according to the task.
68
82
  7. Write a compact task receipt.
69
83
  8. Update the board.
70
- 9. If Judge selected a safe Worker task with `allowed_files`, `verify`, and `stop_if`, activate it and continue unless blocked.
84
+ 9. If safe local work remains, choose the next largest reversible Worker package and continue unless blocked.
71
85
  10. If a problem, suggestion, or follow-up should become a repo artifact, create an approved issue/PR or ask the operator whether to create one.
72
- 11. Treat a slice audit as a checkpoint, not completion, unless it explicitly proves the full original outcome is complete.
86
+ 11. Review at phase, risk, rejected-verification, ambiguity, or final-completion boundaries; do not review every small Worker by habit.
73
87
  12. Finish only with a Judge/PM audit receipt that maps receipts and verification back to the original user outcome and records `full_outcome_complete: true`.
74
88
 
75
89
  Issue and PR handoffs are supporting artifacts. `state.yaml` remains authoritative, and every external artifact decision must be recorded in a task receipt.
@@ -33,6 +33,11 @@ rules:
33
33
  missing_input_or_credentials_do_not_stop_goal: true
34
34
  preserve_and_validate_existing_plan: true
35
35
  intake_misfire_must_be_audited: true
36
+ slice_policy:
37
+ max_consecutive_tiny_tasks: 2
38
+ prefer_vertical_slices: true
39
+ judge_picks_largest_safe_slice: true
40
+ worker_completes_whole_slice: true
36
41
 
37
42
  agents:
38
43
  # installed | bundled_not_installed | missing | unknown
@@ -88,7 +93,7 @@ tasks:
88
93
  - "T001 receipt"
89
94
  constraints:
90
95
  - "Do not implement."
91
- - "Pick small reviewable work."
96
+ - "Pick the largest safe useful slice with clear allowed_files, verify commands, and stop conditions."
92
97
  expected_output:
93
98
  - "Decision"
94
99
  - "Exact Worker objective"