goalbuddy 0.3.5 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -4
- package/goalbuddy/SKILL.md +34 -13
- package/goalbuddy/agents/README.md +1 -1
- package/goalbuddy/agents/goal_judge.toml +8 -4
- package/goalbuddy/agents/goal_worker.toml +8 -5
- package/goalbuddy/extend/local-goal-board/scripts/lib/goal-board.mjs +248 -7
- package/goalbuddy/extend/local-goal-board/test/local-goal-board.test.mjs +59 -1
- package/goalbuddy/scripts/check-goal-state.mjs +76 -0
- package/goalbuddy/scripts/render-task-prompt.mjs +61 -4
- package/goalbuddy/templates/agents.md +3 -2
- package/goalbuddy/templates/goal.md +18 -4
- package/goalbuddy/templates/state.yaml +6 -1
- package/internal/cli/goal-maker.mjs +108 -8
- package/package.json +1 -1
- package/plugins/goalbuddy/.claude-plugin/plugin.json +1 -1
- package/plugins/goalbuddy/.codex-plugin/plugin.json +1 -1
- package/plugins/goalbuddy/agents/goal-judge.md +8 -4
- package/plugins/goalbuddy/agents/goal-worker.md +6 -4
- package/plugins/goalbuddy/skills/goalbuddy/SKILL.md +34 -13
- package/plugins/goalbuddy/skills/goalbuddy/agents/README.md +1 -1
- package/plugins/goalbuddy/skills/goalbuddy/agents/goal_judge.toml +8 -4
- package/plugins/goalbuddy/skills/goalbuddy/agents/goal_worker.toml +8 -5
- package/plugins/goalbuddy/skills/goalbuddy/extend/local-goal-board/scripts/lib/goal-board.mjs +248 -7
- package/plugins/goalbuddy/skills/goalbuddy/extend/local-goal-board/test/local-goal-board.test.mjs +59 -1
- package/plugins/goalbuddy/skills/goalbuddy/scripts/check-goal-state.mjs +76 -0
- package/plugins/goalbuddy/skills/goalbuddy/scripts/render-task-prompt.mjs +61 -4
- package/plugins/goalbuddy/skills/goalbuddy/templates/agents.md +3 -2
- package/plugins/goalbuddy/skills/goalbuddy/templates/goal.md +18 -4
- package/plugins/goalbuddy/skills/goalbuddy/templates/state.yaml +6 -1
|
@@ -369,6 +369,9 @@ for (const task of tasks) {
|
|
|
369
369
|
errors.push(`Worker receipt for ${task.id} has non-passing command status: ${status}`);
|
|
370
370
|
}
|
|
371
371
|
}
|
|
372
|
+
if (task.receipt.scalar("needs_judge") === true) {
|
|
373
|
+
warnings.push(`Worker receipt for ${task.id} requests legacy needs_judge; GoalBuddy now lets the PM continue by default and reviews only at phase, risk, ambiguity, rejected-verification, or final-completion boundaries`);
|
|
374
|
+
}
|
|
372
375
|
}
|
|
373
376
|
if (task.type === "scout" && task.status === "done" && hasReceipt) {
|
|
374
377
|
if (!task.receipt.has("summary")) errors.push(`Scout receipt for ${task.id} missing summary`);
|
|
@@ -381,6 +384,8 @@ for (const task of tasks) {
|
|
|
381
384
|
}
|
|
382
385
|
}
|
|
383
386
|
|
|
387
|
+
warnings.push(...microSliceWarnings(tasks, activeTask, goalStatus));
|
|
388
|
+
|
|
384
389
|
function validateSubgoal(task) {
|
|
385
390
|
if (isChildCheck) {
|
|
386
391
|
errors.push(`child task ${task.id} must not contain a nested subgoal`);
|
|
@@ -430,6 +435,77 @@ function validateSubgoal(task) {
|
|
|
430
435
|
}
|
|
431
436
|
}
|
|
432
437
|
|
|
438
|
+
function microSliceWarnings(tasks, activeTaskId, goalStatus) {
|
|
439
|
+
const found = [];
|
|
440
|
+
const guidance = "Board may be micro-slicing. Prefer the largest safe useful slice.";
|
|
441
|
+
const doneTasks = tasks.filter((task) => task.status === "done");
|
|
442
|
+
const workerTasks = tasks.filter((task) => task.type === "worker");
|
|
443
|
+
const recentTinyWorkers = workerTasks.slice(-5).filter((task) => isTinyTask(task));
|
|
444
|
+
const firstMilestoneComplete = nestedScalar("goal", "first_milestone_complete") === true;
|
|
445
|
+
|
|
446
|
+
if (recentTinyWorkers.length >= 3) {
|
|
447
|
+
found.push(`${guidance} Three recent Worker tasks look tiny.`);
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
for (const task of tasks) {
|
|
451
|
+
if (task.type === "judge" && /pick small reviewable work|select one narrow next task/i.test(task.raw)) {
|
|
452
|
+
found.push(`${guidance} Judge instructions still ask for small or narrow work.`);
|
|
453
|
+
break;
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
if (goalStatus !== "active" || !activeTaskId) return [...new Set(found)];
|
|
458
|
+
const activeIndex = tasks.findIndex((task) => task.id === activeTaskId);
|
|
459
|
+
if (activeIndex === -1) return [...new Set(found)];
|
|
460
|
+
const active = tasks[activeIndex];
|
|
461
|
+
if (active.type === "worker") {
|
|
462
|
+
if (doneTasks.length >= 10 && active.allowedFiles.length > 0 && active.allowedFiles.length <= 2) {
|
|
463
|
+
found.push(`${guidance} Active Worker ${active.id} has only ${active.allowedFiles.length} allowed_files after ${doneTasks.length} completed tasks.`);
|
|
464
|
+
}
|
|
465
|
+
if (firstMilestoneComplete && isTinyTask(active)) {
|
|
466
|
+
found.push(`${guidance} The first milestone is complete, so the active Worker should move toward the next real milestone.`);
|
|
467
|
+
}
|
|
468
|
+
if (isMicroWorkerTask(active)) {
|
|
469
|
+
found.push(`${guidance} Active Worker ${active.id} looks like another helper-sized slice.`);
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
if (active.type !== "judge") return [...new Set(found)];
|
|
473
|
+
|
|
474
|
+
let pairs = 0;
|
|
475
|
+
for (let index = activeIndex; index > 0; index -= 2) {
|
|
476
|
+
const judge = tasks[index];
|
|
477
|
+
const worker = tasks[index - 1];
|
|
478
|
+
if (!isMicroJudgeForWorker(judge, worker)) break;
|
|
479
|
+
pairs += 1;
|
|
480
|
+
}
|
|
481
|
+
if (pairs >= 2) {
|
|
482
|
+
found.push(`${guidance} Micro Worker/Judge loop detected ending at ${active.id}.`);
|
|
483
|
+
}
|
|
484
|
+
return [...new Set(found)];
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
function isMicroJudgeForWorker(judge, worker) {
|
|
488
|
+
if (!judge || !worker) return false;
|
|
489
|
+
if (judge.type !== "judge" || worker.type !== "worker") return false;
|
|
490
|
+
if (!["active", "queued", "done"].includes(judge.status) || worker.status !== "done") return false;
|
|
491
|
+
const objective = String(judge.objective || "").toLowerCase();
|
|
492
|
+
return objective.includes(worker.id.toLowerCase()) && /audit|review|approve/.test(objective) && isMicroWorkerTask(worker);
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
function isMicroWorkerTask(task) {
|
|
496
|
+
if (!task || task.type !== "worker") return false;
|
|
497
|
+
const objective = String(task.objective || "").toLowerCase();
|
|
498
|
+
if (/collapsed|batch|package|tranche/.test(objective)) return false;
|
|
499
|
+
return /one narrow|single helper|one helper|per[- ]helper|per[- ]table|projection helper/.test(objective);
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
function isTinyTask(task) {
|
|
503
|
+
if (!task) return false;
|
|
504
|
+
const text = [task.objective, task.raw, task.receipt?.raw].join(" ").toLowerCase();
|
|
505
|
+
if (/collapsed|batch|package|tranche|vertical slice|milestone/.test(text)) return false;
|
|
506
|
+
return /\b(tiny|narrow|single helper|one helper|projection helper|projection function|contract file|read-only proof|doc note|validator|validation wrapper|pure helper|caller-input)\b/.test(text);
|
|
507
|
+
}
|
|
508
|
+
|
|
433
509
|
function matchesAllowedFile(file, allowedFiles) {
|
|
434
510
|
return allowedFiles.some((pattern) => globMatch(pattern, file));
|
|
435
511
|
}
|
|
@@ -7,7 +7,7 @@ import { parseGoalStateText } from "../extend/local-goal-board/scripts/lib/goal-
|
|
|
7
7
|
const ROLE_DEFAULTS = {
|
|
8
8
|
scout: { agent: "goal_scout", reasoning: "low", sandbox: "read-only" },
|
|
9
9
|
judge: { agent: "goal_judge", reasoning: "high", sandbox: "read-only" },
|
|
10
|
-
worker: { agent: "goal_worker", reasoning: "
|
|
10
|
+
worker: { agent: "goal_worker", reasoning: "medium", sandbox: "workspace-write" },
|
|
11
11
|
pm: { agent: "PM", reasoning: "medium", sandbox: "workspace-write" },
|
|
12
12
|
};
|
|
13
13
|
|
|
@@ -39,11 +39,13 @@ export function renderTaskPrompt(options) {
|
|
|
39
39
|
payload: {
|
|
40
40
|
metadata: {
|
|
41
41
|
recommended_agent: defaults.agent,
|
|
42
|
+
required_spawn_agent_type: defaults.agent === "PM" ? null : defaults.agent,
|
|
42
43
|
recommended_reasoning: reasoning,
|
|
43
44
|
sandbox: defaults.sandbox,
|
|
44
45
|
fork_context_allowed: role !== "worker",
|
|
45
46
|
board_path: board.path,
|
|
46
47
|
child_board_paths: childBoardPaths(board),
|
|
48
|
+
slice_policy: board.document.rules?.slice_policy || null,
|
|
47
49
|
warnings,
|
|
48
50
|
},
|
|
49
51
|
task: {
|
|
@@ -143,15 +145,53 @@ function promptWarnings(board, task) {
|
|
|
143
145
|
if (stringList(task.allowed_files).length === 0) warnings.push(`Worker task ${task.id} has no allowed_files.`);
|
|
144
146
|
if (stringList(task.verify).length === 0) warnings.push(`Worker task ${task.id} has no verify commands.`);
|
|
145
147
|
if (stringList(task.stop_if).length === 0) warnings.push(`Worker task ${task.id} has no stop_if conditions.`);
|
|
148
|
+
if (isFalse(board.goal.full_outcome_complete)) {
|
|
149
|
+
warnings.push(`full_outcome_complete is false and ${task.id} is an active Worker; do not stop after rendering or repairing the board. Execute the Worker unless a stop_if condition applies.`);
|
|
150
|
+
}
|
|
146
151
|
}
|
|
147
152
|
for (const candidate of board.tasks) {
|
|
148
153
|
if (candidate?.subgoal && Number(candidate.subgoal.depth) !== 1) {
|
|
149
154
|
warnings.push(`Task ${candidate.id} has subgoal.depth ${candidate.subgoal.depth || "<missing>"}; only depth 1 is supported.`);
|
|
150
155
|
}
|
|
151
156
|
}
|
|
157
|
+
warnings.push(...microSliceWarnings(board, task));
|
|
152
158
|
return warnings;
|
|
153
159
|
}
|
|
154
160
|
|
|
161
|
+
function microSliceWarnings(board, task) {
|
|
162
|
+
const warnings = [];
|
|
163
|
+
const doneTasks = board.tasks.filter((candidate) => candidate?.status === "done");
|
|
164
|
+
const recentWorkers = board.tasks
|
|
165
|
+
.filter((candidate) => normalizeRole(candidate?.type) === "worker")
|
|
166
|
+
.slice(-5);
|
|
167
|
+
const recentTinyWorkers = recentWorkers.filter((candidate) => isTinyTask(candidate));
|
|
168
|
+
const activeRole = normalizeRole(task.type);
|
|
169
|
+
const activeAllowedFiles = stringList(task.allowed_files);
|
|
170
|
+
const firstMilestoneComplete = isTrue(board.goal.first_milestone_complete);
|
|
171
|
+
const microWarning = "Board may be micro-slicing. Prefer the largest safe useful slice.";
|
|
172
|
+
|
|
173
|
+
if (recentTinyWorkers.length >= 3) warnings.push(microWarning);
|
|
174
|
+
if (doneTasks.length >= 10 && activeRole === "worker" && activeAllowedFiles.length > 0 && activeAllowedFiles.length <= 2) {
|
|
175
|
+
warnings.push(`${microWarning} Active Worker ${task.id} has only ${activeAllowedFiles.length} allowed_files after ${doneTasks.length} completed tasks.`);
|
|
176
|
+
}
|
|
177
|
+
if (firstMilestoneComplete && activeRole === "worker" && isTinyTask(task)) {
|
|
178
|
+
warnings.push(`${microWarning} The first milestone is complete, so the active Worker should move toward the next real milestone.`);
|
|
179
|
+
}
|
|
180
|
+
if (activeRole === "judge" && /pick small reviewable work|select one narrow next task/i.test(String(task.objective || "") + "\n" + stringList(task.constraints).join("\n"))) {
|
|
181
|
+
warnings.push(`${microWarning} Judge instructions still ask for small or narrow work.`);
|
|
182
|
+
}
|
|
183
|
+
return [...new Set(warnings)];
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function isTinyTask(task) {
|
|
187
|
+
const text = [
|
|
188
|
+
task?.objective,
|
|
189
|
+
stringList(task?.constraints).join(" "),
|
|
190
|
+
task?.receipt?.summary,
|
|
191
|
+
].join(" ").toLowerCase();
|
|
192
|
+
return /\b(tiny|narrow|single helper|one helper|projection helper|projection function|contract file|read-only proof|doc note|validator|validation wrapper|pure helper|caller-input)\b/.test(text);
|
|
193
|
+
}
|
|
194
|
+
|
|
155
195
|
function normalizeRole(value) {
|
|
156
196
|
const role = String(value || "pm").toLowerCase();
|
|
157
197
|
return ROLE_DEFAULTS[role] ? role : "pm";
|
|
@@ -163,6 +203,14 @@ function normalizeReasoning(value, fallback) {
|
|
|
163
203
|
return fallback;
|
|
164
204
|
}
|
|
165
205
|
|
|
206
|
+
function isFalse(value) {
|
|
207
|
+
return value === false || String(value).toLowerCase() === "false";
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function isTrue(value) {
|
|
211
|
+
return value === true || String(value).toLowerCase() === "true";
|
|
212
|
+
}
|
|
213
|
+
|
|
166
214
|
function stringList(value) {
|
|
167
215
|
return Array.isArray(value) ? value.filter((item) => item !== null && item !== undefined).map(String) : [];
|
|
168
216
|
}
|
|
@@ -175,15 +223,14 @@ function receiptSchema(role) {
|
|
|
175
223
|
commands: [{ cmd: "<command>", status: "pass | fail | not_run" }],
|
|
176
224
|
summary: "<=120 words",
|
|
177
225
|
remaining_blockers: [],
|
|
178
|
-
needs_judge: false,
|
|
179
226
|
};
|
|
180
227
|
}
|
|
181
228
|
if (role === "judge") {
|
|
182
229
|
return {
|
|
183
230
|
result: "done | blocked",
|
|
184
|
-
decision: "
|
|
231
|
+
decision: "approved | rejected | approve_subgoal | reject_subgoal | not_complete | complete",
|
|
232
|
+
full_outcome_complete: false,
|
|
185
233
|
evidence: [],
|
|
186
|
-
next_allowed_task: null,
|
|
187
234
|
blocked_tasks: [],
|
|
188
235
|
required_board_updates: [],
|
|
189
236
|
};
|
|
@@ -204,6 +251,7 @@ function formatPrompt(payload) {
|
|
|
204
251
|
"",
|
|
205
252
|
"Metadata:",
|
|
206
253
|
`- recommended_agent: ${payload.metadata.recommended_agent}`,
|
|
254
|
+
`- required_spawn_agent_type: ${payload.metadata.required_spawn_agent_type || "PM fallback"}`,
|
|
207
255
|
`- recommended_reasoning: ${payload.metadata.recommended_reasoning}`,
|
|
208
256
|
`- sandbox: ${payload.metadata.sandbox}`,
|
|
209
257
|
`- fork_context_allowed: ${payload.metadata.fork_context_allowed}`,
|
|
@@ -213,12 +261,21 @@ function formatPrompt(payload) {
|
|
|
213
261
|
lines.push("- child_board_paths:");
|
|
214
262
|
for (const path of payload.metadata.child_board_paths) lines.push(` - ${path}`);
|
|
215
263
|
}
|
|
264
|
+
if (payload.metadata.slice_policy) {
|
|
265
|
+
lines.push(`- slice_policy: ${JSON.stringify(payload.metadata.slice_policy)}`);
|
|
266
|
+
}
|
|
216
267
|
if (payload.metadata.warnings.length) {
|
|
217
268
|
lines.push("- warnings:");
|
|
218
269
|
for (const warning of payload.metadata.warnings) lines.push(` - ${warning}`);
|
|
219
270
|
}
|
|
220
271
|
|
|
221
272
|
lines.push(
|
|
273
|
+
"",
|
|
274
|
+
"Spawn contract:",
|
|
275
|
+
`- Codex spawn_agent agent_type: ${payload.metadata.required_spawn_agent_type || "do not spawn; run as PM"}`,
|
|
276
|
+
"- Do not substitute generic scout, worker, or judge agents for GoalBuddy agents.",
|
|
277
|
+
"- If the required GoalBuddy agent is unavailable, stop spawning and continue as PM fallback or install agents.",
|
|
278
|
+
"- After one wait_agent timeout with no visible allowed-file changes, stop waiting and recover deterministically.",
|
|
222
279
|
"",
|
|
223
280
|
"Task:",
|
|
224
281
|
`- id: ${payload.task.id}`,
|
|
@@ -5,7 +5,7 @@ Use three generic agents. The main `/goal` thread remains PM and owns the board.
|
|
|
5
5
|
| Agent | model_reasoning_effort | sandbox_mode | Purpose |
|
|
6
6
|
|---|---:|---|---|
|
|
7
7
|
| goal_scout | low | read-only | Targeted evidence mapping and candidate facts |
|
|
8
|
-
| goal_worker |
|
|
8
|
+
| goal_worker | medium | workspace-write | One coherent bounded implementation/recovery slice |
|
|
9
9
|
| goal_judge | high | read-only | Strategic review, escalation, completion skepticism |
|
|
10
10
|
|
|
11
11
|
## PM Thinking Policy
|
|
@@ -44,5 +44,6 @@ Rules:
|
|
|
44
44
|
|
|
45
45
|
- Only the PM loop chooses active tasks, marks tasks done, or completes the goal.
|
|
46
46
|
- Keep at most one write-capable Worker active unless disjoint write scopes are explicit in `state.yaml`.
|
|
47
|
+
- Worker defaults to medium reasoning for implementation tasks and should complete the whole assigned slice.
|
|
47
48
|
- Scout and Judge are read-only and safe to parallelize when their board inputs are clear.
|
|
48
|
-
- Judge is high thinking.
|
|
49
|
+
- Judge is high thinking and should choose the largest safe useful slice, not the narrowest helper.
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
|
|
26
26
|
## Current Tranche
|
|
27
27
|
|
|
28
|
-
<What is enough for the full owner outcome, and what is the current
|
|
28
|
+
<What is enough for the full owner outcome, and what is the current largest reversible local work package? For execution goals, the default is continuous: discover enough evidence, choose a coherent work package, implement it, verify it, review only at phase/risk/final boundaries, then immediately advance to the next work package until the full original outcome is complete. Plan-only or one-package-only stopping is valid only when explicitly requested.>
|
|
29
29
|
|
|
30
30
|
## Non-Negotiable Constraints
|
|
31
31
|
|
|
@@ -37,7 +37,21 @@ Stop only when a final audit proves the full original outcome is complete.
|
|
|
37
37
|
|
|
38
38
|
Do not stop after planning, discovery, or Judge selection if the user asked for working software or automation and a safe Worker task can be activated.
|
|
39
39
|
|
|
40
|
-
Do not stop after a single verified Worker
|
|
40
|
+
Do not stop after a single verified Worker package when the broader owner outcome still has safe local follow-up work. Advance the board to the next highest-leverage safe Worker package and continue unless a phase, risk, rejected-verification, ambiguity, or final-completion review is due.
|
|
41
|
+
|
|
42
|
+
Do not create one Worker/Judge pair per repeated file, table, route, or helper. Put repeated same-shape work into one Worker package and review the package as a whole.
|
|
43
|
+
|
|
44
|
+
## Slice Sizing
|
|
45
|
+
|
|
46
|
+
Safe means bounded, explicit, verified, and reversible. It does not mean tiny.
|
|
47
|
+
|
|
48
|
+
A good task is the largest safe useful slice.
|
|
49
|
+
|
|
50
|
+
Small is not the goal. Useful is the goal.
|
|
51
|
+
|
|
52
|
+
A Worker should finish the whole assigned slice. A Judge should judge the whole assigned slice. A PM should reorient the board when tasks are safe but not moving the outcome.
|
|
53
|
+
|
|
54
|
+
Tiny tasks are allowed when the failure is isolated, the risk is high, the scope is unknown, or the tiny task unlocks a larger slice. Tiny tasks are bad when they keep happening, do not change behavior, only add wrappers/contracts/proof files, or avoid the real milestone.
|
|
41
55
|
|
|
42
56
|
Do not stop because a slice needs owner input, credentials, production access, destructive operations, or policy decisions. Mark that exact slice blocked with a receipt, create the smallest safe follow-up or workaround task, and continue all local, non-destructive work that can still move the goal toward the full outcome.
|
|
43
57
|
|
|
@@ -67,9 +81,9 @@ On every `/goal` continuation:
|
|
|
67
81
|
6. Assign Scout, Judge, Worker, or PM according to the task.
|
|
68
82
|
7. Write a compact task receipt.
|
|
69
83
|
8. Update the board.
|
|
70
|
-
9. If
|
|
84
|
+
9. If safe local work remains, choose the next largest reversible Worker package and continue unless blocked.
|
|
71
85
|
10. If a problem, suggestion, or follow-up should become a repo artifact, create an approved issue/PR or ask the operator whether to create one.
|
|
72
|
-
11.
|
|
86
|
+
11. Review at phase, risk, rejected-verification, ambiguity, or final-completion boundaries; do not review every small Worker by habit.
|
|
73
87
|
12. Finish only with a Judge/PM audit receipt that maps receipts and verification back to the original user outcome and records `full_outcome_complete: true`.
|
|
74
88
|
|
|
75
89
|
Issue and PR handoffs are supporting artifacts. `state.yaml` remains authoritative, and every external artifact decision must be recorded in a task receipt.
|
|
@@ -33,6 +33,11 @@ rules:
|
|
|
33
33
|
missing_input_or_credentials_do_not_stop_goal: true
|
|
34
34
|
preserve_and_validate_existing_plan: true
|
|
35
35
|
intake_misfire_must_be_audited: true
|
|
36
|
+
slice_policy:
|
|
37
|
+
max_consecutive_tiny_tasks: 2
|
|
38
|
+
prefer_vertical_slices: true
|
|
39
|
+
judge_picks_largest_safe_slice: true
|
|
40
|
+
worker_completes_whole_slice: true
|
|
36
41
|
|
|
37
42
|
agents:
|
|
38
43
|
# installed | bundled_not_installed | missing | unknown
|
|
@@ -88,7 +93,7 @@ tasks:
|
|
|
88
93
|
- "T001 receipt"
|
|
89
94
|
constraints:
|
|
90
95
|
- "Do not implement."
|
|
91
|
-
- "Pick
|
|
96
|
+
- "Pick the largest safe useful slice with clear allowed_files, verify commands, and stop conditions."
|
|
92
97
|
expected_output:
|
|
93
98
|
- "Decision"
|
|
94
99
|
- "Exact Worker objective"
|
|
@@ -83,15 +83,23 @@ async function main() {
|
|
|
83
83
|
break;
|
|
84
84
|
case "install":
|
|
85
85
|
case "update":
|
|
86
|
+
if (wantsHelp()) {
|
|
87
|
+
usage();
|
|
88
|
+
break;
|
|
89
|
+
}
|
|
86
90
|
if (installTargetMode() === "all") {
|
|
87
91
|
await installEverywhere();
|
|
88
92
|
} else if (installTargetMode() === "codex") {
|
|
89
|
-
|
|
93
|
+
installPlugin();
|
|
90
94
|
} else {
|
|
91
95
|
await installClaudeAll();
|
|
92
96
|
}
|
|
93
97
|
break;
|
|
94
98
|
case "agents":
|
|
99
|
+
if (wantsHelp()) {
|
|
100
|
+
usage();
|
|
101
|
+
break;
|
|
102
|
+
}
|
|
95
103
|
if (targetMode() === "codex") {
|
|
96
104
|
installAgents();
|
|
97
105
|
} else {
|
|
@@ -99,6 +107,10 @@ async function main() {
|
|
|
99
107
|
}
|
|
100
108
|
break;
|
|
101
109
|
case "doctor":
|
|
110
|
+
if (wantsHelp()) {
|
|
111
|
+
usage();
|
|
112
|
+
break;
|
|
113
|
+
}
|
|
102
114
|
if (targetMode() === "codex") {
|
|
103
115
|
doctor();
|
|
104
116
|
} else {
|
|
@@ -110,6 +122,10 @@ async function main() {
|
|
|
110
122
|
checkUpdate();
|
|
111
123
|
break;
|
|
112
124
|
case "plugin":
|
|
125
|
+
if (wantsHelp()) {
|
|
126
|
+
pluginUsage();
|
|
127
|
+
break;
|
|
128
|
+
}
|
|
113
129
|
plugin();
|
|
114
130
|
break;
|
|
115
131
|
case "extend":
|
|
@@ -164,6 +180,10 @@ function hasFlag(name) {
|
|
|
164
180
|
return args.includes(name);
|
|
165
181
|
}
|
|
166
182
|
|
|
183
|
+
function wantsHelp() {
|
|
184
|
+
return hasFlag("--help") || hasFlag("-h");
|
|
185
|
+
}
|
|
186
|
+
|
|
167
187
|
function positional(index) {
|
|
168
188
|
return positionalArgs()[index] || "";
|
|
169
189
|
}
|
|
@@ -552,7 +572,7 @@ This alias has the same invocation boundary as \`$${canonicalSkillName}\`: prepa
|
|
|
552
572
|
function installAgents({ quiet = false } = {}) {
|
|
553
573
|
const source = join(skillSource, "agents");
|
|
554
574
|
const target = join(codexHome(), "agents");
|
|
555
|
-
const force = hasFlag("--force") || command === "update" || command === "install";
|
|
575
|
+
const force = hasFlag("--force") || command === "update" || command === "install" || command === "default" || command === "plugin";
|
|
556
576
|
mkdirSync(target, { recursive: true });
|
|
557
577
|
|
|
558
578
|
const results = [];
|
|
@@ -601,6 +621,7 @@ async function installAll() {
|
|
|
601
621
|
function doctor() {
|
|
602
622
|
const skillPath = join(installedSkillRoot(), "SKILL.md");
|
|
603
623
|
const legacySkillPath = join(legacyInstalledSkillRoot(), "SKILL.md");
|
|
624
|
+
const plugin = installedCodexPlugin();
|
|
604
625
|
const agentsPath = join(codexHome(), "agents");
|
|
605
626
|
const installed = existsSync(skillPath);
|
|
606
627
|
const legacyInstalled = existsSync(legacySkillPath);
|
|
@@ -616,12 +637,38 @@ function doctor() {
|
|
|
616
637
|
});
|
|
617
638
|
const goalRuntime = codexGoalRuntimeStatus();
|
|
618
639
|
const warnings = [];
|
|
640
|
+
const errors = [];
|
|
619
641
|
if (!goalRuntime.ready) {
|
|
620
642
|
warnings.push("native Codex /goal runtime is not ready; run `codex login` and `codex features enable goals` before using /goal.");
|
|
621
643
|
}
|
|
644
|
+
if (!plugin.skill_installed && !installed) {
|
|
645
|
+
errors.push("Codex GoalBuddy plugin is not installed; run `npx goalbuddy --target codex`.");
|
|
646
|
+
}
|
|
647
|
+
if (plugin.skill_installed && !plugin.enabled) {
|
|
648
|
+
errors.push("Codex GoalBuddy plugin cache exists but is not enabled in config.toml; run `npx goalbuddy --target codex`.");
|
|
649
|
+
}
|
|
650
|
+
for (const file of missingAgents) {
|
|
651
|
+
errors.push(`Missing GoalBuddy Codex agent: ${file}; run \`npx goalbuddy --target codex\`.`);
|
|
652
|
+
}
|
|
653
|
+
for (const file of staleAgents) {
|
|
654
|
+
errors.push(`Stale GoalBuddy Codex agent: ${file}; run \`npx goalbuddy update --target codex\`.`);
|
|
655
|
+
}
|
|
656
|
+
if (hasFlag("--goal-ready") && !goalRuntime.ready) {
|
|
657
|
+
errors.push("Native Codex /goal runtime is not ready. GoalBuddy $goal-prep and local boards are separate from OpenAI-gated native /goal.");
|
|
658
|
+
}
|
|
622
659
|
|
|
623
660
|
console.log(JSON.stringify({
|
|
624
661
|
codex_home: codexHome(),
|
|
662
|
+
codex_install_model: "plugin",
|
|
663
|
+
expected_state: {
|
|
664
|
+
plugin_cache: true,
|
|
665
|
+
bundled_skill: "$goal-prep",
|
|
666
|
+
standalone_personal_skill: false,
|
|
667
|
+
compatibility_skill: false,
|
|
668
|
+
agents: requiredAgentFiles,
|
|
669
|
+
native_goal: "separate OpenAI-gated Codex feature",
|
|
670
|
+
},
|
|
671
|
+
plugin,
|
|
625
672
|
skill_installed: installed,
|
|
626
673
|
skill_path: skillPath,
|
|
627
674
|
compatibility_skill_installed: legacyInstalled,
|
|
@@ -631,11 +678,14 @@ function doctor() {
|
|
|
631
678
|
stale_agents: staleAgents,
|
|
632
679
|
goal_runtime: goalRuntime,
|
|
633
680
|
warnings,
|
|
681
|
+
errors,
|
|
634
682
|
}, null, 2));
|
|
635
683
|
|
|
636
|
-
const
|
|
684
|
+
const pluginOk = plugin.skill_installed && plugin.enabled;
|
|
685
|
+
const legacySkillOk = installed;
|
|
686
|
+
const installOk = (pluginOk || legacySkillOk) && missingAgents.length === 0 && staleAgents.length === 0;
|
|
637
687
|
const goalReadyOk = !hasFlag("--goal-ready") || goalRuntime.ready;
|
|
638
|
-
process.exit(installOk && goalReadyOk ? 0 : 1);
|
|
688
|
+
process.exit(installOk && goalReadyOk && errors.length === 0 ? 0 : 1);
|
|
639
689
|
}
|
|
640
690
|
|
|
641
691
|
function checkUpdate() {
|
|
@@ -680,6 +730,10 @@ function updateReport() {
|
|
|
680
730
|
|
|
681
731
|
function plugin() {
|
|
682
732
|
const subcommand = positional(1) || "";
|
|
733
|
+
if (wantsHelp()) {
|
|
734
|
+
pluginUsage();
|
|
735
|
+
return;
|
|
736
|
+
}
|
|
683
737
|
switch (subcommand) {
|
|
684
738
|
case "install":
|
|
685
739
|
installPlugin();
|
|
@@ -733,6 +787,7 @@ function installPlugin({ quiet = false } = {}) {
|
|
|
733
787
|
cleanupPreservedExtensions([preservedExtensions.tempPath]);
|
|
734
788
|
const removedLegacySkillPaths = cleanupLegacyCodexSkills();
|
|
735
789
|
const configPath = enablePluginConfig();
|
|
790
|
+
const agents = installAgents({ quiet: true });
|
|
736
791
|
|
|
737
792
|
const report = {
|
|
738
793
|
installed: true,
|
|
@@ -743,6 +798,7 @@ function installPlugin({ quiet = false } = {}) {
|
|
|
743
798
|
marketplace_source: source,
|
|
744
799
|
cache_path: pluginCachePath,
|
|
745
800
|
config_path: configPath,
|
|
801
|
+
agents,
|
|
746
802
|
preserved_extensions: preservedExtensions.ids,
|
|
747
803
|
removed_legacy_skill_paths: removedLegacySkillPaths,
|
|
748
804
|
};
|
|
@@ -758,6 +814,7 @@ function installPlugin({ quiet = false } = {}) {
|
|
|
758
814
|
console.log(`Marketplace: ${source}`);
|
|
759
815
|
console.log(`Cache: ${pluginCachePath}`);
|
|
760
816
|
console.log(`Config: ${configPath}`);
|
|
817
|
+
console.log(`Agents: ${summarizeStatuses(report.agents)}`);
|
|
761
818
|
if (report.preserved_extensions.length) {
|
|
762
819
|
console.log(`Preserved extensions: ${report.preserved_extensions.join(", ")}`);
|
|
763
820
|
}
|
|
@@ -1298,8 +1355,24 @@ function installedSkillRoot() {
|
|
|
1298
1355
|
}
|
|
1299
1356
|
|
|
1300
1357
|
function installedPluginSkillRoot() {
|
|
1358
|
+
return installedCodexPlugin().skill_path;
|
|
1359
|
+
}
|
|
1360
|
+
|
|
1361
|
+
function installedCodexPlugin() {
|
|
1301
1362
|
const root = join(codexHome(), "plugins", "cache", pluginName, pluginName);
|
|
1302
|
-
|
|
1363
|
+
const configPath = join(codexHome(), "config.toml");
|
|
1364
|
+
const base = {
|
|
1365
|
+
installed: false,
|
|
1366
|
+
enabled: pluginConfigEnabled(configPath),
|
|
1367
|
+
name: `${pluginName}@${pluginName}`,
|
|
1368
|
+
version: "",
|
|
1369
|
+
cache_path: "",
|
|
1370
|
+
manifest_path: "",
|
|
1371
|
+
skill_installed: false,
|
|
1372
|
+
skill_path: "",
|
|
1373
|
+
config_path: configPath,
|
|
1374
|
+
};
|
|
1375
|
+
if (!existsSync(root)) return base;
|
|
1303
1376
|
const versions = readdirSync(root, { withFileTypes: true })
|
|
1304
1377
|
.filter((entry) => entry.isDirectory())
|
|
1305
1378
|
.map((entry) => entry.name)
|
|
@@ -1307,10 +1380,37 @@ function installedPluginSkillRoot() {
|
|
|
1307
1380
|
.sort(compareVersions)
|
|
1308
1381
|
.reverse();
|
|
1309
1382
|
for (const version of versions) {
|
|
1310
|
-
const
|
|
1311
|
-
|
|
1383
|
+
const cachePath = join(root, version);
|
|
1384
|
+
const skillPath = join(cachePath, "skills", canonicalSkillDirectory);
|
|
1385
|
+
const manifestPath = join(cachePath, ".codex-plugin", "plugin.json");
|
|
1386
|
+
if (existsSync(join(skillPath, "SKILL.md"))) {
|
|
1387
|
+
return {
|
|
1388
|
+
...base,
|
|
1389
|
+
installed: true,
|
|
1390
|
+
version,
|
|
1391
|
+
cache_path: cachePath,
|
|
1392
|
+
manifest_path: manifestPath,
|
|
1393
|
+
skill_installed: true,
|
|
1394
|
+
skill_path: skillPath,
|
|
1395
|
+
};
|
|
1396
|
+
}
|
|
1397
|
+
}
|
|
1398
|
+
return base;
|
|
1399
|
+
}
|
|
1400
|
+
|
|
1401
|
+
function pluginConfigEnabled(configPath) {
|
|
1402
|
+
if (!existsSync(configPath)) return false;
|
|
1403
|
+
const lines = readFileSync(configPath, "utf8").split(/\r?\n/);
|
|
1404
|
+
const header = `[plugins."${pluginName}@${pluginName}"]`;
|
|
1405
|
+
const start = lines.findIndex((line) => line.trim() === header);
|
|
1406
|
+
if (start === -1) return false;
|
|
1407
|
+
for (let index = start + 1; index < lines.length; index += 1) {
|
|
1408
|
+
const line = lines[index].trim();
|
|
1409
|
+
if (line.startsWith("[")) break;
|
|
1410
|
+
if (/^enabled\s*=\s*true\b/.test(line)) return true;
|
|
1411
|
+
if (/^enabled\s*=/.test(line)) return false;
|
|
1312
1412
|
}
|
|
1313
|
-
return
|
|
1413
|
+
return false;
|
|
1314
1414
|
}
|
|
1315
1415
|
|
|
1316
1416
|
function activeSkillRoot() {
|
package/package.json
CHANGED
|
@@ -13,11 +13,16 @@ Hard contract:
|
|
|
13
13
|
- Read only. Do not edit, stage, install, or implement.
|
|
14
14
|
- Read state receipts before raw files. Then read only the inputs named in the Judge task.
|
|
15
15
|
- Be skeptical of progress. Lots of files, docs, or tests are not completion.
|
|
16
|
-
- A safe Worker
|
|
16
|
+
- A safe Worker package must include objective, allowed_files, verify commands, and stop_if, and should cover the largest reversible local work package at that boundary.
|
|
17
|
+
- Choose the largest safe useful slice: bounded, explicit, verified, reversible, and outcome-moving. Safety does not mean tiny.
|
|
18
|
+
- Judge a whole useful slice, not one helper at a time.
|
|
19
|
+
- Detect micro-slice loops. Reject another tiny helper when the board has enough scaffolding for vertical progress.
|
|
20
|
+
- Select PM reorientation when recent receipts are safe-looking but outcome-light.
|
|
21
|
+
- Prefer milestone reviews over helper reviews.
|
|
17
22
|
- A safe child board must be depth 1, inside `subgoals/`, non-recursive, and linked from exactly one parent task.
|
|
18
23
|
- Parallel Worker work is safe only with provably disjoint `allowed_files`. Separate boards alone are not proof.
|
|
19
24
|
- Reject completion unless the full original outcome is mapped to receipts and current verification.
|
|
20
|
-
- Do not choose the active task or mutate state.
|
|
25
|
+
- Do not generate routine next tasks, choose the active task, or mutate state. The PM owns continuation after your review.
|
|
21
26
|
|
|
22
27
|
Return exactly one parseable JSON receipt object:
|
|
23
28
|
|
|
@@ -27,11 +32,10 @@ Return exactly one parseable JSON receipt object:
|
|
|
27
32
|
"result": "done | blocked",
|
|
28
33
|
"task_id": "<T###>",
|
|
29
34
|
"board_path": "<path to state.yaml>",
|
|
30
|
-
"decision": "
|
|
35
|
+
"decision": "approved | rejected | approve_subgoal | reject_subgoal | not_complete | complete",
|
|
31
36
|
"full_outcome_complete": false,
|
|
32
37
|
"rationale": "<=120 words>",
|
|
33
38
|
"evidence": [],
|
|
34
|
-
"next_allowed_task": null,
|
|
35
39
|
"subgoal_contract": null,
|
|
36
40
|
"parallel_safety": null,
|
|
37
41
|
"blocked_tasks": [],
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: goal-worker
|
|
3
|
-
description: GoalBuddy Worker. Bounded writer for
|
|
3
|
+
description: GoalBuddy Worker. Bounded writer for one coherent reversible Worker work package. Edits only allowed_files, runs verify, returns receipt.
|
|
4
4
|
tools: Read, Edit, Write, Grep, Glob, Bash
|
|
5
5
|
---
|
|
6
6
|
|
|
7
7
|
You are Worker for GoalBuddy.
|
|
8
8
|
|
|
9
|
-
Default effort:
|
|
9
|
+
Default effort: medium for implementation tasks. Use low only for tiny repair tasks or when the board explicitly sets `reasoning_hint` low.
|
|
10
10
|
|
|
11
11
|
Hard contract:
|
|
12
12
|
|
|
@@ -18,7 +18,10 @@ Hard contract:
|
|
|
18
18
|
- Do not create child sub-goals unless the task explicitly allows it.
|
|
19
19
|
- Run the verify commands exactly as listed after edits. You may make at most two fix attempts.
|
|
20
20
|
- Stop immediately if required evidence is missing, a file outside `allowed_files` is needed, source/product/tests conflict, or verification still fails after two attempts.
|
|
21
|
-
-
|
|
21
|
+
- Do not request a Judge just because the package is done. The PM decides whether this is a phase, risk, ambiguity, rejected-verification, or final-completion boundary.
|
|
22
|
+
- Keep the diff coherent, bounded, and reversible. Do not shrink the assigned work below the largest safe useful slice.
|
|
23
|
+
- Complete the whole assigned slice. Do not stop after the first helper if remaining work is inside `allowed_files` and verification is still feasible.
|
|
24
|
+
- If the task asks for a vertical slice, complete the vertical slice.
|
|
22
25
|
|
|
23
26
|
Parallel safety:
|
|
24
27
|
|
|
@@ -39,7 +42,6 @@ Return exactly one parseable JSON receipt object:
|
|
|
39
42
|
"commands": [],
|
|
40
43
|
"summary": "<=120 words>",
|
|
41
44
|
"remaining_blockers": [],
|
|
42
|
-
"needs_judge": false,
|
|
43
45
|
"verification_attempts": 1,
|
|
44
46
|
"stopped_because": null
|
|
45
47
|
}
|