oh-my-opencode 4.5.12 → 4.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/opencode-qa/SKILL.md +194 -0
- package/.agents/skills/opencode-qa/references/cli-commands.md +188 -0
- package/.agents/skills/opencode-qa/references/db-investigation.md +197 -0
- package/.agents/skills/opencode-qa/references/events-hooks.md +110 -0
- package/.agents/skills/opencode-qa/references/sdk.md +96 -0
- package/.agents/skills/opencode-qa/references/server-api.md +200 -0
- package/.agents/skills/opencode-qa/references/testing-harness.md +218 -0
- package/.agents/skills/opencode-qa/references/tui-tmux.md +52 -0
- package/.agents/skills/opencode-qa/scripts/db-session-by-id.sh +53 -0
- package/.agents/skills/opencode-qa/scripts/db-session-by-name.sh +57 -0
- package/.agents/skills/opencode-qa/scripts/db-session-by-text.sh +158 -0
- package/.agents/skills/opencode-qa/scripts/export-roundtrip.sh +57 -0
- package/.agents/skills/opencode-qa/scripts/lib/common.sh +216 -0
- package/.agents/skills/opencode-qa/scripts/server-smoke.sh +64 -0
- package/.agents/skills/opencode-qa/scripts/sse-hook-probe.sh +106 -0
- package/.agents/skills/opencode-qa/scripts/tui-smoke.sh +89 -0
- package/README.ja.md +13 -3
- package/README.ko.md +13 -3
- package/README.md +24 -14
- package/README.ru.md +13 -3
- package/README.zh-cn.md +13 -3
- package/bin/oh-my-opencode.js +4 -3
- package/bin/oh-my-opencode.test.ts +35 -7
- package/bin/platform.d.ts +1 -1
- package/bin/platform.js +4 -4
- package/bin/platform.test.ts +31 -9
- package/bin/version-mismatch.js +47 -0
- package/bin/version-mismatch.test.ts +120 -0
- package/dist/cli/cleanup-command.d.ts +4 -0
- package/dist/cli/cleanup.d.ts +11 -0
- package/dist/cli/cli-program.d.ts +2 -1
- package/dist/cli/codex-ulw-loop.d.ts +12 -0
- package/dist/cli/doctor/checks/tui-plugin-config.d.ts +2 -0
- package/dist/cli/index.js +2189 -529
- package/dist/cli/install-codex/codex-cache.d.ts +1 -0
- package/dist/cli/install-codex/codex-cleanup-config.d.ts +6 -0
- package/dist/cli/install-codex/codex-cleanup.d.ts +21 -0
- package/dist/cli/install-codex/codex-config-permissions.d.ts +1 -0
- package/dist/cli/install-codex/codex-config-reasoning.d.ts +2 -0
- package/dist/cli/install-codex/codex-config-toml.d.ts +2 -1
- package/dist/cli/install-codex/codex-installation-detection.d.ts +36 -0
- package/dist/cli/install-codex/codex-model-catalog.d.ts +13 -0
- package/dist/cli/install-codex/codex-package-layout.d.ts +1 -0
- package/dist/cli/install-codex/codex-project-local-cleanup-best-effort.d.ts +7 -0
- package/dist/cli/install-codex/codex-project-local-cleanup.d.ts +35 -0
- package/dist/cli/install-codex/git-bash.d.ts +35 -0
- package/dist/cli/install-codex/index.d.ts +4 -0
- package/dist/cli/install-codex/toml-section-editor.d.ts +2 -0
- package/dist/cli/install-codex/types.d.ts +20 -0
- package/dist/cli/run/event-state.d.ts +1 -0
- package/dist/cli/run/poll-for-completion.d.ts +1 -0
- package/dist/cli/run/prompt-start.d.ts +7 -0
- package/dist/cli/star-request.d.ts +9 -0
- package/dist/config/schema/hooks.d.ts +0 -1
- package/dist/create-hooks.d.ts +0 -1
- package/dist/features/background-agent/concurrency.d.ts +1 -0
- package/dist/features/background-agent/process-cleanup.d.ts +6 -0
- package/dist/features/builtin-skills/skills/debugging.d.ts +2 -0
- package/dist/features/builtin-skills/skills/index.d.ts +1 -0
- package/dist/features/claude-code-session-state/state.d.ts +1 -0
- package/dist/features/opencode-skill-loader/index.d.ts +1 -0
- package/dist/features/opencode-skill-loader/opencode-config-skills-reader.d.ts +5 -0
- package/dist/features/tmux-subagent/attachable-session-status.d.ts +1 -1
- package/dist/features/tmux-subagent/session-status-parser.d.ts +1 -0
- package/dist/hooks/comment-checker/cli.d.ts +1 -0
- package/dist/hooks/index.d.ts +0 -1
- package/dist/hooks/tasks-todowrite-disabler/constants.d.ts +1 -1
- package/dist/index.js +1077 -563
- package/dist/plugin/hooks/create-core-hooks.d.ts +0 -1
- package/dist/plugin/hooks/create-session-hooks.d.ts +1 -2
- package/dist/plugin/messages-transform.d.ts +8 -1
- package/dist/plugin/user-abort-interrupted-recovery-guard.d.ts +6 -0
- package/dist/shared/command-executor/execute-hook-command.d.ts +2 -0
- package/dist/shared/prompt-async-gate/recent-dispatches.d.ts +14 -0
- package/dist/shared/prompt-async-gate/semantic-dedupe.d.ts +7 -0
- package/dist/shared/prompt-async-gate/session-idle-dispatch.d.ts +1 -0
- package/dist/shared/prompt-async-gate/timing.d.ts +1 -0
- package/dist/shared/prompt-async-gate/types.d.ts +2 -0
- package/dist/shared/prompt-async-gate.d.ts +1 -1
- package/dist/tools/skill/description-formatter.d.ts +5 -1
- package/dist/tools/skill/types.d.ts +1 -0
- package/package.json +22 -18
- package/packages/ast-grep-mcp/dist/cli.js +53 -9
- package/packages/git-bash-mcp/dist/cli.js +367 -0
- package/packages/lsp-tools-mcp/dist/lsp/process.js +1 -1
- package/packages/omo-codex/plugin/.mcp.json +11 -0
- package/packages/omo-codex/plugin/components/comment-checker/README.md +1 -1
- package/packages/omo-codex/plugin/components/git-bash/hooks/hooks.json +29 -0
- package/packages/omo-codex/plugin/components/git-bash/package.json +23 -0
- package/packages/omo-codex/plugin/components/git-bash/src/cli.ts +33 -0
- package/packages/omo-codex/plugin/components/git-bash/src/codex-hook.ts +180 -0
- package/packages/omo-codex/plugin/components/git-bash/src/index.ts +10 -0
- package/packages/omo-codex/plugin/components/git-bash/test/codex-hook.test.ts +195 -0
- package/packages/omo-codex/plugin/components/git-bash/tsconfig.build.json +13 -0
- package/packages/omo-codex/plugin/components/git-bash/tsconfig.json +25 -0
- package/packages/omo-codex/plugin/components/lsp/README.md +1 -1
- package/packages/omo-codex/plugin/components/lsp/src/cli.ts +5 -5
- package/packages/omo-codex/plugin/components/lsp/src/codex-hook-cli.ts +33 -0
- package/packages/omo-codex/plugin/components/lsp/src/codex-hook.ts +19 -27
- package/packages/omo-codex/plugin/components/lsp/test/codex-hook-cli.test.ts +28 -0
- package/packages/omo-codex/plugin/components/lsp/test/codex-hook-errors.test.ts +55 -0
- package/packages/omo-codex/plugin/components/lsp/test/package-smoke.test.ts +7 -5
- package/packages/omo-codex/plugin/components/rules/README.md +1 -1
- package/packages/omo-codex/plugin/components/rules/bundled-rules/hephaestus.md +6 -4
- package/packages/omo-codex/plugin/components/rules/bundled-rules/windows-git-bash.md +10 -0
- package/packages/omo-codex/plugin/components/rules/src/post-compact-budget.ts +0 -2
- package/packages/omo-codex/plugin/components/rules/test/package-smoke.test.ts +3 -1
- package/packages/omo-codex/plugin/components/rules/test/windows-git-bash-bundled-rule.test.ts +97 -0
- package/packages/omo-codex/plugin/components/start-work-continuation/directive.md +6 -5
- package/packages/omo-codex/plugin/components/start-work-continuation/test/codex-hook.test.ts +22 -0
- package/packages/omo-codex/plugin/components/ultrawork/CHANGELOG.md +1 -1
- package/packages/omo-codex/plugin/components/ultrawork/README.md +3 -3
- package/packages/omo-codex/plugin/components/ultrawork/agents/codex-ultrawork-reviewer.toml +4 -1
- package/packages/omo-codex/plugin/components/ultrawork/agents/librarian.toml +8 -7
- package/packages/omo-codex/plugin/components/ultrawork/agents/plan.toml +9 -8
- package/packages/omo-codex/plugin/components/ultrawork/directive.md +32 -6
- package/packages/omo-codex/plugin/components/ultrawork/test/codex-hook.test.ts +27 -4
- package/packages/omo-codex/plugin/components/ultrawork/test/package-smoke.test.ts +25 -0
- package/packages/omo-codex/plugin/components/ulw-loop/README.md +1 -1
- package/packages/omo-codex/plugin/components/ulw-loop/skills/ulw-loop/SKILL.md +28 -205
- package/packages/omo-codex/plugin/components/ulw-loop/skills/ulw-loop/references/full-workflow.md +231 -0
- package/packages/omo-codex/plugin/components/ulw-loop/src/checkpoint.ts +12 -1
- package/packages/omo-codex/plugin/components/ulw-loop/test/checkpoint.test.ts +19 -1
- package/packages/omo-codex/plugin/components/ulw-loop/test/package-smoke.test.ts +102 -5
- package/packages/omo-codex/plugin/hooks/hooks.json +35 -2
- package/packages/omo-codex/plugin/model-catalog.json +49 -0
- package/packages/omo-codex/plugin/package-lock.json +19 -0
- package/packages/omo-codex/plugin/package.json +3 -1
- package/packages/omo-codex/plugin/scripts/auto-update.mjs +159 -0
- package/packages/omo-codex/plugin/scripts/build-bundled-mcp-runtimes.mjs +16 -1
- package/packages/omo-codex/plugin/scripts/build-components.mjs +2 -1
- package/packages/omo-codex/plugin/scripts/migrate-codex-config.mjs +269 -0
- package/packages/omo-codex/plugin/scripts/sync-hook-status-messages.mjs +89 -0
- package/packages/omo-codex/plugin/scripts/sync-skills.mjs +6 -6
- package/packages/omo-codex/plugin/skills/init-deep/SKILL.md +6 -6
- package/packages/omo-codex/plugin/skills/lcx-report-bug/SKILL.md +127 -0
- package/packages/omo-codex/plugin/skills/lcx-report-bug/agents/openai.yaml +9 -0
- package/packages/omo-codex/plugin/skills/refactor/SKILL.md +6 -6
- package/packages/omo-codex/plugin/skills/remove-ai-slops/SKILL.md +6 -6
- package/packages/omo-codex/plugin/skills/review-work/SKILL.md +33 -8
- package/packages/omo-codex/plugin/skills/start-work/SKILL.md +25 -5
- package/packages/omo-codex/plugin/skills/ulw-loop/SKILL.md +28 -205
- package/packages/omo-codex/plugin/skills/ulw-loop/references/full-workflow.md +231 -0
- package/packages/omo-codex/plugin/skills/ulw-plan/SKILL.md +17 -17
- package/packages/omo-codex/plugin/test/aggregate.test.mjs +188 -20
- package/packages/omo-codex/plugin/test/auto-update.test.mjs +129 -0
- package/packages/omo-codex/plugin/test/hook-status-message.test.mjs +58 -11
- package/packages/omo-codex/plugin/test/install-time-build-runtime.test.mjs +34 -0
- package/packages/omo-codex/plugin/test/mcp-research-servers.test.mjs +21 -0
- package/packages/omo-codex/plugin/test/migrate-codex-config.test.mjs +146 -0
- package/packages/omo-codex/plugin/test/node-install-surface.test.mjs +48 -0
- package/packages/omo-codex/plugin/test/subagent-guidance.test.mjs +76 -0
- package/packages/omo-codex/plugin/test/sync-hook-status-messages.test.mjs +67 -0
- package/packages/omo-codex/plugin/test/sync-skills.test.mjs +54 -2
- package/packages/omo-codex/scripts/install/cache.mjs +5 -3
- package/packages/omo-codex/scripts/install/cli-args.mjs +112 -0
- package/packages/omo-codex/scripts/install/config.mjs +23 -1
- package/packages/omo-codex/scripts/install/delegated-command.mjs +25 -0
- package/packages/omo-codex/scripts/install/git-bash.mjs +99 -0
- package/packages/omo-codex/scripts/install/git-bash.test.mjs +174 -0
- package/packages/omo-codex/scripts/install/legacy-bins.mjs +1 -0
- package/packages/omo-codex/scripts/install/mcp-runtime-cache.mjs +5 -1
- package/packages/omo-codex/scripts/install/model-catalog.mjs +66 -0
- package/packages/omo-codex/scripts/install/multi-agent-v2-config.mjs +7 -1
- package/packages/omo-codex/scripts/install/permissions.d.mts +1 -0
- package/packages/omo-codex/scripts/install/permissions.mjs +26 -0
- package/packages/omo-codex/scripts/install/project-local-cleanup.mjs +229 -0
- package/packages/omo-codex/scripts/install/reasoning-config.mjs +72 -0
- package/packages/omo-codex/scripts/install/source-package-build.mjs +20 -0
- package/packages/omo-codex/scripts/install/toml-editor.mjs +19 -2
- package/packages/omo-codex/scripts/install-bin-links.test.mjs +23 -0
- package/packages/omo-codex/scripts/install-cli-args.test.mjs +146 -0
- package/packages/omo-codex/scripts/install-config-autonomous.test.mjs +48 -0
- package/packages/omo-codex/scripts/install-config-reasoning.test.mjs +141 -0
- package/packages/omo-codex/scripts/install-config.test.mjs +205 -0
- package/packages/omo-codex/scripts/install-local-entrypoint.test.mjs +157 -0
- package/packages/omo-codex/scripts/install-local-git-bash-preflight.test.mjs +145 -0
- package/packages/omo-codex/scripts/install-local.mjs +91 -8
- package/packages/omo-codex/scripts/install-local.test.mjs +15 -0
- package/packages/omo-codex/scripts/install-mcp-runtime.test.mjs +60 -0
- package/packages/omo-codex/scripts/install-packaged-local.test.mjs +67 -0
- package/packages/omo-codex/scripts/install-project-local-cleanup.test.mjs +277 -0
- package/packages/shared-skills/skills/lcx-report-bug/SKILL.md +127 -0
- package/packages/shared-skills/skills/lcx-report-bug/agents/openai.yaml +9 -0
- package/packages/shared-skills/skills/review-work/SKILL.md +33 -8
- package/packages/shared-skills/skills/start-work/SKILL.md +25 -5
- package/packages/shared-skills/skills/ulw-plan/SKILL.md +11 -11
- package/postinstall.mjs +36 -3
- package/dist/hooks/context-window-monitor.d.ts +0 -19
|
@@ -18,10 +18,11 @@ You are mid-flight on a Prometheus work plan. The turn just ended without finish
|
|
|
18
18
|
1. Read `{{PLAN_PATH}}` AND `{{LEDGER_PATH}}` first — ground truth for what remains and what evidence has already been recorded. The plan checkbox and the ledger are the only sources of truth; do not trust your own memory of prior turns.
|
|
19
19
|
2. Pick the FIRST unchecked top-level checkbox in `## TODOs` or `## Final Verification Wave`. Ignore nested checkboxes under Acceptance Criteria / Evidence / Definition of Done.
|
|
20
20
|
3. Follow the `start-work` skill in full. The skill is already loaded from your earlier turn — re-read its file at `packages/omo-codex/plugin/skills/start-work/SKILL.md` if you have lost context.
|
|
21
|
-
4. Decompose the checkbox into atomic sub-tasks. Dispatch them in PARALLEL via `spawn_agent` calls in this same response unless a sub-task has a NAMED blocking dependency (input from another sub-task or shared file).
|
|
22
|
-
5. Every sub-task message MUST include all 7 sections and name one Manual-QA channel with its exact tool and exact invocation (the literal `curl` / `send-keys` / `page.click` with concrete inputs and the binary PASS/FAIL observable), plus the applicable ultraqa adversarial classes, a captured artifact, and a cleanup receipt. Channels: HTTP call (`curl -i`); tmux (`send-keys` + `capture-pane`); browser use — use Chrome to drive the page, else download and use agent-browser (https://github.com/vercel-labs/agent-browser); computer use — OS-level GUI automation for a desktop app. Tests are the floor; the channel artifact plus probed adversarial classes are the ceiling. All are required.
|
|
23
|
-
6.
|
|
24
|
-
7.
|
|
21
|
+
4. Decompose the checkbox into atomic sub-tasks. Dispatch them in PARALLEL via `spawn_agent` calls in this same response unless a sub-task has a NAMED blocking dependency (input from another sub-task or shared file). Prefer `fork_turns: "none"` unless full history is truly required. Every dispatch sets `agent_type`; `model` + `reasoning_effort` alone creates a default agent, not the requested role.
|
|
22
|
+
5. Every sub-task message MUST be self-contained and start with `TASK: <imperative assignment>`, then name `DELIVERABLE`, `SCOPE`, and `VERIFY`. State that it is an executable assignment, not a context handoff. It must include all 7 sections and name one Manual-QA channel with its exact tool and exact invocation (the literal `curl` / `send-keys` / `page.click` with concrete inputs and the binary PASS/FAIL observable), plus the applicable ultraqa adversarial classes, a captured artifact, and a cleanup receipt. Channels: HTTP call (`curl -i`); tmux (`send-keys` + `capture-pane`); browser use — use Chrome to drive the page, else download and use agent-browser (https://github.com/vercel-labs/agent-browser); computer use — OS-level GUI automation for a desktop app. Tests are the floor; the channel artifact plus probed adversarial classes are the ceiling. All are required.
|
|
23
|
+
6. Use `wait_agent` for completion signals, but treat `wait_agent` as a mailbox signal, not proof of completion, content, or errors. After two waits with no substantive result, send one targeted followup: `TASK STILL ACTIVE: return <deliverable> or BLOCKED: <reason>`. If still silent or ack-only, record inconclusive, do not count it as pass/review approval, close if safe, and respawn a smaller `fork_turns: "none"` task with the missing deliverable.
|
|
24
|
+
7. After verification of ALL sub-tasks under this checkbox: `apply_patch` the plan to change `- [ ]` → `- [x]`, re-read the plan to confirm the count decreased, append a `task-completed` line to the ledger, then continue.
|
|
25
|
+
8. Do not start fresh on a sub-agent failure. Re-dispatch the same `task_name` with a fix-message: `FAILED: <exact error>` + `Diagnosis: <observation>` + `Fix: <instruction>`.
|
|
25
26
|
|
|
26
27
|
# Hard constraints
|
|
27
28
|
|
|
@@ -36,7 +37,7 @@ You are mid-flight on a Prometheus work plan. The turn just ended without finish
|
|
|
36
37
|
# Stop conditions for THIS turn
|
|
37
38
|
|
|
38
39
|
- A top-level checkbox flipped to `- [x]` after the 5-phase QA gate (Phase 1 read, Phase 2 automated, Phase 3 channel scenario, Phase 4 adversarial-class probing, Phase 5 gate decision). Then the Stop hook will re-evaluate; if more checkboxes remain you will be continued again.
|
|
39
|
-
- 3 same-failure cycles on one sub-task → escalate via `spawn_agent(agent_type="codex-ultrawork-reviewer", ...)` and stop dispatch.
|
|
40
|
+
- 3 same-failure cycles on one sub-task → escalate via `spawn_agent(agent_type="codex-ultrawork-reviewer", fork_turns="none", ...)` and stop dispatch.
|
|
40
41
|
- Safety boundary (destructive command, secret exfiltration, production write) → stop and surface a safe substitute.
|
|
41
42
|
- All top-level checkboxes `- [x]` AND (if gate triggered) `codex-ultrawork-reviewer` approved unconditionally → print the ORCHESTRATION COMPLETE block and end.
|
|
42
43
|
|
package/packages/omo-codex/plugin/components/start-work-continuation/test/codex-hook.test.ts
CHANGED
|
@@ -49,6 +49,28 @@ describe("start-work Stop hook", () => {
|
|
|
49
49
|
expect(parsed.reason).toContain("- Your session id in boulder.json: `codex:sess_abc`");
|
|
50
50
|
});
|
|
51
51
|
|
|
52
|
+
it("#given active codex work #when continuation directive is emitted #then subagent guidance is reliable", () => {
|
|
53
|
+
// given
|
|
54
|
+
const fs = createMemoryFs({
|
|
55
|
+
[BOULDER_PATH]: createBoulderJson({
|
|
56
|
+
sessionIds: ["codex:sess_abc"],
|
|
57
|
+
status: "active",
|
|
58
|
+
}),
|
|
59
|
+
[PLAN_PATH]: ["# Plan", "", "## TODOs", "- [ ] First"].join("\n"),
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
// when
|
|
63
|
+
const output = runStopHook(createStopInput(), fs);
|
|
64
|
+
|
|
65
|
+
// then
|
|
66
|
+
const parsed = parseBlockOutput(output);
|
|
67
|
+
expect(parsed.reason).toMatch(/TASK:/);
|
|
68
|
+
expect(parsed.reason).toMatch(/fork_turns:\s*"none"/);
|
|
69
|
+
expect(parsed.reason).toMatch(/wait_agent.*signal, not proof/);
|
|
70
|
+
expect(parsed.reason).toMatch(/one targeted followup/);
|
|
71
|
+
expect(parsed.reason).toMatch(/respawn.*smaller/);
|
|
72
|
+
});
|
|
73
|
+
|
|
52
74
|
it("#given active work belongs to another harness #when hook runs #then returns empty output", () => {
|
|
53
75
|
// given
|
|
54
76
|
const fs = createMemoryFs({
|
|
@@ -21,5 +21,5 @@
|
|
|
21
21
|
Initial release.
|
|
22
22
|
|
|
23
23
|
- Codex `UserPromptSubmit` hook that detects `ultrawork` / `ulw` (word-bounded, case-insensitive) in the user prompt and injects the ultrawork orchestration directive.
|
|
24
|
-
- Directive enforces: goal + binding success criteria with manual-QA scenarios + evidence, durable `/tmp` notepad lifecycle, obsessive atomic todos, scenario-driven execution loop, and a
|
|
24
|
+
- Directive enforces: goal + binding success criteria with manual-QA scenarios + evidence, durable `/tmp` notepad lifecycle, obsessive atomic todos, scenario-driven execution loop, and a ChatGPT-compatible xhigh verification gate with no "false positive" escape hatch.
|
|
25
25
|
- Directive size: 5,775 chars across 143 lines.
|
|
@@ -13,14 +13,14 @@ Bundled Codex agent role TOMLs in `agents/` are installed into `CODEX_HOME/agent
|
|
|
13
13
|
| Surface + paired cleanup | Execution loop step 4 (**SURFACE-AS-SCENARIO**) runs the chosen channel scenario end-to-end. Step 5 (**CLEANUP, PAIRED**) tears down every QA-spawned process / tmux session / browser context / container / port / temp dir, with a one-line receipt appended to the notepad. Leftover state → NOT done. |
|
|
14
14
|
| Durable /tmp notepad | `mktemp -t ulw-$(date +%Y%m%d-%H%M%S).XXXXXX.md` with sections `Plan`, `Success criteria + QA scenarios`, `Now`, `Todo`, `Findings`, `Learnings`. **Append**, never rewrite. |
|
|
15
15
|
| Obsessive atomic todos | Every action — even one-line edits, `ls`, single test runs — becomes a todo. Format: `path: <action> for <criterion> — verify by <check>`. One in_progress at a time, mark completed immediately. |
|
|
16
|
-
|
|
|
16
|
+
| ChatGPT-compatible xhigh verification gate | Triggered automatically on user-requested rigor, 3+ files, 20+ turns, 30+ minutes, or refactor/migration/perf/security work. Use the bundled `codex-ultrawork-reviewer` agent role when available. Reviewer verdict is **binding**: no "false positive", no minimising, no arguing. Loop until **unconditional** approval. "Looks good but..." = REJECTION. |
|
|
17
17
|
|
|
18
18
|
The directive is currently 10,951 chars / 231 lines and follows the GPT-5.5 prompting structure (Role / Goal / Manual-QA channels / Bootstrap / Execution loop / Verification gate / Commits / Constraints / Output / Stop rules).
|
|
19
19
|
|
|
20
20
|
## Install (via this marketplace)
|
|
21
21
|
|
|
22
22
|
```bash
|
|
23
|
-
|
|
23
|
+
npx lazycodex-ai install
|
|
24
24
|
```
|
|
25
25
|
|
|
26
26
|
The installer copies the plugin into `~/.codex/plugins/cache/sisyphuslabs/omo/0.1.0`, writes the stable Codex marketplace snapshot at `~/.codex/.tmp/marketplaces/sisyphuslabs/`, registers the `sisyphuslabs` marketplace from the `lazycodex` Git repository, enables `omo@sisyphuslabs` in `~/.codex/config.toml`, registers the `UserPromptSubmit` hook, and installs the bundled agent TOMLs into `~/.codex/agents/` (symlinks on Unix, copies on Windows). A `.installed-agents.json` manifest is written next to the bundled TOMLs' source root for clean uninstall tracking.
|
|
@@ -49,7 +49,7 @@ Expect `<ultrawork-mode>` ... directive body.
|
|
|
49
49
|
|
|
50
50
|
## Agent role smoke test
|
|
51
51
|
|
|
52
|
-
Run `
|
|
52
|
+
Run `npx lazycodex-ai install`, then inspect `~/.codex/agents/`. On Linux / macOS you should see symlinks; on Windows you should see file copies. Each TOML should declare a non-empty `name`, `description`, and `developer_instructions`.
|
|
53
53
|
|
|
54
54
|
## License
|
|
55
55
|
|
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
name = "codex-ultrawork-reviewer"
|
|
2
2
|
description = "Strict ultrawork verification reviewer. Use after full QA evidence to audit the diff, goal, and scenario evidence before declaring done."
|
|
3
3
|
nickname_candidates = ["Verifier"]
|
|
4
|
-
model = "gpt-5.
|
|
4
|
+
model = "gpt-5.5"
|
|
5
5
|
model_reasoning_effort = "xhigh"
|
|
6
6
|
developer_instructions = """You are the ultrawork verification reviewer.
|
|
7
7
|
|
|
8
8
|
Review only. Do not implement.
|
|
9
9
|
|
|
10
|
+
The default model intentionally uses a ChatGPT account compatible frontier model. If a caller supplies a different supported reviewer model, follow the caller's assignment while preserving this review contract.
|
|
11
|
+
|
|
10
12
|
Input should include the goal, success criteria, full diff, QA evidence, and notepad path.
|
|
13
|
+
If Codex delivers parent review context as inter-agent commentary, treat the latest parent message with goal/diff/evidence as your active review assignment, not passive context.
|
|
11
14
|
|
|
12
15
|
Verdict rules:
|
|
13
16
|
- Return `UNCONDITIONAL APPROVAL` only when the diff satisfies every success criterion and the evidence proves the real surface works.
|
|
@@ -55,7 +55,7 @@ If the user names a version ("React 18", "Next.js 14", "v2.x"):
|
|
|
55
55
|
|
|
56
56
|
## Step 4 - targeted investigation
|
|
57
57
|
- `webfetch(<specific-doc-page-from-sitemap>)`.
|
|
58
|
-
- If
|
|
58
|
+
- If `context7` is available, query it for the specific topic. Otherwise rely on the sitemap-driven webfetch pages.
|
|
59
59
|
|
|
60
60
|
## Skip Phase 0.5 when
|
|
61
61
|
- TYPE B (implementation) - you're cloning the repo anyway.
|
|
@@ -70,7 +70,7 @@ If the user names a version ("React 18", "Next.js 14", "v2.x"):
|
|
|
70
70
|
Run Phase 0.5 first, then in parallel:
|
|
71
71
|
- `web_search` for current-year usage examples + best practices.
|
|
72
72
|
- `webfetch` for the targeted doc pages identified by the sitemap.
|
|
73
|
-
- `gh search code "<usage pattern>" --language <lang
|
|
73
|
+
- `grep_app` for broad GitHub code search; fall back to `gh search code "<usage pattern>" --language <lang>`.
|
|
74
74
|
|
|
75
75
|
## TYPE B - IMPLEMENTATION REFERENCE
|
|
76
76
|
Execute in sequence:
|
|
@@ -81,9 +81,9 @@ Execute in sequence:
|
|
|
81
81
|
|
|
82
82
|
Parallel acceleration (4+ calls in one batch when independent):
|
|
83
83
|
- Shallow clone.
|
|
84
|
-
- `gh search code "<function-name>" --repo <owner>/<repo>`.
|
|
84
|
+
- `grep_app` broad code search or `gh search code "<function-name>" --repo <owner>/<repo>`.
|
|
85
85
|
- `gh api repos/<owner>/<repo>/commits/HEAD --jq '.sha'`.
|
|
86
|
-
-
|
|
86
|
+
- `context7` or sitemap-targeted `webfetch` of the relevant docs page for the same API surface.
|
|
87
87
|
|
|
88
88
|
## TYPE C - CONTEXT & HISTORY
|
|
89
89
|
Execute in parallel (4+ calls):
|
|
@@ -100,7 +100,7 @@ For a specific issue / PR:
|
|
|
100
100
|
## TYPE D - COMPREHENSIVE
|
|
101
101
|
Run Phase 0.5 first, then execute 6+ parallel calls:
|
|
102
102
|
- 2 docs calls: `webfetch` targeted doc pages + (if available) a docs-indexer query.
|
|
103
|
-
- 2 code-search calls: `gh search code` with varied queries (different angles).
|
|
103
|
+
- 2 code-search calls: `grep_app` or `gh search code` with varied queries (different angles).
|
|
104
104
|
- 1 source clone for deep inspection.
|
|
105
105
|
- 1 issues/PRs query for context.
|
|
106
106
|
|
|
@@ -147,8 +147,9 @@ Never link to a branch name (`/blob/main/...`) - always pin to a SHA so the line
|
|
|
147
147
|
- Sitemap -> `webfetch(<base>/sitemap.xml)` (fallbacks: `/sitemap-0.xml`, `/sitemap_index.xml`).
|
|
148
148
|
- Read a specific page -> `webfetch(<page-url>)`.
|
|
149
149
|
- Latest info -> `web_search("<query> <CURRENT_YEAR>")`.
|
|
150
|
-
-
|
|
151
|
-
- Code search (
|
|
150
|
+
- Docs index -> `context7` when available; use sitemap-driven pages when it is not.
|
|
151
|
+
- Code search (fast, broad) -> `grep_app` for web-scale GitHub search; `gh search code "<query>" --language <lang>` when you need GitHub CLI filters.
|
|
152
|
+
- Code search (deep, repo-scoped) -> after cloning, `rg` / `ast_grep` over the clone.
|
|
152
153
|
- Clone -> `gh repo clone <o>/<r> "${TMPDIR:-/tmp}/<name>" -- --depth 1`.
|
|
153
154
|
- Issues / PRs -> `gh search issues|prs`, `gh issue|pr view <n> --comments`.
|
|
154
155
|
- Release info -> `gh api repos/<o>/<r>/releases/latest`.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
name = "plan"
|
|
2
|
-
description = "Strategic planning consultant. Produces a single executable work plan from a vague or large request. Planner only - never implements. Writes the plan to plans/<slug>.md."
|
|
2
|
+
description = "Strategic planning consultant. Produces a single executable work plan from a vague or large request. Planner only - never implements. Writes the plan to .omo/plans/<slug>.md."
|
|
3
3
|
nickname_candidates = ["Planner"]
|
|
4
4
|
model = "gpt-5.5"
|
|
5
5
|
model_reasoning_effort = "xhigh"
|
|
@@ -29,13 +29,14 @@ Never plan blind. Fire parallel research BEFORE drafting:
|
|
|
29
29
|
- Spawn parallel read-only subagents for internal-source aspects (codebase patterns, conventions, existing implementations, test infrastructure, naming/registration patterns). One subagent per aspect.
|
|
30
30
|
- Spawn parallel read-only subagents for external-source aspects (official docs, OSS reference implementations, API contracts, RFCs). One subagent per aspect.
|
|
31
31
|
- While they run, use direct read-only tools (`read`, `rg`, `ast_grep_search`, `lsp_*`) for immediate context. Do not idle.
|
|
32
|
-
- The role's own system prompt determines each subagent's output shape. Do not re-specify it; pass only
|
|
32
|
+
- The role's own system prompt determines each subagent's output shape. Do not re-specify it; pass only a self-contained `TASK: <question to answer now>`, the minimal context you have, `DELIVERABLE`, and what decision the answer informs.
|
|
33
|
+
- Prefer `fork_turns: "none"` for research subagents unless full history is truly required. Treat `wait_agent` as a signal, not proof; if a child is silent or ack-only after one targeted followup, mark that lane inconclusive and answer from direct evidence or respawn smaller.
|
|
33
34
|
|
|
34
35
|
Wait for context to converge before drafting. Rushed plans fail.
|
|
35
36
|
|
|
36
37
|
# Phase 2 - Plan output (single markdown file, single plan)
|
|
37
38
|
|
|
38
|
-
Write the plan to
|
|
39
|
+
Write the plan to `.omo/plans/<slug>.md` in the working tree (create the `.omo/plans/` directory if absent). One plan per request - no "Phase 1 plan / Phase 2 plan" splits. 50+ tasks is fine if the work demands it.
|
|
39
40
|
|
|
40
41
|
Use this template verbatim (fill the placeholders):
|
|
41
42
|
|
|
@@ -59,7 +60,7 @@ Use this template verbatim (fill the placeholders):
|
|
|
59
60
|
> Zero human intervention - all verification is agent-executed.
|
|
60
61
|
- Test decision: <TDD | tests-after | none> + framework
|
|
61
62
|
- QA policy: every task has agent-executed scenarios
|
|
62
|
-
- Evidence:
|
|
63
|
+
- Evidence: `.omo/evidence/task-<N>-<slug>.<ext>`
|
|
63
64
|
|
|
64
65
|
## Execution strategy
|
|
65
66
|
### Parallel execution waves
|
|
@@ -113,13 +114,13 @@ Critical path: Task 1 -> Task 2 -> Task 6
|
|
|
113
114
|
Tool: <bash | curl | tmux | playwright(real Chrome) | agent-browser | computer-use>
|
|
114
115
|
Steps: <exact command / API call / page action with concrete inputs - URL, payload, keystrokes, selectors>
|
|
115
116
|
Expected: <concrete, binary pass/fail observable>
|
|
116
|
-
Evidence: evidence/task-<N>-<slug>.<ext>
|
|
117
|
+
Evidence: .omo/evidence/task-<N>-<slug>.<ext>
|
|
117
118
|
|
|
118
119
|
Scenario: <failure / edge case>
|
|
119
120
|
Tool: <same, with exact invocation>
|
|
120
121
|
Steps: <trigger the error with specific inputs>
|
|
121
122
|
Expected: <graceful failure with the exact error message/code>
|
|
122
|
-
Evidence: evidence/task-<N>-<slug>-error.<ext>
|
|
123
|
+
Evidence: .omo/evidence/task-<N>-<slug>-error.<ext>
|
|
123
124
|
```
|
|
124
125
|
|
|
125
126
|
Commit: <YES|NO> | Message: `<type>(<scope>): <imperative summary>` | Files: [<paths>]
|
|
@@ -135,14 +136,14 @@ Critical path: Task 1 -> Task 2 -> Task 6
|
|
|
135
136
|
- One logical change per commit. Conventional Commits (`<type>(<scope>): <subject>` body + footer).
|
|
136
137
|
- Atomic: every commit builds and passes tests on its own.
|
|
137
138
|
- No "WIP" / "fix typo squash later" commits on the final branch - clean up before merge.
|
|
138
|
-
- Reference the plan file path in the final commit footer: `Plan: plans/<slug>.md`.
|
|
139
|
+
- Reference the plan file path in the final commit footer: `Plan: .omo/plans/<slug>.md`.
|
|
139
140
|
|
|
140
141
|
## Success criteria
|
|
141
142
|
- All Must-Have shipped; all QA scenarios pass with captured evidence; F1-F4 approved; commit history clean.
|
|
142
143
|
```
|
|
143
144
|
|
|
144
145
|
# Constraints
|
|
145
|
-
- READ + plan-file write only. Tools I will NEVER call: `edit`/`write`/`apply_patch` on anything outside
|
|
146
|
+
- READ + plan-file write only. Tools I will NEVER call: `edit`/`write`/`apply_patch` on anything outside `.omo/plans/<slug>.md`, anything that mutates non-plan files.
|
|
146
147
|
- DO NOT split work into multiple plans. ONE plan per request.
|
|
147
148
|
- DO NOT skip context gathering. NEVER plan blind.
|
|
148
149
|
- DO NOT include "user manually tests" as an acceptance criterion. Every check must be agent-executable.
|
|
@@ -185,8 +185,31 @@ Until every success-criteria scenario PASSES with BOTH evidence pieces:
|
|
|
185
185
|
|
|
186
186
|
Parallel-batch independent reads / searches / subagents within a step,
|
|
187
187
|
but NEVER parallelise RED and GREEN of the same criterion.
|
|
188
|
-
|
|
189
|
-
|
|
188
|
+
|
|
189
|
+
# Codex subagent reliability
|
|
190
|
+
Every `spawn_agent` message is self-contained and starts with
|
|
191
|
+
`TASK: <imperative assignment>`, then names `DELIVERABLE`, `SCOPE`, and
|
|
192
|
+
`VERIFY`. State that it is an executable assignment, not a context
|
|
193
|
+
handoff. Prefer `fork_turns: "none"` unless full history is truly
|
|
194
|
+
required; paste only the context the child needs. Full-history forks can
|
|
195
|
+
make the child continue old parent context instead of the delegated task.
|
|
196
|
+
|
|
197
|
+
Do not use `list_agents` as a polling or status tool in long or
|
|
198
|
+
high-context runs; it can replay large agent status and latest-message
|
|
199
|
+
payloads. Track spawned agent names locally. Plan and reviewer agents
|
|
200
|
+
may run for a long time; spawn them in the background, keep doing
|
|
201
|
+
independent root work, and poll with short wait_agent cycles. Never use
|
|
202
|
+
a single long blocking wait for them. Use `wait_agent` for completion
|
|
203
|
+
signals, but treat `wait_agent` as a mailbox signal, not proof of
|
|
204
|
+
completion, content, or errors. A worker/reviewer counts only after you
|
|
205
|
+
receive substantive output and verify its diff/evidence.
|
|
206
|
+
After two waits with no substantive result, send one targeted followup:
|
|
207
|
+
`TASK STILL ACTIVE: return <deliverable> or BLOCKED: <reason>`. If it is
|
|
208
|
+
still silent or ack-only, record the result as inconclusive, do not
|
|
209
|
+
count it as approval/pass, close it if safe, and respawn a smaller
|
|
210
|
+
`fork_turns: "none"` task with the missing deliverable. Use targeted
|
|
211
|
+
followups only when needed, and `close_agent` after integrating each
|
|
212
|
+
result.
|
|
190
213
|
|
|
191
214
|
# Verification gate (TRIGGERED, NOT OPTIONAL)
|
|
192
215
|
|
|
@@ -197,9 +220,12 @@ Trigger when ANY apply:
|
|
|
197
220
|
anything the user called deep.
|
|
198
221
|
|
|
199
222
|
Procedure (NON-NEGOTIABLE):
|
|
200
|
-
1. Spawn agent_type
|
|
201
|
-
|
|
202
|
-
|
|
223
|
+
1. Spawn `agent_type="codex-ultrawork-reviewer"` with
|
|
224
|
+
`fork_turns: "none"`. If unavailable, spawn `agent_type="worker"`
|
|
225
|
+
with a self-contained reviewer assignment and tight scope. `model` +
|
|
226
|
+
`reasoning_effort` alone creates a default agent, not a reviewer.
|
|
227
|
+
Pass: goal, success-criteria, scenario evidence, full diff, notepad
|
|
228
|
+
path.
|
|
203
229
|
2. Treat the reviewer's verdict as binding. There is NO "false
|
|
204
230
|
positive". Every concern is real. Do not argue. Do not minimise. Do
|
|
205
231
|
not explain it away.
|
|
@@ -215,7 +241,7 @@ Atomic, Conventional Commits (`<type>(<scope>): <imperative>` — feat /
|
|
|
215
241
|
fix / refactor / test / docs / chore / build / ci / perf). One logical
|
|
216
242
|
change per commit; each commit builds + tests green on its own. No WIP
|
|
217
243
|
on the final branch. If a plan file exists, final commit footer:
|
|
218
|
-
`Plan: plans/<slug>.md`. Do NOT auto-`git commit` unless the user
|
|
244
|
+
`Plan: .omo/plans/<slug>.md`. Do NOT auto-`git commit` unless the user
|
|
219
245
|
requested or preauthorised this session — default is stage + draft
|
|
220
246
|
message + present for approval.
|
|
221
247
|
|
|
@@ -161,11 +161,34 @@ describe("codex ultrawork hook", () => {
|
|
|
161
161
|
const directive = parsed.hookSpecificOutput.additionalContext;
|
|
162
162
|
expect(directive).toMatch(/list_agents/);
|
|
163
163
|
expect(directive).toMatch(/polling or status tool/);
|
|
164
|
-
expect(directive).toMatch(/replay large agent status and latest-message
|
|
164
|
+
expect(directive).toMatch(/replay large agent status and latest-message\s+payloads/);
|
|
165
165
|
expect(directive).toMatch(/Track spawned agent names locally/);
|
|
166
|
-
expect(directive).toMatch(/wait_agent
|
|
167
|
-
expect(directive).toMatch(/targeted
|
|
168
|
-
expect(directive).toMatch(/close_agent
|
|
166
|
+
expect(directive).toMatch(/wait_agent[\s\S]*completion/);
|
|
167
|
+
expect(directive).toMatch(/targeted\s+followups only when needed/);
|
|
168
|
+
expect(directive).toMatch(/close_agent[\s\S]*after integrating each\s+result/);
|
|
169
|
+
expect(directive).toMatch(/Plan and reviewer agents\s+may run for a long time/);
|
|
170
|
+
expect(directive).toMatch(/short wait_agent cycles/);
|
|
171
|
+
expect(directive).toMatch(/single long blocking wait/);
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
it("#given directive #when inspected #then hardens Codex subagent assignment ambiguity", () => {
|
|
175
|
+
// given
|
|
176
|
+
const payload = {
|
|
177
|
+
hook_event_name: "UserPromptSubmit",
|
|
178
|
+
prompt: "please ultrawork",
|
|
179
|
+
};
|
|
180
|
+
|
|
181
|
+
// when
|
|
182
|
+
const output = runUserPromptSubmitHook(payload);
|
|
183
|
+
const parsed = parseHookOutput(output);
|
|
184
|
+
|
|
185
|
+
// then
|
|
186
|
+
const directive = parsed.hookSpecificOutput.additionalContext;
|
|
187
|
+
expect(directive).toMatch(/TASK:/);
|
|
188
|
+
expect(directive).toMatch(/fork_turns:\s*"none"/);
|
|
189
|
+
expect(directive).toMatch(/wait_agent[\s\S]*signal, not\s+proof/);
|
|
190
|
+
expect(directive).toMatch(/one targeted followup/);
|
|
191
|
+
expect(directive).toMatch(/respawn.*smaller/);
|
|
169
192
|
});
|
|
170
193
|
});
|
|
171
194
|
|
|
@@ -34,6 +34,31 @@ describe("codex ultrawork package metadata", () => {
|
|
|
34
34
|
expect(hookCommands).toContain(`node "${pluginRoot}/dist/cli.js" hook user-prompt-submit`);
|
|
35
35
|
expect(hookCommands).not.toContainEqual(expect.stringMatching(/\bpython3?\b|ultrawork-detector\.py/));
|
|
36
36
|
});
|
|
37
|
+
|
|
38
|
+
it("#given explorer guidance #when inspected #then names the packaged code-search MCP surface", () => {
|
|
39
|
+
// given
|
|
40
|
+
const explorer = readFileSync("agents/explorer.toml", "utf8");
|
|
41
|
+
|
|
42
|
+
// when
|
|
43
|
+
const guidance = explorer.toLowerCase();
|
|
44
|
+
|
|
45
|
+
// then
|
|
46
|
+
expect(guidance).toContain("ast_grep");
|
|
47
|
+
expect(guidance).toContain("structural");
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
it("#given librarian guidance #when inspected #then names the packaged research MCP surfaces", () => {
|
|
51
|
+
// given
|
|
52
|
+
const librarian = readFileSync("agents/librarian.toml", "utf8");
|
|
53
|
+
|
|
54
|
+
// when
|
|
55
|
+
const guidance = librarian.toLowerCase();
|
|
56
|
+
|
|
57
|
+
// then
|
|
58
|
+
expect(guidance).toContain("grep_app");
|
|
59
|
+
expect(guidance).toContain("context7");
|
|
60
|
+
expect(guidance).toContain("ast_grep");
|
|
61
|
+
});
|
|
37
62
|
});
|
|
38
63
|
|
|
39
64
|
function readJson(path: string): unknown {
|
|
@@ -47,7 +47,7 @@ npm pack --dry-run
|
|
|
47
47
|
## Local Codex Installation
|
|
48
48
|
|
|
49
49
|
```bash
|
|
50
|
-
|
|
50
|
+
npx lazycodex-ai install
|
|
51
51
|
```
|
|
52
52
|
|
|
53
53
|
The installer builds and copies the plugin into `~/.codex/plugins/cache/sisyphuslabs/omo/0.1.0`, registers the `sisyphuslabs` marketplace from the `lazycodex` Git repository, installs runtime dependencies there, and enables:
|