oh-my-opencode 4.6.0 → 4.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/version-mismatch.js +47 -0
- package/bin/version-mismatch.test.ts +120 -0
- package/dist/cli/codex-ulw-loop.d.ts +12 -0
- package/dist/cli/doctor/checks/tui-plugin-config.d.ts +2 -0
- package/dist/cli/index.js +577 -304
- package/dist/cli/install-codex/codex-config-reasoning.d.ts +2 -1
- package/dist/cli/install-codex/codex-model-catalog.d.ts +13 -0
- package/dist/features/background-agent/concurrency.d.ts +1 -0
- package/dist/features/background-agent/process-cleanup.d.ts +6 -0
- package/dist/features/claude-code-session-state/state.d.ts +1 -0
- package/dist/features/opencode-skill-loader/index.d.ts +1 -0
- package/dist/features/opencode-skill-loader/opencode-config-skills-reader.d.ts +5 -0
- package/dist/features/tmux-subagent/attachable-session-status.d.ts +1 -1
- package/dist/features/tmux-subagent/session-status-parser.d.ts +1 -0
- package/dist/hooks/comment-checker/cli.d.ts +1 -0
- package/dist/hooks/tasks-todowrite-disabler/constants.d.ts +1 -1
- package/dist/index.js +811 -450
- package/dist/shared/command-executor/execute-hook-command.d.ts +2 -0
- package/dist/tools/skill/description-formatter.d.ts +5 -1
- package/dist/tools/skill/types.d.ts +1 -0
- package/package.json +12 -13
- package/packages/ast-grep-mcp/dist/cli.js +53 -9
- package/packages/lsp-tools-mcp/dist/lsp/process.js +1 -1
- package/packages/omo-codex/plugin/components/rules/bundled-rules/hephaestus.md +6 -4
- package/packages/omo-codex/plugin/components/rules/src/post-compact-budget.ts +0 -2
- package/packages/omo-codex/plugin/components/start-work-continuation/directive.md +1 -1
- package/packages/omo-codex/plugin/components/ultrawork/CHANGELOG.md +1 -1
- package/packages/omo-codex/plugin/components/ultrawork/README.md +1 -1
- package/packages/omo-codex/plugin/components/ultrawork/agents/codex-ultrawork-reviewer.toml +3 -1
- package/packages/omo-codex/plugin/components/ultrawork/agents/plan.toml +7 -7
- package/packages/omo-codex/plugin/components/ultrawork/directive.md +1 -1
- package/packages/omo-codex/plugin/components/ulw-loop/skills/ulw-loop/SKILL.md +5 -4
- package/packages/omo-codex/plugin/components/ulw-loop/skills/ulw-loop/references/full-workflow.md +4 -3
- package/packages/omo-codex/plugin/components/ulw-loop/src/checkpoint.ts +12 -1
- package/packages/omo-codex/plugin/components/ulw-loop/test/checkpoint.test.ts +19 -1
- package/packages/omo-codex/plugin/hooks/hooks.json +11 -0
- package/packages/omo-codex/plugin/model-catalog.json +49 -0
- package/packages/omo-codex/plugin/scripts/auto-update.mjs +159 -0
- package/packages/omo-codex/plugin/scripts/migrate-codex-config.mjs +269 -0
- package/packages/omo-codex/plugin/scripts/sync-hook-status-messages.mjs +3 -1
- package/packages/omo-codex/plugin/scripts/sync-skills.mjs +6 -6
- package/packages/omo-codex/plugin/skills/init-deep/SKILL.md +6 -6
- package/packages/omo-codex/plugin/skills/lcx-report-bug/SKILL.md +127 -0
- package/packages/omo-codex/plugin/skills/lcx-report-bug/agents/openai.yaml +9 -0
- package/packages/omo-codex/plugin/skills/refactor/SKILL.md +6 -6
- package/packages/omo-codex/plugin/skills/remove-ai-slops/SKILL.md +6 -6
- package/packages/omo-codex/plugin/skills/review-work/SKILL.md +7 -7
- package/packages/omo-codex/plugin/skills/start-work/SKILL.md +6 -6
- package/packages/omo-codex/plugin/skills/ulw-loop/SKILL.md +5 -4
- package/packages/omo-codex/plugin/skills/ulw-loop/references/full-workflow.md +4 -3
- package/packages/omo-codex/plugin/skills/ulw-plan/SKILL.md +17 -17
- package/packages/omo-codex/plugin/test/aggregate.test.mjs +172 -19
- package/packages/omo-codex/plugin/test/auto-update.test.mjs +129 -0
- package/packages/omo-codex/plugin/test/hook-status-message.test.mjs +2 -0
- package/packages/omo-codex/plugin/test/migrate-codex-config.test.mjs +146 -0
- package/packages/omo-codex/plugin/test/sync-hook-status-messages.test.mjs +1 -0
- package/packages/omo-codex/plugin/test/sync-skills.test.mjs +22 -0
- package/packages/omo-codex/scripts/install/cli-args.mjs +1 -1
- package/packages/omo-codex/scripts/install/config.mjs +2 -15
- package/packages/omo-codex/scripts/install/delegated-command.mjs +1 -1
- package/packages/omo-codex/scripts/install/legacy-bins.mjs +1 -0
- package/packages/omo-codex/scripts/install/model-catalog.mjs +66 -0
- package/packages/omo-codex/scripts/install/reasoning-config.mjs +65 -7
- package/packages/omo-codex/scripts/install-bin-links.test.mjs +23 -0
- package/packages/omo-codex/scripts/install-config-reasoning.test.mjs +82 -3
- package/packages/omo-codex/scripts/install-config.test.mjs +5 -6
- package/packages/omo-codex/scripts/install-local-entrypoint.test.mjs +30 -2
- package/packages/omo-codex/scripts/install-local.mjs +1 -1
- package/packages/shared-skills/skills/lcx-report-bug/SKILL.md +127 -0
- package/packages/shared-skills/skills/lcx-report-bug/agents/openai.yaml +9 -0
- package/packages/shared-skills/skills/review-work/SKILL.md +7 -7
- package/packages/shared-skills/skills/start-work/SKILL.md +6 -6
- package/packages/shared-skills/skills/ulw-plan/SKILL.md +11 -11
- package/postinstall.mjs +36 -3
- package/dist/cli/install-codex/codex-config-mcp.d.ts +0 -1
|
@@ -8,6 +8,8 @@ export interface ExecuteHookOptions {
|
|
|
8
8
|
zshPath?: string;
|
|
9
9
|
/** Timeout in milliseconds. Process is killed after this. Default: 30000 */
|
|
10
10
|
timeoutMs?: number;
|
|
11
|
+
/** Grace period before force-killing and resolving timed-out commands. Default: 5000 */
|
|
12
|
+
killGraceMs?: number;
|
|
11
13
|
/** When provided, scrub process.env to only include these vars plus HOME/PATH/etc. Used for plugin-sourced hooks. */
|
|
12
14
|
allowedEnvVars?: string[];
|
|
13
15
|
}
|
|
@@ -1,3 +1,7 @@
|
|
|
1
1
|
import type { SkillInfo } from "./types";
|
|
2
2
|
import type { CommandInfo } from "../slashcommand/types";
|
|
3
|
-
|
|
3
|
+
interface CombinedDescriptionOptions {
|
|
4
|
+
includeSkills?: boolean;
|
|
5
|
+
}
|
|
6
|
+
export declare function formatCombinedDescription(skills?: SkillInfo[], commands?: CommandInfo[], options?: CombinedDescriptionOptions): string;
|
|
7
|
+
export {};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "oh-my-opencode",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.7.0",
|
|
4
4
|
"description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -106,7 +106,6 @@
|
|
|
106
106
|
"commander": "^14.0.3",
|
|
107
107
|
"detect-libc": "^2.1.2",
|
|
108
108
|
"diff": "^9.0.0",
|
|
109
|
-
"effect": "4.0.0-beta.65",
|
|
110
109
|
"js-yaml": "^4.1.1",
|
|
111
110
|
"jsonc-parser": "^3.3.1",
|
|
112
111
|
"picocolors": "^1.1.1",
|
|
@@ -136,17 +135,17 @@
|
|
|
136
135
|
"zod": "^4.4.3"
|
|
137
136
|
},
|
|
138
137
|
"optionalDependencies": {
|
|
139
|
-
"oh-my-opencode-darwin-arm64": "4.
|
|
140
|
-
"oh-my-opencode-darwin-x64": "4.
|
|
141
|
-
"oh-my-opencode-darwin-x64-baseline": "4.
|
|
142
|
-
"oh-my-opencode-linux-arm64": "4.
|
|
143
|
-
"oh-my-opencode-linux-arm64-musl": "4.
|
|
144
|
-
"oh-my-opencode-linux-x64": "4.
|
|
145
|
-
"oh-my-opencode-linux-x64-baseline": "4.
|
|
146
|
-
"oh-my-opencode-linux-x64-musl": "4.
|
|
147
|
-
"oh-my-opencode-linux-x64-musl-baseline": "4.
|
|
148
|
-
"oh-my-opencode-windows-x64": "4.
|
|
149
|
-
"oh-my-opencode-windows-x64-baseline": "4.
|
|
138
|
+
"oh-my-opencode-darwin-arm64": "4.7.0",
|
|
139
|
+
"oh-my-opencode-darwin-x64": "4.7.0",
|
|
140
|
+
"oh-my-opencode-darwin-x64-baseline": "4.7.0",
|
|
141
|
+
"oh-my-opencode-linux-arm64": "4.7.0",
|
|
142
|
+
"oh-my-opencode-linux-arm64-musl": "4.7.0",
|
|
143
|
+
"oh-my-opencode-linux-x64": "4.7.0",
|
|
144
|
+
"oh-my-opencode-linux-x64-baseline": "4.7.0",
|
|
145
|
+
"oh-my-opencode-linux-x64-musl": "4.7.0",
|
|
146
|
+
"oh-my-opencode-linux-x64-musl-baseline": "4.7.0",
|
|
147
|
+
"oh-my-opencode-windows-x64": "4.7.0",
|
|
148
|
+
"oh-my-opencode-windows-x64-baseline": "4.7.0"
|
|
150
149
|
},
|
|
151
150
|
"overrides": {
|
|
152
151
|
"hono": "^4.12.18",
|
|
@@ -342,13 +342,44 @@ function errorMessage(error) {
|
|
|
342
342
|
import { createRequire } from "module";
|
|
343
343
|
import { dirname, join as join2 } from "path";
|
|
344
344
|
import { existsSync, statSync as statSync2 } from "fs";
|
|
345
|
+
var WINDOWS_EXECUTABLE_EXTENSIONS = [".exe", ".cmd", ".bat"];
|
|
345
346
|
function isValidBinary(filePath) {
|
|
346
347
|
try {
|
|
347
|
-
|
|
348
|
+
const stats = statSync2(filePath);
|
|
349
|
+
if (!stats.isFile()) {
|
|
350
|
+
return false;
|
|
351
|
+
}
|
|
352
|
+
const size = stats.size;
|
|
353
|
+
const lowerPath = filePath.toLowerCase();
|
|
354
|
+
if (lowerPath.endsWith(".cmd") || lowerPath.endsWith(".bat")) {
|
|
355
|
+
return size > 0;
|
|
356
|
+
}
|
|
357
|
+
return size > 1e4;
|
|
348
358
|
} catch {
|
|
349
359
|
return false;
|
|
350
360
|
}
|
|
351
361
|
}
|
|
362
|
+
function executableCandidates(filePath, platform = process.platform) {
|
|
363
|
+
if (platform !== "win32")
|
|
364
|
+
return [filePath];
|
|
365
|
+
const candidates = [filePath];
|
|
366
|
+
const lowerPath = filePath.toLowerCase();
|
|
367
|
+
if (WINDOWS_EXECUTABLE_EXTENSIONS.some((extension) => lowerPath.endsWith(extension))) {
|
|
368
|
+
return candidates;
|
|
369
|
+
}
|
|
370
|
+
for (const extension of WINDOWS_EXECUTABLE_EXTENSIONS) {
|
|
371
|
+
candidates.push(`${filePath}${extension}`);
|
|
372
|
+
}
|
|
373
|
+
return candidates;
|
|
374
|
+
}
|
|
375
|
+
function findValidExecutable(filePath) {
|
|
376
|
+
for (const candidate of executableCandidates(filePath)) {
|
|
377
|
+
if (existsSync(candidate) && isValidBinary(candidate)) {
|
|
378
|
+
return candidate;
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
return null;
|
|
382
|
+
}
|
|
352
383
|
function getPlatformPackageName() {
|
|
353
384
|
const platform = process.platform;
|
|
354
385
|
const arch = process.arch;
|
|
@@ -363,29 +394,42 @@ function getPlatformPackageName() {
|
|
|
363
394
|
};
|
|
364
395
|
return platformMap[`${platform}-${arch}`] ?? null;
|
|
365
396
|
}
|
|
397
|
+
function isModuleResolutionFailure(error) {
|
|
398
|
+
return error instanceof Error && (error.message.includes("Cannot find module") || error.message.includes("Cannot find package"));
|
|
399
|
+
}
|
|
366
400
|
function findSgCliPathSync() {
|
|
367
|
-
const binaryName =
|
|
401
|
+
const binaryName = "sg";
|
|
368
402
|
try {
|
|
369
403
|
const require2 = createRequire(import.meta.url);
|
|
370
404
|
const cliPackageJsonPath = require2.resolve("@ast-grep/cli/package.json");
|
|
371
405
|
const cliDirectory = dirname(cliPackageJsonPath);
|
|
372
406
|
const sgPath = join2(cliDirectory, binaryName);
|
|
373
|
-
|
|
374
|
-
|
|
407
|
+
const validSgPath = findValidExecutable(sgPath);
|
|
408
|
+
if (validSgPath) {
|
|
409
|
+
return validSgPath;
|
|
410
|
+
}
|
|
411
|
+
} catch (error) {
|
|
412
|
+
if (!isModuleResolutionFailure(error)) {
|
|
413
|
+
throw error;
|
|
375
414
|
}
|
|
376
|
-
}
|
|
415
|
+
}
|
|
377
416
|
const platformPackage = getPlatformPackageName();
|
|
378
417
|
if (platformPackage) {
|
|
379
418
|
try {
|
|
380
419
|
const require2 = createRequire(import.meta.url);
|
|
381
420
|
const packageJsonPath = require2.resolve(`${platformPackage}/package.json`);
|
|
382
421
|
const packageDirectory = dirname(packageJsonPath);
|
|
383
|
-
const astGrepBinaryName =
|
|
422
|
+
const astGrepBinaryName = "ast-grep";
|
|
384
423
|
const binaryPath = join2(packageDirectory, astGrepBinaryName);
|
|
385
|
-
|
|
386
|
-
|
|
424
|
+
const validBinaryPath = findValidExecutable(binaryPath);
|
|
425
|
+
if (validBinaryPath) {
|
|
426
|
+
return validBinaryPath;
|
|
427
|
+
}
|
|
428
|
+
} catch (error) {
|
|
429
|
+
if (!isModuleResolutionFailure(error)) {
|
|
430
|
+
throw error;
|
|
387
431
|
}
|
|
388
|
-
}
|
|
432
|
+
}
|
|
389
433
|
}
|
|
390
434
|
if (process.platform === "darwin") {
|
|
391
435
|
const homebrewPaths = ["/opt/homebrew/bin/sg", "/usr/local/bin/sg"];
|
|
@@ -97,7 +97,7 @@ function getWindowsPathExtensions(env) {
|
|
|
97
97
|
.map((extension) => extension.trim())
|
|
98
98
|
.filter(Boolean)
|
|
99
99
|
.map((extension) => (extension.startsWith(".") ? extension : `.${extension}`));
|
|
100
|
-
return [...new Set([
|
|
100
|
+
return [...new Set([...extensions, ".exe", ".cmd", ".bat", ""])];
|
|
101
101
|
}
|
|
102
102
|
function resolveWindowsCommand(command, env) {
|
|
103
103
|
const hasPathSeparator = command.includes("/") || command.includes("\\");
|
|
@@ -79,13 +79,15 @@ omo-codex bundles three read-only Codex subagent roles in `CODEX_HOME/agents/`:
|
|
|
79
79
|
|
|
80
80
|
**Routing:**
|
|
81
81
|
|
|
82
|
-
- "Where is X?" / "Find code that does Y" -> `spawn_agent(agent_type="explorer", ...)`
|
|
83
|
-
- "How does library Z work?" / "What's the API contract?" -> `spawn_agent(agent_type="librarian", ...)`
|
|
84
|
-
- 5+ interdependent steps, ambiguous scope, multi-module work -> `spawn_agent(agent_type="plan", ...)`
|
|
85
|
-
- Heavy verification of a finished change -> `spawn_agent(agent_type="codex-ultrawork-reviewer", ...)`
|
|
82
|
+
- "Where is X?" / "Find code that does Y" -> `spawn_agent(agent_type="explorer", fork_turns="none", ...)`
|
|
83
|
+
- "How does library Z work?" / "What's the API contract?" -> `spawn_agent(agent_type="librarian", fork_turns="none", ...)`
|
|
84
|
+
- 5+ interdependent steps, ambiguous scope, multi-module work -> `spawn_agent(agent_type="plan", fork_turns="none", ...)`
|
|
85
|
+
- Heavy verification of a finished change -> `spawn_agent(agent_type="codex-ultrawork-reviewer", fork_turns="none", ...)`
|
|
86
86
|
|
|
87
87
|
**Don't duplicate.** Once a subagent is dispatched for a question, do not re-do the same search yourself. Once results return, do not re-verify by repeating their tool calls; integrate and move on.
|
|
88
88
|
|
|
89
|
+
**Keep parent liveness visible.** While any child is active, keep the parent visibly alive with brief status updates that include active subagent count, agent names, last heartbeat, and whether the parent is waiting for mailbox updates. Do this during long `wait_agent` cycles so the session does not look idle while children are still running.
|
|
90
|
+
|
|
89
91
|
# Operating Loop
|
|
90
92
|
|
|
91
93
|
**Explore -> Plan -> Implement -> Verify -> Manually QA.** Loops are short and tight; do not loop back with a draft when the work is yours to do.
|
|
@@ -24,8 +24,6 @@ const MODEL_CONTEXT_BUDGETS: readonly ModelContextBudget[] = [
|
|
|
24
24
|
{ slug: "gpt-5.5", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
|
|
25
25
|
{ slug: "gpt-5.4", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
|
|
26
26
|
{ slug: "gpt-5.4-mini", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
|
|
27
|
-
{ slug: "gpt-5.3-codex", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
|
|
28
|
-
{ slug: "gpt-5.2", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
|
|
29
27
|
{
|
|
30
28
|
slug: "codex-auto-review",
|
|
31
29
|
contextWindowTokens: 272_000,
|
|
@@ -37,7 +37,7 @@ You are mid-flight on a Prometheus work plan. The turn just ended without finish
|
|
|
37
37
|
# Stop conditions for THIS turn
|
|
38
38
|
|
|
39
39
|
- A top-level checkbox flipped to `- [x]` after the 5-phase QA gate (Phase 1 read, Phase 2 automated, Phase 3 channel scenario, Phase 4 adversarial-class probing, Phase 5 gate decision). Then the Stop hook will re-evaluate; if more checkboxes remain you will be continued again.
|
|
40
|
-
- 3 same-failure cycles on one sub-task → escalate via `spawn_agent(agent_type="codex-ultrawork-reviewer", ...)` and stop dispatch.
|
|
40
|
+
- 3 same-failure cycles on one sub-task → escalate via `spawn_agent(agent_type="codex-ultrawork-reviewer", fork_turns="none", ...)` and stop dispatch.
|
|
41
41
|
- Safety boundary (destructive command, secret exfiltration, production write) → stop and surface a safe substitute.
|
|
42
42
|
- All top-level checkboxes `- [x]` AND (if gate triggered) `codex-ultrawork-reviewer` approved unconditionally → print the ORCHESTRATION COMPLETE block and end.
|
|
43
43
|
|
|
@@ -21,5 +21,5 @@
|
|
|
21
21
|
Initial release.
|
|
22
22
|
|
|
23
23
|
- Codex `UserPromptSubmit` hook that detects `ultrawork` / `ulw` (word-bounded, case-insensitive) in the user prompt and injects the ultrawork orchestration directive.
|
|
24
|
-
- Directive enforces: goal + binding success criteria with manual-QA scenarios + evidence, durable `/tmp` notepad lifecycle, obsessive atomic todos, scenario-driven execution loop, and a
|
|
24
|
+
- Directive enforces: goal + binding success criteria with manual-QA scenarios + evidence, durable `/tmp` notepad lifecycle, obsessive atomic todos, scenario-driven execution loop, and a ChatGPT-compatible xhigh verification gate with no "false positive" escape hatch.
|
|
25
25
|
- Directive size: 5,775 chars across 143 lines.
|
|
@@ -13,7 +13,7 @@ Bundled Codex agent role TOMLs in `agents/` are installed into `CODEX_HOME/agent
|
|
|
13
13
|
| Surface + paired cleanup | Execution loop step 4 (**SURFACE-AS-SCENARIO**) runs the chosen channel scenario end-to-end. Step 5 (**CLEANUP, PAIRED**) tears down every QA-spawned process / tmux session / browser context / container / port / temp dir, with a one-line receipt appended to the notepad. Leftover state → NOT done. |
|
|
14
14
|
| Durable /tmp notepad | `mktemp -t ulw-$(date +%Y%m%d-%H%M%S).XXXXXX.md` with sections `Plan`, `Success criteria + QA scenarios`, `Now`, `Todo`, `Findings`, `Learnings`. **Append**, never rewrite. |
|
|
15
15
|
| Obsessive atomic todos | Every action — even one-line edits, `ls`, single test runs — becomes a todo. Format: `path: <action> for <criterion> — verify by <check>`. One in_progress at a time, mark completed immediately. |
|
|
16
|
-
|
|
|
16
|
+
| ChatGPT-compatible xhigh verification gate | Triggered automatically on user-requested rigor, 3+ files, 20+ turns, 30+ minutes, or refactor/migration/perf/security work. Use the bundled `codex-ultrawork-reviewer` agent role when available. Reviewer verdict is **binding**: no "false positive", no minimising, no arguing. Loop until **unconditional** approval. "Looks good but..." = REJECTION. |
|
|
17
17
|
|
|
18
18
|
The directive is currently 10,951 chars / 231 lines and follows the GPT-5.5 prompting structure (Role / Goal / Manual-QA channels / Bootstrap / Execution loop / Verification gate / Commits / Constraints / Output / Stop rules).
|
|
19
19
|
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
name = "codex-ultrawork-reviewer"
|
|
2
2
|
description = "Strict ultrawork verification reviewer. Use after full QA evidence to audit the diff, goal, and scenario evidence before declaring done."
|
|
3
3
|
nickname_candidates = ["Verifier"]
|
|
4
|
-
model = "gpt-5.
|
|
4
|
+
model = "gpt-5.5"
|
|
5
5
|
model_reasoning_effort = "xhigh"
|
|
6
6
|
developer_instructions = """You are the ultrawork verification reviewer.
|
|
7
7
|
|
|
8
8
|
Review only. Do not implement.
|
|
9
9
|
|
|
10
|
+
The default model intentionally uses a ChatGPT account compatible frontier model. If a caller supplies a different supported reviewer model, follow the caller's assignment while preserving this review contract.
|
|
11
|
+
|
|
10
12
|
Input should include the goal, success criteria, full diff, QA evidence, and notepad path.
|
|
11
13
|
If Codex delivers parent review context as inter-agent commentary, treat the latest parent message with goal/diff/evidence as your active review assignment, not passive context.
|
|
12
14
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
name = "plan"
|
|
2
|
-
description = "Strategic planning consultant. Produces a single executable work plan from a vague or large request. Planner only - never implements. Writes the plan to plans/<slug>.md."
|
|
2
|
+
description = "Strategic planning consultant. Produces a single executable work plan from a vague or large request. Planner only - never implements. Writes the plan to .omo/plans/<slug>.md."
|
|
3
3
|
nickname_candidates = ["Planner"]
|
|
4
4
|
model = "gpt-5.5"
|
|
5
5
|
model_reasoning_effort = "xhigh"
|
|
@@ -36,7 +36,7 @@ Wait for context to converge before drafting. Rushed plans fail.
|
|
|
36
36
|
|
|
37
37
|
# Phase 2 - Plan output (single markdown file, single plan)
|
|
38
38
|
|
|
39
|
-
Write the plan to
|
|
39
|
+
Write the plan to `.omo/plans/<slug>.md` in the working tree (create the `.omo/plans/` directory if absent). One plan per request - no "Phase 1 plan / Phase 2 plan" splits. 50+ tasks is fine if the work demands it.
|
|
40
40
|
|
|
41
41
|
Use this template verbatim (fill the placeholders):
|
|
42
42
|
|
|
@@ -60,7 +60,7 @@ Use this template verbatim (fill the placeholders):
|
|
|
60
60
|
> Zero human intervention - all verification is agent-executed.
|
|
61
61
|
- Test decision: <TDD | tests-after | none> + framework
|
|
62
62
|
- QA policy: every task has agent-executed scenarios
|
|
63
|
-
- Evidence:
|
|
63
|
+
- Evidence: `.omo/evidence/task-<N>-<slug>.<ext>`
|
|
64
64
|
|
|
65
65
|
## Execution strategy
|
|
66
66
|
### Parallel execution waves
|
|
@@ -114,13 +114,13 @@ Critical path: Task 1 -> Task 2 -> Task 6
|
|
|
114
114
|
Tool: <bash | curl | tmux | playwright(real Chrome) | agent-browser | computer-use>
|
|
115
115
|
Steps: <exact command / API call / page action with concrete inputs - URL, payload, keystrokes, selectors>
|
|
116
116
|
Expected: <concrete, binary pass/fail observable>
|
|
117
|
-
Evidence: evidence/task-<N>-<slug>.<ext>
|
|
117
|
+
Evidence: .omo/evidence/task-<N>-<slug>.<ext>
|
|
118
118
|
|
|
119
119
|
Scenario: <failure / edge case>
|
|
120
120
|
Tool: <same, with exact invocation>
|
|
121
121
|
Steps: <trigger the error with specific inputs>
|
|
122
122
|
Expected: <graceful failure with the exact error message/code>
|
|
123
|
-
Evidence: evidence/task-<N>-<slug>-error.<ext>
|
|
123
|
+
Evidence: .omo/evidence/task-<N>-<slug>-error.<ext>
|
|
124
124
|
```
|
|
125
125
|
|
|
126
126
|
Commit: <YES|NO> | Message: `<type>(<scope>): <imperative summary>` | Files: [<paths>]
|
|
@@ -136,14 +136,14 @@ Critical path: Task 1 -> Task 2 -> Task 6
|
|
|
136
136
|
- One logical change per commit. Conventional Commits (`<type>(<scope>): <subject>` body + footer).
|
|
137
137
|
- Atomic: every commit builds and passes tests on its own.
|
|
138
138
|
- No "WIP" / "fix typo squash later" commits on the final branch - clean up before merge.
|
|
139
|
-
- Reference the plan file path in the final commit footer: `Plan: plans/<slug>.md`.
|
|
139
|
+
- Reference the plan file path in the final commit footer: `Plan: .omo/plans/<slug>.md`.
|
|
140
140
|
|
|
141
141
|
## Success criteria
|
|
142
142
|
- All Must-Have shipped; all QA scenarios pass with captured evidence; F1-F4 approved; commit history clean.
|
|
143
143
|
```
|
|
144
144
|
|
|
145
145
|
# Constraints
|
|
146
|
-
- READ + plan-file write only. Tools I will NEVER call: `edit`/`write`/`apply_patch` on anything outside
|
|
146
|
+
- READ + plan-file write only. Tools I will NEVER call: `edit`/`write`/`apply_patch` on anything outside `.omo/plans/<slug>.md`, anything that mutates non-plan files.
|
|
147
147
|
- DO NOT split work into multiple plans. ONE plan per request.
|
|
148
148
|
- DO NOT skip context gathering. NEVER plan blind.
|
|
149
149
|
- DO NOT include "user manually tests" as an acceptance criterion. Every check must be agent-executable.
|
|
@@ -241,7 +241,7 @@ Atomic, Conventional Commits (`<type>(<scope>): <imperative>` — feat /
|
|
|
241
241
|
fix / refactor / test / docs / chore / build / ci / perf). One logical
|
|
242
242
|
change per commit; each commit builds + tests green on its own. No WIP
|
|
243
243
|
on the final branch. If a plan file exists, final commit footer:
|
|
244
|
-
`Plan: plans/<slug>.md`. Do NOT auto-`git commit` unless the user
|
|
244
|
+
`Plan: .omo/plans/<slug>.md`. Do NOT auto-`git commit` unless the user
|
|
245
245
|
requested or preauthorised this session — default is stage + draft
|
|
246
246
|
message + present for approval.
|
|
247
247
|
|
|
@@ -25,6 +25,7 @@ This Codex skill is intentionally compact to avoid adding a large operating manu
|
|
|
25
25
|
- Delegate code edits, test writes, fixes, and QA execution to right-sized Codex subagents when the workflow requires it.
|
|
26
26
|
- Every `spawn_agent` message starts with `TASK:`, then names `DELIVERABLE`, `SCOPE`, and `VERIFY`; role selection requires `agent_type`, while `model` + `reasoning_effort` alone creates a default agent, not a reviewer or worker; prefer `fork_turns: "none"` unless full history is truly required.
|
|
27
27
|
- Plan and reviewer agents may run for a long time; spawn them in the background, keep doing independent root work, and poll with short wait_agent cycles. Never use a single long blocking wait for them.
|
|
28
|
+
- While any child is active, keep the parent visibly alive with brief status updates that include active subagent count, agent names, last heartbeat, and whether the parent is waiting for mailbox updates.
|
|
28
29
|
- Avoid `list_agents` as a polling or status tool in large runs; it can replay large agent status and latest-message payloads. Track spawned agent names locally, use `wait_agent` for completion signals, targeted followups only when needed, and `close_agent` after integrating each result.
|
|
29
30
|
- Treat `wait_agent` as a mailbox signal, not proof of completion, content, or errors. After two waits with no substantive result, send one targeted followup, then record inconclusive and respawn a smaller `fork_turns: "none"` task if the child stays silent or ack-only.
|
|
30
31
|
|
|
@@ -34,10 +35,10 @@ The full workflow may mention OpenCode-style orchestration examples. In Codex, t
|
|
|
34
35
|
|
|
35
36
|
| Workflow intent | Codex tool |
|
|
36
37
|
| --- | --- |
|
|
37
|
-
| Plan agent | `spawn_agent(agent_type="plan", ...)` |
|
|
38
|
-
| Search/read-only worker | `spawn_agent(agent_type="explorer", ...)` |
|
|
39
|
-
| Implementation or QA worker | `spawn_agent(agent_type="worker", ...)` |
|
|
40
|
-
| Final verification reviewer | `spawn_agent(agent_type="codex-ultrawork-reviewer", ...)` |
|
|
38
|
+
| Plan agent | `spawn_agent(agent_type="plan", fork_turns="none", ...)` |
|
|
39
|
+
| Search/read-only worker | `spawn_agent(agent_type="explorer", fork_turns="none", ...)` |
|
|
40
|
+
| Implementation or QA worker | `spawn_agent(agent_type="worker", fork_turns="none", ...)` |
|
|
41
|
+
| Final verification reviewer | `spawn_agent(agent_type="codex-ultrawork-reviewer", fork_turns="none", ...)` |
|
|
41
42
|
| Wait for background result | `wait_agent(...)` |
|
|
42
43
|
| Clean up finished worker | `close_agent(...)` |
|
|
43
44
|
|
package/packages/omo-codex/plugin/components/ulw-loop/skills/ulw-loop/references/full-workflow.md
CHANGED
|
@@ -33,9 +33,9 @@ Size each worker to the task — never spend `xhigh` on a one-liner, never send
|
|
|
33
33
|
| Task shape | agent_type | model | reasoning_effort |
|
|
34
34
|
|---|---|---|---|
|
|
35
35
|
| Trivial / mechanical (rename, move, obvious one-liner, config edit) | `worker` | `gpt-5.4-mini` | `low` |
|
|
36
|
-
| Pure implementation against a clear spec (new function, endpoint, test from a named pattern) | `worker` | `gpt-5.
|
|
36
|
+
| Pure implementation against a clear spec (new function, endpoint, test from a named pattern) | `worker` | `gpt-5.4` | `high` |
|
|
37
37
|
| Deep debugging / race / perf / subtle cross-module reasoning | `worker` | `gpt-5.5` | `xhigh` |
|
|
38
|
-
| QA execution (drive a channel, capture evidence) | `worker` | `gpt-5.
|
|
38
|
+
| QA execution (drive a channel, capture evidence) | `worker` | `gpt-5.4` | `high` |
|
|
39
39
|
| Read-only codebase search | `explorer` | role default | role default |
|
|
40
40
|
| External library / docs research | `librarian` | role default | role default |
|
|
41
41
|
| Final verification audit | `codex-ultrawork-reviewer` | role default | role default |
|
|
@@ -48,6 +48,7 @@ Codex subagent reliability:
|
|
|
48
48
|
- Start every `spawn_agent` message with `TASK: <imperative assignment>`, then name `DELIVERABLE`, `SCOPE`, and `VERIFY`. State that it is an executable assignment, not a context handoff.
|
|
49
49
|
- Prefer `fork_turns: "none"` unless full history is truly required; paste only the context the child needs. Full-history forks can make the child continue old parent context instead of the delegated task.
|
|
50
50
|
- Plan and reviewer agents may run for a long time; spawn them in the background, keep doing independent root work, and poll with short wait_agent cycles. Never use a single long blocking wait for them.
|
|
51
|
+
- While any child is active, keep the parent visibly alive with brief status updates that include active subagent count, agent names, last heartbeat, and whether the parent is waiting for mailbox updates.
|
|
51
52
|
- Do not use `list_agents` as a polling or status tool in long or high-context runs; it can replay large agent status and latest-message payloads. Track spawned agent names locally, use `wait_agent` for completion signals, targeted followups only when needed, and `close_agent` after integrating each result.
|
|
52
53
|
- Treat `wait_agent` as a mailbox signal, not proof of completion, content, or errors. After two waits with no substantive result, send one targeted followup: `TASK STILL ACTIVE: return <deliverable> or BLOCKED: <reason>`. If still silent or ack-only, record inconclusive, do not count it as pass/review approval, close if safe, and respawn a smaller `fork_turns: "none"` task with the missing deliverable.
|
|
53
54
|
|
|
@@ -147,7 +148,7 @@ Loop per goal. Cap at 5 cycles per goal. Cap identical same-criterion failures a
|
|
|
147
148
|
2. Register atomic todos: `path: <action> for <criterion> - verify by <check>`.
|
|
148
149
|
3. DELEGATE-IN-PARALLEL: dispatch every independent task in the wave at once via right-sized `spawn_agent` workers (Delegation table). Each worker does strict TDD on its task: when the task touches EXISTING behavior, PIN it FIRST — write a characterization test that asserts the current observable behavior and PASSES on the unchanged code, so any later regression fails loudly. Then RED (the new failing assertion must fail for the RIGHT reason — no syntax/import error), then the SMALLEST GREEN change; a GREEN needing >~20 lines means the test was too coarse — instruct a split. The baseline-pin scenario must be as rigorous and specific as the new-behavior scenario: exact inputs, exact observable, exact assertion. Serialize only on a NAMED dependency.
|
|
149
150
|
4. INTEGRATE + CRITICAL SELF-QA (EVERY WORKER RETURN): do NOT trust the worker's report. Read the diff yourself, re-run its tests, and run LSP diagnostics on the changed files. Treat "done" as a claim to disprove. If the diff drifts, the test is hollow, or evidence is missing, RESPAWN the worker with the specific failure context. Forward every finding/learning to subsequent workers.
|
|
150
|
-
5. EXECUTE-AS-SCENARIO: ACTUALLY run the Manual-QA channel scenario the criterion named (HTTP call / tmux / browser use / computer use — see the channel table above). Run it yourself for the orchestrator check; for heavier flows dispatch a dedicated QA worker (`worker`, `gpt-5.
|
|
151
|
+
5. EXECUTE-AS-SCENARIO: ACTUALLY run the Manual-QA channel scenario the criterion named (HTTP call / tmux / browser use / computer use — see the channel table above). Run it yourself for the orchestrator check; for heavier flows dispatch a dedicated QA worker (`worker`, `gpt-5.4`, `high`) whose ONLY job is to drive the channel and write the artifact to the named evidence path. The unit suite being green is NEVER substitute. If the scenario FAILS, respawn the implementing worker with the captured failure — do not hand-patch around it.
|
|
151
152
|
6. CAPTURE: collect the observable artifact path: transcript, stdout, screenshot, assertion, status+body, diff, or parsed dump. No artifact written at the evidence path — not done; record BLOCKED and respawn QA.
|
|
152
153
|
7. CLEAN (PAIRED, NEVER SKIP): tear down every runtime artifact step 5 spawned BEFORE recording — server PIDs (`kill`, verify `kill -0` fails), `tmux` sessions (`tmux kill-session -t ulw-qa-<criterion>`; confirm `tmux ls`), browser / Playwright contexts (`.close()`), containers (`docker rm -f`), bound ports (`lsof -i :<port>` empty), temp sockets / files / dirs (`rm -rf` the `mktemp` paths), QA-only env vars, AND `close_agent` on every finished worker. Register each teardown as its own todo the moment the QA spawns the resource (scripts, tmux assets, browsers / agent-browser sessions, PIDs, ports) so none is forgotten. Embed a one-line cleanup receipt in the evidence string, e.g. `cleanup: killed 12345; tmux kill-session ulw-qa-foo; rm -rf /tmp/ulw.aB12cD; close_agent w-3`. Missing receipt → record BLOCKED, not PASS.
|
|
153
154
|
8. RECORD exactly one result:
|
|
@@ -54,6 +54,14 @@ async function canReconcileCompletedTaskScopedAggregateSnapshot(repoRoot: string
|
|
|
54
54
|
return snapshotObjectiveMapsToUlwLoopPlan(repoRoot, snapshotObjective, scope);
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
+
async function canReconcileActiveFinalTaskScopedAggregateSnapshot(repoRoot: string, plan: UlwLoopPlan, goal: UlwLoopItem, snapshotObjective: string, evidence: string, scope?: UlwLoopScope): Promise<boolean> {
|
|
58
|
+
if (codexGoalMode(plan) !== "aggregate") return false;
|
|
59
|
+
if (goal.status !== "in_progress" || plan.activeGoalId !== goal.id) return false;
|
|
60
|
+
if (!isFinalRunCompletionCandidate(plan, goal)) return false;
|
|
61
|
+
if (!textHasCompletionValidationEvidence(evidence)) return false;
|
|
62
|
+
return snapshotObjectiveMapsToUlwLoopPlan(repoRoot, snapshotObjective, scope);
|
|
63
|
+
}
|
|
64
|
+
|
|
57
65
|
function buildCompletedLegacyGoalRemediation(goal: UlwLoopItem): string {
|
|
58
66
|
return [
|
|
59
67
|
"If get_goal returns a different completed legacy/thread objective, do not repeat --status complete in this thread.",
|
|
@@ -130,7 +138,10 @@ export async function checkpointUlwLoop(repoRoot: string, args: CheckpointUlwLoo
|
|
|
130
138
|
codexGoal = reconciliation.snapshot.raw;
|
|
131
139
|
if (!reconciliation.ok) {
|
|
132
140
|
const objective = snapshot?.objective;
|
|
133
|
-
const
|
|
141
|
+
const mismatchedTaskObjective = snapshot?.available === true && objective !== undefined && normalizeObjective(objective) !== normalizeObjective(expectedCodexObjective(plan, goal));
|
|
142
|
+
const completedTaskScoped = mismatchedTaskObjective && snapshot.status === "complete" && await canReconcileCompletedTaskScopedAggregateSnapshot(repoRoot, plan, goal, objective, evidence, scope);
|
|
143
|
+
const activeFinalTaskScoped = mismatchedTaskObjective && snapshot.status === "active" && await canReconcileActiveFinalTaskScopedAggregateSnapshot(repoRoot, plan, goal, objective, evidence, scope);
|
|
144
|
+
const taskScoped = completedTaskScoped || activeFinalTaskScoped;
|
|
134
145
|
if (!taskScoped) throw new UlwLoopError(`${formatCodexGoalReconciliation(reconciliation)}${aggregate && snapshot?.status === "complete" && objective !== undefined ? buildTaskScopedAggregateReconciliationHint(goal, final) : ""}`, "ulw_loop_codex_snapshot_mismatch");
|
|
135
146
|
aggregateCompletion = makeAggregateCompletion(now, evidence, codexGoal);
|
|
136
147
|
}
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
import { mkdir, mkdtemp, readFile, writeFile } from "node:fs/promises";
|
|
3
3
|
import { tmpdir } from "node:os";
|
|
4
4
|
import { join } from "node:path";
|
|
5
|
+
import { fileURLToPath } from "node:url";
|
|
5
6
|
import { describe, expect, it } from "vitest";
|
|
6
7
|
|
|
7
8
|
import { checkpointUlwLoop } from "../src/checkpoint.js";
|
|
@@ -12,7 +13,7 @@ import type { UlwLoopItem, UlwLoopLedgerEntry, UlwLoopPlan, UlwLoopSuccessCriter
|
|
|
12
13
|
import { UlwLoopError } from "../src/types.js";
|
|
13
14
|
|
|
14
15
|
const NOW = "2026-05-23T00:00:00.000Z";
|
|
15
|
-
const QUALITY_GATE_PATH =
|
|
16
|
+
const QUALITY_GATE_PATH = fileURLToPath(new URL("./fixtures/sample-quality-gate.json", import.meta.url));
|
|
16
17
|
|
|
17
18
|
function criterion(id: string, status: UlwLoopSuccessCriterion["status"]): UlwLoopSuccessCriterion {
|
|
18
19
|
return { id, scenario: `${id} scenario`, userModel: "happy", expectedEvidence: `${id} proof`, capturedEvidence: status === "pass" ? `${id} passed` : null, status };
|
|
@@ -142,6 +143,23 @@ describe("checkpointUlwLoop final story", () => {
|
|
|
142
143
|
expect(result.ledgerEntry.kind).toBe("aggregate_completed");
|
|
143
144
|
});
|
|
144
145
|
|
|
146
|
+
it("ACCEPTS complete when active task-scoped Codex objective maps to the ulw-loop brief", async () => {
|
|
147
|
+
const taskObjective = "Create only research artifacts with source evidence";
|
|
148
|
+
const repo = await repoWith(plan([passGoal("G001")], { activeGoalId: "G001" }));
|
|
149
|
+
await writeFile(ulwLoopBriefPath(repo), `${taskObjective}\n`, "utf8");
|
|
150
|
+
|
|
151
|
+
const result = await checkpointUlwLoop(repo, {
|
|
152
|
+
goalId: "G001",
|
|
153
|
+
status: "complete",
|
|
154
|
+
evidence: "final implementation complete and quality gate passed",
|
|
155
|
+
codexGoalJson: snapshot("active", taskObjective),
|
|
156
|
+
qualityGateJson: QUALITY_GATE_PATH,
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
expect(result.aggregateCompletion?.status).toBe("complete");
|
|
160
|
+
expect(result.ledgerEntry.kind).toBe("aggregate_completed");
|
|
161
|
+
});
|
|
162
|
+
|
|
145
163
|
it("explains final task-scoped objective mapping when completed Codex objective is unrelated", async () => {
|
|
146
164
|
const repo = await repoWith(plan([passGoal("G001")], { activeGoalId: "G001" }));
|
|
147
165
|
await writeFile(ulwLoopBriefPath(repo), "Fix ulw-loop objective mismatch and install local ulw\n", "utf8");
|
|
@@ -20,6 +20,17 @@
|
|
|
20
20
|
"statusMessage": "LazyCodex(0.1.0): Recording Session Telemetry"
|
|
21
21
|
}
|
|
22
22
|
]
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
"matcher": "^startup$",
|
|
26
|
+
"hooks": [
|
|
27
|
+
{
|
|
28
|
+
"type": "command",
|
|
29
|
+
"command": "node \"${PLUGIN_ROOT}/scripts/auto-update.mjs\" hook session-start",
|
|
30
|
+
"timeout": 5,
|
|
31
|
+
"statusMessage": "LazyCodex(0.1.0): Checking Auto Update"
|
|
32
|
+
}
|
|
33
|
+
]
|
|
23
34
|
}
|
|
24
35
|
],
|
|
25
36
|
"UserPromptSubmit": [
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "2026-06-03.gpt-5.5-400k",
|
|
3
|
+
"current": {
|
|
4
|
+
"model": "gpt-5.5",
|
|
5
|
+
"model_context_window": 400000,
|
|
6
|
+
"model_reasoning_effort": "high",
|
|
7
|
+
"plan_mode_reasoning_effort": "xhigh"
|
|
8
|
+
},
|
|
9
|
+
"roles": {
|
|
10
|
+
"default": {
|
|
11
|
+
"model": "gpt-5.5",
|
|
12
|
+
"model_context_window": 400000,
|
|
13
|
+
"model_reasoning_effort": "high",
|
|
14
|
+
"plan_mode_reasoning_effort": "xhigh"
|
|
15
|
+
},
|
|
16
|
+
"verifier": {
|
|
17
|
+
"model": "gpt-5.5",
|
|
18
|
+
"model_reasoning_effort": "xhigh"
|
|
19
|
+
},
|
|
20
|
+
"worker": {
|
|
21
|
+
"model": "gpt-5.4",
|
|
22
|
+
"model_reasoning_effort": "high"
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"managedProfiles": [
|
|
26
|
+
{
|
|
27
|
+
"version": "legacy.gpt-5.2",
|
|
28
|
+
"match": {
|
|
29
|
+
"model": "gpt-5.2"
|
|
30
|
+
}
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"version": "legacy.gpt-5.4-1m",
|
|
34
|
+
"match": {
|
|
35
|
+
"model": "gpt-5.4",
|
|
36
|
+
"model_context_window": 1000000,
|
|
37
|
+
"model_reasoning_effort": "high",
|
|
38
|
+
"plan_mode_reasoning_effort": "xhigh"
|
|
39
|
+
}
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
"version": "legacy.gpt-5.5-272k",
|
|
43
|
+
"match": {
|
|
44
|
+
"model": "gpt-5.5",
|
|
45
|
+
"model_context_window": 272000
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
]
|
|
49
|
+
}
|