npm - oh-my-opencode - Versions diffs - 4.6.0 → 4.7.1 - Mend

oh-my-opencode 4.6.0 → 4.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/packages/omo-codex/plugin/components/lsp/test/codex-hook-unavailable.test.ts ADDED Viewed

@@ -0,0 +1,206 @@
+import { mkdtempSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import path from "node:path";
+import { afterEach, describe, expect, it } from "vitest";
+import { runLspPostCompactHook, runLspPostToolUseHook } from "../src/codex-hook.js";
+const MARKSMAN_INITIALIZE_TIMEOUT = [
+	"LSP request timeout (method: initialize)",
+	'recent stderr: [01:16:41 INF] <LSP Entry> Starting Marksman LSP server: {"arch":"Arm64"}',
+	'[01:16:41 INF] <Folder> Loading folder documents: {"uri":"file:///repo"}',
+].join("\n");
+const tempDirs: string[] = [];
+afterEach(() => {
+	for (const tempDir of tempDirs.splice(0)) {
+		rmSync(tempDir, { recursive: true, force: true });
+	}
+});
+describe("codex PostToolUse unavailable LSP suppression", () => {
+	it("#given unavailable markdown LSP in one session #when PostToolUse repeats #then suppresses feedback and skips the cached extension", async () => {
+		// given
+		const pluginData = tempPluginData();
+		const input = postToolUseInput("session-unavailable", ".omo/ulw-loop/evidence/note.md");
+		let calls = 0;
+		await withPluginData(pluginData, async () => {
+			// when
+			const firstOutput = await runLspPostToolUseHook(input, async () => {
+				calls += 1;
+				return MARKSMAN_INITIALIZE_TIMEOUT;
+			});
+			const secondOutput = await runLspPostToolUseHook(input, async () => {
+				calls += 1;
+				return "error[markdown] (1000) at 1:1: second call should have been skipped.";
+			});
+			// then
+			expect(firstOutput).toBe("");
+			expect(secondOutput).toBe("");
+			expect(calls).toBe(1);
+		});
+	});
+	it("#given cached unavailable LSP after PostCompact #when the next PostToolUse runs #then probes once and suppresses again", async () => {
+		// given
+		const pluginData = tempPluginData();
+		const input = postToolUseInput("session-compact", ".omo/ulw-loop/evidence/note.md");
+		let calls = 0;
+		await withPluginData(pluginData, async () => {
+			await runLspPostToolUseHook(input, async () => {
+				calls += 1;
+				return MARKSMAN_INITIALIZE_TIMEOUT;
+			});
+			await runLspPostToolUseHook(input, async () => {
+				calls += 1;
+				return "error[markdown] (1000) at 1:1: cached call should have been skipped.";
+			});
+			// when
+			const compactInput = {
+				cwd: "/repo",
+				hook_event_name: "PostCompact",
+				model: "gpt-5.5",
+				session_id: "session-compact",
+				transcript_path: null,
+				trigger: "manual",
+				turn_id: "turn-compact",
+			};
+			const compactOutput = await runLspPostCompactHook(compactInput);
+			const afterCompactOutput = await runLspPostToolUseHook(input, async () => {
+				calls += 1;
+				return MARKSMAN_INITIALIZE_TIMEOUT;
+			});
+			await runLspPostToolUseHook(input, async () => {
+				calls += 1;
+				return "error[markdown] (1000) at 1:1: post-compact cached call should have been skipped.";
+			});
+			// then
+			expect(compactOutput).toBe("");
+			expect(afterCompactOutput).toBe("");
+			expect(calls).toBe(2);
+		});
+	});
+	it("#given cached unavailable LSP after PostCompact #when the probe is clean #then clears the unavailable cache", async () => {
+		// given
+		const pluginData = tempPluginData();
+		const input = postToolUseInput("session-compact-clean", ".omo/ulw-loop/evidence/note.md");
+		let calls = 0;
+		await withPluginData(pluginData, async () => {
+			await runLspPostToolUseHook(input, async () => {
+				calls += 1;
+				return MARKSMAN_INITIALIZE_TIMEOUT;
+			});
+			await runLspPostCompactHook({ session_id: "session-compact-clean" });
+			// when
+			const cleanProbeOutput = await runLspPostToolUseHook(input, async () => {
+				calls += 1;
+				return "No diagnostics found";
+			});
+			const laterDiagnosticOutput = await runLspPostToolUseHook(input, async () => {
+				calls += 1;
+				return "error[markdown] (1000) at 1:1: recovered markdown diagnostic.";
+			});
+			// then
+			expect(cleanProbeOutput).toBe("");
+			expect(laterDiagnosticOutput).toContain("recovered markdown diagnostic");
+			expect(calls).toBe(3);
+		});
+	});
+	it("#given markdown LSP is cached unavailable #when TypeScript diagnostics run #then real diagnostics still block", async () => {
+		// given
+		const pluginData = tempPluginData();
+		const markdownInput = postToolUseInput("session-real-diagnostics", "README.md");
+		const typescriptInput = postToolUseInput("session-real-diagnostics", "src/broken.ts");
+		await withPluginData(pluginData, async () => {
+			await runLspPostToolUseHook(markdownInput, async () => MARKSMAN_INITIALIZE_TIMEOUT);
+			// when
+			const output = await runLspPostToolUseHook(
+				typescriptInput,
+				async () => "error[typescript] (2304) at 1:1: Cannot find name 'missing'.",
+			);
+			// then
+			const parsed: unknown = JSON.parse(output);
+			if (!isPostToolUseHookOutput(parsed)) throw new TypeError("Expected PostToolUse hook output");
+			expect(parsed.reason).toBe(
+				"LSP diagnostics after editing src/broken.ts:\n\n" +
+					"- error[typescript] (2304) at 1:1: Cannot find name 'missing'.",
+			);
+		});
+	});
+});
+function postToolUseInput(sessionId: string, filePath: string) {
+	return {
+		cwd: "/repo",
+		hook_event_name: "PostToolUse",
+		model: "gpt-5.5",
+		permission_mode: "default",
+		session_id: sessionId,
+		tool_input: { path: filePath },
+		tool_name: "write",
+		tool_response: { ok: true },
+		tool_use_id: "tool-use-1",
+		transcript_path: null,
+		turn_id: "turn-1",
+	};
+}
+async function withPluginData(pluginData: string, fn: () => Promise<void>): Promise<void> {
+	const previous = process.env["PLUGIN_DATA"];
+	process.env["PLUGIN_DATA"] = pluginData;
+	try {
+		await fn();
+	} finally {
+		if (previous === undefined) {
+			delete process.env["PLUGIN_DATA"];
+		} else {
+			process.env["PLUGIN_DATA"] = previous;
+		}
+	}
+}
+function tempPluginData(): string {
+	const dir = mkdtempSync(path.join(tmpdir(), "codex-lsp-unavailable-"));
+	tempDirs.push(dir);
+	return dir;
+}
+interface PostToolUseHookOutput {
+	readonly decision: "block";
+	readonly reason: string;
+	readonly hookSpecificOutput: {
+		readonly hookEventName: "PostToolUse";
+		readonly additionalContext: string;
+	};
+}
+function isPostToolUseHookOutput(value: unknown): value is PostToolUseHookOutput {
+	if (!isRecord(value)) return false;
+	const hookSpecificOutput = value["hookSpecificOutput"];
+	return (
+		value["decision"] === "block" &&
+		typeof value["reason"] === "string" &&
+		isRecord(hookSpecificOutput) &&
+		hookSpecificOutput["hookEventName"] === "PostToolUse" &&
+		typeof hookSpecificOutput["additionalContext"] === "string"
+	);
+}
+function isRecord(value: unknown): value is Record<string, unknown> {
+	return typeof value === "object" && value !== null && !Array.isArray(value);
+}

package/packages/omo-codex/plugin/components/lsp/test/package-smoke.test.ts CHANGED Viewed

@@ -61,7 +61,8 @@ describe("plugin package metadata", () => {
 		const sourceFiles = readdirSync("src");
 		// when
-		const command = hooksJson.hooks["PostToolUse"]?.[0]?.hooks[0]?.command;
+		const postToolUseCommand = hooksJson.hooks["PostToolUse"]?.[0]?.hooks[0]?.command;
+		const postCompactCommand = hooksJson.hooks["PostCompact"]?.[0]?.hooks[0]?.command;
 		const lspServer = mcpJson.mcpServers["lsp"];
 		const pluginRoot = ["$", "{PLUGIN_ROOT}"].join("");
@@ -75,8 +76,9 @@ describe("plugin package metadata", () => {
 		expect(packageJson.bin["codex-lsp"]).toBeUndefined();
 		expect(packageJson.scripts["build"]).toBe("node scripts/clean-dist.mjs && tsc -p tsconfig.build.json");
 		expect(cliSource.startsWith("#!/usr/bin/env node")).toBe(true);
-		expect(cliSource).toContain("Usage: omo-lsp [mcp | hook post-tool-use]");
-		expect(command).toBe(`node "${pluginRoot}/dist/cli.js" hook post-tool-use`);
+		expect(cliSource).toContain("Usage: omo-lsp [mcp | hook post-tool-use | hook post-compact]");
+		expect(postToolUseCommand).toBe(`node "${pluginRoot}/dist/cli.js" hook post-tool-use`);
+		expect(postCompactCommand).toBe(`node "${pluginRoot}/dist/cli.js" hook post-compact`);
 		expect(lspServer?.command).toBe("node");
 		expect(lspServer?.args).toEqual(["../../../../lsp-tools-mcp/dist/cli.js", "mcp"]);
 		expect(cliSource).not.toContain("./lazy-lsp-mcp.js");

package/packages/omo-codex/plugin/components/rules/bundled-rules/hephaestus.md CHANGED Viewed

@@ -79,13 +79,15 @@ omo-codex bundles three read-only Codex subagent roles in `CODEX_HOME/agents/`:
 **Routing:**
-- "Where is X?" / "Find code that does Y" -> `spawn_agent(agent_type="explorer", ...)`
-- "How does library Z work?" / "What's the API contract?" -> `spawn_agent(agent_type="librarian", ...)`
-- 5+ interdependent steps, ambiguous scope, multi-module work -> `spawn_agent(agent_type="plan", ...)`
-- Heavy verification of a finished change -> `spawn_agent(agent_type="codex-ultrawork-reviewer", ...)`
+- "Where is X?" / "Find code that does Y" -> `spawn_agent(agent_type="explorer", fork_turns="none", ...)`
+- "How does library Z work?" / "What's the API contract?" -> `spawn_agent(agent_type="librarian", fork_turns="none", ...)`
+- 5+ interdependent steps, ambiguous scope, multi-module work -> `spawn_agent(agent_type="plan", fork_turns="none", ...)`
+- Heavy verification of a finished change -> `spawn_agent(agent_type="codex-ultrawork-reviewer", fork_turns="none", ...)`
 **Don't duplicate.** Once a subagent is dispatched for a question, do not re-do the same search yourself. Once results return, do not re-verify by repeating their tool calls; integrate and move on.
+**Keep parent liveness visible.** While any child is active, keep the parent visibly alive with brief status updates that include active subagent count, agent names, last heartbeat, and whether the parent is waiting for mailbox updates. Do this during long `wait_agent` cycles so the session does not look idle while children are still running.
 # Operating Loop
 **Explore -> Plan -> Implement -> Verify -> Manually QA.** Loops are short and tight; do not loop back with a draft when the work is yours to do.

package/packages/omo-codex/plugin/components/rules/src/codex-hook-options.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 export interface CodexRulesHookOptions {
 	env?: NodeJS.ProcessEnv;
 	pluginDataRoot?: string;
+	platform?: NodeJS.Platform;
 }

package/packages/omo-codex/plugin/components/rules/src/post-compact-budget.ts CHANGED Viewed

@@ -24,8 +24,6 @@ const MODEL_CONTEXT_BUDGETS: readonly ModelContextBudget[] = [
 	{ slug: "gpt-5.5", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
 	{ slug: "gpt-5.4", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
 	{ slug: "gpt-5.4-mini", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
-	{ slug: "gpt-5.3-codex", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
-	{ slug: "gpt-5.2", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
 	{
 		slug: "codex-auto-review",
 		contextWindowTokens: 272_000,

package/packages/omo-codex/plugin/components/rules/src/rules/finder.ts CHANGED Viewed

@@ -36,6 +36,7 @@ export interface FinderOptions {
 	skipUserHome?: boolean;
 	/** Plugin root directory. Defaults to PLUGIN_ROOT env or this package root. */
 	pluginRoot?: string;
+	platform?: NodeJS.Platform;
 	cache?: RuleDiscoveryCache;
 }
@@ -43,8 +44,11 @@ interface PluginBundledFinderOptions {
 	readonly disabledSources?: ReadonlySet<string>;
 	readonly cache?: RuleDiscoveryCache;
 	readonly pluginRoot?: string;
+	readonly platform?: NodeJS.Platform;
 }
+const WINDOWS_GIT_BASH_BUNDLED_RULE_PATH = "bundled-rules/windows-git-bash.md";
 export function findRuleCandidates(options: FinderOptions): RuleCandidate[] {
 	const skipUserHome = options.skipUserHome ?? false;
 	const disabledSources = options.disabledSources ?? new Set<string>();
@@ -61,6 +65,7 @@ export function findRuleCandidates(options: FinderOptions): RuleCandidate[] {
 		disabledSources,
 		...(options.cache === undefined ? {} : { cache: options.cache }),
 		...(options.pluginRoot === undefined ? {} : { pluginRoot: options.pluginRoot }),
+		...(options.platform === undefined ? {} : { platform: options.platform }),
 	};
 	candidates.push(...findPluginBundledCandidates(pluginBundledOptions));
@@ -78,9 +83,10 @@ export function findPluginBundledCandidates(options: PluginBundledFinderOptions
 	const pluginRoot = resolvePluginRulesRoot(options.pluginRoot);
 	const ruleDirectory = join(pluginRoot, BUNDLED_RULE_SUBDIR);
+	const platform = options.platform ?? process.platform;
 	const candidates: RuleCandidate[] = [];
 	for (const scannedFile of scanRuleFilesCached(ruleDirectory, options.cache)) {
-		candidates.push({
+		const candidate: RuleCandidate = {
 			path: scannedFile.path,
 			realPath: scannedFile.realPath,
 			source: "plugin-bundled",
@@ -88,11 +94,18 @@ export function findPluginBundledCandidates(options: PluginBundledFinderOptions
 			isGlobal: true,
 			isSingleFile: false,
 			relativePath: toRelativePath(pluginRoot, scannedFile.path),
-		});
+		};
+		if (isPluginBundledCandidateEnabled(candidate, platform)) {
+			candidates.push(candidate);
+		}
 	}
 	return candidates;
 }
+function isPluginBundledCandidateEnabled(candidate: RuleCandidate, platform: NodeJS.Platform): boolean {
+	return candidate.relativePath !== WINDOWS_GIT_BASH_BUNDLED_RULE_PATH || platform === "win32";
+}
 function findProjectCandidates(
 	projectRoot: string,
 	targetFile: string | null,

package/packages/omo-codex/plugin/components/rules/src/rules-engine-factory.ts CHANGED Viewed

@@ -7,11 +7,14 @@ import { findProjectRoot } from "./rules/project-root.js";
 interface RulesEngineFactoryOptions {
 	env?: NodeJS.ProcessEnv;
+	platform?: NodeJS.Platform;
 }
 export function createRulesEngine(options: RulesEngineFactoryOptions, config = configFromEnvironment(options.env)) {
+	const platform = options.platform ?? process.platform;
 	return createEngine(config, {
-		findCandidates: findRuleCandidates,
+		findCandidates: (finderOptions) => findRuleCandidates({ ...finderOptions, platform }),
 		findProjectRoot,
 		readFile: (path) => {
 			try {

package/packages/omo-codex/plugin/components/rules/test/windows-git-bash-bundled-rule.test.ts CHANGED Viewed

@@ -3,7 +3,7 @@ import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { afterEach, describe, expect, it } from "vitest";
-import { runSessionStartHook, type CodexSessionStartInput } from "../src/codex-hook.js";
+import { type CodexSessionStartInput, runSessionStartHook } from "../src/codex-hook.js";
 import { findPluginBundledCandidates } from "../src/rules/finder.js";
 const WINDOWS_RULE_DESCRIPTION = "Windows Git Bash guidance for Codex";
@@ -61,34 +61,57 @@ function occurrenceCount(value: string, search: string): number {
 describe("Windows Git Bash bundled rule", () => {
 	it("#given packaged bundled rules #when discovering plugin-bundled candidates #then Windows Git Bash rule is included", () => {
-		const candidates = findPluginBundledCandidates({ pluginRoot: process.cwd() });
+		const candidates = findPluginBundledCandidates({ pluginRoot: process.cwd(), platform: "win32" });
 		expect(candidates.map((candidate) => candidate.relativePath)).toContain(WINDOWS_RULE_PATH);
 	});
-	it("#given bundled rules enabled #when SessionStart runs #then Windows Git Bash guidance is injected once", async () => {
+	it("#given packaged bundled rules off Windows #when discovering plugin-bundled candidates #then Windows Git Bash rule is excluded", () => {
+		const candidates = findPluginBundledCandidates({ pluginRoot: process.cwd(), platform: "darwin" });
+		expect(candidates.map((candidate) => candidate.relativePath)).not.toContain(WINDOWS_RULE_PATH);
+	});
+	it("#given bundled rules enabled on Windows #when SessionStart runs #then Windows Git Bash guidance is injected once", async () => {
 		const { root, pluginData } = makeProject();
 		const output = await runSessionStartHook(sessionStartInput(root), {
 			pluginDataRoot: pluginData,
 			env: BUNDLED_ONLY_ENV,
+			platform: "win32",
 		});
 		expect(occurrenceCount(output, WINDOWS_GUIDANCE)).toBe(1);
 	});
-	it("#given project rule with same description #when static rules load #then project guidance overrides bundled guidance", async () => {
+	it("#given bundled rules enabled off Windows #when SessionStart runs #then Windows Git Bash guidance is not injected", async () => {
+		const { root, pluginData } = makeProject();
+		const output = await runSessionStartHook(sessionStartInput(root), {
+			pluginDataRoot: pluginData,
+			env: BUNDLED_ONLY_ENV,
+			platform: "darwin",
+		});
+		expect(output).not.toContain(WINDOWS_GUIDANCE);
+		expect(output).not.toContain(WINDOWS_RULE_PATH);
+	});
+	it("#given project rule with same description on Windows #when static rules load #then project guidance overrides bundled guidance", async () => {
 		const { root, pluginData } = makeProject();
 		const projectGuidance = "Project-specific Windows shell policy.";
 		mkdirSync(join(root, ".omo", "rules"), { recursive: true });
 		writeFileSync(
 			join(root, ".omo", "rules", "windows-git-bash.md"),
-			["---", `description: ${WINDOWS_RULE_DESCRIPTION}`, "alwaysApply: true", "---", "", projectGuidance].join("\n"),
+			["---", `description: ${WINDOWS_RULE_DESCRIPTION}`, "alwaysApply: true", "---", "", projectGuidance].join(
+				"\n",
+			),
 		);
 		const output = await runSessionStartHook(sessionStartInput(root), {
 			pluginDataRoot: pluginData,
 			env: PROJECT_AND_BUNDLED_ENV,
+			platform: "win32",
 		});
 		expect(output).toContain(projectGuidance);

package/packages/omo-codex/plugin/components/start-work-continuation/directive.md CHANGED Viewed

@@ -37,7 +37,7 @@ You are mid-flight on a Prometheus work plan. The turn just ended without finish
 # Stop conditions for THIS turn
 - A top-level checkbox flipped to `- [x]` after the 5-phase QA gate (Phase 1 read, Phase 2 automated, Phase 3 channel scenario, Phase 4 adversarial-class probing, Phase 5 gate decision). Then the Stop hook will re-evaluate; if more checkboxes remain you will be continued again.
-- 3 same-failure cycles on one sub-task → escalate via `spawn_agent(agent_type="codex-ultrawork-reviewer", ...)` and stop dispatch.
+- 3 same-failure cycles on one sub-task → escalate via `spawn_agent(agent_type="codex-ultrawork-reviewer", fork_turns="none", ...)` and stop dispatch.
 - Safety boundary (destructive command, secret exfiltration, production write) → stop and surface a safe substitute.
 - All top-level checkboxes `- [x]` AND (if gate triggered) `codex-ultrawork-reviewer` approved unconditionally → print the ORCHESTRATION COMPLETE block and end.

package/packages/omo-codex/plugin/components/ultrawork/CHANGELOG.md CHANGED Viewed

@@ -21,5 +21,5 @@
 Initial release.
 - Codex `UserPromptSubmit` hook that detects `ultrawork` / `ulw` (word-bounded, case-insensitive) in the user prompt and injects the ultrawork orchestration directive.
-- Directive enforces: goal + binding success criteria with manual-QA scenarios + evidence, durable `/tmp` notepad lifecycle, obsessive atomic todos, scenario-driven execution loop, and a GPT-5.2 xhigh verification gate with no "false positive" escape hatch.
+- Directive enforces: goal + binding success criteria with manual-QA scenarios + evidence, durable `/tmp` notepad lifecycle, obsessive atomic todos, scenario-driven execution loop, and a ChatGPT-compatible xhigh verification gate with no "false positive" escape hatch.
 - Directive size: 5,775 chars across 143 lines.

package/packages/omo-codex/plugin/components/ultrawork/README.md CHANGED Viewed

@@ -13,7 +13,7 @@ Bundled Codex agent role TOMLs in `agents/` are installed into `CODEX_HOME/agent
 | Surface + paired cleanup | Execution loop step 4 (**SURFACE-AS-SCENARIO**) runs the chosen channel scenario end-to-end. Step 5 (**CLEANUP, PAIRED**) tears down every QA-spawned process / tmux session / browser context / container / port / temp dir, with a one-line receipt appended to the notepad. Leftover state → NOT done. |
 | Durable /tmp notepad | `mktemp -t ulw-$(date +%Y%m%d-%H%M%S).XXXXXX.md` with sections `Plan`, `Success criteria + QA scenarios`, `Now`, `Todo`, `Findings`, `Learnings`. **Append**, never rewrite. |
 | Obsessive atomic todos | Every action — even one-line edits, `ls`, single test runs — becomes a todo. Format: `path: <action> for <criterion> — verify by <check>`. One in_progress at a time, mark completed immediately. |
-| GPT-5.2 xhigh verification gate | Triggered automatically on user-requested rigor, 3+ files, 20+ turns, 30+ minutes, or refactor/migration/perf/security work. Use the bundled `codex-ultrawork-reviewer` agent role when available. Reviewer verdict is **binding** — no "false positive", no minimising, no arguing. Loop until **unconditional** approval. "Looks good but…" = REJECTION. |
+| ChatGPT-compatible xhigh verification gate | Triggered automatically on user-requested rigor, 3+ files, 20+ turns, 30+ minutes, or refactor/migration/perf/security work. Use the bundled `codex-ultrawork-reviewer` agent role when available. Reviewer verdict is **binding**: no "false positive", no minimising, no arguing. Loop until **unconditional** approval. "Looks good but..." = REJECTION. |
 The directive is currently 10,951 chars / 231 lines and follows the GPT-5.5 prompting structure (Role / Goal / Manual-QA channels / Bootstrap / Execution loop / Verification gate / Commits / Constraints / Output / Stop rules).

package/packages/omo-codex/plugin/components/ultrawork/agents/codex-ultrawork-reviewer.toml CHANGED Viewed

@@ -1,12 +1,14 @@
 name = "codex-ultrawork-reviewer"
 description = "Strict ultrawork verification reviewer. Use after full QA evidence to audit the diff, goal, and scenario evidence before declaring done."
 nickname_candidates = ["Verifier"]
-model = "gpt-5.2"
+model = "gpt-5.5"
 model_reasoning_effort = "xhigh"
 developer_instructions = """You are the ultrawork verification reviewer.
 Review only. Do not implement.
+The default model intentionally uses a ChatGPT account compatible frontier model. If a caller supplies a different supported reviewer model, follow the caller's assignment while preserving this review contract.
 Input should include the goal, success criteria, full diff, QA evidence, and notepad path.
 If Codex delivers parent review context as inter-agent commentary, treat the latest parent message with goal/diff/evidence as your active review assignment, not passive context.

package/packages/omo-codex/plugin/components/ultrawork/agents/plan.toml CHANGED Viewed

@@ -1,5 +1,5 @@
 name = "plan"
-description = "Strategic planning consultant. Produces a single executable work plan from a vague or large request. Planner only - never implements. Writes the plan to plans/<slug>.md."
+description = "Strategic planning consultant. Produces a single executable work plan from a vague or large request. Planner only - never implements. Writes the plan to .omo/plans/<slug>.md."
 nickname_candidates = ["Planner"]
 model = "gpt-5.5"
 model_reasoning_effort = "xhigh"
@@ -36,7 +36,7 @@ Wait for context to converge before drafting. Rushed plans fail.
 # Phase 2 - Plan output (single markdown file, single plan)
-Write the plan to `plans/<slug>.md` in the working tree (create the `plans/` directory if absent). One plan per request - no "Phase 1 plan / Phase 2 plan" splits. 50+ tasks is fine if the work demands it.
+Write the plan to `.omo/plans/<slug>.md` in the working tree (create the `.omo/plans/` directory if absent). One plan per request - no "Phase 1 plan / Phase 2 plan" splits. 50+ tasks is fine if the work demands it.
 Use this template verbatim (fill the placeholders):
@@ -60,7 +60,7 @@ Use this template verbatim (fill the placeholders):
 > Zero human intervention - all verification is agent-executed.
 - Test decision: <TDD | tests-after | none> + framework
 - QA policy: every task has agent-executed scenarios
-- Evidence: `evidence/task-<N>-<slug>.<ext>`
+- Evidence: `.omo/evidence/task-<N>-<slug>.<ext>`
 ## Execution strategy
 ### Parallel execution waves
@@ -114,13 +114,13 @@ Critical path: Task 1 -> Task 2 -> Task 6
     Tool:     <bash | curl | tmux | playwright(real Chrome) | agent-browser | computer-use>
     Steps:    <exact command / API call / page action with concrete inputs - URL, payload, keystrokes, selectors>
     Expected: <concrete, binary pass/fail observable>
-    Evidence: evidence/task-<N>-<slug>.<ext>
+    Evidence: .omo/evidence/task-<N>-<slug>.<ext>
   Scenario: <failure / edge case>
     Tool:     <same, with exact invocation>
     Steps:    <trigger the error with specific inputs>
     Expected: <graceful failure with the exact error message/code>
-    Evidence: evidence/task-<N>-<slug>-error.<ext>
+    Evidence: .omo/evidence/task-<N>-<slug>-error.<ext>
   ```
   Commit: <YES|NO> | Message: `<type>(<scope>): <imperative summary>` | Files: [<paths>]
@@ -136,14 +136,14 @@ Critical path: Task 1 -> Task 2 -> Task 6
 - One logical change per commit. Conventional Commits (`<type>(<scope>): <subject>` body + footer).
 - Atomic: every commit builds and passes tests on its own.
 - No "WIP" / "fix typo squash later" commits on the final branch - clean up before merge.
-- Reference the plan file path in the final commit footer: `Plan: plans/<slug>.md`.
+- Reference the plan file path in the final commit footer: `Plan: .omo/plans/<slug>.md`.
 ## Success criteria
 - All Must-Have shipped; all QA scenarios pass with captured evidence; F1-F4 approved; commit history clean.
 ```
 # Constraints
-- READ + plan-file write only. Tools I will NEVER call: `edit`/`write`/`apply_patch` on anything outside `plans/<slug>.md`, anything that mutates non-plan files.
+- READ + plan-file write only. Tools I will NEVER call: `edit`/`write`/`apply_patch` on anything outside `.omo/plans/<slug>.md`, anything that mutates non-plan files.
 - DO NOT split work into multiple plans. ONE plan per request.
 - DO NOT skip context gathering. NEVER plan blind.
 - DO NOT include "user manually tests" as an acceptance criterion. Every check must be agent-executable.

package/packages/omo-codex/plugin/components/ultrawork/directive.md CHANGED Viewed

@@ -241,7 +241,7 @@ Atomic, Conventional Commits (`<type>(<scope>): <imperative>` — feat /
 fix / refactor / test / docs / chore / build / ci / perf). One logical
 change per commit; each commit builds + tests green on its own. No WIP
 on the final branch. If a plan file exists, final commit footer:
-`Plan: plans/<slug>.md`. Do NOT auto-`git commit` unless the user
+`Plan: .omo/plans/<slug>.md`. Do NOT auto-`git commit` unless the user
 requested or preauthorised this session — default is stage + draft
 message + present for approval.

package/packages/omo-codex/plugin/components/ulw-loop/skills/ulw-loop/SKILL.md CHANGED Viewed

@@ -25,6 +25,7 @@ This Codex skill is intentionally compact to avoid adding a large operating manu
 - Delegate code edits, test writes, fixes, and QA execution to right-sized Codex subagents when the workflow requires it.
 - Every `spawn_agent` message starts with `TASK:`, then names `DELIVERABLE`, `SCOPE`, and `VERIFY`; role selection requires `agent_type`, while `model` + `reasoning_effort` alone creates a default agent, not a reviewer or worker; prefer `fork_turns: "none"` unless full history is truly required.
 - Plan and reviewer agents may run for a long time; spawn them in the background, keep doing independent root work, and poll with short wait_agent cycles. Never use a single long blocking wait for them.
+- While any child is active, keep the parent visibly alive with brief status updates that include active subagent count, agent names, last heartbeat, and whether the parent is waiting for mailbox updates.
 - Avoid `list_agents` as a polling or status tool in large runs; it can replay large agent status and latest-message payloads. Track spawned agent names locally, use `wait_agent` for completion signals, targeted followups only when needed, and `close_agent` after integrating each result.
 - Treat `wait_agent` as a mailbox signal, not proof of completion, content, or errors. After two waits with no substantive result, send one targeted followup, then record inconclusive and respawn a smaller `fork_turns: "none"` task if the child stays silent or ack-only.
@@ -34,10 +35,10 @@ The full workflow may mention OpenCode-style orchestration examples. In Codex, t
 | Workflow intent | Codex tool |
 | --- | --- |
-| Plan agent | `spawn_agent(agent_type="plan", ...)` |
-| Search/read-only worker | `spawn_agent(agent_type="explorer", ...)` |
-| Implementation or QA worker | `spawn_agent(agent_type="worker", ...)` |
-| Final verification reviewer | `spawn_agent(agent_type="codex-ultrawork-reviewer", ...)` |
+| Plan agent | `spawn_agent(agent_type="plan", fork_turns="none", ...)` |
+| Search/read-only worker | `spawn_agent(agent_type="explorer", fork_turns="none", ...)` |
+| Implementation or QA worker | `spawn_agent(agent_type="worker", fork_turns="none", ...)` |
+| Final verification reviewer | `spawn_agent(agent_type="codex-ultrawork-reviewer", fork_turns="none", ...)` |
 | Wait for background result | `wait_agent(...)` |
 | Clean up finished worker | `close_agent(...)` |

package/packages/omo-codex/plugin/components/ulw-loop/skills/ulw-loop/references/full-workflow.md CHANGED Viewed

@@ -33,9 +33,9 @@ Size each worker to the task — never spend `xhigh` on a one-liner, never send
 | Task shape | agent_type | model | reasoning_effort |
 |---|---|---|---|
 | Trivial / mechanical (rename, move, obvious one-liner, config edit) | `worker` | `gpt-5.4-mini` | `low` |
-| Pure implementation against a clear spec (new function, endpoint, test from a named pattern) | `worker` | `gpt-5.3-codex` | `high` |
+| Pure implementation against a clear spec (new function, endpoint, test from a named pattern) | `worker` | `gpt-5.4` | `high` |
 | Deep debugging / race / perf / subtle cross-module reasoning | `worker` | `gpt-5.5` | `xhigh` |
-| QA execution (drive a channel, capture evidence) | `worker` | `gpt-5.3-codex` | `high` |
+| QA execution (drive a channel, capture evidence) | `worker` | `gpt-5.4` | `high` |
 | Read-only codebase search | `explorer` | role default | role default |
 | External library / docs research | `librarian` | role default | role default |
 | Final verification audit | `codex-ultrawork-reviewer` | role default | role default |
@@ -48,6 +48,7 @@ Codex subagent reliability:
 - Start every `spawn_agent` message with `TASK: <imperative assignment>`, then name `DELIVERABLE`, `SCOPE`, and `VERIFY`. State that it is an executable assignment, not a context handoff.
 - Prefer `fork_turns: "none"` unless full history is truly required; paste only the context the child needs. Full-history forks can make the child continue old parent context instead of the delegated task.
 - Plan and reviewer agents may run for a long time; spawn them in the background, keep doing independent root work, and poll with short wait_agent cycles. Never use a single long blocking wait for them.
+- While any child is active, keep the parent visibly alive with brief status updates that include active subagent count, agent names, last heartbeat, and whether the parent is waiting for mailbox updates.
 - Do not use `list_agents` as a polling or status tool in long or high-context runs; it can replay large agent status and latest-message payloads. Track spawned agent names locally, use `wait_agent` for completion signals, targeted followups only when needed, and `close_agent` after integrating each result.
 - Treat `wait_agent` as a mailbox signal, not proof of completion, content, or errors. After two waits with no substantive result, send one targeted followup: `TASK STILL ACTIVE: return <deliverable> or BLOCKED: <reason>`. If still silent or ack-only, record inconclusive, do not count it as pass/review approval, close if safe, and respawn a smaller `fork_turns: "none"` task with the missing deliverable.
@@ -147,7 +148,7 @@ Loop per goal. Cap at 5 cycles per goal. Cap identical same-criterion failures a
 2. Register atomic todos: `path: <action> for <criterion> - verify by <check>`.
 3. DELEGATE-IN-PARALLEL: dispatch every independent task in the wave at once via right-sized `spawn_agent` workers (Delegation table). Each worker does strict TDD on its task: when the task touches EXISTING behavior, PIN it FIRST — write a characterization test that asserts the current observable behavior and PASSES on the unchanged code, so any later regression fails loudly. Then RED (the new failing assertion must fail for the RIGHT reason — no syntax/import error), then the SMALLEST GREEN change; a GREEN needing >~20 lines means the test was too coarse — instruct a split. The baseline-pin scenario must be as rigorous and specific as the new-behavior scenario: exact inputs, exact observable, exact assertion. Serialize only on a NAMED dependency.
 4. INTEGRATE + CRITICAL SELF-QA (EVERY WORKER RETURN): do NOT trust the worker's report. Read the diff yourself, re-run its tests, and run LSP diagnostics on the changed files. Treat "done" as a claim to disprove. If the diff drifts, the test is hollow, or evidence is missing, RESPAWN the worker with the specific failure context. Forward every finding/learning to subsequent workers.
-5. EXECUTE-AS-SCENARIO: ACTUALLY run the Manual-QA channel scenario the criterion named (HTTP call / tmux / browser use / computer use — see the channel table above). Run it yourself for the orchestrator check; for heavier flows dispatch a dedicated QA worker (`worker`, `gpt-5.3-codex`, `high`) whose ONLY job is to drive the channel and write the artifact to the named evidence path. The unit suite being green is NEVER substitute. If the scenario FAILS, respawn the implementing worker with the captured failure — do not hand-patch around it.
+5. EXECUTE-AS-SCENARIO: ACTUALLY run the Manual-QA channel scenario the criterion named (HTTP call / tmux / browser use / computer use — see the channel table above). Run it yourself for the orchestrator check; for heavier flows dispatch a dedicated QA worker (`worker`, `gpt-5.4`, `high`) whose ONLY job is to drive the channel and write the artifact to the named evidence path. The unit suite being green is NEVER substitute. If the scenario FAILS, respawn the implementing worker with the captured failure — do not hand-patch around it.
 6. CAPTURE: collect the observable artifact path: transcript, stdout, screenshot, assertion, status+body, diff, or parsed dump. No artifact written at the evidence path — not done; record BLOCKED and respawn QA.
 7. CLEAN (PAIRED, NEVER SKIP): tear down every runtime artifact step 5 spawned BEFORE recording — server PIDs (`kill`, verify `kill -0` fails), `tmux` sessions (`tmux kill-session -t ulw-qa-<criterion>`; confirm `tmux ls`), browser / Playwright contexts (`.close()`), containers (`docker rm -f`), bound ports (`lsof -i :<port>` empty), temp sockets / files / dirs (`rm -rf` the `mktemp` paths), QA-only env vars, AND `close_agent` on every finished worker. Register each teardown as its own todo the moment the QA spawns the resource (scripts, tmux assets, browsers / agent-browser sessions, PIDs, ports) so none is forgotten. Embed a one-line cleanup receipt in the evidence string, e.g. `cleanup: killed 12345; tmux kill-session ulw-qa-foo; rm -rf /tmp/ulw.aB12cD; close_agent w-3`. Missing receipt → record BLOCKED, not PASS.
 8. RECORD exactly one result:

package/packages/omo-codex/plugin/components/ulw-loop/src/checkpoint.ts CHANGED Viewed

@@ -54,6 +54,14 @@ async function canReconcileCompletedTaskScopedAggregateSnapshot(repoRoot: string
 	return snapshotObjectiveMapsToUlwLoopPlan(repoRoot, snapshotObjective, scope);
 }
+async function canReconcileActiveFinalTaskScopedAggregateSnapshot(repoRoot: string, plan: UlwLoopPlan, goal: UlwLoopItem, snapshotObjective: string, evidence: string, scope?: UlwLoopScope): Promise<boolean> {
+	if (codexGoalMode(plan) !== "aggregate") return false;
+	if (goal.status !== "in_progress" || plan.activeGoalId !== goal.id) return false;
+	if (!isFinalRunCompletionCandidate(plan, goal)) return false;
+	if (!textHasCompletionValidationEvidence(evidence)) return false;
+	return snapshotObjectiveMapsToUlwLoopPlan(repoRoot, snapshotObjective, scope);
+}
 function buildCompletedLegacyGoalRemediation(goal: UlwLoopItem): string {
 	return [
 		"If get_goal returns a different completed legacy/thread objective, do not repeat --status complete in this thread.",
@@ -130,7 +138,10 @@ export async function checkpointUlwLoop(repoRoot: string, args: CheckpointUlwLoo
 			codexGoal = reconciliation.snapshot.raw;
 			if (!reconciliation.ok) {
 				const objective = snapshot?.objective;
-				const taskScoped = snapshot?.available === true && snapshot.status === "complete" && objective !== undefined && normalizeObjective(objective) !== normalizeObjective(expectedCodexObjective(plan, goal)) && await canReconcileCompletedTaskScopedAggregateSnapshot(repoRoot, plan, goal, objective, evidence, scope);
+				const mismatchedTaskObjective = snapshot?.available === true && objective !== undefined && normalizeObjective(objective) !== normalizeObjective(expectedCodexObjective(plan, goal));
+				const completedTaskScoped = mismatchedTaskObjective && snapshot.status === "complete" && await canReconcileCompletedTaskScopedAggregateSnapshot(repoRoot, plan, goal, objective, evidence, scope);
+				const activeFinalTaskScoped = mismatchedTaskObjective && snapshot.status === "active" && await canReconcileActiveFinalTaskScopedAggregateSnapshot(repoRoot, plan, goal, objective, evidence, scope);
+				const taskScoped = completedTaskScoped || activeFinalTaskScoped;
 				if (!taskScoped) throw new UlwLoopError(`${formatCodexGoalReconciliation(reconciliation)}${aggregate && snapshot?.status === "complete" && objective !== undefined ? buildTaskScopedAggregateReconciliationHint(goal, final) : ""}`, "ulw_loop_codex_snapshot_mismatch");
 				aggregateCompletion = makeAggregateCompletion(now, evidence, codexGoal);
 			}