npm - oh-my-opencode - Versions diffs - 4.6.0 → 4.7.0 - Mend

oh-my-opencode 4.6.0 → 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

package/dist/shared/command-executor/execute-hook-command.d.ts CHANGED Viewed

@@ -8,6 +8,8 @@ export interface ExecuteHookOptions {
     zshPath?: string;
     /** Timeout in milliseconds. Process is killed after this. Default: 30000 */
     timeoutMs?: number;
+    /** Grace period before force-killing and resolving timed-out commands. Default: 5000 */
+    killGraceMs?: number;
     /** When provided, scrub process.env to only include these vars plus HOME/PATH/etc. Used for plugin-sourced hooks. */
     allowedEnvVars?: string[];
 }

package/dist/tools/skill/description-formatter.d.ts CHANGED Viewed

@@ -1,3 +1,7 @@
 import type { SkillInfo } from "./types";
 import type { CommandInfo } from "../slashcommand/types";
-export declare function formatCombinedDescription(skills?: SkillInfo[], commands?: CommandInfo[]): string;
+interface CombinedDescriptionOptions {
+    includeSkills?: boolean;
+}
+export declare function formatCombinedDescription(skills?: SkillInfo[], commands?: CommandInfo[], options?: CombinedDescriptionOptions): string;
+export {};

package/dist/tools/skill/types.d.ts CHANGED Viewed

@@ -66,4 +66,5 @@ export interface SkillLoadOptions {
         } | undefined>;
         dirs(): string[] | Promise<string[]>;
     };
+    includeSkillsInDescription?: boolean;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "oh-my-opencode",
-  "version": "4.6.0",
+  "version": "4.7.0",
   "description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools",
   "main": "./dist/index.js",
   "types": "dist/index.d.ts",
@@ -106,7 +106,6 @@
     "commander": "^14.0.3",
     "detect-libc": "^2.1.2",
     "diff": "^9.0.0",
-    "effect": "4.0.0-beta.65",
     "js-yaml": "^4.1.1",
     "jsonc-parser": "^3.3.1",
     "picocolors": "^1.1.1",
@@ -136,17 +135,17 @@
     "zod": "^4.4.3"
   },
   "optionalDependencies": {
-    "oh-my-opencode-darwin-arm64": "4.6.0",
-    "oh-my-opencode-darwin-x64": "4.6.0",
-    "oh-my-opencode-darwin-x64-baseline": "4.6.0",
-    "oh-my-opencode-linux-arm64": "4.6.0",
-    "oh-my-opencode-linux-arm64-musl": "4.6.0",
-    "oh-my-opencode-linux-x64": "4.6.0",
-    "oh-my-opencode-linux-x64-baseline": "4.6.0",
-    "oh-my-opencode-linux-x64-musl": "4.6.0",
-    "oh-my-opencode-linux-x64-musl-baseline": "4.6.0",
-    "oh-my-opencode-windows-x64": "4.6.0",
-    "oh-my-opencode-windows-x64-baseline": "4.6.0"
+    "oh-my-opencode-darwin-arm64": "4.7.0",
+    "oh-my-opencode-darwin-x64": "4.7.0",
+    "oh-my-opencode-darwin-x64-baseline": "4.7.0",
+    "oh-my-opencode-linux-arm64": "4.7.0",
+    "oh-my-opencode-linux-arm64-musl": "4.7.0",
+    "oh-my-opencode-linux-x64": "4.7.0",
+    "oh-my-opencode-linux-x64-baseline": "4.7.0",
+    "oh-my-opencode-linux-x64-musl": "4.7.0",
+    "oh-my-opencode-linux-x64-musl-baseline": "4.7.0",
+    "oh-my-opencode-windows-x64": "4.7.0",
+    "oh-my-opencode-windows-x64-baseline": "4.7.0"
   },
   "overrides": {
     "hono": "^4.12.18",

package/packages/ast-grep-mcp/dist/cli.js CHANGED Viewed

@@ -342,13 +342,44 @@ function errorMessage(error) {
 import { createRequire } from "module";
 import { dirname, join as join2 } from "path";
 import { existsSync, statSync as statSync2 } from "fs";
+var WINDOWS_EXECUTABLE_EXTENSIONS = [".exe", ".cmd", ".bat"];
 function isValidBinary(filePath) {
   try {
-    return statSync2(filePath).size > 1e4;
+    const stats = statSync2(filePath);
+    if (!stats.isFile()) {
+      return false;
+    }
+    const size = stats.size;
+    const lowerPath = filePath.toLowerCase();
+    if (lowerPath.endsWith(".cmd") || lowerPath.endsWith(".bat")) {
+      return size > 0;
+    }
+    return size > 1e4;
   } catch {
     return false;
   }
 }
+function executableCandidates(filePath, platform = process.platform) {
+  if (platform !== "win32")
+    return [filePath];
+  const candidates = [filePath];
+  const lowerPath = filePath.toLowerCase();
+  if (WINDOWS_EXECUTABLE_EXTENSIONS.some((extension) => lowerPath.endsWith(extension))) {
+    return candidates;
+  }
+  for (const extension of WINDOWS_EXECUTABLE_EXTENSIONS) {
+    candidates.push(`${filePath}${extension}`);
+  }
+  return candidates;
+}
+function findValidExecutable(filePath) {
+  for (const candidate of executableCandidates(filePath)) {
+    if (existsSync(candidate) && isValidBinary(candidate)) {
+      return candidate;
+    }
+  }
+  return null;
+}
 function getPlatformPackageName() {
   const platform = process.platform;
   const arch = process.arch;
@@ -363,29 +394,42 @@ function getPlatformPackageName() {
   };
   return platformMap[`${platform}-${arch}`] ?? null;
 }
+function isModuleResolutionFailure(error) {
+  return error instanceof Error && (error.message.includes("Cannot find module") || error.message.includes("Cannot find package"));
+}
 function findSgCliPathSync() {
-  const binaryName = process.platform === "win32" ? "sg.exe" : "sg";
+  const binaryName = "sg";
   try {
     const require2 = createRequire(import.meta.url);
     const cliPackageJsonPath = require2.resolve("@ast-grep/cli/package.json");
     const cliDirectory = dirname(cliPackageJsonPath);
     const sgPath = join2(cliDirectory, binaryName);
-    if (existsSync(sgPath) && isValidBinary(sgPath)) {
-      return sgPath;
+    const validSgPath = findValidExecutable(sgPath);
+    if (validSgPath) {
+      return validSgPath;
+    }
+  } catch (error) {
+    if (!isModuleResolutionFailure(error)) {
+      throw error;
     }
-  } catch {}
+  }
   const platformPackage = getPlatformPackageName();
   if (platformPackage) {
     try {
       const require2 = createRequire(import.meta.url);
       const packageJsonPath = require2.resolve(`${platformPackage}/package.json`);
       const packageDirectory = dirname(packageJsonPath);
-      const astGrepBinaryName = process.platform === "win32" ? "ast-grep.exe" : "ast-grep";
+      const astGrepBinaryName = "ast-grep";
       const binaryPath = join2(packageDirectory, astGrepBinaryName);
-      if (existsSync(binaryPath) && isValidBinary(binaryPath)) {
-        return binaryPath;
+      const validBinaryPath = findValidExecutable(binaryPath);
+      if (validBinaryPath) {
+        return validBinaryPath;
+      }
+    } catch (error) {
+      if (!isModuleResolutionFailure(error)) {
+        throw error;
       }
-    } catch {}
+    }
   }
   if (process.platform === "darwin") {
     const homebrewPaths = ["/opt/homebrew/bin/sg", "/usr/local/bin/sg"];

package/packages/lsp-tools-mcp/dist/lsp/process.js CHANGED Viewed

@@ -97,7 +97,7 @@ function getWindowsPathExtensions(env) {
         .map((extension) => extension.trim())
         .filter(Boolean)
         .map((extension) => (extension.startsWith(".") ? extension : `.${extension}`));
-    return [...new Set(["", ...extensions, ".exe", ".cmd", ".bat"])];
+    return [...new Set([...extensions, ".exe", ".cmd", ".bat", ""])];
 }
 function resolveWindowsCommand(command, env) {
     const hasPathSeparator = command.includes("/") || command.includes("\\");

package/packages/omo-codex/plugin/components/rules/bundled-rules/hephaestus.md CHANGED Viewed

@@ -79,13 +79,15 @@ omo-codex bundles three read-only Codex subagent roles in `CODEX_HOME/agents/`:
 **Routing:**
-- "Where is X?" / "Find code that does Y" -> `spawn_agent(agent_type="explorer", ...)`
-- "How does library Z work?" / "What's the API contract?" -> `spawn_agent(agent_type="librarian", ...)`
-- 5+ interdependent steps, ambiguous scope, multi-module work -> `spawn_agent(agent_type="plan", ...)`
-- Heavy verification of a finished change -> `spawn_agent(agent_type="codex-ultrawork-reviewer", ...)`
+- "Where is X?" / "Find code that does Y" -> `spawn_agent(agent_type="explorer", fork_turns="none", ...)`
+- "How does library Z work?" / "What's the API contract?" -> `spawn_agent(agent_type="librarian", fork_turns="none", ...)`
+- 5+ interdependent steps, ambiguous scope, multi-module work -> `spawn_agent(agent_type="plan", fork_turns="none", ...)`
+- Heavy verification of a finished change -> `spawn_agent(agent_type="codex-ultrawork-reviewer", fork_turns="none", ...)`
 **Don't duplicate.** Once a subagent is dispatched for a question, do not re-do the same search yourself. Once results return, do not re-verify by repeating their tool calls; integrate and move on.
+**Keep parent liveness visible.** While any child is active, keep the parent visibly alive with brief status updates that include active subagent count, agent names, last heartbeat, and whether the parent is waiting for mailbox updates. Do this during long `wait_agent` cycles so the session does not look idle while children are still running.
 # Operating Loop
 **Explore -> Plan -> Implement -> Verify -> Manually QA.** Loops are short and tight; do not loop back with a draft when the work is yours to do.

package/packages/omo-codex/plugin/components/rules/src/post-compact-budget.ts CHANGED Viewed

@@ -24,8 +24,6 @@ const MODEL_CONTEXT_BUDGETS: readonly ModelContextBudget[] = [
 	{ slug: "gpt-5.5", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
 	{ slug: "gpt-5.4", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
 	{ slug: "gpt-5.4-mini", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
-	{ slug: "gpt-5.3-codex", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
-	{ slug: "gpt-5.2", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
 	{
 		slug: "codex-auto-review",
 		contextWindowTokens: 272_000,

package/packages/omo-codex/plugin/components/start-work-continuation/directive.md CHANGED Viewed

@@ -37,7 +37,7 @@ You are mid-flight on a Prometheus work plan. The turn just ended without finish
 # Stop conditions for THIS turn
 - A top-level checkbox flipped to `- [x]` after the 5-phase QA gate (Phase 1 read, Phase 2 automated, Phase 3 channel scenario, Phase 4 adversarial-class probing, Phase 5 gate decision). Then the Stop hook will re-evaluate; if more checkboxes remain you will be continued again.
-- 3 same-failure cycles on one sub-task → escalate via `spawn_agent(agent_type="codex-ultrawork-reviewer", ...)` and stop dispatch.
+- 3 same-failure cycles on one sub-task → escalate via `spawn_agent(agent_type="codex-ultrawork-reviewer", fork_turns="none", ...)` and stop dispatch.
 - Safety boundary (destructive command, secret exfiltration, production write) → stop and surface a safe substitute.
 - All top-level checkboxes `- [x]` AND (if gate triggered) `codex-ultrawork-reviewer` approved unconditionally → print the ORCHESTRATION COMPLETE block and end.

package/packages/omo-codex/plugin/components/ultrawork/CHANGELOG.md CHANGED Viewed

@@ -21,5 +21,5 @@
 Initial release.
 - Codex `UserPromptSubmit` hook that detects `ultrawork` / `ulw` (word-bounded, case-insensitive) in the user prompt and injects the ultrawork orchestration directive.
-- Directive enforces: goal + binding success criteria with manual-QA scenarios + evidence, durable `/tmp` notepad lifecycle, obsessive atomic todos, scenario-driven execution loop, and a GPT-5.2 xhigh verification gate with no "false positive" escape hatch.
+- Directive enforces: goal + binding success criteria with manual-QA scenarios + evidence, durable `/tmp` notepad lifecycle, obsessive atomic todos, scenario-driven execution loop, and a ChatGPT-compatible xhigh verification gate with no "false positive" escape hatch.
 - Directive size: 5,775 chars across 143 lines.

package/packages/omo-codex/plugin/components/ultrawork/README.md CHANGED Viewed

@@ -13,7 +13,7 @@ Bundled Codex agent role TOMLs in `agents/` are installed into `CODEX_HOME/agent
 | Surface + paired cleanup | Execution loop step 4 (**SURFACE-AS-SCENARIO**) runs the chosen channel scenario end-to-end. Step 5 (**CLEANUP, PAIRED**) tears down every QA-spawned process / tmux session / browser context / container / port / temp dir, with a one-line receipt appended to the notepad. Leftover state → NOT done. |
 | Durable /tmp notepad | `mktemp -t ulw-$(date +%Y%m%d-%H%M%S).XXXXXX.md` with sections `Plan`, `Success criteria + QA scenarios`, `Now`, `Todo`, `Findings`, `Learnings`. **Append**, never rewrite. |
 | Obsessive atomic todos | Every action — even one-line edits, `ls`, single test runs — becomes a todo. Format: `path: <action> for <criterion> — verify by <check>`. One in_progress at a time, mark completed immediately. |
-| GPT-5.2 xhigh verification gate | Triggered automatically on user-requested rigor, 3+ files, 20+ turns, 30+ minutes, or refactor/migration/perf/security work. Use the bundled `codex-ultrawork-reviewer` agent role when available. Reviewer verdict is **binding** — no "false positive", no minimising, no arguing. Loop until **unconditional** approval. "Looks good but…" = REJECTION. |
+| ChatGPT-compatible xhigh verification gate | Triggered automatically on user-requested rigor, 3+ files, 20+ turns, 30+ minutes, or refactor/migration/perf/security work. Use the bundled `codex-ultrawork-reviewer` agent role when available. Reviewer verdict is **binding**: no "false positive", no minimising, no arguing. Loop until **unconditional** approval. "Looks good but..." = REJECTION. |
 The directive is currently 10,951 chars / 231 lines and follows the GPT-5.5 prompting structure (Role / Goal / Manual-QA channels / Bootstrap / Execution loop / Verification gate / Commits / Constraints / Output / Stop rules).

package/packages/omo-codex/plugin/components/ultrawork/agents/codex-ultrawork-reviewer.toml CHANGED Viewed

@@ -1,12 +1,14 @@
 name = "codex-ultrawork-reviewer"
 description = "Strict ultrawork verification reviewer. Use after full QA evidence to audit the diff, goal, and scenario evidence before declaring done."
 nickname_candidates = ["Verifier"]
-model = "gpt-5.2"
+model = "gpt-5.5"
 model_reasoning_effort = "xhigh"
 developer_instructions = """You are the ultrawork verification reviewer.
 Review only. Do not implement.
+The default model intentionally uses a ChatGPT account compatible frontier model. If a caller supplies a different supported reviewer model, follow the caller's assignment while preserving this review contract.
 Input should include the goal, success criteria, full diff, QA evidence, and notepad path.
 If Codex delivers parent review context as inter-agent commentary, treat the latest parent message with goal/diff/evidence as your active review assignment, not passive context.

package/packages/omo-codex/plugin/components/ultrawork/agents/plan.toml CHANGED Viewed

@@ -1,5 +1,5 @@
 name = "plan"
-description = "Strategic planning consultant. Produces a single executable work plan from a vague or large request. Planner only - never implements. Writes the plan to plans/<slug>.md."
+description = "Strategic planning consultant. Produces a single executable work plan from a vague or large request. Planner only - never implements. Writes the plan to .omo/plans/<slug>.md."
 nickname_candidates = ["Planner"]
 model = "gpt-5.5"
 model_reasoning_effort = "xhigh"
@@ -36,7 +36,7 @@ Wait for context to converge before drafting. Rushed plans fail.
 # Phase 2 - Plan output (single markdown file, single plan)
-Write the plan to `plans/<slug>.md` in the working tree (create the `plans/` directory if absent). One plan per request - no "Phase 1 plan / Phase 2 plan" splits. 50+ tasks is fine if the work demands it.
+Write the plan to `.omo/plans/<slug>.md` in the working tree (create the `.omo/plans/` directory if absent). One plan per request - no "Phase 1 plan / Phase 2 plan" splits. 50+ tasks is fine if the work demands it.
 Use this template verbatim (fill the placeholders):
@@ -60,7 +60,7 @@ Use this template verbatim (fill the placeholders):
 > Zero human intervention - all verification is agent-executed.
 - Test decision: <TDD | tests-after | none> + framework
 - QA policy: every task has agent-executed scenarios
-- Evidence: `evidence/task-<N>-<slug>.<ext>`
+- Evidence: `.omo/evidence/task-<N>-<slug>.<ext>`
 ## Execution strategy
 ### Parallel execution waves
@@ -114,13 +114,13 @@ Critical path: Task 1 -> Task 2 -> Task 6
     Tool:     <bash | curl | tmux | playwright(real Chrome) | agent-browser | computer-use>
     Steps:    <exact command / API call / page action with concrete inputs - URL, payload, keystrokes, selectors>
     Expected: <concrete, binary pass/fail observable>
-    Evidence: evidence/task-<N>-<slug>.<ext>
+    Evidence: .omo/evidence/task-<N>-<slug>.<ext>
   Scenario: <failure / edge case>
     Tool:     <same, with exact invocation>
     Steps:    <trigger the error with specific inputs>
     Expected: <graceful failure with the exact error message/code>
-    Evidence: evidence/task-<N>-<slug>-error.<ext>
+    Evidence: .omo/evidence/task-<N>-<slug>-error.<ext>
   ```
   Commit: <YES|NO> | Message: `<type>(<scope>): <imperative summary>` | Files: [<paths>]
@@ -136,14 +136,14 @@ Critical path: Task 1 -> Task 2 -> Task 6
 - One logical change per commit. Conventional Commits (`<type>(<scope>): <subject>` body + footer).
 - Atomic: every commit builds and passes tests on its own.
 - No "WIP" / "fix typo squash later" commits on the final branch - clean up before merge.
-- Reference the plan file path in the final commit footer: `Plan: plans/<slug>.md`.
+- Reference the plan file path in the final commit footer: `Plan: .omo/plans/<slug>.md`.
 ## Success criteria
 - All Must-Have shipped; all QA scenarios pass with captured evidence; F1-F4 approved; commit history clean.
 ```
 # Constraints
-- READ + plan-file write only. Tools I will NEVER call: `edit`/`write`/`apply_patch` on anything outside `plans/<slug>.md`, anything that mutates non-plan files.
+- READ + plan-file write only. Tools I will NEVER call: `edit`/`write`/`apply_patch` on anything outside `.omo/plans/<slug>.md`, anything that mutates non-plan files.
 - DO NOT split work into multiple plans. ONE plan per request.
 - DO NOT skip context gathering. NEVER plan blind.
 - DO NOT include "user manually tests" as an acceptance criterion. Every check must be agent-executable.

package/packages/omo-codex/plugin/components/ultrawork/directive.md CHANGED Viewed

@@ -241,7 +241,7 @@ Atomic, Conventional Commits (`<type>(<scope>): <imperative>` — feat /
 fix / refactor / test / docs / chore / build / ci / perf). One logical
 change per commit; each commit builds + tests green on its own. No WIP
 on the final branch. If a plan file exists, final commit footer:
-`Plan: plans/<slug>.md`. Do NOT auto-`git commit` unless the user
+`Plan: .omo/plans/<slug>.md`. Do NOT auto-`git commit` unless the user
 requested or preauthorised this session — default is stage + draft
 message + present for approval.

package/packages/omo-codex/plugin/components/ulw-loop/skills/ulw-loop/SKILL.md CHANGED Viewed

@@ -25,6 +25,7 @@ This Codex skill is intentionally compact to avoid adding a large operating manu
 - Delegate code edits, test writes, fixes, and QA execution to right-sized Codex subagents when the workflow requires it.
 - Every `spawn_agent` message starts with `TASK:`, then names `DELIVERABLE`, `SCOPE`, and `VERIFY`; role selection requires `agent_type`, while `model` + `reasoning_effort` alone creates a default agent, not a reviewer or worker; prefer `fork_turns: "none"` unless full history is truly required.
 - Plan and reviewer agents may run for a long time; spawn them in the background, keep doing independent root work, and poll with short wait_agent cycles. Never use a single long blocking wait for them.
+- While any child is active, keep the parent visibly alive with brief status updates that include active subagent count, agent names, last heartbeat, and whether the parent is waiting for mailbox updates.
 - Avoid `list_agents` as a polling or status tool in large runs; it can replay large agent status and latest-message payloads. Track spawned agent names locally, use `wait_agent` for completion signals, targeted followups only when needed, and `close_agent` after integrating each result.
 - Treat `wait_agent` as a mailbox signal, not proof of completion, content, or errors. After two waits with no substantive result, send one targeted followup, then record inconclusive and respawn a smaller `fork_turns: "none"` task if the child stays silent or ack-only.
@@ -34,10 +35,10 @@ The full workflow may mention OpenCode-style orchestration examples. In Codex, t
 | Workflow intent | Codex tool |
 | --- | --- |
-| Plan agent | `spawn_agent(agent_type="plan", ...)` |
-| Search/read-only worker | `spawn_agent(agent_type="explorer", ...)` |
-| Implementation or QA worker | `spawn_agent(agent_type="worker", ...)` |
-| Final verification reviewer | `spawn_agent(agent_type="codex-ultrawork-reviewer", ...)` |
+| Plan agent | `spawn_agent(agent_type="plan", fork_turns="none", ...)` |
+| Search/read-only worker | `spawn_agent(agent_type="explorer", fork_turns="none", ...)` |
+| Implementation or QA worker | `spawn_agent(agent_type="worker", fork_turns="none", ...)` |
+| Final verification reviewer | `spawn_agent(agent_type="codex-ultrawork-reviewer", fork_turns="none", ...)` |
 | Wait for background result | `wait_agent(...)` |
 | Clean up finished worker | `close_agent(...)` |

package/packages/omo-codex/plugin/components/ulw-loop/skills/ulw-loop/references/full-workflow.md CHANGED Viewed

@@ -33,9 +33,9 @@ Size each worker to the task — never spend `xhigh` on a one-liner, never send
 | Task shape | agent_type | model | reasoning_effort |
 |---|---|---|---|
 | Trivial / mechanical (rename, move, obvious one-liner, config edit) | `worker` | `gpt-5.4-mini` | `low` |
-| Pure implementation against a clear spec (new function, endpoint, test from a named pattern) | `worker` | `gpt-5.3-codex` | `high` |
+| Pure implementation against a clear spec (new function, endpoint, test from a named pattern) | `worker` | `gpt-5.4` | `high` |
 | Deep debugging / race / perf / subtle cross-module reasoning | `worker` | `gpt-5.5` | `xhigh` |
-| QA execution (drive a channel, capture evidence) | `worker` | `gpt-5.3-codex` | `high` |
+| QA execution (drive a channel, capture evidence) | `worker` | `gpt-5.4` | `high` |
 | Read-only codebase search | `explorer` | role default | role default |
 | External library / docs research | `librarian` | role default | role default |
 | Final verification audit | `codex-ultrawork-reviewer` | role default | role default |
@@ -48,6 +48,7 @@ Codex subagent reliability:
 - Start every `spawn_agent` message with `TASK: <imperative assignment>`, then name `DELIVERABLE`, `SCOPE`, and `VERIFY`. State that it is an executable assignment, not a context handoff.
 - Prefer `fork_turns: "none"` unless full history is truly required; paste only the context the child needs. Full-history forks can make the child continue old parent context instead of the delegated task.
 - Plan and reviewer agents may run for a long time; spawn them in the background, keep doing independent root work, and poll with short wait_agent cycles. Never use a single long blocking wait for them.
+- While any child is active, keep the parent visibly alive with brief status updates that include active subagent count, agent names, last heartbeat, and whether the parent is waiting for mailbox updates.
 - Do not use `list_agents` as a polling or status tool in long or high-context runs; it can replay large agent status and latest-message payloads. Track spawned agent names locally, use `wait_agent` for completion signals, targeted followups only when needed, and `close_agent` after integrating each result.
 - Treat `wait_agent` as a mailbox signal, not proof of completion, content, or errors. After two waits with no substantive result, send one targeted followup: `TASK STILL ACTIVE: return <deliverable> or BLOCKED: <reason>`. If still silent or ack-only, record inconclusive, do not count it as pass/review approval, close if safe, and respawn a smaller `fork_turns: "none"` task with the missing deliverable.
@@ -147,7 +148,7 @@ Loop per goal. Cap at 5 cycles per goal. Cap identical same-criterion failures a
 2. Register atomic todos: `path: <action> for <criterion> - verify by <check>`.
 3. DELEGATE-IN-PARALLEL: dispatch every independent task in the wave at once via right-sized `spawn_agent` workers (Delegation table). Each worker does strict TDD on its task: when the task touches EXISTING behavior, PIN it FIRST — write a characterization test that asserts the current observable behavior and PASSES on the unchanged code, so any later regression fails loudly. Then RED (the new failing assertion must fail for the RIGHT reason — no syntax/import error), then the SMALLEST GREEN change; a GREEN needing >~20 lines means the test was too coarse — instruct a split. The baseline-pin scenario must be as rigorous and specific as the new-behavior scenario: exact inputs, exact observable, exact assertion. Serialize only on a NAMED dependency.
 4. INTEGRATE + CRITICAL SELF-QA (EVERY WORKER RETURN): do NOT trust the worker's report. Read the diff yourself, re-run its tests, and run LSP diagnostics on the changed files. Treat "done" as a claim to disprove. If the diff drifts, the test is hollow, or evidence is missing, RESPAWN the worker with the specific failure context. Forward every finding/learning to subsequent workers.
-5. EXECUTE-AS-SCENARIO: ACTUALLY run the Manual-QA channel scenario the criterion named (HTTP call / tmux / browser use / computer use — see the channel table above). Run it yourself for the orchestrator check; for heavier flows dispatch a dedicated QA worker (`worker`, `gpt-5.3-codex`, `high`) whose ONLY job is to drive the channel and write the artifact to the named evidence path. The unit suite being green is NEVER substitute. If the scenario FAILS, respawn the implementing worker with the captured failure — do not hand-patch around it.
+5. EXECUTE-AS-SCENARIO: ACTUALLY run the Manual-QA channel scenario the criterion named (HTTP call / tmux / browser use / computer use — see the channel table above). Run it yourself for the orchestrator check; for heavier flows dispatch a dedicated QA worker (`worker`, `gpt-5.4`, `high`) whose ONLY job is to drive the channel and write the artifact to the named evidence path. The unit suite being green is NEVER substitute. If the scenario FAILS, respawn the implementing worker with the captured failure — do not hand-patch around it.
 6. CAPTURE: collect the observable artifact path: transcript, stdout, screenshot, assertion, status+body, diff, or parsed dump. No artifact written at the evidence path — not done; record BLOCKED and respawn QA.
 7. CLEAN (PAIRED, NEVER SKIP): tear down every runtime artifact step 5 spawned BEFORE recording — server PIDs (`kill`, verify `kill -0` fails), `tmux` sessions (`tmux kill-session -t ulw-qa-<criterion>`; confirm `tmux ls`), browser / Playwright contexts (`.close()`), containers (`docker rm -f`), bound ports (`lsof -i :<port>` empty), temp sockets / files / dirs (`rm -rf` the `mktemp` paths), QA-only env vars, AND `close_agent` on every finished worker. Register each teardown as its own todo the moment the QA spawns the resource (scripts, tmux assets, browsers / agent-browser sessions, PIDs, ports) so none is forgotten. Embed a one-line cleanup receipt in the evidence string, e.g. `cleanup: killed 12345; tmux kill-session ulw-qa-foo; rm -rf /tmp/ulw.aB12cD; close_agent w-3`. Missing receipt → record BLOCKED, not PASS.
 8. RECORD exactly one result:

package/packages/omo-codex/plugin/components/ulw-loop/src/checkpoint.ts CHANGED Viewed

@@ -54,6 +54,14 @@ async function canReconcileCompletedTaskScopedAggregateSnapshot(repoRoot: string
 	return snapshotObjectiveMapsToUlwLoopPlan(repoRoot, snapshotObjective, scope);
 }
+async function canReconcileActiveFinalTaskScopedAggregateSnapshot(repoRoot: string, plan: UlwLoopPlan, goal: UlwLoopItem, snapshotObjective: string, evidence: string, scope?: UlwLoopScope): Promise<boolean> {
+	if (codexGoalMode(plan) !== "aggregate") return false;
+	if (goal.status !== "in_progress" || plan.activeGoalId !== goal.id) return false;
+	if (!isFinalRunCompletionCandidate(plan, goal)) return false;
+	if (!textHasCompletionValidationEvidence(evidence)) return false;
+	return snapshotObjectiveMapsToUlwLoopPlan(repoRoot, snapshotObjective, scope);
+}
 function buildCompletedLegacyGoalRemediation(goal: UlwLoopItem): string {
 	return [
 		"If get_goal returns a different completed legacy/thread objective, do not repeat --status complete in this thread.",
@@ -130,7 +138,10 @@ export async function checkpointUlwLoop(repoRoot: string, args: CheckpointUlwLoo
 			codexGoal = reconciliation.snapshot.raw;
 			if (!reconciliation.ok) {
 				const objective = snapshot?.objective;
-				const taskScoped = snapshot?.available === true && snapshot.status === "complete" && objective !== undefined && normalizeObjective(objective) !== normalizeObjective(expectedCodexObjective(plan, goal)) && await canReconcileCompletedTaskScopedAggregateSnapshot(repoRoot, plan, goal, objective, evidence, scope);
+				const mismatchedTaskObjective = snapshot?.available === true && objective !== undefined && normalizeObjective(objective) !== normalizeObjective(expectedCodexObjective(plan, goal));
+				const completedTaskScoped = mismatchedTaskObjective && snapshot.status === "complete" && await canReconcileCompletedTaskScopedAggregateSnapshot(repoRoot, plan, goal, objective, evidence, scope);
+				const activeFinalTaskScoped = mismatchedTaskObjective && snapshot.status === "active" && await canReconcileActiveFinalTaskScopedAggregateSnapshot(repoRoot, plan, goal, objective, evidence, scope);
+				const taskScoped = completedTaskScoped || activeFinalTaskScoped;
 				if (!taskScoped) throw new UlwLoopError(`${formatCodexGoalReconciliation(reconciliation)}${aggregate && snapshot?.status === "complete" && objective !== undefined ? buildTaskScopedAggregateReconciliationHint(goal, final) : ""}`, "ulw_loop_codex_snapshot_mismatch");
 				aggregateCompletion = makeAggregateCompletion(now, evidence, codexGoal);
 			}

package/packages/omo-codex/plugin/components/ulw-loop/test/checkpoint.test.ts CHANGED Viewed

@@ -2,6 +2,7 @@
 import { mkdir, mkdtemp, readFile, writeFile } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
+import { fileURLToPath } from "node:url";
 import { describe, expect, it } from "vitest";
 import { checkpointUlwLoop } from "../src/checkpoint.js";
@@ -12,7 +13,7 @@ import type { UlwLoopItem, UlwLoopLedgerEntry, UlwLoopPlan, UlwLoopSuccessCriter
 import { UlwLoopError } from "../src/types.js";
 const NOW = "2026-05-23T00:00:00.000Z";
-const QUALITY_GATE_PATH = join(process.cwd(), "test", "fixtures", "sample-quality-gate.json");
+const QUALITY_GATE_PATH = fileURLToPath(new URL("./fixtures/sample-quality-gate.json", import.meta.url));
 function criterion(id: string, status: UlwLoopSuccessCriterion["status"]): UlwLoopSuccessCriterion {
 	return { id, scenario: `${id} scenario`, userModel: "happy", expectedEvidence: `${id} proof`, capturedEvidence: status === "pass" ? `${id} passed` : null, status };
@@ -142,6 +143,23 @@ describe("checkpointUlwLoop final story", () => {
 		expect(result.ledgerEntry.kind).toBe("aggregate_completed");
 	});
+	it("ACCEPTS complete when active task-scoped Codex objective maps to the ulw-loop brief", async () => {
+		const taskObjective = "Create only research artifacts with source evidence";
+		const repo = await repoWith(plan([passGoal("G001")], { activeGoalId: "G001" }));
+		await writeFile(ulwLoopBriefPath(repo), `${taskObjective}\n`, "utf8");
+		const result = await checkpointUlwLoop(repo, {
+			goalId: "G001",
+			status: "complete",
+			evidence: "final implementation complete and quality gate passed",
+			codexGoalJson: snapshot("active", taskObjective),
+			qualityGateJson: QUALITY_GATE_PATH,
+		});
+		expect(result.aggregateCompletion?.status).toBe("complete");
+		expect(result.ledgerEntry.kind).toBe("aggregate_completed");
+	});
 	it("explains final task-scoped objective mapping when completed Codex objective is unrelated", async () => {
 		const repo = await repoWith(plan([passGoal("G001")], { activeGoalId: "G001" }));
 		await writeFile(ulwLoopBriefPath(repo), "Fix ulw-loop objective mismatch and install local ulw\n", "utf8");

package/packages/omo-codex/plugin/hooks/hooks.json CHANGED Viewed

@@ -20,6 +20,17 @@
 						"statusMessage": "LazyCodex(0.1.0): Recording Session Telemetry"
 					}
 				]
+			},
+			{
+				"matcher": "^startup$",
+				"hooks": [
+					{
+						"type": "command",
+						"command": "node \"${PLUGIN_ROOT}/scripts/auto-update.mjs\" hook session-start",
+						"timeout": 5,
+						"statusMessage": "LazyCodex(0.1.0): Checking Auto Update"
+					}
+				]
 			}
 		],
 		"UserPromptSubmit": [

package/packages/omo-codex/plugin/model-catalog.json ADDED Viewed

@@ -0,0 +1,49 @@
+{
+	"version": "2026-06-03.gpt-5.5-400k",
+	"current": {
+		"model": "gpt-5.5",
+		"model_context_window": 400000,
+		"model_reasoning_effort": "high",
+		"plan_mode_reasoning_effort": "xhigh"
+	},
+	"roles": {
+		"default": {
+			"model": "gpt-5.5",
+			"model_context_window": 400000,
+			"model_reasoning_effort": "high",
+			"plan_mode_reasoning_effort": "xhigh"
+		},
+		"verifier": {
+			"model": "gpt-5.5",
+			"model_reasoning_effort": "xhigh"
+		},
+		"worker": {
+			"model": "gpt-5.4",
+			"model_reasoning_effort": "high"
+		}
+	},
+	"managedProfiles": [
+		{
+			"version": "legacy.gpt-5.2",
+			"match": {
+				"model": "gpt-5.2"
+			}
+		},
+		{
+			"version": "legacy.gpt-5.4-1m",
+			"match": {
+				"model": "gpt-5.4",
+				"model_context_window": 1000000,
+				"model_reasoning_effort": "high",
+				"plan_mode_reasoning_effort": "xhigh"
+			}
+		},
+		{
+			"version": "legacy.gpt-5.5-272k",
+			"match": {
+				"model": "gpt-5.5",
+				"model_context_window": 272000
+			}
+		}
+	]
+}