oh-my-opencode 4.6.0 → 4.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/version-mismatch.js +47 -0
- package/bin/version-mismatch.test.ts +120 -0
- package/dist/cli/codex-ulw-loop.d.ts +12 -0
- package/dist/cli/doctor/checks/tui-plugin-config.d.ts +2 -0
- package/dist/cli/index.js +5999 -5542
- package/dist/cli/install-codex/codex-config-reasoning.d.ts +2 -1
- package/dist/cli/install-codex/codex-model-catalog.d.ts +13 -0
- package/dist/features/background-agent/concurrency.d.ts +1 -0
- package/dist/features/background-agent/process-cleanup.d.ts +6 -0
- package/dist/features/claude-code-session-state/state.d.ts +1 -0
- package/dist/features/opencode-skill-loader/index.d.ts +1 -0
- package/dist/features/opencode-skill-loader/opencode-config-skills-reader.d.ts +5 -0
- package/dist/features/tmux-subagent/attachable-session-status.d.ts +1 -1
- package/dist/features/tmux-subagent/session-status-parser.d.ts +1 -0
- package/dist/hooks/comment-checker/cli.d.ts +1 -0
- package/dist/hooks/tasks-todowrite-disabler/constants.d.ts +1 -1
- package/dist/index.js +4250 -3776
- package/dist/shared/command-executor/execute-hook-command.d.ts +2 -0
- package/dist/tools/skill/description-formatter.d.ts +5 -1
- package/dist/tools/skill/types.d.ts +1 -0
- package/package.json +13 -14
- package/packages/ast-grep-mcp/dist/cli.js +53 -9
- package/packages/lsp-tools-mcp/dist/lsp/process.js +1 -1
- package/packages/omo-codex/plugin/components/lsp/hooks/hooks.json +13 -0
- package/packages/omo-codex/plugin/components/lsp/src/cli.ts +6 -2
- package/packages/omo-codex/plugin/components/lsp/src/codex-hook-cli.ts +13 -2
- package/packages/omo-codex/plugin/components/lsp/src/codex-hook.ts +30 -79
- package/packages/omo-codex/plugin/components/lsp/src/lsp-session-state.ts +116 -0
- package/packages/omo-codex/plugin/components/lsp/src/mutated-file-paths.ts +88 -0
- package/packages/omo-codex/plugin/components/lsp/test/codex-hook-unavailable.test.ts +206 -0
- package/packages/omo-codex/plugin/components/lsp/test/package-smoke.test.ts +5 -3
- package/packages/omo-codex/plugin/components/rules/bundled-rules/hephaestus.md +6 -4
- package/packages/omo-codex/plugin/components/rules/src/codex-hook-options.ts +1 -0
- package/packages/omo-codex/plugin/components/rules/src/post-compact-budget.ts +0 -2
- package/packages/omo-codex/plugin/components/rules/src/rules/finder.ts +15 -2
- package/packages/omo-codex/plugin/components/rules/src/rules-engine-factory.ts +4 -1
- package/packages/omo-codex/plugin/components/rules/test/windows-git-bash-bundled-rule.test.ts +28 -5
- package/packages/omo-codex/plugin/components/start-work-continuation/directive.md +1 -1
- package/packages/omo-codex/plugin/components/ultrawork/CHANGELOG.md +1 -1
- package/packages/omo-codex/plugin/components/ultrawork/README.md +1 -1
- package/packages/omo-codex/plugin/components/ultrawork/agents/codex-ultrawork-reviewer.toml +3 -1
- package/packages/omo-codex/plugin/components/ultrawork/agents/plan.toml +7 -7
- package/packages/omo-codex/plugin/components/ultrawork/directive.md +1 -1
- package/packages/omo-codex/plugin/components/ulw-loop/skills/ulw-loop/SKILL.md +5 -4
- package/packages/omo-codex/plugin/components/ulw-loop/skills/ulw-loop/references/full-workflow.md +4 -3
- package/packages/omo-codex/plugin/components/ulw-loop/src/checkpoint.ts +12 -1
- package/packages/omo-codex/plugin/components/ulw-loop/test/checkpoint.test.ts +19 -1
- package/packages/omo-codex/plugin/hooks/hooks.json +24 -2
- package/packages/omo-codex/plugin/model-catalog.json +49 -0
- package/packages/omo-codex/plugin/scripts/auto-update.mjs +159 -0
- package/packages/omo-codex/plugin/scripts/migrate-codex-config.mjs +269 -0
- package/packages/omo-codex/plugin/scripts/sync-hook-status-messages.mjs +4 -9
- package/packages/omo-codex/plugin/scripts/sync-skills.mjs +6 -6
- package/packages/omo-codex/plugin/skills/init-deep/SKILL.md +6 -6
- package/packages/omo-codex/plugin/skills/lcx-report-bug/SKILL.md +127 -0
- package/packages/omo-codex/plugin/skills/lcx-report-bug/agents/openai.yaml +9 -0
- package/packages/omo-codex/plugin/skills/refactor/SKILL.md +6 -6
- package/packages/omo-codex/plugin/skills/remove-ai-slops/SKILL.md +6 -6
- package/packages/omo-codex/plugin/skills/review-work/SKILL.md +7 -7
- package/packages/omo-codex/plugin/skills/start-work/SKILL.md +6 -6
- package/packages/omo-codex/plugin/skills/ulw-loop/SKILL.md +5 -4
- package/packages/omo-codex/plugin/skills/ulw-loop/references/full-workflow.md +4 -3
- package/packages/omo-codex/plugin/skills/ulw-plan/SKILL.md +17 -17
- package/packages/omo-codex/plugin/test/aggregate.test.mjs +188 -19
- package/packages/omo-codex/plugin/test/auto-update.test.mjs +129 -0
- package/packages/omo-codex/plugin/test/hook-status-message.test.mjs +7 -27
- package/packages/omo-codex/plugin/test/migrate-codex-config.test.mjs +146 -0
- package/packages/omo-codex/plugin/test/sync-hook-status-messages.test.mjs +27 -1
- package/packages/omo-codex/plugin/test/sync-skills.test.mjs +22 -0
- package/packages/omo-codex/scripts/install/cli-args.mjs +1 -1
- package/packages/omo-codex/scripts/install/config.mjs +2 -15
- package/packages/omo-codex/scripts/install/delegated-command.mjs +1 -1
- package/packages/omo-codex/scripts/install/legacy-bins.mjs +1 -0
- package/packages/omo-codex/scripts/install/model-catalog.mjs +66 -0
- package/packages/omo-codex/scripts/install/permissions.mjs +11 -0
- package/packages/omo-codex/scripts/install/reasoning-config.mjs +65 -7
- package/packages/omo-codex/scripts/install-bin-links.test.mjs +23 -0
- package/packages/omo-codex/scripts/install-config-autonomous-features.test.mjs +83 -0
- package/packages/omo-codex/scripts/install-config-reasoning.test.mjs +82 -3
- package/packages/omo-codex/scripts/install-config.test.mjs +5 -6
- package/packages/omo-codex/scripts/install-local-entrypoint.test.mjs +30 -2
- package/packages/omo-codex/scripts/install-local.mjs +1 -1
- package/packages/omo-codex/scripts/install-local.test.mjs +3 -1
- package/packages/shared-skills/skills/lcx-report-bug/SKILL.md +127 -0
- package/packages/shared-skills/skills/lcx-report-bug/agents/openai.yaml +9 -0
- package/packages/shared-skills/skills/review-work/SKILL.md +7 -7
- package/packages/shared-skills/skills/start-work/SKILL.md +6 -6
- package/packages/shared-skills/skills/ulw-plan/SKILL.md +11 -11
- package/postinstall.mjs +36 -3
- package/dist/cli/install-codex/codex-config-mcp.d.ts +0 -1
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import { mkdtempSync, rmSync } from "node:fs";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
|
|
5
|
+
import { afterEach, describe, expect, it } from "vitest";
|
|
6
|
+
|
|
7
|
+
import { runLspPostCompactHook, runLspPostToolUseHook } from "../src/codex-hook.js";
|
|
8
|
+
|
|
9
|
+
const MARKSMAN_INITIALIZE_TIMEOUT = [
|
|
10
|
+
"LSP request timeout (method: initialize)",
|
|
11
|
+
'recent stderr: [01:16:41 INF] <LSP Entry> Starting Marksman LSP server: {"arch":"Arm64"}',
|
|
12
|
+
'[01:16:41 INF] <Folder> Loading folder documents: {"uri":"file:///repo"}',
|
|
13
|
+
].join("\n");
|
|
14
|
+
|
|
15
|
+
const tempDirs: string[] = [];
|
|
16
|
+
|
|
17
|
+
afterEach(() => {
|
|
18
|
+
for (const tempDir of tempDirs.splice(0)) {
|
|
19
|
+
rmSync(tempDir, { recursive: true, force: true });
|
|
20
|
+
}
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
describe("codex PostToolUse unavailable LSP suppression", () => {
|
|
24
|
+
it("#given unavailable markdown LSP in one session #when PostToolUse repeats #then suppresses feedback and skips the cached extension", async () => {
|
|
25
|
+
// given
|
|
26
|
+
const pluginData = tempPluginData();
|
|
27
|
+
const input = postToolUseInput("session-unavailable", ".omo/ulw-loop/evidence/note.md");
|
|
28
|
+
let calls = 0;
|
|
29
|
+
|
|
30
|
+
await withPluginData(pluginData, async () => {
|
|
31
|
+
// when
|
|
32
|
+
const firstOutput = await runLspPostToolUseHook(input, async () => {
|
|
33
|
+
calls += 1;
|
|
34
|
+
return MARKSMAN_INITIALIZE_TIMEOUT;
|
|
35
|
+
});
|
|
36
|
+
const secondOutput = await runLspPostToolUseHook(input, async () => {
|
|
37
|
+
calls += 1;
|
|
38
|
+
return "error[markdown] (1000) at 1:1: second call should have been skipped.";
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
// then
|
|
42
|
+
expect(firstOutput).toBe("");
|
|
43
|
+
expect(secondOutput).toBe("");
|
|
44
|
+
expect(calls).toBe(1);
|
|
45
|
+
});
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it("#given cached unavailable LSP after PostCompact #when the next PostToolUse runs #then probes once and suppresses again", async () => {
|
|
49
|
+
// given
|
|
50
|
+
const pluginData = tempPluginData();
|
|
51
|
+
const input = postToolUseInput("session-compact", ".omo/ulw-loop/evidence/note.md");
|
|
52
|
+
let calls = 0;
|
|
53
|
+
|
|
54
|
+
await withPluginData(pluginData, async () => {
|
|
55
|
+
await runLspPostToolUseHook(input, async () => {
|
|
56
|
+
calls += 1;
|
|
57
|
+
return MARKSMAN_INITIALIZE_TIMEOUT;
|
|
58
|
+
});
|
|
59
|
+
await runLspPostToolUseHook(input, async () => {
|
|
60
|
+
calls += 1;
|
|
61
|
+
return "error[markdown] (1000) at 1:1: cached call should have been skipped.";
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
// when
|
|
65
|
+
const compactInput = {
|
|
66
|
+
cwd: "/repo",
|
|
67
|
+
hook_event_name: "PostCompact",
|
|
68
|
+
model: "gpt-5.5",
|
|
69
|
+
session_id: "session-compact",
|
|
70
|
+
transcript_path: null,
|
|
71
|
+
trigger: "manual",
|
|
72
|
+
turn_id: "turn-compact",
|
|
73
|
+
};
|
|
74
|
+
const compactOutput = await runLspPostCompactHook(compactInput);
|
|
75
|
+
const afterCompactOutput = await runLspPostToolUseHook(input, async () => {
|
|
76
|
+
calls += 1;
|
|
77
|
+
return MARKSMAN_INITIALIZE_TIMEOUT;
|
|
78
|
+
});
|
|
79
|
+
await runLspPostToolUseHook(input, async () => {
|
|
80
|
+
calls += 1;
|
|
81
|
+
return "error[markdown] (1000) at 1:1: post-compact cached call should have been skipped.";
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
// then
|
|
85
|
+
expect(compactOutput).toBe("");
|
|
86
|
+
expect(afterCompactOutput).toBe("");
|
|
87
|
+
expect(calls).toBe(2);
|
|
88
|
+
});
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it("#given cached unavailable LSP after PostCompact #when the probe is clean #then clears the unavailable cache", async () => {
|
|
92
|
+
// given
|
|
93
|
+
const pluginData = tempPluginData();
|
|
94
|
+
const input = postToolUseInput("session-compact-clean", ".omo/ulw-loop/evidence/note.md");
|
|
95
|
+
let calls = 0;
|
|
96
|
+
|
|
97
|
+
await withPluginData(pluginData, async () => {
|
|
98
|
+
await runLspPostToolUseHook(input, async () => {
|
|
99
|
+
calls += 1;
|
|
100
|
+
return MARKSMAN_INITIALIZE_TIMEOUT;
|
|
101
|
+
});
|
|
102
|
+
await runLspPostCompactHook({ session_id: "session-compact-clean" });
|
|
103
|
+
|
|
104
|
+
// when
|
|
105
|
+
const cleanProbeOutput = await runLspPostToolUseHook(input, async () => {
|
|
106
|
+
calls += 1;
|
|
107
|
+
return "No diagnostics found";
|
|
108
|
+
});
|
|
109
|
+
const laterDiagnosticOutput = await runLspPostToolUseHook(input, async () => {
|
|
110
|
+
calls += 1;
|
|
111
|
+
return "error[markdown] (1000) at 1:1: recovered markdown diagnostic.";
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
// then
|
|
115
|
+
expect(cleanProbeOutput).toBe("");
|
|
116
|
+
expect(laterDiagnosticOutput).toContain("recovered markdown diagnostic");
|
|
117
|
+
expect(calls).toBe(3);
|
|
118
|
+
});
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
it("#given markdown LSP is cached unavailable #when TypeScript diagnostics run #then real diagnostics still block", async () => {
|
|
122
|
+
// given
|
|
123
|
+
const pluginData = tempPluginData();
|
|
124
|
+
const markdownInput = postToolUseInput("session-real-diagnostics", "README.md");
|
|
125
|
+
const typescriptInput = postToolUseInput("session-real-diagnostics", "src/broken.ts");
|
|
126
|
+
|
|
127
|
+
await withPluginData(pluginData, async () => {
|
|
128
|
+
await runLspPostToolUseHook(markdownInput, async () => MARKSMAN_INITIALIZE_TIMEOUT);
|
|
129
|
+
|
|
130
|
+
// when
|
|
131
|
+
const output = await runLspPostToolUseHook(
|
|
132
|
+
typescriptInput,
|
|
133
|
+
async () => "error[typescript] (2304) at 1:1: Cannot find name 'missing'.",
|
|
134
|
+
);
|
|
135
|
+
|
|
136
|
+
// then
|
|
137
|
+
const parsed: unknown = JSON.parse(output);
|
|
138
|
+
if (!isPostToolUseHookOutput(parsed)) throw new TypeError("Expected PostToolUse hook output");
|
|
139
|
+
expect(parsed.reason).toBe(
|
|
140
|
+
"LSP diagnostics after editing src/broken.ts:\n\n" +
|
|
141
|
+
"- error[typescript] (2304) at 1:1: Cannot find name 'missing'.",
|
|
142
|
+
);
|
|
143
|
+
});
|
|
144
|
+
});
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
function postToolUseInput(sessionId: string, filePath: string) {
|
|
148
|
+
return {
|
|
149
|
+
cwd: "/repo",
|
|
150
|
+
hook_event_name: "PostToolUse",
|
|
151
|
+
model: "gpt-5.5",
|
|
152
|
+
permission_mode: "default",
|
|
153
|
+
session_id: sessionId,
|
|
154
|
+
tool_input: { path: filePath },
|
|
155
|
+
tool_name: "write",
|
|
156
|
+
tool_response: { ok: true },
|
|
157
|
+
tool_use_id: "tool-use-1",
|
|
158
|
+
transcript_path: null,
|
|
159
|
+
turn_id: "turn-1",
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
async function withPluginData(pluginData: string, fn: () => Promise<void>): Promise<void> {
|
|
164
|
+
const previous = process.env["PLUGIN_DATA"];
|
|
165
|
+
process.env["PLUGIN_DATA"] = pluginData;
|
|
166
|
+
try {
|
|
167
|
+
await fn();
|
|
168
|
+
} finally {
|
|
169
|
+
if (previous === undefined) {
|
|
170
|
+
delete process.env["PLUGIN_DATA"];
|
|
171
|
+
} else {
|
|
172
|
+
process.env["PLUGIN_DATA"] = previous;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function tempPluginData(): string {
|
|
178
|
+
const dir = mkdtempSync(path.join(tmpdir(), "codex-lsp-unavailable-"));
|
|
179
|
+
tempDirs.push(dir);
|
|
180
|
+
return dir;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
interface PostToolUseHookOutput {
|
|
184
|
+
readonly decision: "block";
|
|
185
|
+
readonly reason: string;
|
|
186
|
+
readonly hookSpecificOutput: {
|
|
187
|
+
readonly hookEventName: "PostToolUse";
|
|
188
|
+
readonly additionalContext: string;
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
function isPostToolUseHookOutput(value: unknown): value is PostToolUseHookOutput {
|
|
193
|
+
if (!isRecord(value)) return false;
|
|
194
|
+
const hookSpecificOutput = value["hookSpecificOutput"];
|
|
195
|
+
return (
|
|
196
|
+
value["decision"] === "block" &&
|
|
197
|
+
typeof value["reason"] === "string" &&
|
|
198
|
+
isRecord(hookSpecificOutput) &&
|
|
199
|
+
hookSpecificOutput["hookEventName"] === "PostToolUse" &&
|
|
200
|
+
typeof hookSpecificOutput["additionalContext"] === "string"
|
|
201
|
+
);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
205
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
206
|
+
}
|
|
@@ -61,7 +61,8 @@ describe("plugin package metadata", () => {
|
|
|
61
61
|
const sourceFiles = readdirSync("src");
|
|
62
62
|
|
|
63
63
|
// when
|
|
64
|
-
const
|
|
64
|
+
const postToolUseCommand = hooksJson.hooks["PostToolUse"]?.[0]?.hooks[0]?.command;
|
|
65
|
+
const postCompactCommand = hooksJson.hooks["PostCompact"]?.[0]?.hooks[0]?.command;
|
|
65
66
|
const lspServer = mcpJson.mcpServers["lsp"];
|
|
66
67
|
const pluginRoot = ["$", "{PLUGIN_ROOT}"].join("");
|
|
67
68
|
|
|
@@ -75,8 +76,9 @@ describe("plugin package metadata", () => {
|
|
|
75
76
|
expect(packageJson.bin["codex-lsp"]).toBeUndefined();
|
|
76
77
|
expect(packageJson.scripts["build"]).toBe("node scripts/clean-dist.mjs && tsc -p tsconfig.build.json");
|
|
77
78
|
expect(cliSource.startsWith("#!/usr/bin/env node")).toBe(true);
|
|
78
|
-
expect(cliSource).toContain("Usage: omo-lsp [mcp | hook post-tool-use]");
|
|
79
|
-
expect(
|
|
79
|
+
expect(cliSource).toContain("Usage: omo-lsp [mcp | hook post-tool-use | hook post-compact]");
|
|
80
|
+
expect(postToolUseCommand).toBe(`node "${pluginRoot}/dist/cli.js" hook post-tool-use`);
|
|
81
|
+
expect(postCompactCommand).toBe(`node "${pluginRoot}/dist/cli.js" hook post-compact`);
|
|
80
82
|
expect(lspServer?.command).toBe("node");
|
|
81
83
|
expect(lspServer?.args).toEqual(["../../../../lsp-tools-mcp/dist/cli.js", "mcp"]);
|
|
82
84
|
expect(cliSource).not.toContain("./lazy-lsp-mcp.js");
|
|
@@ -79,13 +79,15 @@ omo-codex bundles three read-only Codex subagent roles in `CODEX_HOME/agents/`:
|
|
|
79
79
|
|
|
80
80
|
**Routing:**
|
|
81
81
|
|
|
82
|
-
- "Where is X?" / "Find code that does Y" -> `spawn_agent(agent_type="explorer", ...)`
|
|
83
|
-
- "How does library Z work?" / "What's the API contract?" -> `spawn_agent(agent_type="librarian", ...)`
|
|
84
|
-
- 5+ interdependent steps, ambiguous scope, multi-module work -> `spawn_agent(agent_type="plan", ...)`
|
|
85
|
-
- Heavy verification of a finished change -> `spawn_agent(agent_type="codex-ultrawork-reviewer", ...)`
|
|
82
|
+
- "Where is X?" / "Find code that does Y" -> `spawn_agent(agent_type="explorer", fork_turns="none", ...)`
|
|
83
|
+
- "How does library Z work?" / "What's the API contract?" -> `spawn_agent(agent_type="librarian", fork_turns="none", ...)`
|
|
84
|
+
- 5+ interdependent steps, ambiguous scope, multi-module work -> `spawn_agent(agent_type="plan", fork_turns="none", ...)`
|
|
85
|
+
- Heavy verification of a finished change -> `spawn_agent(agent_type="codex-ultrawork-reviewer", fork_turns="none", ...)`
|
|
86
86
|
|
|
87
87
|
**Don't duplicate.** Once a subagent is dispatched for a question, do not re-do the same search yourself. Once results return, do not re-verify by repeating their tool calls; integrate and move on.
|
|
88
88
|
|
|
89
|
+
**Keep parent liveness visible.** While any child is active, keep the parent visibly alive with brief status updates that include active subagent count, agent names, last heartbeat, and whether the parent is waiting for mailbox updates. Do this during long `wait_agent` cycles so the session does not look idle while children are still running.
|
|
90
|
+
|
|
89
91
|
# Operating Loop
|
|
90
92
|
|
|
91
93
|
**Explore -> Plan -> Implement -> Verify -> Manually QA.** Loops are short and tight; do not loop back with a draft when the work is yours to do.
|
|
@@ -24,8 +24,6 @@ const MODEL_CONTEXT_BUDGETS: readonly ModelContextBudget[] = [
|
|
|
24
24
|
{ slug: "gpt-5.5", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
|
|
25
25
|
{ slug: "gpt-5.4", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
|
|
26
26
|
{ slug: "gpt-5.4-mini", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
|
|
27
|
-
{ slug: "gpt-5.3-codex", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
|
|
28
|
-
{ slug: "gpt-5.2", contextWindowTokens: 272_000, effectivePercent: DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT },
|
|
29
27
|
{
|
|
30
28
|
slug: "codex-auto-review",
|
|
31
29
|
contextWindowTokens: 272_000,
|
|
@@ -36,6 +36,7 @@ export interface FinderOptions {
|
|
|
36
36
|
skipUserHome?: boolean;
|
|
37
37
|
/** Plugin root directory. Defaults to PLUGIN_ROOT env or this package root. */
|
|
38
38
|
pluginRoot?: string;
|
|
39
|
+
platform?: NodeJS.Platform;
|
|
39
40
|
cache?: RuleDiscoveryCache;
|
|
40
41
|
}
|
|
41
42
|
|
|
@@ -43,8 +44,11 @@ interface PluginBundledFinderOptions {
|
|
|
43
44
|
readonly disabledSources?: ReadonlySet<string>;
|
|
44
45
|
readonly cache?: RuleDiscoveryCache;
|
|
45
46
|
readonly pluginRoot?: string;
|
|
47
|
+
readonly platform?: NodeJS.Platform;
|
|
46
48
|
}
|
|
47
49
|
|
|
50
|
+
const WINDOWS_GIT_BASH_BUNDLED_RULE_PATH = "bundled-rules/windows-git-bash.md";
|
|
51
|
+
|
|
48
52
|
export function findRuleCandidates(options: FinderOptions): RuleCandidate[] {
|
|
49
53
|
const skipUserHome = options.skipUserHome ?? false;
|
|
50
54
|
const disabledSources = options.disabledSources ?? new Set<string>();
|
|
@@ -61,6 +65,7 @@ export function findRuleCandidates(options: FinderOptions): RuleCandidate[] {
|
|
|
61
65
|
disabledSources,
|
|
62
66
|
...(options.cache === undefined ? {} : { cache: options.cache }),
|
|
63
67
|
...(options.pluginRoot === undefined ? {} : { pluginRoot: options.pluginRoot }),
|
|
68
|
+
...(options.platform === undefined ? {} : { platform: options.platform }),
|
|
64
69
|
};
|
|
65
70
|
candidates.push(...findPluginBundledCandidates(pluginBundledOptions));
|
|
66
71
|
|
|
@@ -78,9 +83,10 @@ export function findPluginBundledCandidates(options: PluginBundledFinderOptions
|
|
|
78
83
|
|
|
79
84
|
const pluginRoot = resolvePluginRulesRoot(options.pluginRoot);
|
|
80
85
|
const ruleDirectory = join(pluginRoot, BUNDLED_RULE_SUBDIR);
|
|
86
|
+
const platform = options.platform ?? process.platform;
|
|
81
87
|
const candidates: RuleCandidate[] = [];
|
|
82
88
|
for (const scannedFile of scanRuleFilesCached(ruleDirectory, options.cache)) {
|
|
83
|
-
|
|
89
|
+
const candidate: RuleCandidate = {
|
|
84
90
|
path: scannedFile.path,
|
|
85
91
|
realPath: scannedFile.realPath,
|
|
86
92
|
source: "plugin-bundled",
|
|
@@ -88,11 +94,18 @@ export function findPluginBundledCandidates(options: PluginBundledFinderOptions
|
|
|
88
94
|
isGlobal: true,
|
|
89
95
|
isSingleFile: false,
|
|
90
96
|
relativePath: toRelativePath(pluginRoot, scannedFile.path),
|
|
91
|
-
}
|
|
97
|
+
};
|
|
98
|
+
if (isPluginBundledCandidateEnabled(candidate, platform)) {
|
|
99
|
+
candidates.push(candidate);
|
|
100
|
+
}
|
|
92
101
|
}
|
|
93
102
|
return candidates;
|
|
94
103
|
}
|
|
95
104
|
|
|
105
|
+
function isPluginBundledCandidateEnabled(candidate: RuleCandidate, platform: NodeJS.Platform): boolean {
|
|
106
|
+
return candidate.relativePath !== WINDOWS_GIT_BASH_BUNDLED_RULE_PATH || platform === "win32";
|
|
107
|
+
}
|
|
108
|
+
|
|
96
109
|
function findProjectCandidates(
|
|
97
110
|
projectRoot: string,
|
|
98
111
|
targetFile: string | null,
|
|
@@ -7,11 +7,14 @@ import { findProjectRoot } from "./rules/project-root.js";
|
|
|
7
7
|
|
|
8
8
|
interface RulesEngineFactoryOptions {
|
|
9
9
|
env?: NodeJS.ProcessEnv;
|
|
10
|
+
platform?: NodeJS.Platform;
|
|
10
11
|
}
|
|
11
12
|
|
|
12
13
|
export function createRulesEngine(options: RulesEngineFactoryOptions, config = configFromEnvironment(options.env)) {
|
|
14
|
+
const platform = options.platform ?? process.platform;
|
|
15
|
+
|
|
13
16
|
return createEngine(config, {
|
|
14
|
-
findCandidates: findRuleCandidates,
|
|
17
|
+
findCandidates: (finderOptions) => findRuleCandidates({ ...finderOptions, platform }),
|
|
15
18
|
findProjectRoot,
|
|
16
19
|
readFile: (path) => {
|
|
17
20
|
try {
|
package/packages/omo-codex/plugin/components/rules/test/windows-git-bash-bundled-rule.test.ts
CHANGED
|
@@ -3,7 +3,7 @@ import { tmpdir } from "node:os";
|
|
|
3
3
|
import { join } from "node:path";
|
|
4
4
|
import { afterEach, describe, expect, it } from "vitest";
|
|
5
5
|
|
|
6
|
-
import {
|
|
6
|
+
import { type CodexSessionStartInput, runSessionStartHook } from "../src/codex-hook.js";
|
|
7
7
|
import { findPluginBundledCandidates } from "../src/rules/finder.js";
|
|
8
8
|
|
|
9
9
|
const WINDOWS_RULE_DESCRIPTION = "Windows Git Bash guidance for Codex";
|
|
@@ -61,34 +61,57 @@ function occurrenceCount(value: string, search: string): number {
|
|
|
61
61
|
|
|
62
62
|
describe("Windows Git Bash bundled rule", () => {
|
|
63
63
|
it("#given packaged bundled rules #when discovering plugin-bundled candidates #then Windows Git Bash rule is included", () => {
|
|
64
|
-
const candidates = findPluginBundledCandidates({ pluginRoot: process.cwd() });
|
|
64
|
+
const candidates = findPluginBundledCandidates({ pluginRoot: process.cwd(), platform: "win32" });
|
|
65
65
|
|
|
66
66
|
expect(candidates.map((candidate) => candidate.relativePath)).toContain(WINDOWS_RULE_PATH);
|
|
67
67
|
});
|
|
68
68
|
|
|
69
|
-
it("#given bundled rules
|
|
69
|
+
it("#given packaged bundled rules off Windows #when discovering plugin-bundled candidates #then Windows Git Bash rule is excluded", () => {
|
|
70
|
+
const candidates = findPluginBundledCandidates({ pluginRoot: process.cwd(), platform: "darwin" });
|
|
71
|
+
|
|
72
|
+
expect(candidates.map((candidate) => candidate.relativePath)).not.toContain(WINDOWS_RULE_PATH);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it("#given bundled rules enabled on Windows #when SessionStart runs #then Windows Git Bash guidance is injected once", async () => {
|
|
70
76
|
const { root, pluginData } = makeProject();
|
|
71
77
|
|
|
72
78
|
const output = await runSessionStartHook(sessionStartInput(root), {
|
|
73
79
|
pluginDataRoot: pluginData,
|
|
74
80
|
env: BUNDLED_ONLY_ENV,
|
|
81
|
+
platform: "win32",
|
|
75
82
|
});
|
|
76
83
|
|
|
77
84
|
expect(occurrenceCount(output, WINDOWS_GUIDANCE)).toBe(1);
|
|
78
85
|
});
|
|
79
86
|
|
|
80
|
-
it("#given
|
|
87
|
+
it("#given bundled rules enabled off Windows #when SessionStart runs #then Windows Git Bash guidance is not injected", async () => {
|
|
88
|
+
const { root, pluginData } = makeProject();
|
|
89
|
+
|
|
90
|
+
const output = await runSessionStartHook(sessionStartInput(root), {
|
|
91
|
+
pluginDataRoot: pluginData,
|
|
92
|
+
env: BUNDLED_ONLY_ENV,
|
|
93
|
+
platform: "darwin",
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
expect(output).not.toContain(WINDOWS_GUIDANCE);
|
|
97
|
+
expect(output).not.toContain(WINDOWS_RULE_PATH);
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
it("#given project rule with same description on Windows #when static rules load #then project guidance overrides bundled guidance", async () => {
|
|
81
101
|
const { root, pluginData } = makeProject();
|
|
82
102
|
const projectGuidance = "Project-specific Windows shell policy.";
|
|
83
103
|
mkdirSync(join(root, ".omo", "rules"), { recursive: true });
|
|
84
104
|
writeFileSync(
|
|
85
105
|
join(root, ".omo", "rules", "windows-git-bash.md"),
|
|
86
|
-
["---", `description: ${WINDOWS_RULE_DESCRIPTION}`, "alwaysApply: true", "---", "", projectGuidance].join(
|
|
106
|
+
["---", `description: ${WINDOWS_RULE_DESCRIPTION}`, "alwaysApply: true", "---", "", projectGuidance].join(
|
|
107
|
+
"\n",
|
|
108
|
+
),
|
|
87
109
|
);
|
|
88
110
|
|
|
89
111
|
const output = await runSessionStartHook(sessionStartInput(root), {
|
|
90
112
|
pluginDataRoot: pluginData,
|
|
91
113
|
env: PROJECT_AND_BUNDLED_ENV,
|
|
114
|
+
platform: "win32",
|
|
92
115
|
});
|
|
93
116
|
|
|
94
117
|
expect(output).toContain(projectGuidance);
|
|
@@ -37,7 +37,7 @@ You are mid-flight on a Prometheus work plan. The turn just ended without finish
|
|
|
37
37
|
# Stop conditions for THIS turn
|
|
38
38
|
|
|
39
39
|
- A top-level checkbox flipped to `- [x]` after the 5-phase QA gate (Phase 1 read, Phase 2 automated, Phase 3 channel scenario, Phase 4 adversarial-class probing, Phase 5 gate decision). Then the Stop hook will re-evaluate; if more checkboxes remain you will be continued again.
|
|
40
|
-
- 3 same-failure cycles on one sub-task → escalate via `spawn_agent(agent_type="codex-ultrawork-reviewer", ...)` and stop dispatch.
|
|
40
|
+
- 3 same-failure cycles on one sub-task → escalate via `spawn_agent(agent_type="codex-ultrawork-reviewer", fork_turns="none", ...)` and stop dispatch.
|
|
41
41
|
- Safety boundary (destructive command, secret exfiltration, production write) → stop and surface a safe substitute.
|
|
42
42
|
- All top-level checkboxes `- [x]` AND (if gate triggered) `codex-ultrawork-reviewer` approved unconditionally → print the ORCHESTRATION COMPLETE block and end.
|
|
43
43
|
|
|
@@ -21,5 +21,5 @@
|
|
|
21
21
|
Initial release.
|
|
22
22
|
|
|
23
23
|
- Codex `UserPromptSubmit` hook that detects `ultrawork` / `ulw` (word-bounded, case-insensitive) in the user prompt and injects the ultrawork orchestration directive.
|
|
24
|
-
- Directive enforces: goal + binding success criteria with manual-QA scenarios + evidence, durable `/tmp` notepad lifecycle, obsessive atomic todos, scenario-driven execution loop, and a
|
|
24
|
+
- Directive enforces: goal + binding success criteria with manual-QA scenarios + evidence, durable `/tmp` notepad lifecycle, obsessive atomic todos, scenario-driven execution loop, and a ChatGPT-compatible xhigh verification gate with no "false positive" escape hatch.
|
|
25
25
|
- Directive size: 5,775 chars across 143 lines.
|
|
@@ -13,7 +13,7 @@ Bundled Codex agent role TOMLs in `agents/` are installed into `CODEX_HOME/agent
|
|
|
13
13
|
| Surface + paired cleanup | Execution loop step 4 (**SURFACE-AS-SCENARIO**) runs the chosen channel scenario end-to-end. Step 5 (**CLEANUP, PAIRED**) tears down every QA-spawned process / tmux session / browser context / container / port / temp dir, with a one-line receipt appended to the notepad. Leftover state → NOT done. |
|
|
14
14
|
| Durable /tmp notepad | `mktemp -t ulw-$(date +%Y%m%d-%H%M%S).XXXXXX.md` with sections `Plan`, `Success criteria + QA scenarios`, `Now`, `Todo`, `Findings`, `Learnings`. **Append**, never rewrite. |
|
|
15
15
|
| Obsessive atomic todos | Every action — even one-line edits, `ls`, single test runs — becomes a todo. Format: `path: <action> for <criterion> — verify by <check>`. One in_progress at a time, mark completed immediately. |
|
|
16
|
-
|
|
|
16
|
+
| ChatGPT-compatible xhigh verification gate | Triggered automatically on user-requested rigor, 3+ files, 20+ turns, 30+ minutes, or refactor/migration/perf/security work. Use the bundled `codex-ultrawork-reviewer` agent role when available. Reviewer verdict is **binding**: no "false positive", no minimising, no arguing. Loop until **unconditional** approval. "Looks good but..." = REJECTION. |
|
|
17
17
|
|
|
18
18
|
The directive is currently 10,951 chars / 231 lines and follows the GPT-5.5 prompting structure (Role / Goal / Manual-QA channels / Bootstrap / Execution loop / Verification gate / Commits / Constraints / Output / Stop rules).
|
|
19
19
|
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
name = "codex-ultrawork-reviewer"
|
|
2
2
|
description = "Strict ultrawork verification reviewer. Use after full QA evidence to audit the diff, goal, and scenario evidence before declaring done."
|
|
3
3
|
nickname_candidates = ["Verifier"]
|
|
4
|
-
model = "gpt-5.
|
|
4
|
+
model = "gpt-5.5"
|
|
5
5
|
model_reasoning_effort = "xhigh"
|
|
6
6
|
developer_instructions = """You are the ultrawork verification reviewer.
|
|
7
7
|
|
|
8
8
|
Review only. Do not implement.
|
|
9
9
|
|
|
10
|
+
The default model intentionally uses a ChatGPT account compatible frontier model. If a caller supplies a different supported reviewer model, follow the caller's assignment while preserving this review contract.
|
|
11
|
+
|
|
10
12
|
Input should include the goal, success criteria, full diff, QA evidence, and notepad path.
|
|
11
13
|
If Codex delivers parent review context as inter-agent commentary, treat the latest parent message with goal/diff/evidence as your active review assignment, not passive context.
|
|
12
14
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
name = "plan"
|
|
2
|
-
description = "Strategic planning consultant. Produces a single executable work plan from a vague or large request. Planner only - never implements. Writes the plan to plans/<slug>.md."
|
|
2
|
+
description = "Strategic planning consultant. Produces a single executable work plan from a vague or large request. Planner only - never implements. Writes the plan to .omo/plans/<slug>.md."
|
|
3
3
|
nickname_candidates = ["Planner"]
|
|
4
4
|
model = "gpt-5.5"
|
|
5
5
|
model_reasoning_effort = "xhigh"
|
|
@@ -36,7 +36,7 @@ Wait for context to converge before drafting. Rushed plans fail.
|
|
|
36
36
|
|
|
37
37
|
# Phase 2 - Plan output (single markdown file, single plan)
|
|
38
38
|
|
|
39
|
-
Write the plan to
|
|
39
|
+
Write the plan to `.omo/plans/<slug>.md` in the working tree (create the `.omo/plans/` directory if absent). One plan per request - no "Phase 1 plan / Phase 2 plan" splits. 50+ tasks is fine if the work demands it.
|
|
40
40
|
|
|
41
41
|
Use this template verbatim (fill the placeholders):
|
|
42
42
|
|
|
@@ -60,7 +60,7 @@ Use this template verbatim (fill the placeholders):
|
|
|
60
60
|
> Zero human intervention - all verification is agent-executed.
|
|
61
61
|
- Test decision: <TDD | tests-after | none> + framework
|
|
62
62
|
- QA policy: every task has agent-executed scenarios
|
|
63
|
-
- Evidence:
|
|
63
|
+
- Evidence: `.omo/evidence/task-<N>-<slug>.<ext>`
|
|
64
64
|
|
|
65
65
|
## Execution strategy
|
|
66
66
|
### Parallel execution waves
|
|
@@ -114,13 +114,13 @@ Critical path: Task 1 -> Task 2 -> Task 6
|
|
|
114
114
|
Tool: <bash | curl | tmux | playwright(real Chrome) | agent-browser | computer-use>
|
|
115
115
|
Steps: <exact command / API call / page action with concrete inputs - URL, payload, keystrokes, selectors>
|
|
116
116
|
Expected: <concrete, binary pass/fail observable>
|
|
117
|
-
Evidence: evidence/task-<N>-<slug>.<ext>
|
|
117
|
+
Evidence: .omo/evidence/task-<N>-<slug>.<ext>
|
|
118
118
|
|
|
119
119
|
Scenario: <failure / edge case>
|
|
120
120
|
Tool: <same, with exact invocation>
|
|
121
121
|
Steps: <trigger the error with specific inputs>
|
|
122
122
|
Expected: <graceful failure with the exact error message/code>
|
|
123
|
-
Evidence: evidence/task-<N>-<slug>-error.<ext>
|
|
123
|
+
Evidence: .omo/evidence/task-<N>-<slug>-error.<ext>
|
|
124
124
|
```
|
|
125
125
|
|
|
126
126
|
Commit: <YES|NO> | Message: `<type>(<scope>): <imperative summary>` | Files: [<paths>]
|
|
@@ -136,14 +136,14 @@ Critical path: Task 1 -> Task 2 -> Task 6
|
|
|
136
136
|
- One logical change per commit. Conventional Commits (`<type>(<scope>): <subject>` body + footer).
|
|
137
137
|
- Atomic: every commit builds and passes tests on its own.
|
|
138
138
|
- No "WIP" / "fix typo squash later" commits on the final branch - clean up before merge.
|
|
139
|
-
- Reference the plan file path in the final commit footer: `Plan: plans/<slug>.md`.
|
|
139
|
+
- Reference the plan file path in the final commit footer: `Plan: .omo/plans/<slug>.md`.
|
|
140
140
|
|
|
141
141
|
## Success criteria
|
|
142
142
|
- All Must-Have shipped; all QA scenarios pass with captured evidence; F1-F4 approved; commit history clean.
|
|
143
143
|
```
|
|
144
144
|
|
|
145
145
|
# Constraints
|
|
146
|
-
- READ + plan-file write only. Tools I will NEVER call: `edit`/`write`/`apply_patch` on anything outside
|
|
146
|
+
- READ + plan-file write only. Tools I will NEVER call: `edit`/`write`/`apply_patch` on anything outside `.omo/plans/<slug>.md`, anything that mutates non-plan files.
|
|
147
147
|
- DO NOT split work into multiple plans. ONE plan per request.
|
|
148
148
|
- DO NOT skip context gathering. NEVER plan blind.
|
|
149
149
|
- DO NOT include "user manually tests" as an acceptance criterion. Every check must be agent-executable.
|
|
@@ -241,7 +241,7 @@ Atomic, Conventional Commits (`<type>(<scope>): <imperative>` — feat /
|
|
|
241
241
|
fix / refactor / test / docs / chore / build / ci / perf). One logical
|
|
242
242
|
change per commit; each commit builds + tests green on its own. No WIP
|
|
243
243
|
on the final branch. If a plan file exists, final commit footer:
|
|
244
|
-
`Plan: plans/<slug>.md`. Do NOT auto-`git commit` unless the user
|
|
244
|
+
`Plan: .omo/plans/<slug>.md`. Do NOT auto-`git commit` unless the user
|
|
245
245
|
requested or preauthorised this session — default is stage + draft
|
|
246
246
|
message + present for approval.
|
|
247
247
|
|
|
@@ -25,6 +25,7 @@ This Codex skill is intentionally compact to avoid adding a large operating manu
|
|
|
25
25
|
- Delegate code edits, test writes, fixes, and QA execution to right-sized Codex subagents when the workflow requires it.
|
|
26
26
|
- Every `spawn_agent` message starts with `TASK:`, then names `DELIVERABLE`, `SCOPE`, and `VERIFY`; role selection requires `agent_type`, while `model` + `reasoning_effort` alone creates a default agent, not a reviewer or worker; prefer `fork_turns: "none"` unless full history is truly required.
|
|
27
27
|
- Plan and reviewer agents may run for a long time; spawn them in the background, keep doing independent root work, and poll with short wait_agent cycles. Never use a single long blocking wait for them.
|
|
28
|
+
- While any child is active, keep the parent visibly alive with brief status updates that include active subagent count, agent names, last heartbeat, and whether the parent is waiting for mailbox updates.
|
|
28
29
|
- Avoid `list_agents` as a polling or status tool in large runs; it can replay large agent status and latest-message payloads. Track spawned agent names locally, use `wait_agent` for completion signals, targeted followups only when needed, and `close_agent` after integrating each result.
|
|
29
30
|
- Treat `wait_agent` as a mailbox signal, not proof of completion, content, or errors. After two waits with no substantive result, send one targeted followup, then record inconclusive and respawn a smaller `fork_turns: "none"` task if the child stays silent or ack-only.
|
|
30
31
|
|
|
@@ -34,10 +35,10 @@ The full workflow may mention OpenCode-style orchestration examples. In Codex, t
|
|
|
34
35
|
|
|
35
36
|
| Workflow intent | Codex tool |
|
|
36
37
|
| --- | --- |
|
|
37
|
-
| Plan agent | `spawn_agent(agent_type="plan", ...)` |
|
|
38
|
-
| Search/read-only worker | `spawn_agent(agent_type="explorer", ...)` |
|
|
39
|
-
| Implementation or QA worker | `spawn_agent(agent_type="worker", ...)` |
|
|
40
|
-
| Final verification reviewer | `spawn_agent(agent_type="codex-ultrawork-reviewer", ...)` |
|
|
38
|
+
| Plan agent | `spawn_agent(agent_type="plan", fork_turns="none", ...)` |
|
|
39
|
+
| Search/read-only worker | `spawn_agent(agent_type="explorer", fork_turns="none", ...)` |
|
|
40
|
+
| Implementation or QA worker | `spawn_agent(agent_type="worker", fork_turns="none", ...)` |
|
|
41
|
+
| Final verification reviewer | `spawn_agent(agent_type="codex-ultrawork-reviewer", fork_turns="none", ...)` |
|
|
41
42
|
| Wait for background result | `wait_agent(...)` |
|
|
42
43
|
| Clean up finished worker | `close_agent(...)` |
|
|
43
44
|
|
package/packages/omo-codex/plugin/components/ulw-loop/skills/ulw-loop/references/full-workflow.md
CHANGED
|
@@ -33,9 +33,9 @@ Size each worker to the task — never spend `xhigh` on a one-liner, never send
|
|
|
33
33
|
| Task shape | agent_type | model | reasoning_effort |
|
|
34
34
|
|---|---|---|---|
|
|
35
35
|
| Trivial / mechanical (rename, move, obvious one-liner, config edit) | `worker` | `gpt-5.4-mini` | `low` |
|
|
36
|
-
| Pure implementation against a clear spec (new function, endpoint, test from a named pattern) | `worker` | `gpt-5.
|
|
36
|
+
| Pure implementation against a clear spec (new function, endpoint, test from a named pattern) | `worker` | `gpt-5.4` | `high` |
|
|
37
37
|
| Deep debugging / race / perf / subtle cross-module reasoning | `worker` | `gpt-5.5` | `xhigh` |
|
|
38
|
-
| QA execution (drive a channel, capture evidence) | `worker` | `gpt-5.
|
|
38
|
+
| QA execution (drive a channel, capture evidence) | `worker` | `gpt-5.4` | `high` |
|
|
39
39
|
| Read-only codebase search | `explorer` | role default | role default |
|
|
40
40
|
| External library / docs research | `librarian` | role default | role default |
|
|
41
41
|
| Final verification audit | `codex-ultrawork-reviewer` | role default | role default |
|
|
@@ -48,6 +48,7 @@ Codex subagent reliability:
|
|
|
48
48
|
- Start every `spawn_agent` message with `TASK: <imperative assignment>`, then name `DELIVERABLE`, `SCOPE`, and `VERIFY`. State that it is an executable assignment, not a context handoff.
|
|
49
49
|
- Prefer `fork_turns: "none"` unless full history is truly required; paste only the context the child needs. Full-history forks can make the child continue old parent context instead of the delegated task.
|
|
50
50
|
- Plan and reviewer agents may run for a long time; spawn them in the background, keep doing independent root work, and poll with short wait_agent cycles. Never use a single long blocking wait for them.
|
|
51
|
+
- While any child is active, keep the parent visibly alive with brief status updates that include active subagent count, agent names, last heartbeat, and whether the parent is waiting for mailbox updates.
|
|
51
52
|
- Do not use `list_agents` as a polling or status tool in long or high-context runs; it can replay large agent status and latest-message payloads. Track spawned agent names locally, use `wait_agent` for completion signals, targeted followups only when needed, and `close_agent` after integrating each result.
|
|
52
53
|
- Treat `wait_agent` as a mailbox signal, not proof of completion, content, or errors. After two waits with no substantive result, send one targeted followup: `TASK STILL ACTIVE: return <deliverable> or BLOCKED: <reason>`. If still silent or ack-only, record inconclusive, do not count it as pass/review approval, close if safe, and respawn a smaller `fork_turns: "none"` task with the missing deliverable.
|
|
53
54
|
|
|
@@ -147,7 +148,7 @@ Loop per goal. Cap at 5 cycles per goal. Cap identical same-criterion failures a
|
|
|
147
148
|
2. Register atomic todos: `path: <action> for <criterion> - verify by <check>`.
|
|
148
149
|
3. DELEGATE-IN-PARALLEL: dispatch every independent task in the wave at once via right-sized `spawn_agent` workers (Delegation table). Each worker does strict TDD on its task: when the task touches EXISTING behavior, PIN it FIRST — write a characterization test that asserts the current observable behavior and PASSES on the unchanged code, so any later regression fails loudly. Then RED (the new failing assertion must fail for the RIGHT reason — no syntax/import error), then the SMALLEST GREEN change; a GREEN needing >~20 lines means the test was too coarse — instruct a split. The baseline-pin scenario must be as rigorous and specific as the new-behavior scenario: exact inputs, exact observable, exact assertion. Serialize only on a NAMED dependency.
|
|
149
150
|
4. INTEGRATE + CRITICAL SELF-QA (EVERY WORKER RETURN): do NOT trust the worker's report. Read the diff yourself, re-run its tests, and run LSP diagnostics on the changed files. Treat "done" as a claim to disprove. If the diff drifts, the test is hollow, or evidence is missing, RESPAWN the worker with the specific failure context. Forward every finding/learning to subsequent workers.
|
|
150
|
-
5. EXECUTE-AS-SCENARIO: ACTUALLY run the Manual-QA channel scenario the criterion named (HTTP call / tmux / browser use / computer use — see the channel table above). Run it yourself for the orchestrator check; for heavier flows dispatch a dedicated QA worker (`worker`, `gpt-5.
|
|
151
|
+
5. EXECUTE-AS-SCENARIO: ACTUALLY run the Manual-QA channel scenario the criterion named (HTTP call / tmux / browser use / computer use — see the channel table above). Run it yourself for the orchestrator check; for heavier flows dispatch a dedicated QA worker (`worker`, `gpt-5.4`, `high`) whose ONLY job is to drive the channel and write the artifact to the named evidence path. The unit suite being green is NEVER substitute. If the scenario FAILS, respawn the implementing worker with the captured failure — do not hand-patch around it.
|
|
151
152
|
6. CAPTURE: collect the observable artifact path: transcript, stdout, screenshot, assertion, status+body, diff, or parsed dump. No artifact written at the evidence path — not done; record BLOCKED and respawn QA.
|
|
152
153
|
7. CLEAN (PAIRED, NEVER SKIP): tear down every runtime artifact step 5 spawned BEFORE recording — server PIDs (`kill`, verify `kill -0` fails), `tmux` sessions (`tmux kill-session -t ulw-qa-<criterion>`; confirm `tmux ls`), browser / Playwright contexts (`.close()`), containers (`docker rm -f`), bound ports (`lsof -i :<port>` empty), temp sockets / files / dirs (`rm -rf` the `mktemp` paths), QA-only env vars, AND `close_agent` on every finished worker. Register each teardown as its own todo the moment the QA spawns the resource (scripts, tmux assets, browsers / agent-browser sessions, PIDs, ports) so none is forgotten. Embed a one-line cleanup receipt in the evidence string, e.g. `cleanup: killed 12345; tmux kill-session ulw-qa-foo; rm -rf /tmp/ulw.aB12cD; close_agent w-3`. Missing receipt → record BLOCKED, not PASS.
|
|
153
154
|
8. RECORD exactly one result:
|
|
@@ -54,6 +54,14 @@ async function canReconcileCompletedTaskScopedAggregateSnapshot(repoRoot: string
|
|
|
54
54
|
return snapshotObjectiveMapsToUlwLoopPlan(repoRoot, snapshotObjective, scope);
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
+
async function canReconcileActiveFinalTaskScopedAggregateSnapshot(repoRoot: string, plan: UlwLoopPlan, goal: UlwLoopItem, snapshotObjective: string, evidence: string, scope?: UlwLoopScope): Promise<boolean> {
|
|
58
|
+
if (codexGoalMode(plan) !== "aggregate") return false;
|
|
59
|
+
if (goal.status !== "in_progress" || plan.activeGoalId !== goal.id) return false;
|
|
60
|
+
if (!isFinalRunCompletionCandidate(plan, goal)) return false;
|
|
61
|
+
if (!textHasCompletionValidationEvidence(evidence)) return false;
|
|
62
|
+
return snapshotObjectiveMapsToUlwLoopPlan(repoRoot, snapshotObjective, scope);
|
|
63
|
+
}
|
|
64
|
+
|
|
57
65
|
function buildCompletedLegacyGoalRemediation(goal: UlwLoopItem): string {
|
|
58
66
|
return [
|
|
59
67
|
"If get_goal returns a different completed legacy/thread objective, do not repeat --status complete in this thread.",
|
|
@@ -130,7 +138,10 @@ export async function checkpointUlwLoop(repoRoot: string, args: CheckpointUlwLoo
|
|
|
130
138
|
codexGoal = reconciliation.snapshot.raw;
|
|
131
139
|
if (!reconciliation.ok) {
|
|
132
140
|
const objective = snapshot?.objective;
|
|
133
|
-
const
|
|
141
|
+
const mismatchedTaskObjective = snapshot?.available === true && objective !== undefined && normalizeObjective(objective) !== normalizeObjective(expectedCodexObjective(plan, goal));
|
|
142
|
+
const completedTaskScoped = mismatchedTaskObjective && snapshot.status === "complete" && await canReconcileCompletedTaskScopedAggregateSnapshot(repoRoot, plan, goal, objective, evidence, scope);
|
|
143
|
+
const activeFinalTaskScoped = mismatchedTaskObjective && snapshot.status === "active" && await canReconcileActiveFinalTaskScopedAggregateSnapshot(repoRoot, plan, goal, objective, evidence, scope);
|
|
144
|
+
const taskScoped = completedTaskScoped || activeFinalTaskScoped;
|
|
134
145
|
if (!taskScoped) throw new UlwLoopError(`${formatCodexGoalReconciliation(reconciliation)}${aggregate && snapshot?.status === "complete" && objective !== undefined ? buildTaskScopedAggregateReconciliationHint(goal, final) : ""}`, "ulw_loop_codex_snapshot_mismatch");
|
|
135
146
|
aggregateCompletion = makeAggregateCompletion(now, evidence, codexGoal);
|
|
136
147
|
}
|