orcastrator 0.2.14 → 0.2.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -22,7 +22,33 @@ Start with a plain-language goal:
22
22
  orca "add auth to the app"
23
23
  ```
24
24
 
25
- Orca will create a run, plan tasks, execute them, and persist run state.
25
+ Orca will create a run, plan tasks, run a pre-execution review/improvement pass on the task graph, execute the reviewed graph, and persist run state.
26
+
27
+ ### Pre-execution review-improvement stage
28
+
29
+ After planning, Orca runs a structured review pass that can edit the task graph before execution starts. The review output is schema-validated and supports concrete graph operations:
30
+
31
+ - update task fields (`name`, `description`, `acceptance_criteria`)
32
+ - add/remove task
33
+ - add/remove dependency
34
+
35
+ The edited graph is re-validated as a DAG. If review output is invalid, Orca fails with an actionable error by default. You can configure `review.plan.onInvalid: "warn_skip"` to log a warning and continue with the original planner graph.
36
+
37
+ ### Post-execution review / fix cycles
38
+
39
+ After task execution, Orca can run deterministic validation commands, then ask Codex to review findings and optionally auto-fix issues in bounded cycles.
40
+
41
+ - `review.execution.enabled` (default `true`)
42
+ - `review.execution.maxCycles` (default `2`)
43
+ - `review.execution.onFindings`:
44
+ - `auto_fix` (default): apply fixes and continue until clean or max cycles
45
+ - `report_only`: report findings and stop
46
+ - `fail`: mark run failed when findings exist
47
+ - `review.execution.validator.auto` (default `true`): auto-detect validator commands from `package.json`
48
+ - `review.execution.validator.commands` (optional explicit command list)
49
+ - `review.execution.prompt` (optional custom reviewer instruction)
50
+
51
+ When using the Codex executor, Orca prints a final post-execution review summary.
26
52
 
27
53
  ## Spec And Plan Files
28
54
 
@@ -79,19 +105,47 @@ Orca auto-discovers config in this order:
79
105
 
80
106
  Later entries override earlier ones.
81
107
 
82
- ```js
83
- // orca.config.js
108
+ ```ts
109
+ // orca.config.ts
84
110
  export default {
85
111
  runsDir: "./.orca/runs",
86
112
  sessionLogs: "./session-logs",
113
+
114
+ // Function hooks are first-class and strongly typed per hook.
115
+ hooks: {
116
+ onTaskComplete: async (event, context) => {
117
+ console.log(`task done: ${event.taskId} (${event.taskName}) from pid ${context.pid}`);
118
+ },
119
+ onError: async (event) => {
120
+ console.error(event.error);
121
+ }
122
+ },
123
+
124
+ // Command hooks remain supported; payload is sent as stdin JSON.
87
125
  hookCommands: {
88
- onTaskComplete: "echo task done: $ORCA_TASK_NAME",
126
+ onTaskComplete: "node -e 'let s=\"\";process.stdin.on(\"data\",d=>s+=d);process.stdin.on(\"end\",()=>{const p=JSON.parse(s);console.log(`task done: ${p.taskId}`);})'",
89
127
  onComplete: "echo run complete",
90
128
  onError: "echo run failed"
91
129
  },
92
130
  codex: {
93
131
  model: "gpt-5.3-codex", // override the codex model
94
132
  multiAgent: true, // enable codex multi-agent (see below)
133
+ },
134
+ review: {
135
+ plan: {
136
+ enabled: true, // default true
137
+ onInvalid: "fail" // or "warn_skip"
138
+ },
139
+ execution: {
140
+ enabled: true, // default true
141
+ maxCycles: 2, // default 2
142
+ onFindings: "auto_fix", // "auto_fix" | "report_only" | "fail"
143
+ validator: {
144
+ auto: true, // default true
145
+ // commands: ["npm run validate"]
146
+ },
147
+ // prompt: "Prefer minimal safe fixes"
148
+ }
95
149
  }
96
150
  };
97
151
  ```
@@ -135,6 +189,8 @@ Global:
135
189
  - `--on-milestone <cmd>`
136
190
  - `--on-task-complete <cmd>`
137
191
  - `--on-task-fail <cmd>`
192
+ - `--on-invalid-plan <cmd>`
193
+ - `--on-findings <cmd>`
138
194
  - `--on-complete <cmd>`
139
195
  - `--on-error <cmd>`
140
196
 
@@ -191,6 +247,8 @@ Global:
191
247
  - `--check` (API key lookup order: CLI flag → process env → `~/.openclaw/openclaw.json` `env.vars` → `~/.claude/.env` → `~/.config/claude/.env`)
192
248
  - `--global`
193
249
  - `--project`
250
+ - `--project-config-template`
251
+ - `--skip-project-config`
194
252
 
195
253
  `orca help`:
196
254
 
@@ -204,10 +262,24 @@ Hook names:
204
262
  - `onMilestone`
205
263
  - `onTaskComplete`
206
264
  - `onTaskFail`
265
+ - `onInvalidPlan`
266
+ - `onFindings`
207
267
  - `onComplete`
208
268
  - `onError`
209
269
 
210
- Run hooks from CLI with `--on-...` flags or from config via `hookCommands` / `hooks`.
270
+ Run hooks from CLI with `--on-...` flags or from config via `hooks` / `hookCommands`.
271
+ Unknown hook keys in config are rejected at load time with an explicit allowed-hook list.
272
+
273
+ Hook contract:
274
+ - Function hooks (`config.hooks`) are the primary path and are strongly typed per hook event.
275
+ - Every function hook receives `(event, context)` where `context` is deterministic: `{ cwd, pid, invokedAt }`.
276
+ - Command hooks (`--on-...` and `config.hookCommands`) receive the full event payload as JSON over stdin.
277
+ - Orca no longer injects hook payload via `ORCA_*` env vars.
278
+
279
+ Migration note:
280
+ - If your hook commands previously read any `ORCA_*` hook env payload (`ORCA_HOOK_PAYLOAD_JSON`, `ORCA_MSG`, `ORCA_RUN_ID`, etc.), switch them to parse stdin JSON instead.
281
+ - Existing CLI hook flags are preserved (`--on-milestone`, `--on-error`, etc.); only payload transport changed.
282
+ - Smoke-test the hook contract (function + command + concurrency): `npm run smoke:hooks`.
211
283
 
212
284
  ### Run ID Format
213
285
 
@@ -222,6 +294,15 @@ Run IDs are generated as:
222
294
  - Project: `./orca.config.js` or `./orca.config.ts`
223
295
  - Explicit: `--config <path>`
224
296
 
297
+ ### Project Instruction Files
298
+
299
+ During planning, Orca automatically injects project instruction files when present:
300
+
301
+ 1. `AGENTS.md`
302
+ 2. `CLAUDE.md`
303
+
304
+ Files are discovered from the project root (nearest `.git` from the spec/task context) and injected in that order.
305
+
225
306
  ### Run State Locations
226
307
 
227
308
  - Run status: `<runsDir>/<run-id>/status.json`
@@ -230,8 +311,54 @@ Run IDs are generated as:
230
311
 
231
312
  ## Development
232
313
 
314
+ Install dependencies with npm (primary lockfile):
315
+
316
+ ```bash
317
+ npm install
318
+ ```
319
+
320
+ Run local development and tests with Bun (faster runtime for this project):
321
+
233
322
  ```bash
234
- bun install
235
- bun test
236
323
  bun run src/cli/index.ts "your goal here"
324
+ bun test src
325
+ ```
326
+
327
+ ## Validation pipeline
328
+
329
+ Use the full validation gate before opening/publishing changes:
330
+
331
+ ```bash
332
+ npm run validate
333
+ ```
334
+
335
+ This runs, in order:
336
+
337
+ 1. `npm run lint` (Oxlint syntax/style/static rules)
338
+ 2. `npm run lint:type-aware` (Oxlint + tsgolint alpha type-aware + type-check diagnostics)
339
+ 3. `npm run typecheck` (TypeScript Native Preview via `tsgo --noEmit`, with environment fallback to `tsc --noEmit`)
340
+ 4. `npm run test`
341
+ 5. `npm run build`
342
+
343
+ `npm run build` remains `tsc` because the native preview compiler is used here as a fast typecheck gate; production JS emission stays on stable `typescript` for predictable package output.
344
+
345
+ ## Package manager + lockfile policy
346
+
347
+ Orca uses a mixed runtime/tooling model on purpose:
348
+
349
+ - **npm is canonical for dependency resolution, release builds, and deterministic installs**.
350
+ - **Bun is used as a runtime/test runner in local workflows** (`dev`, `start`, `test`).
351
+
352
+ Commit both lockfiles:
353
+
354
+ - `package-lock.json` — canonical dependency graph for npm/CI/publish
355
+ - `bun.lock` — Bun runtime resolution parity for local Bun commands
356
+
357
+ When dependencies change, update both lockfiles in the same PR:
358
+
359
+ ```bash
360
+ npm install
361
+ bun install
237
362
  ```
363
+
364
+ This keeps npm and Bun behavior aligned without forcing a disruptive full migration.
@@ -1,5 +1,6 @@
1
1
  import { query } from "@anthropic-ai/claude-agent-sdk";
2
2
  import { z } from "zod";
3
+ import { TaskGraphReviewPayloadSchema } from "../../core/task-graph-review.js";
3
4
  import { parseAgentJson } from "../../utils/agent-json.js";
4
5
  const PlannedTaskSchema = z.object({
5
6
  id: z.string().min(1),
@@ -101,6 +102,23 @@ const EXECUTION_OUTPUT_FORMAT = {
101
102
  type: "json_schema",
102
103
  schema: EXECUTION_OUTPUT_SCHEMA,
103
104
  };
105
+ const REVIEW_OUTPUT_SCHEMA = {
106
+ type: "object",
107
+ additionalProperties: false,
108
+ required: ["changes"],
109
+ properties: {
110
+ changes: {
111
+ type: "array",
112
+ items: {
113
+ type: "object"
114
+ }
115
+ }
116
+ }
117
+ };
118
+ const REVIEW_OUTPUT_FORMAT = {
119
+ type: "json_schema",
120
+ schema: REVIEW_OUTPUT_SCHEMA,
121
+ };
104
122
  function buildPlanningPrompt(spec, systemContext) {
105
123
  return [
106
124
  systemContext,
@@ -127,6 +145,27 @@ function buildTaskExecutionPrompt(task, runId, cwd, systemContext) {
127
145
  "If you cannot complete the task, set outcome=failed and provide a concise error.",
128
146
  ].join("\n\n");
129
147
  }
148
+ function buildTaskGraphReviewPrompt(tasks, systemContext) {
149
+ return [
150
+ systemContext,
151
+ "You are Orca's pre-execution task-graph reviewer.",
152
+ "Return only structured review operations in the configured schema.",
153
+ "Allowed operations: update_task (name/description/acceptance_criteria), add_task, remove_task, add_dependency, remove_dependency.",
154
+ "Return an empty changes array if no edits are needed.",
155
+ "Current task graph JSON:",
156
+ JSON.stringify(tasks, null, 2)
157
+ ].join("\n\n");
158
+ }
159
+ function parseStructuredTaskGraphReviewPayload(payload, rawResponse = "") {
160
+ const result = TaskGraphReviewPayloadSchema.safeParse(payload);
161
+ if (!result.success) {
162
+ throw formatSchemaError("Claude structured review payload failed schema validation", result.error);
163
+ }
164
+ return {
165
+ changes: result.data.changes,
166
+ rawResponse
167
+ };
168
+ }
130
169
  function extractAssistantText(message) {
131
170
  if (!message || typeof message !== "object") {
132
171
  return null;
@@ -277,6 +316,22 @@ export async function planSpec(spec, systemContext, config) {
277
316
  claudeQuery.close();
278
317
  }
279
318
  }
319
+ export async function reviewTaskGraph(tasks, systemContext, config) {
320
+ const claudeQuery = query({
321
+ prompt: buildTaskGraphReviewPrompt(tasks, systemContext),
322
+ options: buildClaudeQueryOptions(config, REVIEW_OUTPUT_FORMAT),
323
+ });
324
+ try {
325
+ const { rawResponse, structuredOutput } = await collectSessionResult(claudeQuery);
326
+ if (structuredOutput === undefined) {
327
+ throwMissingStructuredOutput("review");
328
+ }
329
+ return parseStructuredTaskGraphReviewPayload(structuredOutput, rawResponse);
330
+ }
331
+ finally {
332
+ claudeQuery.close();
333
+ }
334
+ }
280
335
  export async function executeTask(task, runId, config, systemContext) {
281
336
  const claudeQuery = query({
282
337
  prompt: buildTaskExecutionPrompt(task, runId, process.cwd(), systemContext),
@@ -1,4 +1,5 @@
1
1
  import { CodexClient } from "@ratley/codex-client";
2
+ import { TaskGraphReviewPayloadSchema } from "../../core/task-graph-review.js";
2
3
  function buildPlanningPrompt(spec, systemContext) {
3
4
  return [
4
5
  systemContext,
@@ -33,6 +34,36 @@ function buildTaskExecutionPrompt(task, runId, cwd, systemContext) {
33
34
  "Do not wrap it in markdown fences. Do not add any text after the JSON line. The JSON line is required.",
34
35
  ].join("\n\n");
35
36
  }
37
+ function buildTaskGraphReviewPrompt(tasks, systemContext) {
38
+ return [
39
+ systemContext,
40
+ "You are Orca's pre-execution task-graph reviewer.",
41
+ "Return JSON matching this shape exactly: {\"changes\":[...operations...]}",
42
+ "Allowed operation shapes:",
43
+ "- {\"op\":\"update_task\",\"taskId\":\"...\",\"fields\":{\"name\"?:string,\"description\"?:string,\"acceptance_criteria\"?:string[]}}",
44
+ "- {\"op\":\"add_task\",\"task\":<full task object>}",
45
+ "- {\"op\":\"remove_task\",\"taskId\":\"...\"}",
46
+ "- {\"op\":\"add_dependency\",\"taskId\":\"...\",\"dependsOn\":\"...\"}",
47
+ "- {\"op\":\"remove_dependency\",\"taskId\":\"...\",\"dependsOn\":\"...\"}",
48
+ "Return ONLY JSON. No markdown.",
49
+ "Current task graph:",
50
+ JSON.stringify(tasks, null, 2),
51
+ ].join("\n\n");
52
+ }
53
+ function parseTaskGraphReview(raw) {
54
+ const parsed = JSON.parse(extractJson(raw));
55
+ const result = TaskGraphReviewPayloadSchema.safeParse(parsed);
56
+ if (!result.success) {
57
+ const details = result.error.issues
58
+ .map((issue) => `${issue.path.length > 0 ? issue.path.join(".") : "<root>"}: ${issue.message}`)
59
+ .join("; ");
60
+ throw new Error(`Codex review response failed schema validation. ${details}`);
61
+ }
62
+ return {
63
+ changes: result.data.changes,
64
+ rawResponse: raw,
65
+ };
66
+ }
36
67
  function extractAgentText(result) {
37
68
  if (result.agentMessage.length > 0) {
38
69
  return result.agentMessage;
@@ -192,6 +223,21 @@ export async function createCodexSession(cwd, config) {
192
223
  rawResponse,
193
224
  };
194
225
  },
226
+ async reviewTaskGraph(tasks, systemContext) {
227
+ const effort = getEffort(config);
228
+ const result = effort
229
+ ? await client.runTurn({
230
+ threadId,
231
+ effort,
232
+ input: [{ type: "text", text: buildTaskGraphReviewPrompt(tasks, systemContext) }],
233
+ })
234
+ : await client.runTurn({
235
+ threadId,
236
+ input: [{ type: "text", text: buildTaskGraphReviewPrompt(tasks, systemContext) }],
237
+ });
238
+ const rawResponse = extractAgentText(result);
239
+ return parseTaskGraphReview(rawResponse);
240
+ },
195
241
  async executeTask(task, runId, systemContext) {
196
242
  const effort = getEffort(config);
197
243
  const result = effort
@@ -280,6 +326,20 @@ export async function createCodexSession(cwd, config) {
280
326
  });
281
327
  return result.reviewText;
282
328
  },
329
+ async runPrompt(prompt) {
330
+ const effort = getEffort(config);
331
+ const result = effort
332
+ ? await client.runTurn({
333
+ threadId,
334
+ effort,
335
+ input: [{ type: "text", text: prompt }],
336
+ })
337
+ : await client.runTurn({
338
+ threadId,
339
+ input: [{ type: "text", text: prompt }],
340
+ });
341
+ return extractAgentText(result);
342
+ },
283
343
  async disconnect() {
284
344
  await client.disconnect();
285
345
  },
@@ -299,6 +359,15 @@ export async function planSpec(spec, systemContext, config) {
299
359
  await session.disconnect();
300
360
  }
301
361
  }
362
+ export async function reviewTaskGraph(tasks, systemContext, config) {
363
+ const session = await createCodexSession(process.cwd(), config);
364
+ try {
365
+ return await session.reviewTaskGraph(tasks, systemContext);
366
+ }
367
+ finally {
368
+ await session.disconnect();
369
+ }
370
+ }
302
371
  export async function executeTask(task, runId, config, systemContext) {
303
372
  const session = await createCodexSession(process.cwd(), config);
304
373
  try {
@@ -37,7 +37,7 @@ export async function cancelCommandHandler(options) {
37
37
  return;
38
38
  }
39
39
  const cancelledAt = new Date().toISOString();
40
- let cancelledTaskId = null;
40
+ let cancelledTaskId;
41
41
  const tasks = run.tasks.map((task) => {
42
42
  if (task.status === "in_progress") {
43
43
  cancelledTaskId = task.id;
@@ -1,13 +1,15 @@
1
1
  import { constants as fsConstants } from "node:fs";
2
- import { access, unlink, writeFile } from "node:fs/promises";
2
+ import { exec as execCallback } from "node:child_process";
3
+ import { access, readFile, unlink, writeFile } from "node:fs/promises";
3
4
  import os from "node:os";
4
5
  import path from "node:path";
5
6
  import { randomUUID } from "node:crypto";
7
+ import { promisify } from "node:util";
6
8
  import { InvalidArgumentError } from "commander";
7
9
  import { createCodexSession } from "../../agents/codex/session.js";
8
10
  import { ensureCodexMultiAgent } from "../../core/codex-config.js";
9
11
  import { resolveConfig } from "../../core/config-loader.js";
10
- import { runPlanner } from "../../core/planner.js";
12
+ import { InvalidPlanError, runPlanner } from "../../core/planner.js";
11
13
  import { runTaskRunner } from "../../core/task-runner.js";
12
14
  import { createOpenclawHookHandler, detectOpenclawAvailability } from "../../hooks/adapters/openclaw.js";
13
15
  import { createStdoutHookHandler } from "../../hooks/adapters/stdout.js";
@@ -15,10 +17,13 @@ import { HookDispatcher } from "../../hooks/dispatcher.js";
15
17
  import { RunStore } from "../../state/store.js";
16
18
  import { parseClaudeEffort, parseCodexEffort } from "../../types/effort.js";
17
19
  import { generateRunId } from "../../utils/ids.js";
20
+ const exec = promisify(execCallback);
18
21
  const ALL_HOOKS = [
19
22
  "onMilestone",
20
23
  "onTaskComplete",
21
24
  "onTaskFail",
25
+ "onInvalidPlan",
26
+ "onFindings",
22
27
  "onComplete",
23
28
  "onError"
24
29
  ];
@@ -26,6 +31,8 @@ const VALID_HOOK_NAMES = new Set([
26
31
  "onMilestone",
27
32
  "onTaskComplete",
28
33
  "onTaskFail",
34
+ "onInvalidPlan",
35
+ "onFindings",
29
36
  "onComplete",
30
37
  "onError"
31
38
  ]);
@@ -56,6 +63,9 @@ function computeFinalStatus(overallStatus, allTasksDone) {
56
63
  if (overallStatus === "cancelled") {
57
64
  return "cancelled";
58
65
  }
66
+ if (overallStatus === "failed") {
67
+ return "failed";
68
+ }
59
69
  return allTasksDone ? "completed" : "failed";
60
70
  }
61
71
  function buildCliCommandHooks(options) {
@@ -63,6 +73,8 @@ function buildCliCommandHooks(options) {
63
73
  ...(options.onMilestone ? { onMilestone: options.onMilestone } : {}),
64
74
  ...(options.onTaskComplete ? { onTaskComplete: options.onTaskComplete } : {}),
65
75
  ...(options.onTaskFail ? { onTaskFail: options.onTaskFail } : {}),
76
+ ...(options.onInvalidPlan ? { onInvalidPlan: options.onInvalidPlan } : {}),
77
+ ...(options.onFindings ? { onFindings: options.onFindings } : {}),
66
78
  ...(options.onComplete ? { onComplete: options.onComplete } : {}),
67
79
  ...(options.onError ? { onError: options.onError } : {})
68
80
  };
@@ -83,6 +95,87 @@ function applyExecutorOverrideForRun(config, options) {
83
95
  }
84
96
  return nextConfig;
85
97
  }
98
+ function getExecutionReviewConfig(config) {
99
+ const review = (config?.review ?? {});
100
+ const executionConfig = review.execution;
101
+ const skipValidators = process.env.ORCA_SKIP_VALIDATORS === "1";
102
+ return {
103
+ enabled: executionConfig?.enabled ?? review.enabled ?? true,
104
+ maxCycles: executionConfig?.maxCycles ?? 2,
105
+ onFindings: executionConfig?.onFindings ?? "auto_fix",
106
+ validatorAuto: skipValidators ? false : (executionConfig?.validator?.auto ?? true),
107
+ ...(executionConfig?.validator?.commands !== undefined ? { validatorCommands: executionConfig.validator.commands } : {}),
108
+ ...(executionConfig?.prompt !== undefined ? { prompt: executionConfig.prompt } : {})
109
+ };
110
+ }
111
+ async function detectValidatorCommands() {
112
+ try {
113
+ const packageJson = JSON.parse(await readFile(path.join(process.cwd(), "package.json"), "utf8"));
114
+ const scripts = packageJson.scripts ?? {};
115
+ if (typeof scripts.validate === "string") {
116
+ return ["npm run validate"];
117
+ }
118
+ const fallbacks = ["lint", "typecheck", "test", "build"].filter((name) => typeof scripts[name] === "string");
119
+ return fallbacks.map((name) => `npm run ${name}`);
120
+ }
121
+ catch {
122
+ return [];
123
+ }
124
+ }
125
+ async function runValidatorCommands(commands) {
126
+ const results = [];
127
+ for (const command of commands) {
128
+ try {
129
+ const { stdout, stderr } = await exec(command, { cwd: process.cwd() });
130
+ results.push({ command, exitCode: 0, output: `${stdout}${stderr}`.trim() });
131
+ }
132
+ catch (error) {
133
+ const failed = error;
134
+ results.push({
135
+ command,
136
+ exitCode: typeof failed.code === "number" ? failed.code : 1,
137
+ output: `${failed.stdout ?? ""}${failed.stderr ?? ""}`.trim()
138
+ });
139
+ }
140
+ }
141
+ return results;
142
+ }
143
+ function buildPostExecutionReviewPrompt(cycleIndex, validationResults, extraPrompt) {
144
+ return [
145
+ "You are Orca's post-execution reviewer.",
146
+ "Inspect uncommitted repository changes and validation command output.",
147
+ "If there are fixable findings, apply fixes directly in the workspace before responding.",
148
+ "Respond with JSON only using this exact shape:",
149
+ '{"summary":"...","findings":["..."],"fixed":true|false}',
150
+ `Cycle: ${cycleIndex}`,
151
+ "Validation output:",
152
+ JSON.stringify(validationResults, null, 2),
153
+ ...(extraPrompt ? ["Additional reviewer instructions:", extraPrompt] : [])
154
+ ].join("\n\n");
155
+ }
156
+ function parseExecutionReviewResult(raw) {
157
+ const match = raw.match(/```(?:json)?\s*([\s\S]*?)```/);
158
+ const candidate = (match?.[1] ?? raw).trim();
159
+ try {
160
+ const parsed = JSON.parse(candidate);
161
+ const findings = Array.isArray(parsed.findings) ? parsed.findings.filter((item) => typeof item === "string") : [];
162
+ return {
163
+ findings,
164
+ summary: typeof parsed.summary === "string" ? parsed.summary : (findings.length > 0 ? findings.join("; ") : "No findings."),
165
+ fixed: parsed.fixed === true,
166
+ rawResponse: raw
167
+ };
168
+ }
169
+ catch (error) {
170
+ const message = error instanceof Error ? error.message : String(error);
171
+ return {
172
+ findings: [`review-response-parse-error: ${message}`],
173
+ summary: `Post-execution reviewer returned invalid JSON (${message})`,
174
+ fixed: false,
175
+ rawResponse: raw
176
+ };
177
+ }
178
+ }
86
179
  export async function runCommandHandler(options) {
87
180
  if (options.codexOnly && options.claudeOnly) {
88
181
  throw new Error("--codex-only and --claude-only are mutually exclusive; choose only one executor override.");
@@ -118,11 +211,6 @@ export async function runCommandHandler(options) {
118
211
  console.log(`Run ID: ${runId}`);
119
212
  const store = createStore();
120
213
  await store.createRun(runId, specPath);
121
- await runPlanner(specPath, store, runId, effectiveConfig);
122
- await store.updateRun(runId, {
123
- mode: "run",
124
- overallStatus: "running"
125
- });
126
214
  const cliCommandHooks = buildCliCommandHooks(options);
127
215
  const dispatcher = new HookDispatcher({
128
216
  commandHooks: {
@@ -147,21 +235,44 @@ export async function runCommandHandler(options) {
147
235
  }
148
236
  }
149
237
  if (orcaConfig?.hooks) {
150
- for (const [hookName, handler] of Object.entries(orcaConfig.hooks)) {
151
- if (!isHookName(hookName)) {
152
- console.error(`Warning: ignoring unknown hook name in config: ${hookName}`);
238
+ for (const [hookNameRaw, handler] of Object.entries(orcaConfig.hooks)) {
239
+ if (!isHookName(hookNameRaw)) {
240
+ console.error(`Warning: ignoring unknown hook name in config: ${hookNameRaw}`);
153
241
  continue;
154
242
  }
155
243
  if (typeof handler !== "function") {
156
- console.error(`Warning: ignoring invalid hook handler for ${hookName}; expected function, got ${typeof handler}`);
244
+ console.error(`Warning: ignoring invalid hook handler for ${hookNameRaw}; expected function, got ${typeof handler}`);
157
245
  continue;
158
246
  }
247
+ const hookName = hookNameRaw;
159
248
  dispatcher.on(hookName, handler);
160
249
  }
161
250
  }
162
251
  const emitHook = async (event) => {
163
252
  await dispatcher.dispatch(event);
164
253
  };
254
+ try {
255
+ await runPlanner(specPath, store, runId, effectiveConfig);
256
+ }
257
+ catch (error) {
258
+ if (error instanceof InvalidPlanError) {
259
+ await emitHook({
260
+ runId: runId,
261
+ hook: "onInvalidPlan",
262
+ message: `invalid-plan:${error.stage}`,
263
+ timestamp: new Date().toISOString(),
264
+ error: error.message,
265
+ metadata: {
266
+ stage: error.stage
267
+ }
268
+ });
269
+ }
270
+ throw error;
271
+ }
272
+ await store.updateRun(runId, {
273
+ mode: "run",
274
+ overallStatus: "running"
275
+ });
165
276
  const executor = effectiveConfig?.executor ?? "codex";
166
277
  if (executor === "codex") {
167
278
  const cwd = process.cwd();
@@ -197,9 +308,62 @@ export async function runCommandHandler(options) {
197
308
  emitHook,
198
309
  executeTask: (task, taskRunId, _config, systemContext) => codexSession.executeTask(task, taskRunId, systemContext),
199
310
  });
200
- const reviewText = await codexSession.reviewChanges();
201
- console.log("Codex post-execution review:");
202
- console.log(reviewText);
311
+ const reviewConfig = getExecutionReviewConfig(effectiveConfig);
312
+ const finalSummaries = [];
313
+ const runAfterExecution = await store.getRun(runId);
314
+ if (reviewConfig.enabled && (runAfterExecution?.tasks.length ?? 0) > 0) {
315
+ const configured = reviewConfig.validatorCommands?.filter((item) => item.trim().length > 0) ?? [];
316
+ const validatorCommands = configured.length > 0
317
+ ? configured
318
+ : (reviewConfig.validatorAuto ? await detectValidatorCommands() : []);
319
+ for (let cycleIndex = 1; cycleIndex <= reviewConfig.maxCycles; cycleIndex += 1) {
320
+ const validationResults = await runValidatorCommands(validatorCommands);
321
+ const prompt = buildPostExecutionReviewPrompt(cycleIndex, validationResults, reviewConfig.prompt);
322
+ const rawReview = await codexSession.runPrompt(prompt);
323
+ const reviewResult = parseExecutionReviewResult(rawReview);
324
+ finalSummaries.push(`cycle ${cycleIndex}: ${reviewResult.summary}`);
325
+ if (reviewResult.findings.length === 0) {
326
+ break;
327
+ }
328
+ await emitHook({
329
+ runId: runId,
330
+ hook: "onFindings",
331
+ message: reviewResult.summary,
332
+ timestamp: new Date().toISOString(),
333
+ metadata: {
334
+ findingsCount: reviewResult.findings.length,
335
+ findingsSummary: reviewResult.summary,
336
+ cycleIndex
337
+ }
338
+ });
339
+ if (reviewConfig.onFindings === "report_only") {
340
+ break;
341
+ }
342
+ if (reviewConfig.onFindings === "fail") {
343
+ await store.updateRun(runId, { overallStatus: "failed" });
344
+ break;
345
+ }
346
+ if (!reviewResult.fixed) {
347
+ break;
348
+ }
349
+ }
350
+ }
351
+ let fallbackReview = "";
352
+ if (reviewConfig.enabled) {
353
+ fallbackReview = await codexSession.reviewChanges();
354
+ }
355
+ console.log("Codex post-execution final review summary:");
356
+ if (finalSummaries.length > 0) {
357
+ for (const summary of finalSummaries) {
358
+ console.log(`- ${summary}`);
359
+ }
360
+ }
361
+ else {
362
+ console.log("- Post-execution review loop disabled.");
363
+ }
364
+ if (fallbackReview.length > 0) {
365
+ console.log(fallbackReview);
366
+ }
203
367
  }
204
368
  finally {
205
369
  await codexSession.disconnect();
@@ -253,6 +417,8 @@ export function registerRunCommand(program) {
253
417
  .option("--on-milestone <cmd>", "Shell hook command for onMilestone")
254
418
  .option("--on-task-complete <cmd>", "Shell hook command for onTaskComplete")
255
419
  .option("--on-task-fail <cmd>", "Shell hook command for onTaskFail")
420
+ .option("--on-invalid-plan <cmd>", "Shell hook command for onInvalidPlan")
421
+ .option("--on-findings <cmd>", "Shell hook command for onFindings")
256
422
  .option("--on-complete <cmd>", "Shell hook command for onComplete")
257
423
  .option("--on-error <cmd>", "Shell hook command for onError")
258
424
  .action(async (goal, commandOptions) => {