npm - orcastrator - Versions diffs - 0.2.14 → 0.2.16 - Mend

orcastrator 0.2.14 → 0.2.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +134 -7
package/dist/agents/claude/session.js +55 -0
package/dist/agents/codex/session.js +69 -0
package/dist/cli/commands/cancel.js +1 -1
package/dist/cli/commands/run.js +180 -14
package/dist/cli/commands/setup.js +146 -0
package/dist/core/config-loader.js +119 -4
package/dist/core/planner.js +89 -6
package/dist/core/task-graph-review.js +132 -0
package/dist/hooks/dispatcher.js +103 -58
package/dist/types/config-typing.typecheck.js +32 -0
package/dist/types/index.js +3 -0
package/dist/utils/agent-json.js +3 -3
package/package.json +12 -5

package/README.md CHANGED Viewed

@@ -22,7 +22,33 @@ Start with a plain-language goal:
 orca "add auth to the app"
 ```
-Orca will create a run, plan tasks, execute them, and persist run state.
+Orca will create a run, plan tasks, run a pre-execution review/improvement pass on the task graph, execute the reviewed graph, and persist run state.
+### Pre-execution review-improvement stage
+After planning, Orca runs a structured review pass that can edit the task graph before execution starts. The review output is schema-validated and supports concrete graph operations:
+- update task fields (`name`, `description`, `acceptance_criteria`)
+- add/remove task
+- add/remove dependency
+The edited graph is re-validated as a DAG. If review output is invalid, Orca fails with an actionable error by default. You can configure `review.plan.onInvalid: "warn_skip"` to log a warning and continue with the original planner graph.
+### Post-execution review / fix cycles
+After task execution, Orca can run deterministic validation commands, then ask Codex to review findings and optionally auto-fix issues in bounded cycles.
+- `review.execution.enabled` (default `true`)
+- `review.execution.maxCycles` (default `2`)
+- `review.execution.onFindings`:
+  - `auto_fix` (default): apply fixes and continue until clean or max cycles
+  - `report_only`: report findings and stop
+  - `fail`: mark run failed when findings exist
+- `review.execution.validator.auto` (default `true`): auto-detect validator commands from `package.json`
+- `review.execution.validator.commands` (optional explicit command list)
+- `review.execution.prompt` (optional custom reviewer instruction)
+When using the Codex executor, Orca prints a final post-execution review summary.
 ## Spec And Plan Files
@@ -79,19 +105,47 @@ Orca auto-discovers config in this order:
 Later entries override earlier ones.
-```js
-// orca.config.js
+```ts
+// orca.config.ts
 export default {
   runsDir: "./.orca/runs",
   sessionLogs: "./session-logs",
+  // Function hooks are first-class and strongly typed per hook.
+  hooks: {
+    onTaskComplete: async (event, context) => {
+      console.log(`task done: ${event.taskId} (${event.taskName}) from pid ${context.pid}`);
+    },
+    onError: async (event) => {
+      console.error(event.error);
+    }
+  },
+  // Command hooks remain supported; payload is sent as stdin JSON.
   hookCommands: {
-    onTaskComplete: "echo task done: $ORCA_TASK_NAME",
+    onTaskComplete: "node -e 'let s=\"\";process.stdin.on(\"data\",d=>s+=d);process.stdin.on(\"end\",()=>{const p=JSON.parse(s);console.log(`task done: ${p.taskId}`);})'",
     onComplete: "echo run complete",
     onError: "echo run failed"
   },
   codex: {
     model: "gpt-5.3-codex",       // override the codex model
     multiAgent: true,              // enable codex multi-agent (see below)
+  },
+  review: {
+    plan: {
+      enabled: true,               // default true
+      onInvalid: "fail"           // or "warn_skip"
+    },
+    execution: {
+      enabled: true,               // default true
+      maxCycles: 2,                // default 2
+      onFindings: "auto_fix",     // "auto_fix" | "report_only" | "fail"
+      validator: {
+        auto: true,                // default true
+        // commands: ["npm run validate"]
+      },
+      // prompt: "Prefer minimal safe fixes"
+    }
   }
 };
 ```
@@ -135,6 +189,8 @@ Global:
 - `--on-milestone <cmd>`
 - `--on-task-complete <cmd>`
 - `--on-task-fail <cmd>`
+- `--on-invalid-plan <cmd>`
+- `--on-findings <cmd>`
 - `--on-complete <cmd>`
 - `--on-error <cmd>`
@@ -191,6 +247,8 @@ Global:
 - `--check` (API key lookup order: CLI flag → process env → `~/.openclaw/openclaw.json` `env.vars` → `~/.claude/.env` → `~/.config/claude/.env`)
 - `--global`
 - `--project`
+- `--project-config-template`
+- `--skip-project-config`
 `orca help`:
@@ -204,10 +262,24 @@ Hook names:
 - `onMilestone`
 - `onTaskComplete`
 - `onTaskFail`
+- `onInvalidPlan`
+- `onFindings`
 - `onComplete`
 - `onError`
-Run hooks from CLI with `--on-...` flags or from config via `hookCommands` / `hooks`.
+Run hooks from CLI with `--on-...` flags or from config via `hooks` / `hookCommands`.
+Unknown hook keys in config are rejected at load time with an explicit allowed-hook list.
+Hook contract:
+- Function hooks (`config.hooks`) are the primary path and are strongly typed per hook event.
+- Every function hook receives `(event, context)` where `context` is deterministic: `{ cwd, pid, invokedAt }`.
+- Command hooks (`--on-...` and `config.hookCommands`) receive the full event payload as JSON over stdin.
+- Orca no longer injects hook payload via `ORCA_*` env vars.
+Migration note:
+- If your hook commands previously read any `ORCA_*` hook env payload (`ORCA_HOOK_PAYLOAD_JSON`, `ORCA_MSG`, `ORCA_RUN_ID`, etc.), switch them to parse stdin JSON instead.
+- Existing CLI hook flags are preserved (`--on-milestone`, `--on-error`, etc.); only payload transport changed.
+- Smoke-test the hook contract (function + command + concurrency): `npm run smoke:hooks`.
 ### Run ID Format
@@ -222,6 +294,15 @@ Run IDs are generated as:
 - Project: `./orca.config.js` or `./orca.config.ts`
 - Explicit: `--config <path>`
+### Project Instruction Files
+During planning, Orca automatically injects project instruction files when present:
+1. `AGENTS.md`
+2. `CLAUDE.md`
+Files are discovered from the project root (nearest `.git` from the spec/task context) and injected in that order.
 ### Run State Locations
 - Run status: `<runsDir>/<run-id>/status.json`
@@ -230,8 +311,54 @@ Run IDs are generated as:
 ## Development
+Install dependencies with npm (primary lockfile):
+```bash
+npm install
+```
+Run local development and tests with Bun (faster runtime for this project):
 ```bash
-bun install
-bun test
 bun run src/cli/index.ts "your goal here"
+bun test src
+```
+## Validation pipeline
+Use the full validation gate before opening/publishing changes:
+```bash
+npm run validate
+```
+This runs, in order:
+1. `npm run lint` (Oxlint syntax/style/static rules)
+2. `npm run lint:type-aware` (Oxlint + tsgolint alpha type-aware + type-check diagnostics)
+3. `npm run typecheck` (TypeScript Native Preview via `tsgo --noEmit`, with environment fallback to `tsc --noEmit`)
+4. `npm run test`
+5. `npm run build`
+`npm run build` remains `tsc` because the native preview compiler is used here as a fast typecheck gate; production JS emission stays on stable `typescript` for predictable package output.
+## Package manager + lockfile policy
+Orca uses a mixed runtime/tooling model on purpose:
+- **npm is canonical for dependency resolution, release builds, and deterministic installs**.
+- **Bun is used as a runtime/test runner in local workflows** (`dev`, `start`, `test`).
+Commit both lockfiles:
+- `package-lock.json` — canonical dependency graph for npm/CI/publish
+- `bun.lock` — Bun runtime resolution parity for local Bun commands
+When dependencies change, update both lockfiles in the same PR:
+```bash
+npm install
+bun install
 ```
+This keeps npm and Bun behavior aligned without forcing a disruptive full migration.

package/dist/agents/claude/session.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { query } from "@anthropic-ai/claude-agent-sdk";
 import { z } from "zod";
+import { TaskGraphReviewPayloadSchema } from "../../core/task-graph-review.js";
 import { parseAgentJson } from "../../utils/agent-json.js";
 const PlannedTaskSchema = z.object({
     id: z.string().min(1),
@@ -101,6 +102,23 @@ const EXECUTION_OUTPUT_FORMAT = {
     type: "json_schema",
     schema: EXECUTION_OUTPUT_SCHEMA,
 };
+const REVIEW_OUTPUT_SCHEMA = {
+    type: "object",
+    additionalProperties: false,
+    required: ["changes"],
+    properties: {
+        changes: {
+            type: "array",
+            items: {
+                type: "object"
+            }
+        }
+    }
+};
+const REVIEW_OUTPUT_FORMAT = {
+    type: "json_schema",
+    schema: REVIEW_OUTPUT_SCHEMA,
+};
 function buildPlanningPrompt(spec, systemContext) {
     return [
         systemContext,
@@ -127,6 +145,27 @@ function buildTaskExecutionPrompt(task, runId, cwd, systemContext) {
         "If you cannot complete the task, set outcome=failed and provide a concise error.",
     ].join("\n\n");
 }
+function buildTaskGraphReviewPrompt(tasks, systemContext) {
+    return [
+        systemContext,
+        "You are Orca's pre-execution task-graph reviewer.",
+        "Return only structured review operations in the configured schema.",
+        "Allowed operations: update_task (name/description/acceptance_criteria), add_task, remove_task, add_dependency, remove_dependency.",
+        "Return an empty changes array if no edits are needed.",
+        "Current task graph JSON:",
+        JSON.stringify(tasks, null, 2)
+    ].join("\n\n");
+}
+function parseStructuredTaskGraphReviewPayload(payload, rawResponse = "") {
+    const result = TaskGraphReviewPayloadSchema.safeParse(payload);
+    if (!result.success) {
+        throw formatSchemaError("Claude structured review payload failed schema validation", result.error);
+    }
+    return {
+        changes: result.data.changes,
+        rawResponse
+    };
+}
 function extractAssistantText(message) {
     if (!message || typeof message !== "object") {
         return null;
@@ -277,6 +316,22 @@ export async function planSpec(spec, systemContext, config) {
         claudeQuery.close();
     }
 }
+export async function reviewTaskGraph(tasks, systemContext, config) {
+    const claudeQuery = query({
+        prompt: buildTaskGraphReviewPrompt(tasks, systemContext),
+        options: buildClaudeQueryOptions(config, REVIEW_OUTPUT_FORMAT),
+    });
+    try {
+        const { rawResponse, structuredOutput } = await collectSessionResult(claudeQuery);
+        if (structuredOutput === undefined) {
+            throwMissingStructuredOutput("review");
+        }
+        return parseStructuredTaskGraphReviewPayload(structuredOutput, rawResponse);
+    }
+    finally {
+        claudeQuery.close();
+    }
+}
 export async function executeTask(task, runId, config, systemContext) {
     const claudeQuery = query({
         prompt: buildTaskExecutionPrompt(task, runId, process.cwd(), systemContext),

package/dist/agents/codex/session.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import { CodexClient } from "@ratley/codex-client";
+import { TaskGraphReviewPayloadSchema } from "../../core/task-graph-review.js";
 function buildPlanningPrompt(spec, systemContext) {
     return [
         systemContext,
@@ -33,6 +34,36 @@ function buildTaskExecutionPrompt(task, runId, cwd, systemContext) {
         "Do not wrap it in markdown fences. Do not add any text after the JSON line. The JSON line is required.",
     ].join("\n\n");
 }
+function buildTaskGraphReviewPrompt(tasks, systemContext) {
+    return [
+        systemContext,
+        "You are Orca's pre-execution task-graph reviewer.",
+        "Return JSON matching this shape exactly: {\"changes\":[...operations...]}",
+        "Allowed operation shapes:",
+        "- {\"op\":\"update_task\",\"taskId\":\"...\",\"fields\":{\"name\"?:string,\"description\"?:string,\"acceptance_criteria\"?:string[]}}",
+        "- {\"op\":\"add_task\",\"task\":<full task object>}",
+        "- {\"op\":\"remove_task\",\"taskId\":\"...\"}",
+        "- {\"op\":\"add_dependency\",\"taskId\":\"...\",\"dependsOn\":\"...\"}",
+        "- {\"op\":\"remove_dependency\",\"taskId\":\"...\",\"dependsOn\":\"...\"}",
+        "Return ONLY JSON. No markdown.",
+        "Current task graph:",
+        JSON.stringify(tasks, null, 2),
+    ].join("\n\n");
+}
+function parseTaskGraphReview(raw) {
+    const parsed = JSON.parse(extractJson(raw));
+    const result = TaskGraphReviewPayloadSchema.safeParse(parsed);
+    if (!result.success) {
+        const details = result.error.issues
+            .map((issue) => `${issue.path.length > 0 ? issue.path.join(".") : "<root>"}: ${issue.message}`)
+            .join("; ");
+        throw new Error(`Codex review response failed schema validation. ${details}`);
+    }
+    return {
+        changes: result.data.changes,
+        rawResponse: raw,
+    };
+}
 function extractAgentText(result) {
     if (result.agentMessage.length > 0) {
         return result.agentMessage;
@@ -192,6 +223,21 @@ export async function createCodexSession(cwd, config) {
                 rawResponse,
             };
         },
+        async reviewTaskGraph(tasks, systemContext) {
+            const effort = getEffort(config);
+            const result = effort
+                ? await client.runTurn({
+                    threadId,
+                    effort,
+                    input: [{ type: "text", text: buildTaskGraphReviewPrompt(tasks, systemContext) }],
+                })
+                : await client.runTurn({
+                    threadId,
+                    input: [{ type: "text", text: buildTaskGraphReviewPrompt(tasks, systemContext) }],
+                });
+            const rawResponse = extractAgentText(result);
+            return parseTaskGraphReview(rawResponse);
+        },
         async executeTask(task, runId, systemContext) {
             const effort = getEffort(config);
             const result = effort
@@ -280,6 +326,20 @@ export async function createCodexSession(cwd, config) {
             });
             return result.reviewText;
         },
+        async runPrompt(prompt) {
+            const effort = getEffort(config);
+            const result = effort
+                ? await client.runTurn({
+                    threadId,
+                    effort,
+                    input: [{ type: "text", text: prompt }],
+                })
+                : await client.runTurn({
+                    threadId,
+                    input: [{ type: "text", text: prompt }],
+                });
+            return extractAgentText(result);
+        },
         async disconnect() {
             await client.disconnect();
         },
@@ -299,6 +359,15 @@ export async function planSpec(spec, systemContext, config) {
         await session.disconnect();
     }
 }
+export async function reviewTaskGraph(tasks, systemContext, config) {
+    const session = await createCodexSession(process.cwd(), config);
+    try {
+        return await session.reviewTaskGraph(tasks, systemContext);
+    }
+    finally {
+        await session.disconnect();
+    }
+}
 export async function executeTask(task, runId, config, systemContext) {
     const session = await createCodexSession(process.cwd(), config);
     try {

package/dist/cli/commands/cancel.js CHANGED Viewed

@@ -37,7 +37,7 @@ export async function cancelCommandHandler(options) {
         return;
     }
     const cancelledAt = new Date().toISOString();
-    let cancelledTaskId = null;
+    let cancelledTaskId;
     const tasks = run.tasks.map((task) => {
         if (task.status === "in_progress") {
             cancelledTaskId = task.id;

package/dist/cli/commands/run.js CHANGED Viewed

@@ -1,13 +1,15 @@
 import { constants as fsConstants } from "node:fs";
-import { access, unlink, writeFile } from "node:fs/promises";
+import { exec as execCallback } from "node:child_process";
+import { access, readFile, unlink, writeFile } from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
 import { randomUUID } from "node:crypto";
+import { promisify } from "node:util";
 import { InvalidArgumentError } from "commander";
 import { createCodexSession } from "../../agents/codex/session.js";
 import { ensureCodexMultiAgent } from "../../core/codex-config.js";
 import { resolveConfig } from "../../core/config-loader.js";
-import { runPlanner } from "../../core/planner.js";
+import { InvalidPlanError, runPlanner } from "../../core/planner.js";
 import { runTaskRunner } from "../../core/task-runner.js";
 import { createOpenclawHookHandler, detectOpenclawAvailability } from "../../hooks/adapters/openclaw.js";
 import { createStdoutHookHandler } from "../../hooks/adapters/stdout.js";
@@ -15,10 +17,13 @@ import { HookDispatcher } from "../../hooks/dispatcher.js";
 import { RunStore } from "../../state/store.js";
 import { parseClaudeEffort, parseCodexEffort } from "../../types/effort.js";
 import { generateRunId } from "../../utils/ids.js";
+const exec = promisify(execCallback);
 const ALL_HOOKS = [
     "onMilestone",
     "onTaskComplete",
     "onTaskFail",
+    "onInvalidPlan",
+    "onFindings",
     "onComplete",
     "onError"
 ];
@@ -26,6 +31,8 @@ const VALID_HOOK_NAMES = new Set([
     "onMilestone",
     "onTaskComplete",
     "onTaskFail",
+    "onInvalidPlan",
+    "onFindings",
     "onComplete",
     "onError"
 ]);
@@ -56,6 +63,9 @@ function computeFinalStatus(overallStatus, allTasksDone) {
     if (overallStatus === "cancelled") {
         return "cancelled";
     }
+    if (overallStatus === "failed") {
+        return "failed";
+    }
     return allTasksDone ? "completed" : "failed";
 }
 function buildCliCommandHooks(options) {
@@ -63,6 +73,8 @@ function buildCliCommandHooks(options) {
         ...(options.onMilestone ? { onMilestone: options.onMilestone } : {}),
         ...(options.onTaskComplete ? { onTaskComplete: options.onTaskComplete } : {}),
         ...(options.onTaskFail ? { onTaskFail: options.onTaskFail } : {}),
+        ...(options.onInvalidPlan ? { onInvalidPlan: options.onInvalidPlan } : {}),
+        ...(options.onFindings ? { onFindings: options.onFindings } : {}),
         ...(options.onComplete ? { onComplete: options.onComplete } : {}),
         ...(options.onError ? { onError: options.onError } : {})
     };
@@ -83,6 +95,87 @@ function applyExecutorOverrideForRun(config, options) {
     }
     return nextConfig;
 }
+function getExecutionReviewConfig(config) {
+    const review = (config?.review ?? {});
+    const executionConfig = review.execution;
+    const skipValidators = process.env.ORCA_SKIP_VALIDATORS === "1";
+    return {
+        enabled: executionConfig?.enabled ?? review.enabled ?? true,
+        maxCycles: executionConfig?.maxCycles ?? 2,
+        onFindings: executionConfig?.onFindings ?? "auto_fix",
+        validatorAuto: skipValidators ? false : (executionConfig?.validator?.auto ?? true),
+        ...(executionConfig?.validator?.commands !== undefined ? { validatorCommands: executionConfig.validator.commands } : {}),
+        ...(executionConfig?.prompt !== undefined ? { prompt: executionConfig.prompt } : {})
+    };
+}
+async function detectValidatorCommands() {
+    try {
+        const packageJson = JSON.parse(await readFile(path.join(process.cwd(), "package.json"), "utf8"));
+        const scripts = packageJson.scripts ?? {};
+        if (typeof scripts.validate === "string") {
+            return ["npm run validate"];
+        }
+        const fallbacks = ["lint", "typecheck", "test", "build"].filter((name) => typeof scripts[name] === "string");
+        return fallbacks.map((name) => `npm run ${name}`);
+    }
+    catch {
+        return [];
+    }
+}
+async function runValidatorCommands(commands) {
+    const results = [];
+    for (const command of commands) {
+        try {
+            const { stdout, stderr } = await exec(command, { cwd: process.cwd() });
+            results.push({ command, exitCode: 0, output: `${stdout}${stderr}`.trim() });
+        }
+        catch (error) {
+            const failed = error;
+            results.push({
+                command,
+                exitCode: typeof failed.code === "number" ? failed.code : 1,
+                output: `${failed.stdout ?? ""}${failed.stderr ?? ""}`.trim()
+            });
+        }
+    }
+    return results;
+}
+function buildPostExecutionReviewPrompt(cycleIndex, validationResults, extraPrompt) {
+    return [
+        "You are Orca's post-execution reviewer.",
+        "Inspect uncommitted repository changes and validation command output.",
+        "If there are fixable findings, apply fixes directly in the workspace before responding.",
+        "Respond with JSON only using this exact shape:",
+        '{"summary":"...","findings":["..."],"fixed":true|false}',
+        `Cycle: ${cycleIndex}`,
+        "Validation output:",
+        JSON.stringify(validationResults, null, 2),
+        ...(extraPrompt ? ["Additional reviewer instructions:", extraPrompt] : [])
+    ].join("\n\n");
+}
+function parseExecutionReviewResult(raw) {
+    const match = raw.match(/```(?:json)?\s*([\s\S]*?)```/);
+    const candidate = (match?.[1] ?? raw).trim();
+    try {
+        const parsed = JSON.parse(candidate);
+        const findings = Array.isArray(parsed.findings) ? parsed.findings.filter((item) => typeof item === "string") : [];
+        return {
+            findings,
+            summary: typeof parsed.summary === "string" ? parsed.summary : (findings.length > 0 ? findings.join("; ") : "No findings."),
+            fixed: parsed.fixed === true,
+            rawResponse: raw
+        };
+    }
+    catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        return {
+            findings: [`review-response-parse-error: ${message}`],
+            summary: `Post-execution reviewer returned invalid JSON (${message})`,
+            fixed: false,
+            rawResponse: raw
+        };
+    }
+}
 export async function runCommandHandler(options) {
     if (options.codexOnly && options.claudeOnly) {
         throw new Error("--codex-only and --claude-only are mutually exclusive; choose only one executor override.");
@@ -118,11 +211,6 @@ export async function runCommandHandler(options) {
         console.log(`Run ID: ${runId}`);
         const store = createStore();
         await store.createRun(runId, specPath);
-        await runPlanner(specPath, store, runId, effectiveConfig);
-        await store.updateRun(runId, {
-            mode: "run",
-            overallStatus: "running"
-        });
         const cliCommandHooks = buildCliCommandHooks(options);
         const dispatcher = new HookDispatcher({
             commandHooks: {
@@ -147,21 +235,44 @@ export async function runCommandHandler(options) {
             }
         }
         if (orcaConfig?.hooks) {
-            for (const [hookName, handler] of Object.entries(orcaConfig.hooks)) {
-                if (!isHookName(hookName)) {
-                    console.error(`Warning: ignoring unknown hook name in config: ${hookName}`);
+            for (const [hookNameRaw, handler] of Object.entries(orcaConfig.hooks)) {
+                if (!isHookName(hookNameRaw)) {
+                    console.error(`Warning: ignoring unknown hook name in config: ${hookNameRaw}`);
                     continue;
                 }
                 if (typeof handler !== "function") {
-                    console.error(`Warning: ignoring invalid hook handler for ${hookName}; expected function, got ${typeof handler}`);
+                    console.error(`Warning: ignoring invalid hook handler for ${hookNameRaw}; expected function, got ${typeof handler}`);
                     continue;
                 }
+                const hookName = hookNameRaw;
                 dispatcher.on(hookName, handler);
             }
         }
         const emitHook = async (event) => {
             await dispatcher.dispatch(event);
         };
+        try {
+            await runPlanner(specPath, store, runId, effectiveConfig);
+        }
+        catch (error) {
+            if (error instanceof InvalidPlanError) {
+                await emitHook({
+                    runId: runId,
+                    hook: "onInvalidPlan",
+                    message: `invalid-plan:${error.stage}`,
+                    timestamp: new Date().toISOString(),
+                    error: error.message,
+                    metadata: {
+                        stage: error.stage
+                    }
+                });
+            }
+            throw error;
+        }
+        await store.updateRun(runId, {
+            mode: "run",
+            overallStatus: "running"
+        });
         const executor = effectiveConfig?.executor ?? "codex";
         if (executor === "codex") {
             const cwd = process.cwd();
@@ -197,9 +308,62 @@ export async function runCommandHandler(options) {
                     emitHook,
                     executeTask: (task, taskRunId, _config, systemContext) => codexSession.executeTask(task, taskRunId, systemContext),
                 });
-                const reviewText = await codexSession.reviewChanges();
-                console.log("Codex post-execution review:");
-                console.log(reviewText);
+                const reviewConfig = getExecutionReviewConfig(effectiveConfig);
+                const finalSummaries = [];
+                const runAfterExecution = await store.getRun(runId);
+                if (reviewConfig.enabled && (runAfterExecution?.tasks.length ?? 0) > 0) {
+                    const configured = reviewConfig.validatorCommands?.filter((item) => item.trim().length > 0) ?? [];
+                    const validatorCommands = configured.length > 0
+                        ? configured
+                        : (reviewConfig.validatorAuto ? await detectValidatorCommands() : []);
+                    for (let cycleIndex = 1; cycleIndex <= reviewConfig.maxCycles; cycleIndex += 1) {
+                        const validationResults = await runValidatorCommands(validatorCommands);
+                        const prompt = buildPostExecutionReviewPrompt(cycleIndex, validationResults, reviewConfig.prompt);
+                        const rawReview = await codexSession.runPrompt(prompt);
+                        const reviewResult = parseExecutionReviewResult(rawReview);
+                        finalSummaries.push(`cycle ${cycleIndex}: ${reviewResult.summary}`);
+                        if (reviewResult.findings.length === 0) {
+                            break;
+                        }
+                        await emitHook({
+                            runId: runId,
+                            hook: "onFindings",
+                            message: reviewResult.summary,
+                            timestamp: new Date().toISOString(),
+                            metadata: {
+                                findingsCount: reviewResult.findings.length,
+                                findingsSummary: reviewResult.summary,
+                                cycleIndex
+                            }
+                        });
+                        if (reviewConfig.onFindings === "report_only") {
+                            break;
+                        }
+                        if (reviewConfig.onFindings === "fail") {
+                            await store.updateRun(runId, { overallStatus: "failed" });
+                            break;
+                        }
+                        if (!reviewResult.fixed) {
+                            break;
+                        }
+                    }
+                }
+                let fallbackReview = "";
+                if (reviewConfig.enabled) {
+                    fallbackReview = await codexSession.reviewChanges();
+                }
+                console.log("Codex post-execution final review summary:");
+                if (finalSummaries.length > 0) {
+                    for (const summary of finalSummaries) {
+                        console.log(`- ${summary}`);
+                    }
+                }
+                else {
+                    console.log("- Post-execution review loop disabled.");
+                }
+                if (fallbackReview.length > 0) {
+                    console.log(fallbackReview);
+                }
             }
             finally {
                 await codexSession.disconnect();
@@ -253,6 +417,8 @@ export function registerRunCommand(program) {
         .option("--on-milestone <cmd>", "Shell hook command for onMilestone")
         .option("--on-task-complete <cmd>", "Shell hook command for onTaskComplete")
         .option("--on-task-fail <cmd>", "Shell hook command for onTaskFail")
+        .option("--on-invalid-plan <cmd>", "Shell hook command for onInvalidPlan")
+        .option("--on-findings <cmd>", "Shell hook command for onFindings")
         .option("--on-complete <cmd>", "Shell hook command for onComplete")
         .option("--on-error <cmd>", "Shell hook command for onError")
         .action(async (goal, commandOptions) => {