npm - @kbediako/codex-orchestrator - Versions diffs - 0.1.1 → 0.1.3 - Mend

@kbediako/codex-orchestrator 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

package/README.md CHANGED Viewed

@@ -80,6 +80,7 @@ Use `npx codex-orchestrator resume --run <run-id>` to continue interrupted runs;
 - `codex-orchestrator mcp serve [--repo <path>] [--dry-run] [-- <extra args>]`: launch the MCP stdio server (delegates to `codex mcp-server`; stdout guard keeps protocol-only output, logs to stderr).
 - `codex-orchestrator init codex [--cwd <path>] [--force]`: copy starter templates into a repo (no overwrite unless `--force`).
 - `codex-orchestrator doctor [--format json]`: check optional tooling dependencies and print install commands.
+- `codex-orchestrator devtools setup [--yes]`: print DevTools MCP setup instructions (`--yes` applies `codex mcp add ...`).
 - `codex-orchestrator self-check --format json`: emit a safe JSON health payload for smoke tests.
 - `codex-orchestrator --version`: print the package version.
@@ -121,7 +122,7 @@ Notes:
 - These prompts are consumed by the Codex CLI UI only; the orchestrator does not read them. Keep updates synced across machines during onboarding.
 - To install or refresh the prompts (repo-only), run `scripts/setup-codex-prompts.sh` (use `--force` to overwrite existing files).
 - `/prompts:diagnostics` takes `TASK=<task-id> MANIFEST=<path> [NOTES=<free text>]`, exports `MCP_RUNNER_TASK_ID=$TASK`, runs `npx codex-orchestrator start diagnostics --format json`, tails `.runs/$TASK/cli/<run-id>/manifest.json` (or `npx codex-orchestrator status --watch`), and records evidence to `/tasks`, `docs/TASKS.md`, `.agent/task/...`, `.runs/$TASK/metrics.json`, and `out/$TASK/state.json` using `$MANIFEST`.
-- `/prompts:review-handoff` takes `TASK=<task-id> MANIFEST=<path> NOTES=<goal + summary + risks + optional questions>`, re-exports `MCP_RUNNER_TASK_ID`, and (repo-only) runs `node scripts/spec-guard.mjs --dry-run`, `npm run lint`, `npm run test`, optional `npm run eval:test`, plus `npm run review` (wraps `codex review` against the current diff and includes the latest run manifest path as evidence). It also reminds you to log approvals in `$MANIFEST` and mirror the evidence to the same docs/metrics/state targets.
+- `/prompts:review-handoff` takes `TASK=<task-id> MANIFEST=<path> NOTES=<goal + summary + risks + optional questions>`, re-exports `MCP_RUNNER_TASK_ID`, and (repo-only) runs `node scripts/delegation-guard.mjs`, `node scripts/spec-guard.mjs --dry-run`, `npm run lint`, `npm run test`, optional `npm run eval:test`, plus `npm run review` (wraps `codex review` against the current diff and includes the latest run manifest path as evidence). It also reminds you to log approvals in `$MANIFEST` and mirror the evidence to the same docs/metrics/state targets.
 - In CI / `--no-interactive` pipelines (or when stdin is not a TTY), `npm run review` prints the review handoff prompt (including evidence paths) and exits successfully instead of invoking `codex review`. Set `FORCE_CODEX_REVIEW=1` to run `codex review` in those environments.
 - Always trigger diagnostics and review workflows through these prompts whenever you run the orchestrator so contributors consistently execute the required command sequences and capture auditable manifests.
@@ -133,7 +134,7 @@ Notes:
 - Default pipelines live in `codex.orchestrator.json` (repository-specific) and `orchestrator/src/cli/pipelines/` (built-in defaults). Each stage is either a command (shell execution) or a nested pipeline.
 - The `CommandPlanner` inspects the selected pipeline and target stage; you can pass `--target <stage-id>` (alias: `--target-stage`) or set `CODEX_ORCHESTRATOR_TARGET_STAGE` to focus on a specific step (e.g., rerun tests only).
 - Stage execution records stdout/stderr logs, exit codes, optional summaries, and failure data directly into the manifest (`commands[]` array).
-- Guardrails (repo-only): before review, run `node scripts/spec-guard.mjs --dry-run` to ensure specs touched in the PR are current; the orchestrator tracks guardrail outcomes in the manifest (`guardrail_status`).
+- Guardrails (repo-only): before review, run `node scripts/delegation-guard.mjs` and `node scripts/spec-guard.mjs --dry-run` to ensure delegation and spec freshness; the orchestrator tracks guardrail outcomes in the manifest (`guardrail_status`).
 ## Approval & Sandbox Model
 - Approval policies (`never`, `on-request`, `auto`, or custom strings) flow through `packages/orchestrator`. Tool invocations can require approval before sandbox elevation, and all prompts/decisions are persisted.
@@ -165,6 +166,8 @@ Note: the commands below assume a source checkout; `scripts/` helpers are not in
 | `npm run test` | Vitest suite covering orchestration core, CLI services, and patterns. |
 | `npm run eval:test` | Optional evaluation harness (enable when `evaluation/fixtures/**` is populated). |
 | `npm run docs:check` | Deterministically validates scripts/pipelines/paths referenced in agent-facing docs. |
+| `npm run docs:freshness` | Validates docs registry coverage + review recency; writes `out/<task-id>/docs-freshness.json`. |
+| `node scripts/delegation-guard.mjs` | Enforces subagent delegation evidence before review (repo-only). |
 | `node scripts/spec-guard.mjs --dry-run` | Validates spec freshness; required before review (repo-only). |
 | `node scripts/diff-budget.mjs` | Guards against oversized diffs before review (repo-only; defaults: 25 files / 800 lines; supports explicit overrides). |
 | `npm run review` | Runs `codex review` with the latest run manifest path as evidence (repo-only; CI disables stdin; set `CODEX_REVIEW_NON_INTERACTIVE=1` to enforce locally). |
@@ -197,18 +200,18 @@ Use an explicit handoff note for reviewers. `NOTES` is required for review runs;
 Template: `Goal: ... | Summary: ... | Risks: ... | Questions (optional): ...`
 To enable Chrome DevTools for review runs, set `CODEX_REVIEW_DEVTOOLS=1` (uses a codex config override; no repo scripts required).
-Default to the standard `implementation-gate` for general reviews; use `implementation-gate-devtools` only when the review needs Chrome DevTools capabilities (visual/layout checks, network/perf diagnostics). After fixing review feedback, rerun the same gate and include any follow-up questions in `NOTES`.
-To run the full implementation gate with DevTools-enabled review, use `npx codex-orchestrator start implementation-gate-devtools --format json --no-interactive --task <task-id>`.
+Default to the standard `implementation-gate` for general reviews; enable DevTools only when the review needs Chrome DevTools capabilities (visual/layout checks, network/perf diagnostics). After fixing review feedback, rerun the same gate and include any follow-up questions in `NOTES`.
+To run the full implementation gate with DevTools-enabled review, use `CODEX_REVIEW_DEVTOOLS=1 npx codex-orchestrator start implementation-gate --format json --no-interactive --task <task-id>`.
 ## Frontend Testing
 Frontend testing is a first-class pipeline with DevTools off by default. The shipped pipelines already set `CODEX_NON_INTERACTIVE=1`; add it explicitly for custom automation or when you want the `frontend-test` shortcut to suppress Codex prompts:
 - `CODEX_NON_INTERACTIVE=1 npx codex-orchestrator start frontend-testing --format json --no-interactive --task <task-id>`
-- `CODEX_NON_INTERACTIVE=1 npx codex-orchestrator start frontend-testing-devtools --format json --no-interactive --task <task-id>` (DevTools enabled)
+- `CODEX_NON_INTERACTIVE=1 CODEX_REVIEW_DEVTOOLS=1 npx codex-orchestrator start frontend-testing --format json --no-interactive --task <task-id>` (DevTools enabled)
 - `CODEX_NON_INTERACTIVE=1 codex-orchestrator frontend-test` (shortcut; add `--devtools` to enable DevTools)
 If you run the pipelines from this repo, run `npm run build` first so `dist/` stays current (the pipeline executes the compiled runner).
-Note: the frontend-testing pipelines toggle the shared `CODEX_REVIEW_DEVTOOLS` flag under the hood; prefer `--devtools` or the devtools pipeline instead of setting it manually.
+Note: the frontend-testing pipeline reads the shared `CODEX_REVIEW_DEVTOOLS` flag; prefer `--devtools` or `CODEX_REVIEW_DEVTOOLS=1` for explicit enablement.
 Optional prompt overrides:
 - `CODEX_FRONTEND_TEST_PROMPT` (inline prompt)
@@ -216,7 +219,7 @@ Optional prompt overrides:
 `--no-interactive` disables the HUD only; set `CODEX_NON_INTERACTIVE=1` when you need to suppress Codex prompts (e.g., shortcut runs or custom automation).
-Check readiness with `codex-orchestrator doctor --format json` (reports DevTools skill availability).
+Check readiness with `codex-orchestrator doctor --format json` (reports DevTools skill + MCP config availability). Use `codex-orchestrator devtools setup` to print setup steps.
 ## Mirror Workflows
 - `npm run mirror:fetch -- --project <name> [--dry-run] [--force]`: reads `packages/<project>/mirror.config.json` (origin, routes, asset roots, rewrite/block/allow lists), caches downloads **per project** under `.runs/<task>/mirror/<project>/cache`, strips tracker patterns, rewrites externals to `/external/<host>/...`, localizes OG/twitter preview images, rewrites share links off tracker-heavy hosts, and stages into `.runs/<task>/mirror/<project>/<timestamp>/staging/public` before promoting to `packages/<project>/public`. Non-origin assets fall back to Web Archive when the primary host is down; promotion is skipped if errors are detected unless `--force` is set. Manifests live at `.runs/<task>/mirror/<project>/<timestamp>/manifest.json` (warns when `MCP_RUNNER_TASK_ID` is unset; honors `compliance/permit.json` when present).
@@ -253,4 +256,4 @@ Use the hi-fi pipeline to snapshot complex marketing sites (motion, interactions
 ---
-When preparing a review (repo-only), always capture the latest manifest path, run `node scripts/spec-guard.mjs --dry-run`, and ensure checklist mirrors (`/tasks`, `docs/`, `.agent/`) point at the evidence generated by Codex Orchestrator. That keeps the automation trustworthy and auditable across projects.
+When preparing a review (repo-only), always capture the latest manifest path, run `node scripts/delegation-guard.mjs` and `node scripts/spec-guard.mjs --dry-run`, and ensure checklist mirrors (`/tasks`, `docs/`, `.agent/`) point at the evidence generated by Codex Orchestrator. That keeps the automation trustworthy and auditable across projects.

package/dist/bin/codex-orchestrator.js CHANGED Viewed

@@ -1,15 +1,20 @@
 #!/usr/bin/env node
+import { readFile } from 'node:fs/promises';
+import { basename, join } from 'node:path';
 import process from 'node:process';
 import { CodexOrchestrator } from '../orchestrator/src/cli/orchestrator.js';
 import { formatPlanPreview } from '../orchestrator/src/cli/utils/planFormatter.js';
 import { executeExecCommand } from '../orchestrator/src/cli/exec/command.js';
-import { resolveEnvironment, sanitizeTaskId } from '../orchestrator/src/cli/run/environment.js';
+import { resolveEnvironmentPaths } from '../scripts/lib/run-manifests.js';
+import { normalizeEnvironmentPaths, sanitizeTaskId } from '../orchestrator/src/cli/run/environment.js';
 import { RunEventEmitter } from '../orchestrator/src/cli/events/runEvents.js';
 import { evaluateInteractiveGate } from '../orchestrator/src/cli/utils/interactive.js';
 import { buildSelfCheckResult } from '../orchestrator/src/cli/selfCheck.js';
 import { initCodexTemplates, formatInitSummary } from '../orchestrator/src/cli/init.js';
 import { runDoctor, formatDoctorSummary } from '../orchestrator/src/cli/doctor.js';
+import { formatDevtoolsSetupSummary, runDevtoolsSetup } from '../orchestrator/src/cli/devtoolsSetup.js';
 import { loadPackageInfo } from '../orchestrator/src/cli/utils/packageInfo.js';
+import { slugify } from '../orchestrator/src/cli/utils/strings.js';
 import { serveMcp } from '../orchestrator/src/cli/mcp.js';
 async function main() {
     const args = process.argv.slice(2);
@@ -34,6 +39,9 @@ async function main() {
             case 'plan':
                 await handlePlan(orchestrator, args);
                 break;
+            case 'rlm':
+                await handleRlm(orchestrator, args);
+                break;
             case 'resume':
                 await handleResume(orchestrator, args);
                 break;
@@ -52,6 +60,9 @@ async function main() {
             case 'doctor':
                 await handleDoctor(args);
                 break;
+            case 'devtools':
+                await handleDevtools(args);
+                break;
             case 'mcp':
                 await handleMcp(args);
                 break;
@@ -107,110 +118,133 @@ function resolveTargetStageId(flags) {
     }
     return undefined;
 }
+function readStringFlag(flags, key) {
+    const value = flags[key];
+    if (typeof value !== 'string') {
+        return undefined;
+    }
+    const trimmed = value.trim();
+    return trimmed.length > 0 ? trimmed : undefined;
+}
+function applyRlmEnvOverrides(flags, goal) {
+    if (goal) {
+        process.env.RLM_GOAL = goal;
+    }
+    const validator = readStringFlag(flags, 'validator');
+    if (validator) {
+        process.env.RLM_VALIDATOR = validator;
+    }
+    const maxIterations = readStringFlag(flags, 'max-iterations');
+    if (maxIterations) {
+        process.env.RLM_MAX_ITERATIONS = maxIterations;
+    }
+    const maxMinutes = readStringFlag(flags, 'max-minutes');
+    if (maxMinutes) {
+        process.env.RLM_MAX_MINUTES = maxMinutes;
+    }
+    const roles = readStringFlag(flags, 'roles');
+    if (roles) {
+        process.env.RLM_ROLES = roles;
+    }
+}
+function resolveRlmTaskId(taskFlag) {
+    if (taskFlag) {
+        return sanitizeTaskId(taskFlag);
+    }
+    const envTask = process.env.MCP_RUNNER_TASK_ID?.trim();
+    if (envTask) {
+        return sanitizeTaskId(envTask);
+    }
+    const { repoRoot } = resolveEnvironmentPaths();
+    const repoName = basename(repoRoot);
+    const slug = slugify(repoName, 'adhoc');
+    return sanitizeTaskId(`rlm-${slug}`);
+}
+async function waitForManifestCompletion(manifestPath, intervalMs = 2000) {
+    const terminal = new Set(['succeeded', 'failed', 'cancelled']);
+    while (true) {
+        const raw = await readFile(manifestPath, 'utf8');
+        const manifest = JSON.parse(raw);
+        if (terminal.has(manifest.status)) {
+            return manifest;
+        }
+        await new Promise((resolve) => setTimeout(resolve, intervalMs));
+    }
+}
+async function readRlmState(statePath) {
+    try {
+        const raw = await readFile(statePath, 'utf8');
+        const parsed = JSON.parse(raw);
+        if (!parsed?.final) {
+            return null;
+        }
+        return { exitCode: parsed.final.exitCode, status: parsed.final.status };
+    }
+    catch {
+        return null;
+    }
+}
 async function handleStart(orchestrator, rawArgs) {
     const { positionals, flags } = parseArgs(rawArgs);
     const pipelineId = positionals[0];
     const format = flags['format'] === 'json' ? 'json' : 'text';
-    const interactiveRequested = Boolean(flags['interactive'] || flags['ui']);
-    const interactiveDisabled = Boolean(flags['no-interactive']);
-    const runEvents = new RunEventEmitter();
-    const gate = evaluateInteractiveGate({
-        requested: interactiveRequested,
-        disabled: interactiveDisabled,
-        format,
-        stdoutIsTTY: process.stdout.isTTY === true,
-        stderrIsTTY: process.stderr.isTTY === true,
-        term: process.env.TERM ?? null
-    });
-    const hud = await maybeStartHud(gate, runEvents);
-    if (!gate.enabled && interactiveRequested && !interactiveDisabled && gate.reason) {
-        console.error(`[HUD disabled] ${gate.reason}`);
-    }
-    try {
+    if (pipelineId === 'rlm') {
+        const goal = readStringFlag(flags, 'goal');
+        applyRlmEnvOverrides(flags, goal);
+    }
+    await withRunUi(flags, format, async (runEvents) => {
+        let taskIdOverride = typeof flags['task'] === 'string' ? flags['task'] : undefined;
+        if (pipelineId === 'rlm') {
+            taskIdOverride = resolveRlmTaskId(taskIdOverride);
+            process.env.MCP_RUNNER_TASK_ID = taskIdOverride;
+            if (format !== 'json') {
+                console.log(`Task: ${taskIdOverride}`);
+            }
+        }
         const result = await orchestrator.start({
             pipelineId,
-            taskId: typeof flags['task'] === 'string' ? flags['task'] : undefined,
+            taskId: taskIdOverride,
             parentRunId: typeof flags['parent-run'] === 'string' ? flags['parent-run'] : undefined,
             approvalPolicy: typeof flags['approval-policy'] === 'string' ? flags['approval-policy'] : undefined,
             targetStageId: resolveTargetStageId(flags),
             runEvents
         });
-        hud?.stop();
-        const payload = {
-            run_id: result.manifest.run_id,
-            status: result.manifest.status,
-            artifact_root: result.manifest.artifact_root,
-            manifest: `${result.manifest.artifact_root}/manifest.json`,
-            log_path: result.manifest.log_path
-        };
-        if (format === 'json') {
-            console.log(JSON.stringify(payload, null, 2));
-        }
-        else {
-            console.log(`Run started: ${payload.run_id}`);
-            console.log(`Status: ${payload.status}`);
-            console.log(`Manifest: ${payload.manifest}`);
-            console.log(`Log: ${payload.log_path}`);
-        }
-    }
-    finally {
-        hud?.stop();
-        runEvents.dispose();
-    }
+        emitRunOutput(result, format, 'Run started');
+    });
 }
 async function handleFrontendTest(orchestrator, rawArgs) {
     const { positionals, flags } = parseArgs(rawArgs);
     const format = flags['format'] === 'json' ? 'json' : 'text';
     const devtools = Boolean(flags['devtools']);
-    const interactiveRequested = Boolean(flags['interactive'] || flags['ui']);
-    const interactiveDisabled = Boolean(flags['no-interactive']);
-    const runEvents = new RunEventEmitter();
-    const gate = evaluateInteractiveGate({
-        requested: interactiveRequested,
-        disabled: interactiveDisabled,
-        format,
-        stdoutIsTTY: process.stdout.isTTY === true,
-        stderrIsTTY: process.stderr.isTTY === true,
-        term: process.env.TERM ?? null
-    });
-    const hud = await maybeStartHud(gate, runEvents);
-    if (!gate.enabled && interactiveRequested && !interactiveDisabled && gate.reason) {
-        console.error(`[HUD disabled] ${gate.reason}`);
-    }
     if (positionals.length > 0) {
         console.error(`[frontend-test] ignoring extra arguments: ${positionals.join(' ')}`);
     }
+    const originalDevtools = process.env.CODEX_REVIEW_DEVTOOLS;
+    if (devtools) {
+        process.env.CODEX_REVIEW_DEVTOOLS = '1';
+    }
     try {
-        const pipelineId = devtools ? 'frontend-testing-devtools' : 'frontend-testing';
-        const result = await orchestrator.start({
-            pipelineId,
-            taskId: typeof flags['task'] === 'string' ? flags['task'] : undefined,
-            parentRunId: typeof flags['parent-run'] === 'string' ? flags['parent-run'] : undefined,
-            approvalPolicy: typeof flags['approval-policy'] === 'string' ? flags['approval-policy'] : undefined,
-            targetStageId: resolveTargetStageId(flags),
-            runEvents
+        await withRunUi(flags, format, async (runEvents) => {
+            const result = await orchestrator.start({
+                pipelineId: 'frontend-testing',
+                taskId: typeof flags['task'] === 'string' ? flags['task'] : undefined,
+                parentRunId: typeof flags['parent-run'] === 'string' ? flags['parent-run'] : undefined,
+                approvalPolicy: typeof flags['approval-policy'] === 'string' ? flags['approval-policy'] : undefined,
+                targetStageId: resolveTargetStageId(flags),
+                runEvents
+            });
+            emitRunOutput(result, format, 'Run started');
         });
-        hud?.stop();
-        const payload = {
-            run_id: result.manifest.run_id,
-            status: result.manifest.status,
-            artifact_root: result.manifest.artifact_root,
-            manifest: `${result.manifest.artifact_root}/manifest.json`,
-            log_path: result.manifest.log_path
-        };
-        if (format === 'json') {
-            console.log(JSON.stringify(payload, null, 2));
-        }
-        else {
-            console.log(`Run started: ${payload.run_id}`);
-            console.log(`Status: ${payload.status}`);
-            console.log(`Manifest: ${payload.manifest}`);
-            console.log(`Log: ${payload.log_path}`);
-        }
     }
     finally {
-        hud?.stop();
-        runEvents.dispose();
+        if (devtools) {
+            if (originalDevtools === undefined) {
+                delete process.env.CODEX_REVIEW_DEVTOOLS;
+            }
+            else {
+                process.env.CODEX_REVIEW_DEVTOOLS = originalDevtools;
+            }
+        }
     }
 }
 async function handlePlan(orchestrator, rawArgs) {
@@ -228,6 +262,47 @@ async function handlePlan(orchestrator, rawArgs) {
     }
     process.stdout.write(`${formatPlanPreview(result)}\n`);
 }
+async function handleRlm(orchestrator, rawArgs) {
+    const { positionals, flags } = parseArgs(rawArgs);
+    const goalFromArgs = positionals.length > 0 ? positionals.join(' ') : undefined;
+    const goal = goalFromArgs ?? readStringFlag(flags, 'goal') ?? process.env.RLM_GOAL?.trim();
+    if (!goal) {
+        throw new Error('rlm requires a goal. Use: codex-orchestrator rlm \"<goal>\".');
+    }
+    const taskFlag = typeof flags['task'] === 'string' ? flags['task'] : undefined;
+    const taskId = resolveRlmTaskId(taskFlag);
+    process.env.MCP_RUNNER_TASK_ID = taskId;
+    applyRlmEnvOverrides(flags, goal);
+    console.log(`Task: ${taskId}`);
+    let startResult = null;
+    await withRunUi(flags, 'text', async (runEvents) => {
+        startResult = await orchestrator.start({
+            pipelineId: 'rlm',
+            taskId,
+            parentRunId: typeof flags['parent-run'] === 'string' ? flags['parent-run'] : undefined,
+            approvalPolicy: typeof flags['approval-policy'] === 'string' ? flags['approval-policy'] : undefined,
+            runEvents
+        });
+        emitRunOutput(startResult, 'text', 'Run started');
+    });
+    if (!startResult) {
+        throw new Error('rlm run failed to start.');
+    }
+    const resolvedStart = startResult;
+    const { repoRoot } = resolveEnvironmentPaths();
+    const manifestPath = join(repoRoot, resolvedStart.manifest.artifact_root, 'manifest.json');
+    const manifest = await waitForManifestCompletion(manifestPath);
+    const statePath = join(repoRoot, resolvedStart.manifest.artifact_root, 'rlm', 'state.json');
+    const rlmState = await readRlmState(statePath);
+    if (rlmState) {
+        console.log(`RLM status: ${rlmState.status}`);
+        process.exitCode = rlmState.exitCode;
+        return;
+    }
+    console.log(`RLM status: ${manifest.status}`);
+    console.error('RLM state file missing; treating as internal error.');
+    process.exitCode = 10;
+}
 async function handleResume(orchestrator, rawArgs) {
     const { positionals, flags } = parseArgs(rawArgs);
     const runId = (flags['run'] ?? positionals[0]);
@@ -235,22 +310,7 @@ async function handleResume(orchestrator, rawArgs) {
         throw new Error('resume requires --run <run-id>.');
     }
     const format = flags['format'] === 'json' ? 'json' : 'text';
-    const interactiveRequested = Boolean(flags['interactive'] || flags['ui']);
-    const interactiveDisabled = Boolean(flags['no-interactive']);
-    const runEvents = new RunEventEmitter();
-    const gate = evaluateInteractiveGate({
-        requested: interactiveRequested,
-        disabled: interactiveDisabled,
-        format,
-        stdoutIsTTY: process.stdout.isTTY === true,
-        stderrIsTTY: process.stderr.isTTY === true,
-        term: process.env.TERM ?? null
-    });
-    const hud = await maybeStartHud(gate, runEvents);
-    if (!gate.enabled && interactiveRequested && !interactiveDisabled && gate.reason) {
-        console.error(`[HUD disabled] ${gate.reason}`);
-    }
-    try {
+    await withRunUi(flags, format, async (runEvents) => {
         const result = await orchestrator.resume({
             runId,
             resumeToken: typeof flags['token'] === 'string' ? flags['token'] : undefined,
@@ -259,28 +319,8 @@ async function handleResume(orchestrator, rawArgs) {
             targetStageId: resolveTargetStageId(flags),
             runEvents
         });
-        hud?.stop();
-        const payload = {
-            run_id: result.manifest.run_id,
-            status: result.manifest.status,
-            artifact_root: result.manifest.artifact_root,
-            manifest: `${result.manifest.artifact_root}/manifest.json`,
-            log_path: result.manifest.log_path
-        };
-        if (format === 'json') {
-            console.log(JSON.stringify(payload, null, 2));
-        }
-        else {
-            console.log(`Run resumed: ${payload.run_id}`);
-            console.log(`Status: ${payload.status}`);
-            console.log(`Manifest: ${payload.manifest}`);
-            console.log(`Log: ${payload.log_path}`);
-        }
-    }
-    finally {
-        hud?.stop();
-        runEvents.dispose();
-    }
+        emitRunOutput(result, format, 'Run resumed');
+    });
 }
 async function handleStatus(orchestrator, rawArgs) {
     const { positionals, flags } = parseArgs(rawArgs);
@@ -311,6 +351,47 @@ async function maybeStartHud(gate, emitter) {
     const { startHud } = await import('../orchestrator/src/cli/ui/controller.js');
     return startHud({ emitter, footerNote: 'interactive HUD (read-only)' });
 }
+async function withRunUi(flags, format, action) {
+    const interactiveRequested = Boolean(flags['interactive'] || flags['ui']);
+    const interactiveDisabled = Boolean(flags['no-interactive']);
+    const runEvents = new RunEventEmitter();
+    const gate = evaluateInteractiveGate({
+        requested: interactiveRequested,
+        disabled: interactiveDisabled,
+        format,
+        stdoutIsTTY: process.stdout.isTTY === true,
+        stderrIsTTY: process.stderr.isTTY === true,
+        term: process.env.TERM ?? null
+    });
+    const hud = await maybeStartHud(gate, runEvents);
+    if (!gate.enabled && interactiveRequested && !interactiveDisabled && gate.reason) {
+        console.error(`[HUD disabled] ${gate.reason}`);
+    }
+    try {
+        await action(runEvents);
+    }
+    finally {
+        hud?.stop();
+        runEvents.dispose();
+    }
+}
+function emitRunOutput(result, format, label) {
+    const payload = {
+        run_id: result.manifest.run_id,
+        status: result.manifest.status,
+        artifact_root: result.manifest.artifact_root,
+        manifest: `${result.manifest.artifact_root}/manifest.json`,
+        log_path: result.manifest.log_path
+    };
+    if (format === 'json') {
+        console.log(JSON.stringify(payload, null, 2));
+        return;
+    }
+    console.log(`${label}: ${payload.run_id}`);
+    console.log(`Status: ${payload.status}`);
+    console.log(`Manifest: ${payload.manifest}`);
+    console.log(`Log: ${payload.log_path}`);
+}
 async function handleExec(rawArgs) {
     const parsed = parseExecArgs(rawArgs);
     if (parsed.commandTokens.length === 0) {
@@ -318,7 +399,7 @@ async function handleExec(rawArgs) {
     }
     const isInteractive = process.stdout.isTTY === true && process.stderr.isTTY === true;
     const outputMode = parsed.requestedMode ?? (isInteractive ? 'interactive' : 'jsonl');
-    const env = resolveEnvironment();
+    const env = normalizeEnvironmentPaths(resolveEnvironmentPaths());
     if (parsed.taskId) {
         env.taskId = sanitizeTaskId(parsed.taskId);
     }
@@ -388,6 +469,30 @@ async function handleDoctor(rawArgs) {
         console.log(line);
     }
 }
+async function handleDevtools(rawArgs) {
+    const { positionals, flags } = parseArgs(rawArgs);
+    const subcommand = positionals.shift();
+    if (!subcommand) {
+        throw new Error('devtools requires a subcommand (setup).');
+    }
+    if (subcommand !== 'setup') {
+        throw new Error(`Unknown devtools subcommand: ${subcommand}`);
+    }
+    const format = flags['format'] === 'json' ? 'json' : 'text';
+    const apply = Boolean(flags['yes']);
+    if (format === 'json' && apply) {
+        throw new Error('devtools setup does not support --format json with --yes.');
+    }
+    const result = await runDevtoolsSetup({ apply });
+    if (format === 'json') {
+        console.log(JSON.stringify(result, null, 2));
+        return;
+    }
+    const summary = formatDevtoolsSetupSummary(result);
+    for (const line of summary) {
+        console.log(line);
+    }
+}
 async function handleMcp(rawArgs) {
     const { positionals, flags } = parseArgs(rawArgs);
     const subcommand = positionals.shift();
@@ -522,6 +627,22 @@ Commands:
     --approval-policy <p>   Record approval policy metadata.
     --format json           Emit machine-readable output.
     --target <stage-id>     Focus plan/build metadata on a specific stage (alias: --target-stage).
+    --goal "<goal>"         When pipeline is rlm, set the RLM goal.
+    --validator <cmd|none>  When pipeline is rlm, set the validator command.
+    --max-iterations <n>    When pipeline is rlm, override max iterations.
+    --max-minutes <n>       When pipeline is rlm, override max minutes.
+    --roles <single|triad>  When pipeline is rlm, set role split.
+    --interactive | --ui    Enable read-only HUD when running in a TTY.
+    --no-interactive        Force disable HUD (default is off unless requested).
+  rlm "<goal>"              Run RLM loop until validator passes.
+    --task <id>             Override task identifier.
+    --validator <cmd|none>  Set validator command or disable validation.
+    --max-iterations <n>    Override max iterations (0 = unlimited with validator).
+    --max-minutes <n>       Optional time-based guardrail in minutes.
+    --roles <single|triad>  Choose single or triad role split.
+    --parent-run <id>       Link run to parent run id.
+    --approval-policy <p>   Record approval policy metadata.
     --interactive | --ui    Enable read-only HUD when running in a TTY.
     --no-interactive        Force disable HUD (default is off unless requested).
@@ -562,6 +683,9 @@ Commands:
   self-check [--format json]
   init codex [--cwd <path>] [--force]
   doctor [--format json]
+  devtools setup          Print DevTools MCP setup instructions.
+    --yes                 Apply setup by running "codex mcp add ...".
+    --format json         Emit machine-readable output (dry-run only).
   mcp serve [--repo <path>] [--dry-run] [-- <extra args>]
   version | --version

package/dist/orchestrator/src/cli/config/userConfig.js CHANGED Viewed

@@ -1,28 +1,102 @@
 import { readFile } from 'node:fs/promises';
 import { join } from 'node:path';
 import { logger } from '../../logger.js';
+import { findPackageRoot } from '../utils/packageInfo.js';
+export async function loadRepoConfig(env) {
+    const repoConfigPath = join(env.repoRoot, 'codex.orchestrator.json');
+    const repoConfig = await readConfig(repoConfigPath);
+    if (repoConfig) {
+        logger.info(`[codex-config] Loaded user config from ${repoConfigPath}`);
+        return normalizeUserConfig(repoConfig, 'repo');
+    }
+    logger.warn(`[codex-config] Missing codex.orchestrator.json at ${repoConfigPath}`);
+    return null;
+}
+export async function loadPackageConfig(env) {
+    const repoConfigPath = join(env.repoRoot, 'codex.orchestrator.json');
+    const packageRoot = findPackageRoot();
+    const packageConfigPath = join(packageRoot, 'codex.orchestrator.json');
+    if (packageConfigPath === repoConfigPath) {
+        return null;
+    }
+    const packageConfig = await readConfig(packageConfigPath);
+    if (packageConfig) {
+        logger.info(`[codex-config] Loaded user config from ${packageConfigPath}`);
+        return normalizeUserConfig(packageConfig, 'package');
+    }
+    logger.warn(`[codex-config] Missing codex.orchestrator.json at ${packageConfigPath}`);
+    return null;
+}
 export async function loadUserConfig(env) {
-    const configPath = join(env.repoRoot, 'codex.orchestrator.json');
+    const repoConfig = await loadRepoConfig(env);
+    if (repoConfig) {
+        return repoConfig;
+    }
+    return await loadPackageConfig(env);
+}
+export function findPipeline(config, id) {
+    if (!config?.pipelines) {
+        return null;
+    }
+    return config.pipelines.find((pipeline) => pipeline.id === id) ?? null;
+}
+function normalizeUserConfig(config, source) {
+    if (!config) {
+        return null;
+    }
+    const stageSets = normalizeStageSets(config.stageSets);
+    const pipelines = Array.isArray(config.pipelines)
+        ? config.pipelines.map((pipeline) => expandPipelineStages(pipeline, stageSets))
+        : config.pipelines;
+    return { pipelines, defaultPipeline: config.defaultPipeline, source };
+}
+async function readConfig(configPath) {
     try {
         const raw = await readFile(configPath, 'utf8');
-        const parsed = JSON.parse(raw);
-        logger.info(`[codex-config] Loaded user config from ${configPath}`);
-        if (parsed && Array.isArray(parsed.pipelines)) {
-            return parsed;
-        }
-        return parsed ?? null;
+        return JSON.parse(raw);
     }
     catch (error) {
         if (error.code === 'ENOENT') {
-            logger.warn(`[codex-config] Missing codex.orchestrator.json at ${configPath}`);
             return null;
         }
         throw error;
     }
 }
-export function findPipeline(config, id) {
-    if (!config?.pipelines) {
-        return null;
+function normalizeStageSets(stageSets) {
+    if (!stageSets) {
+        return {};
     }
-    return config.pipelines.find((pipeline) => pipeline.id === id) ?? null;
+    if (typeof stageSets !== 'object' || Array.isArray(stageSets)) {
+        throw new Error('codex.orchestrator.json stageSets must be an object of stage arrays.');
+    }
+    const normalized = {};
+    for (const [key, value] of Object.entries(stageSets)) {
+        if (!Array.isArray(value)) {
+            throw new Error(`Stage set "${key}" must be an array.`);
+        }
+        if (value.some((stage) => isStageSetRef(stage))) {
+            throw new Error(`Stage set "${key}" cannot include stage-set references.`);
+        }
+        normalized[key] = value;
+    }
+    return normalized;
+}
+function expandPipelineStages(pipeline, stageSets) {
+    const expanded = [];
+    for (const stage of pipeline.stages ?? []) {
+        if (isStageSetRef(stage)) {
+            const sharedStages = stageSets[stage.ref];
+            if (!sharedStages) {
+                throw new Error(`Pipeline "${pipeline.id}" references unknown stage set "${stage.ref}".`);
+            }
+            expanded.push(...sharedStages);
+        }
+        else {
+            expanded.push(stage);
+        }
+    }
+    return { ...pipeline, stages: expanded };
+}
+function isStageSetRef(stage) {
+    return stage.kind === 'stage-set';
 }