@kbediako/codex-orchestrator 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +11 -8
  2. package/dist/bin/codex-orchestrator.js +245 -121
  3. package/dist/orchestrator/src/cli/config/userConfig.js +86 -12
  4. package/dist/orchestrator/src/cli/devtoolsSetup.js +66 -0
  5. package/dist/orchestrator/src/cli/doctor.js +46 -21
  6. package/dist/orchestrator/src/cli/exec/context.js +5 -2
  7. package/dist/orchestrator/src/cli/exec/learning.js +5 -3
  8. package/dist/orchestrator/src/cli/exec/stageRunner.js +1 -1
  9. package/dist/orchestrator/src/cli/exec/summary.js +1 -1
  10. package/dist/orchestrator/src/cli/orchestrator.js +16 -7
  11. package/dist/orchestrator/src/cli/pipelines/index.js +13 -24
  12. package/dist/orchestrator/src/cli/rlm/prompt.js +31 -0
  13. package/dist/orchestrator/src/cli/rlm/runner.js +177 -0
  14. package/dist/orchestrator/src/cli/rlm/types.js +1 -0
  15. package/dist/orchestrator/src/cli/rlm/validator.js +159 -0
  16. package/dist/orchestrator/src/cli/rlmRunner.js +417 -0
  17. package/dist/orchestrator/src/cli/run/environment.js +4 -11
  18. package/dist/orchestrator/src/cli/run/manifest.js +7 -1
  19. package/dist/orchestrator/src/cli/services/commandRunner.js +1 -1
  20. package/dist/orchestrator/src/cli/services/controlPlaneService.js +3 -1
  21. package/dist/orchestrator/src/cli/services/execRuntime.js +1 -2
  22. package/dist/orchestrator/src/cli/services/pipelineResolver.js +33 -2
  23. package/dist/orchestrator/src/cli/services/runPreparation.js +7 -1
  24. package/dist/orchestrator/src/cli/services/schedulerService.js +1 -1
  25. package/dist/orchestrator/src/cli/utils/devtools.js +178 -0
  26. package/dist/orchestrator/src/cli/utils/specGuardRunner.js +3 -1
  27. package/dist/orchestrator/src/cli/utils/strings.js +8 -6
  28. package/dist/orchestrator/src/persistence/ExperienceStore.js +6 -16
  29. package/dist/orchestrator/src/persistence/TaskStateStore.js +1 -1
  30. package/dist/orchestrator/src/persistence/sanitizeIdentifier.js +1 -1
  31. package/dist/packages/orchestrator/src/exec/stdio.js +112 -0
  32. package/dist/packages/orchestrator/src/exec/unified-exec.js +1 -1
  33. package/dist/packages/orchestrator/src/index.js +1 -0
  34. package/dist/packages/shared/design-artifacts/writer.js +4 -14
  35. package/dist/packages/shared/streams/stdio.js +2 -112
  36. package/dist/packages/shared/utils/strings.js +17 -0
  37. package/dist/scripts/design/pipeline/advanced-assets.js +1 -1
  38. package/dist/scripts/design/pipeline/context.js +5 -5
  39. package/dist/scripts/design/pipeline/extract.js +9 -6
  40. package/dist/scripts/design/pipeline/{optionalDeps.js → optional-deps.js} +49 -38
  41. package/dist/scripts/design/pipeline/permit.js +59 -0
  42. package/dist/scripts/design/pipeline/toolkit/common.js +18 -32
  43. package/dist/scripts/design/pipeline/toolkit/reference.js +1 -1
  44. package/dist/scripts/design/pipeline/toolkit/snapshot.js +1 -1
  45. package/dist/scripts/design/pipeline/visual-regression.js +2 -11
  46. package/dist/scripts/lib/cli-args.js +53 -0
  47. package/dist/scripts/lib/docs-helpers.js +111 -0
  48. package/dist/scripts/lib/npm-pack.js +20 -0
  49. package/dist/scripts/lib/run-manifests.js +160 -0
  50. package/package.json +17 -6
  51. package/dist/orchestrator/src/cli/pipelines/defaultDiagnostics.js +0 -32
  52. package/dist/orchestrator/src/cli/pipelines/designReference.js +0 -72
  53. package/dist/orchestrator/src/cli/pipelines/hiFiDesignToolkit.js +0 -71
  54. package/dist/orchestrator/src/cli/utils/jsonlWriter.js +0 -10
  55. package/dist/orchestrator/src/control-plane/index.js +0 -3
  56. package/dist/orchestrator/src/persistence/identifierGuards.js +0 -1
  57. package/dist/orchestrator/src/persistence/writeAtomicFile.js +0 -4
  58. package/dist/orchestrator/src/scheduler/index.js +0 -1
package/README.md CHANGED
@@ -80,6 +80,7 @@ Use `npx codex-orchestrator resume --run <run-id>` to continue interrupted runs;
80
80
  - `codex-orchestrator mcp serve [--repo <path>] [--dry-run] [-- <extra args>]`: launch the MCP stdio server (delegates to `codex mcp-server`; stdout guard keeps protocol-only output, logs to stderr).
81
81
  - `codex-orchestrator init codex [--cwd <path>] [--force]`: copy starter templates into a repo (no overwrite unless `--force`).
82
82
  - `codex-orchestrator doctor [--format json]`: check optional tooling dependencies and print install commands.
83
+ - `codex-orchestrator devtools setup [--yes]`: print DevTools MCP setup instructions (`--yes` applies `codex mcp add ...`).
83
84
  - `codex-orchestrator self-check --format json`: emit a safe JSON health payload for smoke tests.
84
85
  - `codex-orchestrator --version`: print the package version.
85
86
 
@@ -121,7 +122,7 @@ Notes:
121
122
  - These prompts are consumed by the Codex CLI UI only; the orchestrator does not read them. Keep updates synced across machines during onboarding.
122
123
  - To install or refresh the prompts (repo-only), run `scripts/setup-codex-prompts.sh` (use `--force` to overwrite existing files).
123
124
  - `/prompts:diagnostics` takes `TASK=<task-id> MANIFEST=<path> [NOTES=<free text>]`, exports `MCP_RUNNER_TASK_ID=$TASK`, runs `npx codex-orchestrator start diagnostics --format json`, tails `.runs/$TASK/cli/<run-id>/manifest.json` (or `npx codex-orchestrator status --watch`), and records evidence to `/tasks`, `docs/TASKS.md`, `.agent/task/...`, `.runs/$TASK/metrics.json`, and `out/$TASK/state.json` using `$MANIFEST`.
124
- - `/prompts:review-handoff` takes `TASK=<task-id> MANIFEST=<path> NOTES=<goal + summary + risks + optional questions>`, re-exports `MCP_RUNNER_TASK_ID`, and (repo-only) runs `node scripts/spec-guard.mjs --dry-run`, `npm run lint`, `npm run test`, optional `npm run eval:test`, plus `npm run review` (wraps `codex review` against the current diff and includes the latest run manifest path as evidence). It also reminds you to log approvals in `$MANIFEST` and mirror the evidence to the same docs/metrics/state targets.
125
+ - `/prompts:review-handoff` takes `TASK=<task-id> MANIFEST=<path> NOTES=<goal + summary + risks + optional questions>`, re-exports `MCP_RUNNER_TASK_ID`, and (repo-only) runs `node scripts/delegation-guard.mjs`, `node scripts/spec-guard.mjs --dry-run`, `npm run lint`, `npm run test`, optional `npm run eval:test`, plus `npm run review` (wraps `codex review` against the current diff and includes the latest run manifest path as evidence). It also reminds you to log approvals in `$MANIFEST` and mirror the evidence to the same docs/metrics/state targets.
125
126
  - In CI / `--no-interactive` pipelines (or when stdin is not a TTY), `npm run review` prints the review handoff prompt (including evidence paths) and exits successfully instead of invoking `codex review`. Set `FORCE_CODEX_REVIEW=1` to run `codex review` in those environments.
126
127
  - Always trigger diagnostics and review workflows through these prompts whenever you run the orchestrator so contributors consistently execute the required command sequences and capture auditable manifests.
127
128
 
@@ -133,7 +134,7 @@ Notes:
133
134
  - Default pipelines live in `codex.orchestrator.json` (repository-specific) and `orchestrator/src/cli/pipelines/` (built-in defaults). Each stage is either a command (shell execution) or a nested pipeline.
134
135
  - The `CommandPlanner` inspects the selected pipeline and target stage; you can pass `--target <stage-id>` (alias: `--target-stage`) or set `CODEX_ORCHESTRATOR_TARGET_STAGE` to focus on a specific step (e.g., rerun tests only).
135
136
  - Stage execution records stdout/stderr logs, exit codes, optional summaries, and failure data directly into the manifest (`commands[]` array).
136
- - Guardrails (repo-only): before review, run `node scripts/spec-guard.mjs --dry-run` to ensure specs touched in the PR are current; the orchestrator tracks guardrail outcomes in the manifest (`guardrail_status`).
137
+ - Guardrails (repo-only): before review, run `node scripts/delegation-guard.mjs` and `node scripts/spec-guard.mjs --dry-run` to ensure delegation and spec freshness; the orchestrator tracks guardrail outcomes in the manifest (`guardrail_status`).
137
138
 
138
139
  ## Approval & Sandbox Model
139
140
  - Approval policies (`never`, `on-request`, `auto`, or custom strings) flow through `packages/orchestrator`. Tool invocations can require approval before sandbox elevation, and all prompts/decisions are persisted.
@@ -165,6 +166,8 @@ Note: the commands below assume a source checkout; `scripts/` helpers are not in
165
166
  | `npm run test` | Vitest suite covering orchestration core, CLI services, and patterns. |
166
167
  | `npm run eval:test` | Optional evaluation harness (enable when `evaluation/fixtures/**` is populated). |
167
168
  | `npm run docs:check` | Deterministically validates scripts/pipelines/paths referenced in agent-facing docs. |
169
+ | `npm run docs:freshness` | Validates docs registry coverage + review recency; writes `out/<task-id>/docs-freshness.json`. |
170
+ | `node scripts/delegation-guard.mjs` | Enforces subagent delegation evidence before review (repo-only). |
168
171
  | `node scripts/spec-guard.mjs --dry-run` | Validates spec freshness; required before review (repo-only). |
169
172
  | `node scripts/diff-budget.mjs` | Guards against oversized diffs before review (repo-only; defaults: 25 files / 800 lines; supports explicit overrides). |
170
173
  | `npm run review` | Runs `codex review` with the latest run manifest path as evidence (repo-only; CI disables stdin; set `CODEX_REVIEW_NON_INTERACTIVE=1` to enforce locally). |
@@ -197,18 +200,18 @@ Use an explicit handoff note for reviewers. `NOTES` is required for review runs;
197
200
  Template: `Goal: ... | Summary: ... | Risks: ... | Questions (optional): ...`
198
201
 
199
202
  To enable Chrome DevTools for review runs, set `CODEX_REVIEW_DEVTOOLS=1` (uses a codex config override; no repo scripts required).
200
- Default to the standard `implementation-gate` for general reviews; use `implementation-gate-devtools` only when the review needs Chrome DevTools capabilities (visual/layout checks, network/perf diagnostics). After fixing review feedback, rerun the same gate and include any follow-up questions in `NOTES`.
201
- To run the full implementation gate with DevTools-enabled review, use `npx codex-orchestrator start implementation-gate-devtools --format json --no-interactive --task <task-id>`.
203
+ Default to the standard `implementation-gate` for general reviews; enable DevTools only when the review needs Chrome DevTools capabilities (visual/layout checks, network/perf diagnostics). After fixing review feedback, rerun the same gate and include any follow-up questions in `NOTES`.
204
+ To run the full implementation gate with DevTools-enabled review, use `CODEX_REVIEW_DEVTOOLS=1 npx codex-orchestrator start implementation-gate --format json --no-interactive --task <task-id>`.
202
205
 
203
206
  ## Frontend Testing
204
207
  Frontend testing is a first-class pipeline with DevTools off by default. The shipped pipelines already set `CODEX_NON_INTERACTIVE=1`; add it explicitly for custom automation or when you want the `frontend-test` shortcut to suppress Codex prompts:
205
208
  - `CODEX_NON_INTERACTIVE=1 npx codex-orchestrator start frontend-testing --format json --no-interactive --task <task-id>`
206
- - `CODEX_NON_INTERACTIVE=1 npx codex-orchestrator start frontend-testing-devtools --format json --no-interactive --task <task-id>` (DevTools enabled)
209
+ - `CODEX_NON_INTERACTIVE=1 CODEX_REVIEW_DEVTOOLS=1 npx codex-orchestrator start frontend-testing --format json --no-interactive --task <task-id>` (DevTools enabled)
207
210
  - `CODEX_NON_INTERACTIVE=1 codex-orchestrator frontend-test` (shortcut; add `--devtools` to enable DevTools)
208
211
 
209
212
  If you run the pipelines from this repo, run `npm run build` first so `dist/` stays current (the pipeline executes the compiled runner).
210
213
 
211
- Note: the frontend-testing pipelines toggle the shared `CODEX_REVIEW_DEVTOOLS` flag under the hood; prefer `--devtools` or the devtools pipeline instead of setting it manually.
214
+ Note: the frontend-testing pipeline reads the shared `CODEX_REVIEW_DEVTOOLS` flag; prefer `--devtools` or `CODEX_REVIEW_DEVTOOLS=1` for explicit enablement.
212
215
 
213
216
  Optional prompt overrides:
214
217
  - `CODEX_FRONTEND_TEST_PROMPT` (inline prompt)
@@ -216,7 +219,7 @@ Optional prompt overrides:
216
219
 
217
220
  `--no-interactive` disables the HUD only; set `CODEX_NON_INTERACTIVE=1` when you need to suppress Codex prompts (e.g., shortcut runs or custom automation).
218
221
 
219
- Check readiness with `codex-orchestrator doctor --format json` (reports DevTools skill availability).
222
+ Check readiness with `codex-orchestrator doctor --format json` (reports DevTools skill + MCP config availability). Use `codex-orchestrator devtools setup` to print setup steps.
220
223
 
221
224
  ## Mirror Workflows
222
225
  - `npm run mirror:fetch -- --project <name> [--dry-run] [--force]`: reads `packages/<project>/mirror.config.json` (origin, routes, asset roots, rewrite/block/allow lists), caches downloads **per project** under `.runs/<task>/mirror/<project>/cache`, strips tracker patterns, rewrites externals to `/external/<host>/...`, localizes OG/twitter preview images, rewrites share links off tracker-heavy hosts, and stages into `.runs/<task>/mirror/<project>/<timestamp>/staging/public` before promoting to `packages/<project>/public`. Non-origin assets fall back to Web Archive when the primary host is down; promotion is skipped if errors are detected unless `--force` is set. Manifests live at `.runs/<task>/mirror/<project>/<timestamp>/manifest.json` (warns when `MCP_RUNNER_TASK_ID` is unset; honors `compliance/permit.json` when present).
@@ -253,4 +256,4 @@ Use the hi-fi pipeline to snapshot complex marketing sites (motion, interactions
253
256
 
254
257
  ---
255
258
 
256
- When preparing a review (repo-only), always capture the latest manifest path, run `node scripts/spec-guard.mjs --dry-run`, and ensure checklist mirrors (`/tasks`, `docs/`, `.agent/`) point at the evidence generated by Codex Orchestrator. That keeps the automation trustworthy and auditable across projects.
259
+ When preparing a review (repo-only), always capture the latest manifest path, run `node scripts/delegation-guard.mjs` and `node scripts/spec-guard.mjs --dry-run`, and ensure checklist mirrors (`/tasks`, `docs/`, `.agent/`) point at the evidence generated by Codex Orchestrator. That keeps the automation trustworthy and auditable across projects.
@@ -1,15 +1,20 @@
1
1
  #!/usr/bin/env node
2
+ import { readFile } from 'node:fs/promises';
3
+ import { basename, join } from 'node:path';
2
4
  import process from 'node:process';
3
5
  import { CodexOrchestrator } from '../orchestrator/src/cli/orchestrator.js';
4
6
  import { formatPlanPreview } from '../orchestrator/src/cli/utils/planFormatter.js';
5
7
  import { executeExecCommand } from '../orchestrator/src/cli/exec/command.js';
6
- import { resolveEnvironment, sanitizeTaskId } from '../orchestrator/src/cli/run/environment.js';
8
+ import { resolveEnvironmentPaths } from '../scripts/lib/run-manifests.js';
9
+ import { normalizeEnvironmentPaths, sanitizeTaskId } from '../orchestrator/src/cli/run/environment.js';
7
10
  import { RunEventEmitter } from '../orchestrator/src/cli/events/runEvents.js';
8
11
  import { evaluateInteractiveGate } from '../orchestrator/src/cli/utils/interactive.js';
9
12
  import { buildSelfCheckResult } from '../orchestrator/src/cli/selfCheck.js';
10
13
  import { initCodexTemplates, formatInitSummary } from '../orchestrator/src/cli/init.js';
11
14
  import { runDoctor, formatDoctorSummary } from '../orchestrator/src/cli/doctor.js';
15
+ import { formatDevtoolsSetupSummary, runDevtoolsSetup } from '../orchestrator/src/cli/devtoolsSetup.js';
12
16
  import { loadPackageInfo } from '../orchestrator/src/cli/utils/packageInfo.js';
17
+ import { slugify } from '../orchestrator/src/cli/utils/strings.js';
13
18
  import { serveMcp } from '../orchestrator/src/cli/mcp.js';
14
19
  async function main() {
15
20
  const args = process.argv.slice(2);
@@ -34,6 +39,9 @@ async function main() {
34
39
  case 'plan':
35
40
  await handlePlan(orchestrator, args);
36
41
  break;
42
+ case 'rlm':
43
+ await handleRlm(orchestrator, args);
44
+ break;
37
45
  case 'resume':
38
46
  await handleResume(orchestrator, args);
39
47
  break;
@@ -52,6 +60,9 @@ async function main() {
52
60
  case 'doctor':
53
61
  await handleDoctor(args);
54
62
  break;
63
+ case 'devtools':
64
+ await handleDevtools(args);
65
+ break;
55
66
  case 'mcp':
56
67
  await handleMcp(args);
57
68
  break;
@@ -107,110 +118,133 @@ function resolveTargetStageId(flags) {
107
118
  }
108
119
  return undefined;
109
120
  }
121
+ function readStringFlag(flags, key) {
122
+ const value = flags[key];
123
+ if (typeof value !== 'string') {
124
+ return undefined;
125
+ }
126
+ const trimmed = value.trim();
127
+ return trimmed.length > 0 ? trimmed : undefined;
128
+ }
129
+ function applyRlmEnvOverrides(flags, goal) {
130
+ if (goal) {
131
+ process.env.RLM_GOAL = goal;
132
+ }
133
+ const validator = readStringFlag(flags, 'validator');
134
+ if (validator) {
135
+ process.env.RLM_VALIDATOR = validator;
136
+ }
137
+ const maxIterations = readStringFlag(flags, 'max-iterations');
138
+ if (maxIterations) {
139
+ process.env.RLM_MAX_ITERATIONS = maxIterations;
140
+ }
141
+ const maxMinutes = readStringFlag(flags, 'max-minutes');
142
+ if (maxMinutes) {
143
+ process.env.RLM_MAX_MINUTES = maxMinutes;
144
+ }
145
+ const roles = readStringFlag(flags, 'roles');
146
+ if (roles) {
147
+ process.env.RLM_ROLES = roles;
148
+ }
149
+ }
150
+ function resolveRlmTaskId(taskFlag) {
151
+ if (taskFlag) {
152
+ return sanitizeTaskId(taskFlag);
153
+ }
154
+ const envTask = process.env.MCP_RUNNER_TASK_ID?.trim();
155
+ if (envTask) {
156
+ return sanitizeTaskId(envTask);
157
+ }
158
+ const { repoRoot } = resolveEnvironmentPaths();
159
+ const repoName = basename(repoRoot);
160
+ const slug = slugify(repoName, 'adhoc');
161
+ return sanitizeTaskId(`rlm-${slug}`);
162
+ }
163
+ async function waitForManifestCompletion(manifestPath, intervalMs = 2000) {
164
+ const terminal = new Set(['succeeded', 'failed', 'cancelled']);
165
+ while (true) {
166
+ const raw = await readFile(manifestPath, 'utf8');
167
+ const manifest = JSON.parse(raw);
168
+ if (terminal.has(manifest.status)) {
169
+ return manifest;
170
+ }
171
+ await new Promise((resolve) => setTimeout(resolve, intervalMs));
172
+ }
173
+ }
174
+ async function readRlmState(statePath) {
175
+ try {
176
+ const raw = await readFile(statePath, 'utf8');
177
+ const parsed = JSON.parse(raw);
178
+ if (!parsed?.final) {
179
+ return null;
180
+ }
181
+ return { exitCode: parsed.final.exitCode, status: parsed.final.status };
182
+ }
183
+ catch {
184
+ return null;
185
+ }
186
+ }
110
187
  async function handleStart(orchestrator, rawArgs) {
111
188
  const { positionals, flags } = parseArgs(rawArgs);
112
189
  const pipelineId = positionals[0];
113
190
  const format = flags['format'] === 'json' ? 'json' : 'text';
114
- const interactiveRequested = Boolean(flags['interactive'] || flags['ui']);
115
- const interactiveDisabled = Boolean(flags['no-interactive']);
116
- const runEvents = new RunEventEmitter();
117
- const gate = evaluateInteractiveGate({
118
- requested: interactiveRequested,
119
- disabled: interactiveDisabled,
120
- format,
121
- stdoutIsTTY: process.stdout.isTTY === true,
122
- stderrIsTTY: process.stderr.isTTY === true,
123
- term: process.env.TERM ?? null
124
- });
125
- const hud = await maybeStartHud(gate, runEvents);
126
- if (!gate.enabled && interactiveRequested && !interactiveDisabled && gate.reason) {
127
- console.error(`[HUD disabled] ${gate.reason}`);
128
- }
129
- try {
191
+ if (pipelineId === 'rlm') {
192
+ const goal = readStringFlag(flags, 'goal');
193
+ applyRlmEnvOverrides(flags, goal);
194
+ }
195
+ await withRunUi(flags, format, async (runEvents) => {
196
+ let taskIdOverride = typeof flags['task'] === 'string' ? flags['task'] : undefined;
197
+ if (pipelineId === 'rlm') {
198
+ taskIdOverride = resolveRlmTaskId(taskIdOverride);
199
+ process.env.MCP_RUNNER_TASK_ID = taskIdOverride;
200
+ if (format !== 'json') {
201
+ console.log(`Task: ${taskIdOverride}`);
202
+ }
203
+ }
130
204
  const result = await orchestrator.start({
131
205
  pipelineId,
132
- taskId: typeof flags['task'] === 'string' ? flags['task'] : undefined,
206
+ taskId: taskIdOverride,
133
207
  parentRunId: typeof flags['parent-run'] === 'string' ? flags['parent-run'] : undefined,
134
208
  approvalPolicy: typeof flags['approval-policy'] === 'string' ? flags['approval-policy'] : undefined,
135
209
  targetStageId: resolveTargetStageId(flags),
136
210
  runEvents
137
211
  });
138
- hud?.stop();
139
- const payload = {
140
- run_id: result.manifest.run_id,
141
- status: result.manifest.status,
142
- artifact_root: result.manifest.artifact_root,
143
- manifest: `${result.manifest.artifact_root}/manifest.json`,
144
- log_path: result.manifest.log_path
145
- };
146
- if (format === 'json') {
147
- console.log(JSON.stringify(payload, null, 2));
148
- }
149
- else {
150
- console.log(`Run started: ${payload.run_id}`);
151
- console.log(`Status: ${payload.status}`);
152
- console.log(`Manifest: ${payload.manifest}`);
153
- console.log(`Log: ${payload.log_path}`);
154
- }
155
- }
156
- finally {
157
- hud?.stop();
158
- runEvents.dispose();
159
- }
212
+ emitRunOutput(result, format, 'Run started');
213
+ });
160
214
  }
161
215
  async function handleFrontendTest(orchestrator, rawArgs) {
162
216
  const { positionals, flags } = parseArgs(rawArgs);
163
217
  const format = flags['format'] === 'json' ? 'json' : 'text';
164
218
  const devtools = Boolean(flags['devtools']);
165
- const interactiveRequested = Boolean(flags['interactive'] || flags['ui']);
166
- const interactiveDisabled = Boolean(flags['no-interactive']);
167
- const runEvents = new RunEventEmitter();
168
- const gate = evaluateInteractiveGate({
169
- requested: interactiveRequested,
170
- disabled: interactiveDisabled,
171
- format,
172
- stdoutIsTTY: process.stdout.isTTY === true,
173
- stderrIsTTY: process.stderr.isTTY === true,
174
- term: process.env.TERM ?? null
175
- });
176
- const hud = await maybeStartHud(gate, runEvents);
177
- if (!gate.enabled && interactiveRequested && !interactiveDisabled && gate.reason) {
178
- console.error(`[HUD disabled] ${gate.reason}`);
179
- }
180
219
  if (positionals.length > 0) {
181
220
  console.error(`[frontend-test] ignoring extra arguments: ${positionals.join(' ')}`);
182
221
  }
222
+ const originalDevtools = process.env.CODEX_REVIEW_DEVTOOLS;
223
+ if (devtools) {
224
+ process.env.CODEX_REVIEW_DEVTOOLS = '1';
225
+ }
183
226
  try {
184
- const pipelineId = devtools ? 'frontend-testing-devtools' : 'frontend-testing';
185
- const result = await orchestrator.start({
186
- pipelineId,
187
- taskId: typeof flags['task'] === 'string' ? flags['task'] : undefined,
188
- parentRunId: typeof flags['parent-run'] === 'string' ? flags['parent-run'] : undefined,
189
- approvalPolicy: typeof flags['approval-policy'] === 'string' ? flags['approval-policy'] : undefined,
190
- targetStageId: resolveTargetStageId(flags),
191
- runEvents
227
+ await withRunUi(flags, format, async (runEvents) => {
228
+ const result = await orchestrator.start({
229
+ pipelineId: 'frontend-testing',
230
+ taskId: typeof flags['task'] === 'string' ? flags['task'] : undefined,
231
+ parentRunId: typeof flags['parent-run'] === 'string' ? flags['parent-run'] : undefined,
232
+ approvalPolicy: typeof flags['approval-policy'] === 'string' ? flags['approval-policy'] : undefined,
233
+ targetStageId: resolveTargetStageId(flags),
234
+ runEvents
235
+ });
236
+ emitRunOutput(result, format, 'Run started');
192
237
  });
193
- hud?.stop();
194
- const payload = {
195
- run_id: result.manifest.run_id,
196
- status: result.manifest.status,
197
- artifact_root: result.manifest.artifact_root,
198
- manifest: `${result.manifest.artifact_root}/manifest.json`,
199
- log_path: result.manifest.log_path
200
- };
201
- if (format === 'json') {
202
- console.log(JSON.stringify(payload, null, 2));
203
- }
204
- else {
205
- console.log(`Run started: ${payload.run_id}`);
206
- console.log(`Status: ${payload.status}`);
207
- console.log(`Manifest: ${payload.manifest}`);
208
- console.log(`Log: ${payload.log_path}`);
209
- }
210
238
  }
211
239
  finally {
212
- hud?.stop();
213
- runEvents.dispose();
240
+ if (devtools) {
241
+ if (originalDevtools === undefined) {
242
+ delete process.env.CODEX_REVIEW_DEVTOOLS;
243
+ }
244
+ else {
245
+ process.env.CODEX_REVIEW_DEVTOOLS = originalDevtools;
246
+ }
247
+ }
214
248
  }
215
249
  }
216
250
  async function handlePlan(orchestrator, rawArgs) {
@@ -228,6 +262,47 @@ async function handlePlan(orchestrator, rawArgs) {
228
262
  }
229
263
  process.stdout.write(`${formatPlanPreview(result)}\n`);
230
264
  }
265
+ async function handleRlm(orchestrator, rawArgs) {
266
+ const { positionals, flags } = parseArgs(rawArgs);
267
+ const goalFromArgs = positionals.length > 0 ? positionals.join(' ') : undefined;
268
+ const goal = goalFromArgs ?? readStringFlag(flags, 'goal') ?? process.env.RLM_GOAL?.trim();
269
+ if (!goal) {
270
+ throw new Error('rlm requires a goal. Use: codex-orchestrator rlm \"<goal>\".');
271
+ }
272
+ const taskFlag = typeof flags['task'] === 'string' ? flags['task'] : undefined;
273
+ const taskId = resolveRlmTaskId(taskFlag);
274
+ process.env.MCP_RUNNER_TASK_ID = taskId;
275
+ applyRlmEnvOverrides(flags, goal);
276
+ console.log(`Task: ${taskId}`);
277
+ let startResult = null;
278
+ await withRunUi(flags, 'text', async (runEvents) => {
279
+ startResult = await orchestrator.start({
280
+ pipelineId: 'rlm',
281
+ taskId,
282
+ parentRunId: typeof flags['parent-run'] === 'string' ? flags['parent-run'] : undefined,
283
+ approvalPolicy: typeof flags['approval-policy'] === 'string' ? flags['approval-policy'] : undefined,
284
+ runEvents
285
+ });
286
+ emitRunOutput(startResult, 'text', 'Run started');
287
+ });
288
+ if (!startResult) {
289
+ throw new Error('rlm run failed to start.');
290
+ }
291
+ const resolvedStart = startResult;
292
+ const { repoRoot } = resolveEnvironmentPaths();
293
+ const manifestPath = join(repoRoot, resolvedStart.manifest.artifact_root, 'manifest.json');
294
+ const manifest = await waitForManifestCompletion(manifestPath);
295
+ const statePath = join(repoRoot, resolvedStart.manifest.artifact_root, 'rlm', 'state.json');
296
+ const rlmState = await readRlmState(statePath);
297
+ if (rlmState) {
298
+ console.log(`RLM status: ${rlmState.status}`);
299
+ process.exitCode = rlmState.exitCode;
300
+ return;
301
+ }
302
+ console.log(`RLM status: ${manifest.status}`);
303
+ console.error('RLM state file missing; treating as internal error.');
304
+ process.exitCode = 10;
305
+ }
231
306
  async function handleResume(orchestrator, rawArgs) {
232
307
  const { positionals, flags } = parseArgs(rawArgs);
233
308
  const runId = (flags['run'] ?? positionals[0]);
@@ -235,22 +310,7 @@ async function handleResume(orchestrator, rawArgs) {
235
310
  throw new Error('resume requires --run <run-id>.');
236
311
  }
237
312
  const format = flags['format'] === 'json' ? 'json' : 'text';
238
- const interactiveRequested = Boolean(flags['interactive'] || flags['ui']);
239
- const interactiveDisabled = Boolean(flags['no-interactive']);
240
- const runEvents = new RunEventEmitter();
241
- const gate = evaluateInteractiveGate({
242
- requested: interactiveRequested,
243
- disabled: interactiveDisabled,
244
- format,
245
- stdoutIsTTY: process.stdout.isTTY === true,
246
- stderrIsTTY: process.stderr.isTTY === true,
247
- term: process.env.TERM ?? null
248
- });
249
- const hud = await maybeStartHud(gate, runEvents);
250
- if (!gate.enabled && interactiveRequested && !interactiveDisabled && gate.reason) {
251
- console.error(`[HUD disabled] ${gate.reason}`);
252
- }
253
- try {
313
+ await withRunUi(flags, format, async (runEvents) => {
254
314
  const result = await orchestrator.resume({
255
315
  runId,
256
316
  resumeToken: typeof flags['token'] === 'string' ? flags['token'] : undefined,
@@ -259,28 +319,8 @@ async function handleResume(orchestrator, rawArgs) {
259
319
  targetStageId: resolveTargetStageId(flags),
260
320
  runEvents
261
321
  });
262
- hud?.stop();
263
- const payload = {
264
- run_id: result.manifest.run_id,
265
- status: result.manifest.status,
266
- artifact_root: result.manifest.artifact_root,
267
- manifest: `${result.manifest.artifact_root}/manifest.json`,
268
- log_path: result.manifest.log_path
269
- };
270
- if (format === 'json') {
271
- console.log(JSON.stringify(payload, null, 2));
272
- }
273
- else {
274
- console.log(`Run resumed: ${payload.run_id}`);
275
- console.log(`Status: ${payload.status}`);
276
- console.log(`Manifest: ${payload.manifest}`);
277
- console.log(`Log: ${payload.log_path}`);
278
- }
279
- }
280
- finally {
281
- hud?.stop();
282
- runEvents.dispose();
283
- }
322
+ emitRunOutput(result, format, 'Run resumed');
323
+ });
284
324
  }
285
325
  async function handleStatus(orchestrator, rawArgs) {
286
326
  const { positionals, flags } = parseArgs(rawArgs);
@@ -311,6 +351,47 @@ async function maybeStartHud(gate, emitter) {
311
351
  const { startHud } = await import('../orchestrator/src/cli/ui/controller.js');
312
352
  return startHud({ emitter, footerNote: 'interactive HUD (read-only)' });
313
353
  }
354
+ async function withRunUi(flags, format, action) {
355
+ const interactiveRequested = Boolean(flags['interactive'] || flags['ui']);
356
+ const interactiveDisabled = Boolean(flags['no-interactive']);
357
+ const runEvents = new RunEventEmitter();
358
+ const gate = evaluateInteractiveGate({
359
+ requested: interactiveRequested,
360
+ disabled: interactiveDisabled,
361
+ format,
362
+ stdoutIsTTY: process.stdout.isTTY === true,
363
+ stderrIsTTY: process.stderr.isTTY === true,
364
+ term: process.env.TERM ?? null
365
+ });
366
+ const hud = await maybeStartHud(gate, runEvents);
367
+ if (!gate.enabled && interactiveRequested && !interactiveDisabled && gate.reason) {
368
+ console.error(`[HUD disabled] ${gate.reason}`);
369
+ }
370
+ try {
371
+ await action(runEvents);
372
+ }
373
+ finally {
374
+ hud?.stop();
375
+ runEvents.dispose();
376
+ }
377
+ }
378
+ function emitRunOutput(result, format, label) {
379
+ const payload = {
380
+ run_id: result.manifest.run_id,
381
+ status: result.manifest.status,
382
+ artifact_root: result.manifest.artifact_root,
383
+ manifest: `${result.manifest.artifact_root}/manifest.json`,
384
+ log_path: result.manifest.log_path
385
+ };
386
+ if (format === 'json') {
387
+ console.log(JSON.stringify(payload, null, 2));
388
+ return;
389
+ }
390
+ console.log(`${label}: ${payload.run_id}`);
391
+ console.log(`Status: ${payload.status}`);
392
+ console.log(`Manifest: ${payload.manifest}`);
393
+ console.log(`Log: ${payload.log_path}`);
394
+ }
314
395
  async function handleExec(rawArgs) {
315
396
  const parsed = parseExecArgs(rawArgs);
316
397
  if (parsed.commandTokens.length === 0) {
@@ -318,7 +399,7 @@ async function handleExec(rawArgs) {
318
399
  }
319
400
  const isInteractive = process.stdout.isTTY === true && process.stderr.isTTY === true;
320
401
  const outputMode = parsed.requestedMode ?? (isInteractive ? 'interactive' : 'jsonl');
321
- const env = resolveEnvironment();
402
+ const env = normalizeEnvironmentPaths(resolveEnvironmentPaths());
322
403
  if (parsed.taskId) {
323
404
  env.taskId = sanitizeTaskId(parsed.taskId);
324
405
  }
@@ -388,6 +469,30 @@ async function handleDoctor(rawArgs) {
388
469
  console.log(line);
389
470
  }
390
471
  }
472
+ async function handleDevtools(rawArgs) {
473
+ const { positionals, flags } = parseArgs(rawArgs);
474
+ const subcommand = positionals.shift();
475
+ if (!subcommand) {
476
+ throw new Error('devtools requires a subcommand (setup).');
477
+ }
478
+ if (subcommand !== 'setup') {
479
+ throw new Error(`Unknown devtools subcommand: ${subcommand}`);
480
+ }
481
+ const format = flags['format'] === 'json' ? 'json' : 'text';
482
+ const apply = Boolean(flags['yes']);
483
+ if (format === 'json' && apply) {
484
+ throw new Error('devtools setup does not support --format json with --yes.');
485
+ }
486
+ const result = await runDevtoolsSetup({ apply });
487
+ if (format === 'json') {
488
+ console.log(JSON.stringify(result, null, 2));
489
+ return;
490
+ }
491
+ const summary = formatDevtoolsSetupSummary(result);
492
+ for (const line of summary) {
493
+ console.log(line);
494
+ }
495
+ }
391
496
  async function handleMcp(rawArgs) {
392
497
  const { positionals, flags } = parseArgs(rawArgs);
393
498
  const subcommand = positionals.shift();
@@ -522,6 +627,22 @@ Commands:
522
627
  --approval-policy <p> Record approval policy metadata.
523
628
  --format json Emit machine-readable output.
524
629
  --target <stage-id> Focus plan/build metadata on a specific stage (alias: --target-stage).
630
+ --goal "<goal>" When pipeline is rlm, set the RLM goal.
631
+ --validator <cmd|none> When pipeline is rlm, set the validator command.
632
+ --max-iterations <n> When pipeline is rlm, override max iterations.
633
+ --max-minutes <n> When pipeline is rlm, override max minutes.
634
+ --roles <single|triad> When pipeline is rlm, set role split.
635
+ --interactive | --ui Enable read-only HUD when running in a TTY.
636
+ --no-interactive Force disable HUD (default is off unless requested).
637
+
638
+ rlm "<goal>" Run RLM loop until validator passes.
639
+ --task <id> Override task identifier.
640
+ --validator <cmd|none> Set validator command or disable validation.
641
+ --max-iterations <n> Override max iterations (0 = unlimited with validator).
642
+ --max-minutes <n> Optional time-based guardrail in minutes.
643
+ --roles <single|triad> Choose single or triad role split.
644
+ --parent-run <id> Link run to parent run id.
645
+ --approval-policy <p> Record approval policy metadata.
525
646
  --interactive | --ui Enable read-only HUD when running in a TTY.
526
647
  --no-interactive Force disable HUD (default is off unless requested).
527
648
 
@@ -562,6 +683,9 @@ Commands:
562
683
  self-check [--format json]
563
684
  init codex [--cwd <path>] [--force]
564
685
  doctor [--format json]
686
+ devtools setup Print DevTools MCP setup instructions.
687
+ --yes Apply setup by running "codex mcp add ...".
688
+ --format json Emit machine-readable output (dry-run only).
565
689
  mcp serve [--repo <path>] [--dry-run] [-- <extra args>]
566
690
  version | --version
567
691
 
@@ -1,28 +1,102 @@
1
1
  import { readFile } from 'node:fs/promises';
2
2
  import { join } from 'node:path';
3
3
  import { logger } from '../../logger.js';
4
+ import { findPackageRoot } from '../utils/packageInfo.js';
5
+ export async function loadRepoConfig(env) {
6
+ const repoConfigPath = join(env.repoRoot, 'codex.orchestrator.json');
7
+ const repoConfig = await readConfig(repoConfigPath);
8
+ if (repoConfig) {
9
+ logger.info(`[codex-config] Loaded user config from ${repoConfigPath}`);
10
+ return normalizeUserConfig(repoConfig, 'repo');
11
+ }
12
+ logger.warn(`[codex-config] Missing codex.orchestrator.json at ${repoConfigPath}`);
13
+ return null;
14
+ }
15
+ export async function loadPackageConfig(env) {
16
+ const repoConfigPath = join(env.repoRoot, 'codex.orchestrator.json');
17
+ const packageRoot = findPackageRoot();
18
+ const packageConfigPath = join(packageRoot, 'codex.orchestrator.json');
19
+ if (packageConfigPath === repoConfigPath) {
20
+ return null;
21
+ }
22
+ const packageConfig = await readConfig(packageConfigPath);
23
+ if (packageConfig) {
24
+ logger.info(`[codex-config] Loaded user config from ${packageConfigPath}`);
25
+ return normalizeUserConfig(packageConfig, 'package');
26
+ }
27
+ logger.warn(`[codex-config] Missing codex.orchestrator.json at ${packageConfigPath}`);
28
+ return null;
29
+ }
4
30
  export async function loadUserConfig(env) {
5
- const configPath = join(env.repoRoot, 'codex.orchestrator.json');
31
+ const repoConfig = await loadRepoConfig(env);
32
+ if (repoConfig) {
33
+ return repoConfig;
34
+ }
35
+ return await loadPackageConfig(env);
36
+ }
37
+ export function findPipeline(config, id) {
38
+ if (!config?.pipelines) {
39
+ return null;
40
+ }
41
+ return config.pipelines.find((pipeline) => pipeline.id === id) ?? null;
42
+ }
43
+ function normalizeUserConfig(config, source) {
44
+ if (!config) {
45
+ return null;
46
+ }
47
+ const stageSets = normalizeStageSets(config.stageSets);
48
+ const pipelines = Array.isArray(config.pipelines)
49
+ ? config.pipelines.map((pipeline) => expandPipelineStages(pipeline, stageSets))
50
+ : config.pipelines;
51
+ return { pipelines, defaultPipeline: config.defaultPipeline, source };
52
+ }
53
+ async function readConfig(configPath) {
6
54
  try {
7
55
  const raw = await readFile(configPath, 'utf8');
8
- const parsed = JSON.parse(raw);
9
- logger.info(`[codex-config] Loaded user config from ${configPath}`);
10
- if (parsed && Array.isArray(parsed.pipelines)) {
11
- return parsed;
12
- }
13
- return parsed ?? null;
56
+ return JSON.parse(raw);
14
57
  }
15
58
  catch (error) {
16
59
  if (error.code === 'ENOENT') {
17
- logger.warn(`[codex-config] Missing codex.orchestrator.json at ${configPath}`);
18
60
  return null;
19
61
  }
20
62
  throw error;
21
63
  }
22
64
  }
23
- export function findPipeline(config, id) {
24
- if (!config?.pipelines) {
25
- return null;
65
+ function normalizeStageSets(stageSets) {
66
+ if (!stageSets) {
67
+ return {};
26
68
  }
27
- return config.pipelines.find((pipeline) => pipeline.id === id) ?? null;
69
+ if (typeof stageSets !== 'object' || Array.isArray(stageSets)) {
70
+ throw new Error('codex.orchestrator.json stageSets must be an object of stage arrays.');
71
+ }
72
+ const normalized = {};
73
+ for (const [key, value] of Object.entries(stageSets)) {
74
+ if (!Array.isArray(value)) {
75
+ throw new Error(`Stage set "${key}" must be an array.`);
76
+ }
77
+ if (value.some((stage) => isStageSetRef(stage))) {
78
+ throw new Error(`Stage set "${key}" cannot include stage-set references.`);
79
+ }
80
+ normalized[key] = value;
81
+ }
82
+ return normalized;
83
+ }
84
+ function expandPipelineStages(pipeline, stageSets) {
85
+ const expanded = [];
86
+ for (const stage of pipeline.stages ?? []) {
87
+ if (isStageSetRef(stage)) {
88
+ const sharedStages = stageSets[stage.ref];
89
+ if (!sharedStages) {
90
+ throw new Error(`Pipeline "${pipeline.id}" references unknown stage set "${stage.ref}".`);
91
+ }
92
+ expanded.push(...sharedStages);
93
+ }
94
+ else {
95
+ expanded.push(stage);
96
+ }
97
+ }
98
+ return { ...pipeline, stages: expanded };
99
+ }
100
+ function isStageSetRef(stage) {
101
+ return stage.kind === 'stage-set';
28
102
  }