@kbediako/codex-orchestrator 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -7
- package/dist/bin/codex-orchestrator.js +214 -121
- package/dist/orchestrator/src/cli/config/userConfig.js +86 -12
- package/dist/orchestrator/src/cli/exec/context.js +5 -2
- package/dist/orchestrator/src/cli/exec/learning.js +5 -3
- package/dist/orchestrator/src/cli/exec/stageRunner.js +1 -1
- package/dist/orchestrator/src/cli/exec/summary.js +1 -1
- package/dist/orchestrator/src/cli/orchestrator.js +16 -7
- package/dist/orchestrator/src/cli/pipelines/index.js +13 -24
- package/dist/orchestrator/src/cli/rlm/prompt.js +31 -0
- package/dist/orchestrator/src/cli/rlm/runner.js +177 -0
- package/dist/orchestrator/src/cli/rlm/types.js +1 -0
- package/dist/orchestrator/src/cli/rlm/validator.js +159 -0
- package/dist/orchestrator/src/cli/rlmRunner.js +417 -0
- package/dist/orchestrator/src/cli/run/environment.js +4 -11
- package/dist/orchestrator/src/cli/run/manifest.js +7 -1
- package/dist/orchestrator/src/cli/services/commandRunner.js +1 -1
- package/dist/orchestrator/src/cli/services/controlPlaneService.js +3 -1
- package/dist/orchestrator/src/cli/services/execRuntime.js +1 -2
- package/dist/orchestrator/src/cli/services/pipelineResolver.js +33 -2
- package/dist/orchestrator/src/cli/services/runPreparation.js +7 -1
- package/dist/orchestrator/src/cli/services/schedulerService.js +1 -1
- package/dist/orchestrator/src/cli/utils/specGuardRunner.js +3 -1
- package/dist/orchestrator/src/cli/utils/strings.js +8 -6
- package/dist/orchestrator/src/persistence/ExperienceStore.js +6 -16
- package/dist/orchestrator/src/persistence/TaskStateStore.js +1 -1
- package/dist/orchestrator/src/persistence/sanitizeIdentifier.js +1 -1
- package/dist/packages/orchestrator/src/exec/stdio.js +112 -0
- package/dist/packages/orchestrator/src/exec/unified-exec.js +1 -1
- package/dist/packages/orchestrator/src/index.js +1 -0
- package/dist/packages/shared/design-artifacts/writer.js +4 -14
- package/dist/packages/shared/streams/stdio.js +2 -112
- package/dist/packages/shared/utils/strings.js +17 -0
- package/dist/scripts/design/pipeline/advanced-assets.js +1 -1
- package/dist/scripts/design/pipeline/context.js +5 -5
- package/dist/scripts/design/pipeline/extract.js +9 -6
- package/dist/scripts/design/pipeline/{optionalDeps.js → optional-deps.js} +49 -38
- package/dist/scripts/design/pipeline/permit.js +59 -0
- package/dist/scripts/design/pipeline/toolkit/common.js +18 -32
- package/dist/scripts/design/pipeline/toolkit/reference.js +1 -1
- package/dist/scripts/design/pipeline/toolkit/snapshot.js +1 -1
- package/dist/scripts/design/pipeline/visual-regression.js +2 -11
- package/dist/scripts/lib/cli-args.js +53 -0
- package/dist/scripts/lib/docs-helpers.js +111 -0
- package/dist/scripts/lib/npm-pack.js +20 -0
- package/dist/scripts/lib/run-manifests.js +160 -0
- package/package.json +5 -2
- package/dist/orchestrator/src/cli/pipelines/defaultDiagnostics.js +0 -32
- package/dist/orchestrator/src/cli/pipelines/designReference.js +0 -72
- package/dist/orchestrator/src/cli/pipelines/hiFiDesignToolkit.js +0 -71
- package/dist/orchestrator/src/cli/utils/jsonlWriter.js +0 -10
- package/dist/orchestrator/src/control-plane/index.js +0 -3
- package/dist/orchestrator/src/persistence/identifierGuards.js +0 -1
- package/dist/orchestrator/src/persistence/writeAtomicFile.js +0 -4
- package/dist/orchestrator/src/scheduler/index.js +0 -1
package/README.md
CHANGED
|
@@ -122,7 +122,7 @@ Notes:
|
|
|
122
122
|
- These prompts are consumed by the Codex CLI UI only; the orchestrator does not read them. Keep updates synced across machines during onboarding.
|
|
123
123
|
- To install or refresh the prompts (repo-only), run `scripts/setup-codex-prompts.sh` (use `--force` to overwrite existing files).
|
|
124
124
|
- `/prompts:diagnostics` takes `TASK=<task-id> MANIFEST=<path> [NOTES=<free text>]`, exports `MCP_RUNNER_TASK_ID=$TASK`, runs `npx codex-orchestrator start diagnostics --format json`, tails `.runs/$TASK/cli/<run-id>/manifest.json` (or `npx codex-orchestrator status --watch`), and records evidence to `/tasks`, `docs/TASKS.md`, `.agent/task/...`, `.runs/$TASK/metrics.json`, and `out/$TASK/state.json` using `$MANIFEST`.
|
|
125
|
-
- `/prompts:review-handoff` takes `TASK=<task-id> MANIFEST=<path> NOTES=<goal + summary + risks + optional questions>`, re-exports `MCP_RUNNER_TASK_ID`, and (repo-only) runs `node scripts/spec-guard.mjs --dry-run`, `npm run lint`, `npm run test`, optional `npm run eval:test`, plus `npm run review` (wraps `codex review` against the current diff and includes the latest run manifest path as evidence). It also reminds you to log approvals in `$MANIFEST` and mirror the evidence to the same docs/metrics/state targets.
|
|
125
|
+
- `/prompts:review-handoff` takes `TASK=<task-id> MANIFEST=<path> NOTES=<goal + summary + risks + optional questions>`, re-exports `MCP_RUNNER_TASK_ID`, and (repo-only) runs `node scripts/delegation-guard.mjs`, `node scripts/spec-guard.mjs --dry-run`, `npm run lint`, `npm run test`, optional `npm run eval:test`, plus `npm run review` (wraps `codex review` against the current diff and includes the latest run manifest path as evidence). It also reminds you to log approvals in `$MANIFEST` and mirror the evidence to the same docs/metrics/state targets.
|
|
126
126
|
- In CI / `--no-interactive` pipelines (or when stdin is not a TTY), `npm run review` prints the review handoff prompt (including evidence paths) and exits successfully instead of invoking `codex review`. Set `FORCE_CODEX_REVIEW=1` to run `codex review` in those environments.
|
|
127
127
|
- Always trigger diagnostics and review workflows through these prompts whenever you run the orchestrator so contributors consistently execute the required command sequences and capture auditable manifests.
|
|
128
128
|
|
|
@@ -134,7 +134,7 @@ Notes:
|
|
|
134
134
|
- Default pipelines live in `codex.orchestrator.json` (repository-specific) and `orchestrator/src/cli/pipelines/` (built-in defaults). Each stage is either a command (shell execution) or a nested pipeline.
|
|
135
135
|
- The `CommandPlanner` inspects the selected pipeline and target stage; you can pass `--target <stage-id>` (alias: `--target-stage`) or set `CODEX_ORCHESTRATOR_TARGET_STAGE` to focus on a specific step (e.g., rerun tests only).
|
|
136
136
|
- Stage execution records stdout/stderr logs, exit codes, optional summaries, and failure data directly into the manifest (`commands[]` array).
|
|
137
|
-
- Guardrails (repo-only): before review, run `node scripts/spec-guard.mjs --dry-run` to ensure
|
|
137
|
+
- Guardrails (repo-only): before review, run `node scripts/delegation-guard.mjs` and `node scripts/spec-guard.mjs --dry-run` to ensure delegation and spec freshness; the orchestrator tracks guardrail outcomes in the manifest (`guardrail_status`).
|
|
138
138
|
|
|
139
139
|
## Approval & Sandbox Model
|
|
140
140
|
- Approval policies (`never`, `on-request`, `auto`, or custom strings) flow through `packages/orchestrator`. Tool invocations can require approval before sandbox elevation, and all prompts/decisions are persisted.
|
|
@@ -166,6 +166,8 @@ Note: the commands below assume a source checkout; `scripts/` helpers are not in
|
|
|
166
166
|
| `npm run test` | Vitest suite covering orchestration core, CLI services, and patterns. |
|
|
167
167
|
| `npm run eval:test` | Optional evaluation harness (enable when `evaluation/fixtures/**` is populated). |
|
|
168
168
|
| `npm run docs:check` | Deterministically validates scripts/pipelines/paths referenced in agent-facing docs. |
|
|
169
|
+
| `npm run docs:freshness` | Validates docs registry coverage + review recency; writes `out/<task-id>/docs-freshness.json`. |
|
|
170
|
+
| `node scripts/delegation-guard.mjs` | Enforces subagent delegation evidence before review (repo-only). |
|
|
169
171
|
| `node scripts/spec-guard.mjs --dry-run` | Validates spec freshness; required before review (repo-only). |
|
|
170
172
|
| `node scripts/diff-budget.mjs` | Guards against oversized diffs before review (repo-only; defaults: 25 files / 800 lines; supports explicit overrides). |
|
|
171
173
|
| `npm run review` | Runs `codex review` with the latest run manifest path as evidence (repo-only; CI disables stdin; set `CODEX_REVIEW_NON_INTERACTIVE=1` to enforce locally). |
|
|
@@ -198,18 +200,18 @@ Use an explicit handoff note for reviewers. `NOTES` is required for review runs;
|
|
|
198
200
|
Template: `Goal: ... | Summary: ... | Risks: ... | Questions (optional): ...`
|
|
199
201
|
|
|
200
202
|
To enable Chrome DevTools for review runs, set `CODEX_REVIEW_DEVTOOLS=1` (uses a codex config override; no repo scripts required).
|
|
201
|
-
Default to the standard `implementation-gate` for general reviews;
|
|
202
|
-
To run the full implementation gate with DevTools-enabled review, use `npx codex-orchestrator start implementation-gate
|
|
203
|
+
Default to the standard `implementation-gate` for general reviews; enable DevTools only when the review needs Chrome DevTools capabilities (visual/layout checks, network/perf diagnostics). After fixing review feedback, rerun the same gate and include any follow-up questions in `NOTES`.
|
|
204
|
+
To run the full implementation gate with DevTools-enabled review, use `CODEX_REVIEW_DEVTOOLS=1 npx codex-orchestrator start implementation-gate --format json --no-interactive --task <task-id>`.
|
|
203
205
|
|
|
204
206
|
## Frontend Testing
|
|
205
207
|
Frontend testing is a first-class pipeline with DevTools off by default. The shipped pipelines already set `CODEX_NON_INTERACTIVE=1`; add it explicitly for custom automation or when you want the `frontend-test` shortcut to suppress Codex prompts:
|
|
206
208
|
- `CODEX_NON_INTERACTIVE=1 npx codex-orchestrator start frontend-testing --format json --no-interactive --task <task-id>`
|
|
207
|
-
- `CODEX_NON_INTERACTIVE=1 npx codex-orchestrator start frontend-testing
|
|
209
|
+
- `CODEX_NON_INTERACTIVE=1 CODEX_REVIEW_DEVTOOLS=1 npx codex-orchestrator start frontend-testing --format json --no-interactive --task <task-id>` (DevTools enabled)
|
|
208
210
|
- `CODEX_NON_INTERACTIVE=1 codex-orchestrator frontend-test` (shortcut; add `--devtools` to enable DevTools)
|
|
209
211
|
|
|
210
212
|
If you run the pipelines from this repo, run `npm run build` first so `dist/` stays current (the pipeline executes the compiled runner).
|
|
211
213
|
|
|
212
|
-
Note: the frontend-testing
|
|
214
|
+
Note: the frontend-testing pipeline reads the shared `CODEX_REVIEW_DEVTOOLS` flag; prefer `--devtools` or `CODEX_REVIEW_DEVTOOLS=1` for explicit enablement.
|
|
213
215
|
|
|
214
216
|
Optional prompt overrides:
|
|
215
217
|
- `CODEX_FRONTEND_TEST_PROMPT` (inline prompt)
|
|
@@ -254,4 +256,4 @@ Use the hi-fi pipeline to snapshot complex marketing sites (motion, interactions
|
|
|
254
256
|
|
|
255
257
|
---
|
|
256
258
|
|
|
257
|
-
When preparing a review (repo-only), always capture the latest manifest path, run `node scripts/spec-guard.mjs --dry-run`, and ensure checklist mirrors (`/tasks`, `docs/`, `.agent/`) point at the evidence generated by Codex Orchestrator. That keeps the automation trustworthy and auditable across projects.
|
|
259
|
+
When preparing a review (repo-only), always capture the latest manifest path, run `node scripts/delegation-guard.mjs` and `node scripts/spec-guard.mjs --dry-run`, and ensure checklist mirrors (`/tasks`, `docs/`, `.agent/`) point at the evidence generated by Codex Orchestrator. That keeps the automation trustworthy and auditable across projects.
|
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
import { readFile } from 'node:fs/promises';
|
|
3
|
+
import { basename, join } from 'node:path';
|
|
2
4
|
import process from 'node:process';
|
|
3
5
|
import { CodexOrchestrator } from '../orchestrator/src/cli/orchestrator.js';
|
|
4
6
|
import { formatPlanPreview } from '../orchestrator/src/cli/utils/planFormatter.js';
|
|
5
7
|
import { executeExecCommand } from '../orchestrator/src/cli/exec/command.js';
|
|
6
|
-
import {
|
|
8
|
+
import { resolveEnvironmentPaths } from '../scripts/lib/run-manifests.js';
|
|
9
|
+
import { normalizeEnvironmentPaths, sanitizeTaskId } from '../orchestrator/src/cli/run/environment.js';
|
|
7
10
|
import { RunEventEmitter } from '../orchestrator/src/cli/events/runEvents.js';
|
|
8
11
|
import { evaluateInteractiveGate } from '../orchestrator/src/cli/utils/interactive.js';
|
|
9
12
|
import { buildSelfCheckResult } from '../orchestrator/src/cli/selfCheck.js';
|
|
@@ -11,6 +14,7 @@ import { initCodexTemplates, formatInitSummary } from '../orchestrator/src/cli/i
|
|
|
11
14
|
import { runDoctor, formatDoctorSummary } from '../orchestrator/src/cli/doctor.js';
|
|
12
15
|
import { formatDevtoolsSetupSummary, runDevtoolsSetup } from '../orchestrator/src/cli/devtoolsSetup.js';
|
|
13
16
|
import { loadPackageInfo } from '../orchestrator/src/cli/utils/packageInfo.js';
|
|
17
|
+
import { slugify } from '../orchestrator/src/cli/utils/strings.js';
|
|
14
18
|
import { serveMcp } from '../orchestrator/src/cli/mcp.js';
|
|
15
19
|
async function main() {
|
|
16
20
|
const args = process.argv.slice(2);
|
|
@@ -35,6 +39,9 @@ async function main() {
|
|
|
35
39
|
case 'plan':
|
|
36
40
|
await handlePlan(orchestrator, args);
|
|
37
41
|
break;
|
|
42
|
+
case 'rlm':
|
|
43
|
+
await handleRlm(orchestrator, args);
|
|
44
|
+
break;
|
|
38
45
|
case 'resume':
|
|
39
46
|
await handleResume(orchestrator, args);
|
|
40
47
|
break;
|
|
@@ -111,110 +118,133 @@ function resolveTargetStageId(flags) {
|
|
|
111
118
|
}
|
|
112
119
|
return undefined;
|
|
113
120
|
}
|
|
121
|
+
function readStringFlag(flags, key) {
|
|
122
|
+
const value = flags[key];
|
|
123
|
+
if (typeof value !== 'string') {
|
|
124
|
+
return undefined;
|
|
125
|
+
}
|
|
126
|
+
const trimmed = value.trim();
|
|
127
|
+
return trimmed.length > 0 ? trimmed : undefined;
|
|
128
|
+
}
|
|
129
|
+
function applyRlmEnvOverrides(flags, goal) {
|
|
130
|
+
if (goal) {
|
|
131
|
+
process.env.RLM_GOAL = goal;
|
|
132
|
+
}
|
|
133
|
+
const validator = readStringFlag(flags, 'validator');
|
|
134
|
+
if (validator) {
|
|
135
|
+
process.env.RLM_VALIDATOR = validator;
|
|
136
|
+
}
|
|
137
|
+
const maxIterations = readStringFlag(flags, 'max-iterations');
|
|
138
|
+
if (maxIterations) {
|
|
139
|
+
process.env.RLM_MAX_ITERATIONS = maxIterations;
|
|
140
|
+
}
|
|
141
|
+
const maxMinutes = readStringFlag(flags, 'max-minutes');
|
|
142
|
+
if (maxMinutes) {
|
|
143
|
+
process.env.RLM_MAX_MINUTES = maxMinutes;
|
|
144
|
+
}
|
|
145
|
+
const roles = readStringFlag(flags, 'roles');
|
|
146
|
+
if (roles) {
|
|
147
|
+
process.env.RLM_ROLES = roles;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
function resolveRlmTaskId(taskFlag) {
|
|
151
|
+
if (taskFlag) {
|
|
152
|
+
return sanitizeTaskId(taskFlag);
|
|
153
|
+
}
|
|
154
|
+
const envTask = process.env.MCP_RUNNER_TASK_ID?.trim();
|
|
155
|
+
if (envTask) {
|
|
156
|
+
return sanitizeTaskId(envTask);
|
|
157
|
+
}
|
|
158
|
+
const { repoRoot } = resolveEnvironmentPaths();
|
|
159
|
+
const repoName = basename(repoRoot);
|
|
160
|
+
const slug = slugify(repoName, 'adhoc');
|
|
161
|
+
return sanitizeTaskId(`rlm-${slug}`);
|
|
162
|
+
}
|
|
163
|
+
async function waitForManifestCompletion(manifestPath, intervalMs = 2000) {
|
|
164
|
+
const terminal = new Set(['succeeded', 'failed', 'cancelled']);
|
|
165
|
+
while (true) {
|
|
166
|
+
const raw = await readFile(manifestPath, 'utf8');
|
|
167
|
+
const manifest = JSON.parse(raw);
|
|
168
|
+
if (terminal.has(manifest.status)) {
|
|
169
|
+
return manifest;
|
|
170
|
+
}
|
|
171
|
+
await new Promise((resolve) => setTimeout(resolve, intervalMs));
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
async function readRlmState(statePath) {
|
|
175
|
+
try {
|
|
176
|
+
const raw = await readFile(statePath, 'utf8');
|
|
177
|
+
const parsed = JSON.parse(raw);
|
|
178
|
+
if (!parsed?.final) {
|
|
179
|
+
return null;
|
|
180
|
+
}
|
|
181
|
+
return { exitCode: parsed.final.exitCode, status: parsed.final.status };
|
|
182
|
+
}
|
|
183
|
+
catch {
|
|
184
|
+
return null;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
114
187
|
async function handleStart(orchestrator, rawArgs) {
|
|
115
188
|
const { positionals, flags } = parseArgs(rawArgs);
|
|
116
189
|
const pipelineId = positionals[0];
|
|
117
190
|
const format = flags['format'] === 'json' ? 'json' : 'text';
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
console.error(`[HUD disabled] ${gate.reason}`);
|
|
132
|
-
}
|
|
133
|
-
try {
|
|
191
|
+
if (pipelineId === 'rlm') {
|
|
192
|
+
const goal = readStringFlag(flags, 'goal');
|
|
193
|
+
applyRlmEnvOverrides(flags, goal);
|
|
194
|
+
}
|
|
195
|
+
await withRunUi(flags, format, async (runEvents) => {
|
|
196
|
+
let taskIdOverride = typeof flags['task'] === 'string' ? flags['task'] : undefined;
|
|
197
|
+
if (pipelineId === 'rlm') {
|
|
198
|
+
taskIdOverride = resolveRlmTaskId(taskIdOverride);
|
|
199
|
+
process.env.MCP_RUNNER_TASK_ID = taskIdOverride;
|
|
200
|
+
if (format !== 'json') {
|
|
201
|
+
console.log(`Task: ${taskIdOverride}`);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
134
204
|
const result = await orchestrator.start({
|
|
135
205
|
pipelineId,
|
|
136
|
-
taskId:
|
|
206
|
+
taskId: taskIdOverride,
|
|
137
207
|
parentRunId: typeof flags['parent-run'] === 'string' ? flags['parent-run'] : undefined,
|
|
138
208
|
approvalPolicy: typeof flags['approval-policy'] === 'string' ? flags['approval-policy'] : undefined,
|
|
139
209
|
targetStageId: resolveTargetStageId(flags),
|
|
140
210
|
runEvents
|
|
141
211
|
});
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
run_id: result.manifest.run_id,
|
|
145
|
-
status: result.manifest.status,
|
|
146
|
-
artifact_root: result.manifest.artifact_root,
|
|
147
|
-
manifest: `${result.manifest.artifact_root}/manifest.json`,
|
|
148
|
-
log_path: result.manifest.log_path
|
|
149
|
-
};
|
|
150
|
-
if (format === 'json') {
|
|
151
|
-
console.log(JSON.stringify(payload, null, 2));
|
|
152
|
-
}
|
|
153
|
-
else {
|
|
154
|
-
console.log(`Run started: ${payload.run_id}`);
|
|
155
|
-
console.log(`Status: ${payload.status}`);
|
|
156
|
-
console.log(`Manifest: ${payload.manifest}`);
|
|
157
|
-
console.log(`Log: ${payload.log_path}`);
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
finally {
|
|
161
|
-
hud?.stop();
|
|
162
|
-
runEvents.dispose();
|
|
163
|
-
}
|
|
212
|
+
emitRunOutput(result, format, 'Run started');
|
|
213
|
+
});
|
|
164
214
|
}
|
|
165
215
|
async function handleFrontendTest(orchestrator, rawArgs) {
|
|
166
216
|
const { positionals, flags } = parseArgs(rawArgs);
|
|
167
217
|
const format = flags['format'] === 'json' ? 'json' : 'text';
|
|
168
218
|
const devtools = Boolean(flags['devtools']);
|
|
169
|
-
const interactiveRequested = Boolean(flags['interactive'] || flags['ui']);
|
|
170
|
-
const interactiveDisabled = Boolean(flags['no-interactive']);
|
|
171
|
-
const runEvents = new RunEventEmitter();
|
|
172
|
-
const gate = evaluateInteractiveGate({
|
|
173
|
-
requested: interactiveRequested,
|
|
174
|
-
disabled: interactiveDisabled,
|
|
175
|
-
format,
|
|
176
|
-
stdoutIsTTY: process.stdout.isTTY === true,
|
|
177
|
-
stderrIsTTY: process.stderr.isTTY === true,
|
|
178
|
-
term: process.env.TERM ?? null
|
|
179
|
-
});
|
|
180
|
-
const hud = await maybeStartHud(gate, runEvents);
|
|
181
|
-
if (!gate.enabled && interactiveRequested && !interactiveDisabled && gate.reason) {
|
|
182
|
-
console.error(`[HUD disabled] ${gate.reason}`);
|
|
183
|
-
}
|
|
184
219
|
if (positionals.length > 0) {
|
|
185
220
|
console.error(`[frontend-test] ignoring extra arguments: ${positionals.join(' ')}`);
|
|
186
221
|
}
|
|
222
|
+
const originalDevtools = process.env.CODEX_REVIEW_DEVTOOLS;
|
|
223
|
+
if (devtools) {
|
|
224
|
+
process.env.CODEX_REVIEW_DEVTOOLS = '1';
|
|
225
|
+
}
|
|
187
226
|
try {
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
227
|
+
await withRunUi(flags, format, async (runEvents) => {
|
|
228
|
+
const result = await orchestrator.start({
|
|
229
|
+
pipelineId: 'frontend-testing',
|
|
230
|
+
taskId: typeof flags['task'] === 'string' ? flags['task'] : undefined,
|
|
231
|
+
parentRunId: typeof flags['parent-run'] === 'string' ? flags['parent-run'] : undefined,
|
|
232
|
+
approvalPolicy: typeof flags['approval-policy'] === 'string' ? flags['approval-policy'] : undefined,
|
|
233
|
+
targetStageId: resolveTargetStageId(flags),
|
|
234
|
+
runEvents
|
|
235
|
+
});
|
|
236
|
+
emitRunOutput(result, format, 'Run started');
|
|
196
237
|
});
|
|
197
|
-
hud?.stop();
|
|
198
|
-
const payload = {
|
|
199
|
-
run_id: result.manifest.run_id,
|
|
200
|
-
status: result.manifest.status,
|
|
201
|
-
artifact_root: result.manifest.artifact_root,
|
|
202
|
-
manifest: `${result.manifest.artifact_root}/manifest.json`,
|
|
203
|
-
log_path: result.manifest.log_path
|
|
204
|
-
};
|
|
205
|
-
if (format === 'json') {
|
|
206
|
-
console.log(JSON.stringify(payload, null, 2));
|
|
207
|
-
}
|
|
208
|
-
else {
|
|
209
|
-
console.log(`Run started: ${payload.run_id}`);
|
|
210
|
-
console.log(`Status: ${payload.status}`);
|
|
211
|
-
console.log(`Manifest: ${payload.manifest}`);
|
|
212
|
-
console.log(`Log: ${payload.log_path}`);
|
|
213
|
-
}
|
|
214
238
|
}
|
|
215
239
|
finally {
|
|
216
|
-
|
|
217
|
-
|
|
240
|
+
if (devtools) {
|
|
241
|
+
if (originalDevtools === undefined) {
|
|
242
|
+
delete process.env.CODEX_REVIEW_DEVTOOLS;
|
|
243
|
+
}
|
|
244
|
+
else {
|
|
245
|
+
process.env.CODEX_REVIEW_DEVTOOLS = originalDevtools;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
218
248
|
}
|
|
219
249
|
}
|
|
220
250
|
async function handlePlan(orchestrator, rawArgs) {
|
|
@@ -232,6 +262,47 @@ async function handlePlan(orchestrator, rawArgs) {
|
|
|
232
262
|
}
|
|
233
263
|
process.stdout.write(`${formatPlanPreview(result)}\n`);
|
|
234
264
|
}
|
|
265
|
+
async function handleRlm(orchestrator, rawArgs) {
|
|
266
|
+
const { positionals, flags } = parseArgs(rawArgs);
|
|
267
|
+
const goalFromArgs = positionals.length > 0 ? positionals.join(' ') : undefined;
|
|
268
|
+
const goal = goalFromArgs ?? readStringFlag(flags, 'goal') ?? process.env.RLM_GOAL?.trim();
|
|
269
|
+
if (!goal) {
|
|
270
|
+
throw new Error('rlm requires a goal. Use: codex-orchestrator rlm \"<goal>\".');
|
|
271
|
+
}
|
|
272
|
+
const taskFlag = typeof flags['task'] === 'string' ? flags['task'] : undefined;
|
|
273
|
+
const taskId = resolveRlmTaskId(taskFlag);
|
|
274
|
+
process.env.MCP_RUNNER_TASK_ID = taskId;
|
|
275
|
+
applyRlmEnvOverrides(flags, goal);
|
|
276
|
+
console.log(`Task: ${taskId}`);
|
|
277
|
+
let startResult = null;
|
|
278
|
+
await withRunUi(flags, 'text', async (runEvents) => {
|
|
279
|
+
startResult = await orchestrator.start({
|
|
280
|
+
pipelineId: 'rlm',
|
|
281
|
+
taskId,
|
|
282
|
+
parentRunId: typeof flags['parent-run'] === 'string' ? flags['parent-run'] : undefined,
|
|
283
|
+
approvalPolicy: typeof flags['approval-policy'] === 'string' ? flags['approval-policy'] : undefined,
|
|
284
|
+
runEvents
|
|
285
|
+
});
|
|
286
|
+
emitRunOutput(startResult, 'text', 'Run started');
|
|
287
|
+
});
|
|
288
|
+
if (!startResult) {
|
|
289
|
+
throw new Error('rlm run failed to start.');
|
|
290
|
+
}
|
|
291
|
+
const resolvedStart = startResult;
|
|
292
|
+
const { repoRoot } = resolveEnvironmentPaths();
|
|
293
|
+
const manifestPath = join(repoRoot, resolvedStart.manifest.artifact_root, 'manifest.json');
|
|
294
|
+
const manifest = await waitForManifestCompletion(manifestPath);
|
|
295
|
+
const statePath = join(repoRoot, resolvedStart.manifest.artifact_root, 'rlm', 'state.json');
|
|
296
|
+
const rlmState = await readRlmState(statePath);
|
|
297
|
+
if (rlmState) {
|
|
298
|
+
console.log(`RLM status: ${rlmState.status}`);
|
|
299
|
+
process.exitCode = rlmState.exitCode;
|
|
300
|
+
return;
|
|
301
|
+
}
|
|
302
|
+
console.log(`RLM status: ${manifest.status}`);
|
|
303
|
+
console.error('RLM state file missing; treating as internal error.');
|
|
304
|
+
process.exitCode = 10;
|
|
305
|
+
}
|
|
235
306
|
async function handleResume(orchestrator, rawArgs) {
|
|
236
307
|
const { positionals, flags } = parseArgs(rawArgs);
|
|
237
308
|
const runId = (flags['run'] ?? positionals[0]);
|
|
@@ -239,22 +310,7 @@ async function handleResume(orchestrator, rawArgs) {
|
|
|
239
310
|
throw new Error('resume requires --run <run-id>.');
|
|
240
311
|
}
|
|
241
312
|
const format = flags['format'] === 'json' ? 'json' : 'text';
|
|
242
|
-
|
|
243
|
-
const interactiveDisabled = Boolean(flags['no-interactive']);
|
|
244
|
-
const runEvents = new RunEventEmitter();
|
|
245
|
-
const gate = evaluateInteractiveGate({
|
|
246
|
-
requested: interactiveRequested,
|
|
247
|
-
disabled: interactiveDisabled,
|
|
248
|
-
format,
|
|
249
|
-
stdoutIsTTY: process.stdout.isTTY === true,
|
|
250
|
-
stderrIsTTY: process.stderr.isTTY === true,
|
|
251
|
-
term: process.env.TERM ?? null
|
|
252
|
-
});
|
|
253
|
-
const hud = await maybeStartHud(gate, runEvents);
|
|
254
|
-
if (!gate.enabled && interactiveRequested && !interactiveDisabled && gate.reason) {
|
|
255
|
-
console.error(`[HUD disabled] ${gate.reason}`);
|
|
256
|
-
}
|
|
257
|
-
try {
|
|
313
|
+
await withRunUi(flags, format, async (runEvents) => {
|
|
258
314
|
const result = await orchestrator.resume({
|
|
259
315
|
runId,
|
|
260
316
|
resumeToken: typeof flags['token'] === 'string' ? flags['token'] : undefined,
|
|
@@ -263,28 +319,8 @@ async function handleResume(orchestrator, rawArgs) {
|
|
|
263
319
|
targetStageId: resolveTargetStageId(flags),
|
|
264
320
|
runEvents
|
|
265
321
|
});
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
run_id: result.manifest.run_id,
|
|
269
|
-
status: result.manifest.status,
|
|
270
|
-
artifact_root: result.manifest.artifact_root,
|
|
271
|
-
manifest: `${result.manifest.artifact_root}/manifest.json`,
|
|
272
|
-
log_path: result.manifest.log_path
|
|
273
|
-
};
|
|
274
|
-
if (format === 'json') {
|
|
275
|
-
console.log(JSON.stringify(payload, null, 2));
|
|
276
|
-
}
|
|
277
|
-
else {
|
|
278
|
-
console.log(`Run resumed: ${payload.run_id}`);
|
|
279
|
-
console.log(`Status: ${payload.status}`);
|
|
280
|
-
console.log(`Manifest: ${payload.manifest}`);
|
|
281
|
-
console.log(`Log: ${payload.log_path}`);
|
|
282
|
-
}
|
|
283
|
-
}
|
|
284
|
-
finally {
|
|
285
|
-
hud?.stop();
|
|
286
|
-
runEvents.dispose();
|
|
287
|
-
}
|
|
322
|
+
emitRunOutput(result, format, 'Run resumed');
|
|
323
|
+
});
|
|
288
324
|
}
|
|
289
325
|
async function handleStatus(orchestrator, rawArgs) {
|
|
290
326
|
const { positionals, flags } = parseArgs(rawArgs);
|
|
@@ -315,6 +351,47 @@ async function maybeStartHud(gate, emitter) {
|
|
|
315
351
|
const { startHud } = await import('../orchestrator/src/cli/ui/controller.js');
|
|
316
352
|
return startHud({ emitter, footerNote: 'interactive HUD (read-only)' });
|
|
317
353
|
}
|
|
354
|
+
async function withRunUi(flags, format, action) {
|
|
355
|
+
const interactiveRequested = Boolean(flags['interactive'] || flags['ui']);
|
|
356
|
+
const interactiveDisabled = Boolean(flags['no-interactive']);
|
|
357
|
+
const runEvents = new RunEventEmitter();
|
|
358
|
+
const gate = evaluateInteractiveGate({
|
|
359
|
+
requested: interactiveRequested,
|
|
360
|
+
disabled: interactiveDisabled,
|
|
361
|
+
format,
|
|
362
|
+
stdoutIsTTY: process.stdout.isTTY === true,
|
|
363
|
+
stderrIsTTY: process.stderr.isTTY === true,
|
|
364
|
+
term: process.env.TERM ?? null
|
|
365
|
+
});
|
|
366
|
+
const hud = await maybeStartHud(gate, runEvents);
|
|
367
|
+
if (!gate.enabled && interactiveRequested && !interactiveDisabled && gate.reason) {
|
|
368
|
+
console.error(`[HUD disabled] ${gate.reason}`);
|
|
369
|
+
}
|
|
370
|
+
try {
|
|
371
|
+
await action(runEvents);
|
|
372
|
+
}
|
|
373
|
+
finally {
|
|
374
|
+
hud?.stop();
|
|
375
|
+
runEvents.dispose();
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
function emitRunOutput(result, format, label) {
|
|
379
|
+
const payload = {
|
|
380
|
+
run_id: result.manifest.run_id,
|
|
381
|
+
status: result.manifest.status,
|
|
382
|
+
artifact_root: result.manifest.artifact_root,
|
|
383
|
+
manifest: `${result.manifest.artifact_root}/manifest.json`,
|
|
384
|
+
log_path: result.manifest.log_path
|
|
385
|
+
};
|
|
386
|
+
if (format === 'json') {
|
|
387
|
+
console.log(JSON.stringify(payload, null, 2));
|
|
388
|
+
return;
|
|
389
|
+
}
|
|
390
|
+
console.log(`${label}: ${payload.run_id}`);
|
|
391
|
+
console.log(`Status: ${payload.status}`);
|
|
392
|
+
console.log(`Manifest: ${payload.manifest}`);
|
|
393
|
+
console.log(`Log: ${payload.log_path}`);
|
|
394
|
+
}
|
|
318
395
|
async function handleExec(rawArgs) {
|
|
319
396
|
const parsed = parseExecArgs(rawArgs);
|
|
320
397
|
if (parsed.commandTokens.length === 0) {
|
|
@@ -322,7 +399,7 @@ async function handleExec(rawArgs) {
|
|
|
322
399
|
}
|
|
323
400
|
const isInteractive = process.stdout.isTTY === true && process.stderr.isTTY === true;
|
|
324
401
|
const outputMode = parsed.requestedMode ?? (isInteractive ? 'interactive' : 'jsonl');
|
|
325
|
-
const env =
|
|
402
|
+
const env = normalizeEnvironmentPaths(resolveEnvironmentPaths());
|
|
326
403
|
if (parsed.taskId) {
|
|
327
404
|
env.taskId = sanitizeTaskId(parsed.taskId);
|
|
328
405
|
}
|
|
@@ -550,6 +627,22 @@ Commands:
|
|
|
550
627
|
--approval-policy <p> Record approval policy metadata.
|
|
551
628
|
--format json Emit machine-readable output.
|
|
552
629
|
--target <stage-id> Focus plan/build metadata on a specific stage (alias: --target-stage).
|
|
630
|
+
--goal "<goal>" When pipeline is rlm, set the RLM goal.
|
|
631
|
+
--validator <cmd|none> When pipeline is rlm, set the validator command.
|
|
632
|
+
--max-iterations <n> When pipeline is rlm, override max iterations.
|
|
633
|
+
--max-minutes <n> When pipeline is rlm, override max minutes.
|
|
634
|
+
--roles <single|triad> When pipeline is rlm, set role split.
|
|
635
|
+
--interactive | --ui Enable read-only HUD when running in a TTY.
|
|
636
|
+
--no-interactive Force disable HUD (default is off unless requested).
|
|
637
|
+
|
|
638
|
+
rlm "<goal>" Run RLM loop until validator passes.
|
|
639
|
+
--task <id> Override task identifier.
|
|
640
|
+
--validator <cmd|none> Set validator command or disable validation.
|
|
641
|
+
--max-iterations <n> Override max iterations (0 = unlimited with validator).
|
|
642
|
+
--max-minutes <n> Optional time-based guardrail in minutes.
|
|
643
|
+
--roles <single|triad> Choose single or triad role split.
|
|
644
|
+
--parent-run <id> Link run to parent run id.
|
|
645
|
+
--approval-policy <p> Record approval policy metadata.
|
|
553
646
|
--interactive | --ui Enable read-only HUD when running in a TTY.
|
|
554
647
|
--no-interactive Force disable HUD (default is off unless requested).
|
|
555
648
|
|
|
@@ -1,28 +1,102 @@
|
|
|
1
1
|
import { readFile } from 'node:fs/promises';
|
|
2
2
|
import { join } from 'node:path';
|
|
3
3
|
import { logger } from '../../logger.js';
|
|
4
|
+
import { findPackageRoot } from '../utils/packageInfo.js';
|
|
5
|
+
export async function loadRepoConfig(env) {
|
|
6
|
+
const repoConfigPath = join(env.repoRoot, 'codex.orchestrator.json');
|
|
7
|
+
const repoConfig = await readConfig(repoConfigPath);
|
|
8
|
+
if (repoConfig) {
|
|
9
|
+
logger.info(`[codex-config] Loaded user config from ${repoConfigPath}`);
|
|
10
|
+
return normalizeUserConfig(repoConfig, 'repo');
|
|
11
|
+
}
|
|
12
|
+
logger.warn(`[codex-config] Missing codex.orchestrator.json at ${repoConfigPath}`);
|
|
13
|
+
return null;
|
|
14
|
+
}
|
|
15
|
+
export async function loadPackageConfig(env) {
|
|
16
|
+
const repoConfigPath = join(env.repoRoot, 'codex.orchestrator.json');
|
|
17
|
+
const packageRoot = findPackageRoot();
|
|
18
|
+
const packageConfigPath = join(packageRoot, 'codex.orchestrator.json');
|
|
19
|
+
if (packageConfigPath === repoConfigPath) {
|
|
20
|
+
return null;
|
|
21
|
+
}
|
|
22
|
+
const packageConfig = await readConfig(packageConfigPath);
|
|
23
|
+
if (packageConfig) {
|
|
24
|
+
logger.info(`[codex-config] Loaded user config from ${packageConfigPath}`);
|
|
25
|
+
return normalizeUserConfig(packageConfig, 'package');
|
|
26
|
+
}
|
|
27
|
+
logger.warn(`[codex-config] Missing codex.orchestrator.json at ${packageConfigPath}`);
|
|
28
|
+
return null;
|
|
29
|
+
}
|
|
4
30
|
export async function loadUserConfig(env) {
|
|
5
|
-
const
|
|
31
|
+
const repoConfig = await loadRepoConfig(env);
|
|
32
|
+
if (repoConfig) {
|
|
33
|
+
return repoConfig;
|
|
34
|
+
}
|
|
35
|
+
return await loadPackageConfig(env);
|
|
36
|
+
}
|
|
37
|
+
export function findPipeline(config, id) {
|
|
38
|
+
if (!config?.pipelines) {
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
return config.pipelines.find((pipeline) => pipeline.id === id) ?? null;
|
|
42
|
+
}
|
|
43
|
+
function normalizeUserConfig(config, source) {
|
|
44
|
+
if (!config) {
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
const stageSets = normalizeStageSets(config.stageSets);
|
|
48
|
+
const pipelines = Array.isArray(config.pipelines)
|
|
49
|
+
? config.pipelines.map((pipeline) => expandPipelineStages(pipeline, stageSets))
|
|
50
|
+
: config.pipelines;
|
|
51
|
+
return { pipelines, defaultPipeline: config.defaultPipeline, source };
|
|
52
|
+
}
|
|
53
|
+
async function readConfig(configPath) {
|
|
6
54
|
try {
|
|
7
55
|
const raw = await readFile(configPath, 'utf8');
|
|
8
|
-
|
|
9
|
-
logger.info(`[codex-config] Loaded user config from ${configPath}`);
|
|
10
|
-
if (parsed && Array.isArray(parsed.pipelines)) {
|
|
11
|
-
return parsed;
|
|
12
|
-
}
|
|
13
|
-
return parsed ?? null;
|
|
56
|
+
return JSON.parse(raw);
|
|
14
57
|
}
|
|
15
58
|
catch (error) {
|
|
16
59
|
if (error.code === 'ENOENT') {
|
|
17
|
-
logger.warn(`[codex-config] Missing codex.orchestrator.json at ${configPath}`);
|
|
18
60
|
return null;
|
|
19
61
|
}
|
|
20
62
|
throw error;
|
|
21
63
|
}
|
|
22
64
|
}
|
|
23
|
-
|
|
24
|
-
if (!
|
|
25
|
-
return
|
|
65
|
+
function normalizeStageSets(stageSets) {
|
|
66
|
+
if (!stageSets) {
|
|
67
|
+
return {};
|
|
26
68
|
}
|
|
27
|
-
|
|
69
|
+
if (typeof stageSets !== 'object' || Array.isArray(stageSets)) {
|
|
70
|
+
throw new Error('codex.orchestrator.json stageSets must be an object of stage arrays.');
|
|
71
|
+
}
|
|
72
|
+
const normalized = {};
|
|
73
|
+
for (const [key, value] of Object.entries(stageSets)) {
|
|
74
|
+
if (!Array.isArray(value)) {
|
|
75
|
+
throw new Error(`Stage set "${key}" must be an array.`);
|
|
76
|
+
}
|
|
77
|
+
if (value.some((stage) => isStageSetRef(stage))) {
|
|
78
|
+
throw new Error(`Stage set "${key}" cannot include stage-set references.`);
|
|
79
|
+
}
|
|
80
|
+
normalized[key] = value;
|
|
81
|
+
}
|
|
82
|
+
return normalized;
|
|
83
|
+
}
|
|
84
|
+
function expandPipelineStages(pipeline, stageSets) {
|
|
85
|
+
const expanded = [];
|
|
86
|
+
for (const stage of pipeline.stages ?? []) {
|
|
87
|
+
if (isStageSetRef(stage)) {
|
|
88
|
+
const sharedStages = stageSets[stage.ref];
|
|
89
|
+
if (!sharedStages) {
|
|
90
|
+
throw new Error(`Pipeline "${pipeline.id}" references unknown stage set "${stage.ref}".`);
|
|
91
|
+
}
|
|
92
|
+
expanded.push(...sharedStages);
|
|
93
|
+
}
|
|
94
|
+
else {
|
|
95
|
+
expanded.push(stage);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
return { ...pipeline, stages: expanded };
|
|
99
|
+
}
|
|
100
|
+
function isStageSetRef(stage) {
|
|
101
|
+
return stage.kind === 'stage-set';
|
|
28
102
|
}
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import process from 'node:process';
|
|
2
2
|
import { bootstrapManifest } from '../run/manifest.js';
|
|
3
3
|
import { generateRunId } from '../utils/runId.js';
|
|
4
|
-
import { JsonlWriter } from '../utils/jsonlWriter.js';
|
|
5
4
|
import { ExperienceStore } from '../../persistence/ExperienceStore.js';
|
|
6
5
|
import { createTelemetrySink } from '../../../../packages/orchestrator/src/telemetry/otel-exporter.js';
|
|
7
6
|
import { createNotificationSink } from '../../../../packages/orchestrator/src/notifications/index.js';
|
|
@@ -47,7 +46,11 @@ export async function bootstrapExecContext(context, invocation) {
|
|
|
47
46
|
targets: invocation.notifyTargets,
|
|
48
47
|
envTargets: envNotifications
|
|
49
48
|
});
|
|
50
|
-
const jsonlWriter = outputMode === 'jsonl'
|
|
49
|
+
const jsonlWriter = outputMode === 'jsonl'
|
|
50
|
+
? (event) => {
|
|
51
|
+
stdout.write(`${JSON.stringify(event)}\n`);
|
|
52
|
+
}
|
|
53
|
+
: null;
|
|
51
54
|
return {
|
|
52
55
|
env,
|
|
53
56
|
invocation,
|
|
@@ -2,9 +2,6 @@ import process from 'node:process';
|
|
|
2
2
|
import { persistManifest } from '../run/manifestPersister.js';
|
|
3
3
|
import { isoTimestamp } from '../utils/time.js';
|
|
4
4
|
import { logger } from '../../logger.js';
|
|
5
|
-
import { runLearningHarvester } from '../../learning/harvester.js';
|
|
6
|
-
import { synthesizeScenario } from '../../learning/runner.js';
|
|
7
|
-
import { runScenarioValidation } from '../../learning/validator.js';
|
|
8
5
|
export async function maybeTriggerLearning(runContext, runStatus) {
|
|
9
6
|
const enabled = process.env.LEARNING_PIPELINE_ENABLED === '1';
|
|
10
7
|
if (!enabled) {
|
|
@@ -15,6 +12,11 @@ export async function maybeTriggerLearning(runContext, runStatus) {
|
|
|
15
12
|
return;
|
|
16
13
|
}
|
|
17
14
|
try {
|
|
15
|
+
const [{ runLearningHarvester }, { synthesizeScenario }, { runScenarioValidation }] = await Promise.all([
|
|
16
|
+
import('../../learning/harvester.js'),
|
|
17
|
+
import('../../learning/runner.js'),
|
|
18
|
+
import('../../learning/validator.js')
|
|
19
|
+
]);
|
|
18
20
|
const harvester = await runLearningHarvester(runContext.manifest, {
|
|
19
21
|
repoRoot: runContext.env.repoRoot,
|
|
20
22
|
runsRoot: runContext.env.runsRoot,
|
|
@@ -10,7 +10,7 @@ export async function runExecStage(context) {
|
|
|
10
10
|
const serialized = serializeExecEvent(event);
|
|
11
11
|
context.telemetryTasks.push(Promise.resolve(context.telemetrySink.record(serialized)).then(() => undefined));
|
|
12
12
|
if (context.outputMode === 'jsonl' && context.jsonlWriter) {
|
|
13
|
-
context.jsonlWriter
|
|
13
|
+
context.jsonlWriter(serialized);
|
|
14
14
|
}
|
|
15
15
|
else if (context.outputMode === 'interactive') {
|
|
16
16
|
streamInteractive(context.stdout, context.stderr, event);
|
|
@@ -61,7 +61,7 @@ export function createRunSummaryPayload(params) {
|
|
|
61
61
|
}
|
|
62
62
|
export function renderRunOutput(context, summaryPayload, summaryEvent) {
|
|
63
63
|
if (context.outputMode === 'jsonl' && context.jsonlWriter) {
|
|
64
|
-
context.jsonlWriter
|
|
64
|
+
context.jsonlWriter(summaryEvent);
|
|
65
65
|
return;
|
|
66
66
|
}
|
|
67
67
|
if (context.outputMode === 'json') {
|