@kbediako/codex-orchestrator 0.1.36 → 0.1.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -3
- package/dist/bin/codex-orchestrator.js +56 -0
- package/dist/orchestrator/src/cli/config/userConfig.js +17 -1
- package/dist/orchestrator/src/cli/doctorUsage.js +17 -1
- package/dist/orchestrator/src/cli/frontendTestingRunner.js +24 -6
- package/dist/orchestrator/src/cli/orchestrator.js +119 -16
- package/dist/orchestrator/src/cli/rlmRunner.js +27 -3
- package/dist/orchestrator/src/cli/run/manifest.js +19 -0
- package/dist/orchestrator/src/cli/runtime/codexCommand.js +39 -0
- package/dist/orchestrator/src/cli/runtime/index.js +3 -0
- package/dist/orchestrator/src/cli/runtime/mode.js +53 -0
- package/dist/orchestrator/src/cli/runtime/provider.js +205 -0
- package/dist/orchestrator/src/cli/runtime/types.js +1 -0
- package/dist/orchestrator/src/cli/services/commandRunner.js +20 -5
- package/dist/orchestrator/src/cli/services/runPreparation.js +2 -0
- package/dist/orchestrator/src/cli/services/runSummaryWriter.js +12 -0
- package/dist/scripts/run-review.js +55 -9
- package/docs/README.md +7 -4
- package/package.json +13 -3
- package/schemas/manifest.json +21 -0
- package/skills/chrome-devtools/SKILL.md +1 -1
- package/skills/codex-orchestrator/SKILL.md +83 -0
- package/skills/collab-subagents-first/SKILL.md +1 -0
- package/skills/delegation-usage/SKILL.md +1 -0
- package/templates/codex/.codex/agents/explorer-fast.toml +1 -0
package/README.md
CHANGED
|
@@ -39,6 +39,16 @@ Node.js >= 20 is required.
|
|
|
39
39
|
> Tip: if you prefer `npx`, replace `codex-orch` with `npx @kbediako/codex-orchestrator`.
|
|
40
40
|
> Tip: for multiple commands, you can also `export MCP_RUNNER_TASK_ID=<task-id>` once.
|
|
41
41
|
|
|
42
|
+
## Runtime + Execution Modes
|
|
43
|
+
|
|
44
|
+
- Mode semantics are orthogonal:
|
|
45
|
+
- `executionMode=mcp|cloud` controls where stages execute.
|
|
46
|
+
- `runtimeMode=cli|appserver` controls local runtime provider selection.
|
|
47
|
+
- Local default runtime is `appserver`; preserve `--runtime-mode cli` as break-glass.
|
|
48
|
+
- `--execution-mode cloud --runtime-mode appserver` is intentionally unsupported and fails fast with actionable errors.
|
|
49
|
+
- `js_repl` is enabled by default globally. For deterministic cloud contracts, run explicit feature lanes (`CODEX_CLOUD_ENABLE_FEATURES=js_repl` and separate `CODEX_CLOUD_DISABLE_FEATURES=js_repl` runs). Use `CODEX_CLOUD_DISABLE_FEATURES=js_repl` for task-scoped cloud break-glass; reserve `codex features disable js_repl` for global emergency toggles and re-enable with `codex features enable js_repl`.
|
|
50
|
+
- `memories` remains scoped to explicit eval lanes (legacy alias `memory_tool` is compatibility-only).
|
|
51
|
+
|
|
42
52
|
## Downstream init (recommended)
|
|
43
53
|
|
|
44
54
|
Use this when you want Codex to drive work inside another repo with the CO defaults.
|
|
@@ -97,6 +107,7 @@ codex -c 'mcp_servers.delegation.enabled=true' ...
|
|
|
97
107
|
Codex built-ins are `default`, `explorer`, `worker`, and `awaiter`. `researcher` is user-defined.
|
|
98
108
|
- `spawn_agent` defaults to `default` when `agent_type` is omitted, so always set `agent_type` explicitly.
|
|
99
109
|
- Multi-turn loops are supported (`spawn_agent` -> `send_input` -> `wait`/`resume_agent` -> `close_agent`), so subagents can iterate before parent synthesis.
|
|
110
|
+
- Keep `fork_context` off by default for bounded subagent streams; set `fork_context=true` only when the subagent must inherit prior thread history.
|
|
100
111
|
|
|
101
112
|
In Codex CLI `0.105.0`, built-in `explorer` no longer pins an older model profile; it inherits top-level defaults unless you attach a role `config_file`.
|
|
102
113
|
CO now ships this downstream starter config via `init codex` (source template: `templates/codex/.codex/config.toml`; installed as .codex/config.toml in target repos):
|
|
@@ -155,7 +166,7 @@ Delegation guard profile:
|
|
|
155
166
|
|
|
156
167
|
RLM (Recursive Language Model) is the long-horizon loop used by the `rlm` pipeline (`codex-orchestrator rlm "<goal>"` or `codex-orchestrator start rlm --goal "<goal>"`). Delegated runs only enter RLM when the child is launched with the `rlm` pipeline (or the rlm runner directly). In auto mode it resolves to symbolic only when context is large (`RLM_SYMBOLIC_MIN_BYTES`) and an explicit context signal is present (`RLM_CONTEXT_PATH` or delegated run); otherwise it stays iterative. The runner writes state to `.runs/<task-id>/cli/<run-id>/rlm/state.json` and stops when the validator passes or budgets are exhausted.
|
|
157
168
|
For symbolic mode, the Option 2 alignment checker is enabled by default (`RLM_ALIGNMENT_CHECKER=1`) and writes append-only alignment artifacts under `.runs/<task-id>/cli/<run-id>/rlm/alignment/` (ledger + projection). Rollback toggle: set `RLM_ALIGNMENT_CHECKER=0`. Enforcement is opt-in via `RLM_ALIGNMENT_CHECKER_ENFORCE=1`.
|
|
158
|
-
Symbolic subcalls can optionally use collab tools. Fast path: `codex-orchestrator rlm --multi-agent auto "<goal>"` (legacy alias: `--collab auto`; sets `RLM_SYMBOLIC_MULTI_AGENT=1` plus legacy `RLM_SYMBOLIC_COLLAB=1` for compatibility, and implies symbolic mode). Collab requires `multi_agent=true` in `codex features list` (`collab` remains a legacy alias). Collab tool calls parsed from `codex exec --json --enable multi_agent` are stored in `manifest.collab_tool_calls` (bounded by `CODEX_ORCHESTRATOR_COLLAB_MAX_EVENTS`, set to `0` to disable). For auditable role routing, prefix spawned prompts with `[agent_type:<role>]` and set `spawn_agent.agent_type` when supported; lifecycle validation enforces prompt-role evidence and validates `agent_type` when present (`RLM_SYMBOLIC_MULTI_AGENT_ROLE_POLICY=warn|off`, legacy alias `RLM_COLLAB_ROLE_POLICY`; `RLM_SYMBOLIC_MULTI_AGENT_ALLOW_DEFAULT_ROLE=1`, legacy alias `RLM_COLLAB_ALLOW_DEFAULT_ROLE`). `codex-orchestrator codex setup` remains available when you want a managed/pinned CLI path (opt-in via `CODEX_CLI_USE_MANAGED=1`).
|
|
169
|
+
Symbolic subcalls can optionally use collab tools. Fast path: `codex-orchestrator rlm --multi-agent auto "<goal>"` (legacy alias: `--collab auto`; sets `RLM_SYMBOLIC_MULTI_AGENT=1` plus legacy `RLM_SYMBOLIC_COLLAB=1` for compatibility, and implies symbolic mode). Collab requires `multi_agent=true` in `codex features list` (`collab` remains a legacy alias). Collab tool calls parsed from `codex exec --json --enable multi_agent` are stored in `manifest.collab_tool_calls` (bounded by `CODEX_ORCHESTRATOR_COLLAB_MAX_EVENTS`, set to `0` to disable); when present in events, `spawn_agent.fork_context` is captured for observability and surfaced in `codex-orchestrator doctor --usage` fork-context counters. For auditable role routing, prefix spawned prompts with `[agent_type:<role>]` and set `spawn_agent.agent_type` when supported; lifecycle validation enforces prompt-role evidence and validates `agent_type` when present (`RLM_SYMBOLIC_MULTI_AGENT_ROLE_POLICY=warn|off`, legacy alias `RLM_COLLAB_ROLE_POLICY`; `RLM_SYMBOLIC_MULTI_AGENT_ALLOW_DEFAULT_ROLE=1`, legacy alias `RLM_COLLAB_ALLOW_DEFAULT_ROLE`). `codex-orchestrator codex setup` remains available when you want a managed/pinned CLI path (opt-in via `CODEX_CLI_USE_MANAGED=1`).
|
|
159
170
|
For batch fan-out jobs, prefer native `spawn_agents_on_csv` before building custom orchestration wrappers.
|
|
160
171
|
|
|
161
172
|
### Delegation flow
|
|
@@ -215,10 +226,12 @@ Options:
|
|
|
215
226
|
- `--codex-home <path>` targets a different Codex home directory.
|
|
216
227
|
|
|
217
228
|
Bundled skills (may vary by release):
|
|
229
|
+
- `codex-orchestrator`
|
|
218
230
|
- `collab-subagents-first`
|
|
219
231
|
- `chrome-devtools`
|
|
220
232
|
- `delegation-usage`
|
|
221
233
|
- `standalone-review`
|
|
234
|
+
- `elegance-review`
|
|
222
235
|
- `docs-first`
|
|
223
236
|
- `collab-evals`
|
|
224
237
|
- `collab-deliberation`
|
|
@@ -276,7 +289,7 @@ codex-orchestrator doctor --cloud-preflight
|
|
|
276
289
|
- Active PR watch-resolve-merge loop: `codex-orchestrator pr resolve-merge --pr <number> --quiet-minutes <window>` (add `--auto-merge` when approved; exits early when author action is required).
|
|
277
290
|
- Passive PR monitor loop: `codex-orchestrator pr watch-merge --pr <number> --quiet-minutes <window>` (monitor-only behavior; keeps waiting unless terminal/timeout).
|
|
278
291
|
- Review checkpoints (npm-only safe): `NOTES="Goal: ... | Summary: ... | Risks: ..." codex-orchestrator review --task <task-id>` for manifest-backed standalone review wrapper behavior (auto-skips repo-only diff-budget script when unavailable in downstream installs); use `codex review "<focus>"` for quick prompt-only checks; use `codex-orchestrator start implementation-gate --task <task-id> --format json` when you want a full gate run.
|
|
279
|
-
- Downstream simulation before shipping wrapper/skill changes: `npm run pack:smoke` (packaged CLI in temp mock repo; validates `review` artifacts and `long-poll-wait` install path).
|
|
292
|
+
- Downstream simulation before shipping wrapper/skill changes: `npm run pack:smoke` (packaged CLI in temp mock repo; validates `review` artifacts and `long-poll-wait` install path; spot-check gate). Use `npm run pack:audit` for full tarball inventory validation.
|
|
280
293
|
- Delegation: `codex-orchestrator doctor --apply --yes`, then enable for a Codex run with: `codex -c 'mcp_servers.delegation.enabled=true' ...`
|
|
281
294
|
- Collab (symbolic RLM subagents): `codex-orchestrator rlm --multi-agent auto "<goal>"` (legacy alias: `--collab auto`; requires Codex `features.multi_agent=true`)
|
|
282
295
|
- Cloud: set `CODEX_CLOUD_ENV_ID` (and optional `CODEX_CLOUD_BRANCH`), then run: `codex-orchestrator start <pipeline> --cloud --target <stage-id>`
|
|
@@ -307,7 +320,7 @@ codex-orchestrator devtools setup
|
|
|
307
320
|
- `codex-orchestrator mcp enable --servers <csv> --yes` — enable specific disabled MCP servers from existing Codex config entries.
|
|
308
321
|
- `codex-orchestrator self-check --format json` — JSON health payload.
|
|
309
322
|
- `codex-orchestrator mcp serve` — Codex MCP stdio server.
|
|
310
|
-
- `npm run pack:smoke` — maintainer smoke gate for packaged downstream behavior (tarball install + review/skill checks).
|
|
323
|
+
- `npm run pack:smoke` — maintainer smoke gate for packaged downstream behavior (tarball install + review/skill checks). Core lane runs it on downstream-facing diffs; `.github/workflows/pack-smoke-backstop.yml` runs a weekly `main` backstop.
|
|
311
324
|
|
|
312
325
|
## What ships in the npm release
|
|
313
326
|
|
|
@@ -393,6 +393,23 @@ function resolveExecutionModeFlag(flags) {
|
|
|
393
393
|
}
|
|
394
394
|
return normalized;
|
|
395
395
|
}
|
|
396
|
+
function resolveRuntimeModeFlag(flags) {
|
|
397
|
+
if (flags['runtime-mode'] === true) {
|
|
398
|
+
throw new Error('--runtime-mode requires a value. Expected one of: cli, appserver.');
|
|
399
|
+
}
|
|
400
|
+
const rawMode = readStringFlag(flags, 'runtime-mode');
|
|
401
|
+
if (flags['runtime-mode'] !== undefined && !rawMode) {
|
|
402
|
+
throw new Error('--runtime-mode requires a non-empty value. Expected one of: cli, appserver.');
|
|
403
|
+
}
|
|
404
|
+
if (!rawMode) {
|
|
405
|
+
return undefined;
|
|
406
|
+
}
|
|
407
|
+
const normalized = rawMode.toLowerCase();
|
|
408
|
+
if (normalized !== 'cli' && normalized !== 'appserver') {
|
|
409
|
+
throw new Error('Invalid --runtime-mode value. Expected one of: cli, appserver.');
|
|
410
|
+
}
|
|
411
|
+
return normalized;
|
|
412
|
+
}
|
|
396
413
|
function normalizeRlmMultiAgentValue(raw) {
|
|
397
414
|
if (raw === true) {
|
|
398
415
|
return 'enabled';
|
|
@@ -580,6 +597,7 @@ async function handleStart(orchestrator, rawArgs) {
|
|
|
580
597
|
const pipelineId = positionals[0];
|
|
581
598
|
const format = flags['format'] === 'json' ? 'json' : 'text';
|
|
582
599
|
const executionMode = resolveExecutionModeFlag(flags);
|
|
600
|
+
const runtimeMode = resolveRuntimeModeFlag(flags);
|
|
583
601
|
applyRepoConfigRequiredPolicy(flags);
|
|
584
602
|
const autoIssueLogEnabled = resolveAutoIssueLogEnabled(flags);
|
|
585
603
|
if (pipelineId === 'rlm') {
|
|
@@ -607,6 +625,7 @@ async function handleStart(orchestrator, rawArgs) {
|
|
|
607
625
|
approvalPolicy: typeof flags['approval-policy'] === 'string' ? flags['approval-policy'] : undefined,
|
|
608
626
|
targetStageId: resolveTargetStageId(flags),
|
|
609
627
|
executionMode,
|
|
628
|
+
runtimeMode,
|
|
610
629
|
runEvents
|
|
611
630
|
});
|
|
612
631
|
const issueLogCapture = result.manifest.status !== 'succeeded'
|
|
@@ -618,6 +637,9 @@ async function handleStart(orchestrator, rawArgs) {
|
|
|
618
637
|
})
|
|
619
638
|
: { issueLog: null, issueLogError: null };
|
|
620
639
|
emitRunOutput(result, format, 'Run started', issueLogCapture);
|
|
640
|
+
if (result.manifest.status === 'failed' || result.manifest.status === 'cancelled') {
|
|
641
|
+
process.exitCode = 1;
|
|
642
|
+
}
|
|
621
643
|
if (result.manifest.status === 'succeeded' && result.manifest.pipeline_id !== 'rlm') {
|
|
622
644
|
await maybeEmitRunAdoptionHint({
|
|
623
645
|
format,
|
|
@@ -640,6 +662,7 @@ async function handleFrontendTest(orchestrator, rawArgs) {
|
|
|
640
662
|
const { positionals, flags } = parseArgs(rawArgs);
|
|
641
663
|
const format = flags['format'] === 'json' ? 'json' : 'text';
|
|
642
664
|
const devtools = Boolean(flags['devtools']);
|
|
665
|
+
const runtimeMode = resolveRuntimeModeFlag(flags);
|
|
643
666
|
applyRepoConfigRequiredPolicy(flags);
|
|
644
667
|
if (positionals.length > 0) {
|
|
645
668
|
console.error(`[frontend-test] ignoring extra arguments: ${positionals.join(' ')}`);
|
|
@@ -656,9 +679,13 @@ async function handleFrontendTest(orchestrator, rawArgs) {
|
|
|
656
679
|
parentRunId: typeof flags['parent-run'] === 'string' ? flags['parent-run'] : undefined,
|
|
657
680
|
approvalPolicy: typeof flags['approval-policy'] === 'string' ? flags['approval-policy'] : undefined,
|
|
658
681
|
targetStageId: resolveTargetStageId(flags),
|
|
682
|
+
runtimeMode,
|
|
659
683
|
runEvents
|
|
660
684
|
});
|
|
661
685
|
emitRunOutput(result, format, 'Run started');
|
|
686
|
+
if (result.manifest.status === 'failed' || result.manifest.status === 'cancelled') {
|
|
687
|
+
process.exitCode = 1;
|
|
688
|
+
}
|
|
662
689
|
});
|
|
663
690
|
}
|
|
664
691
|
finally {
|
|
@@ -683,6 +710,7 @@ async function handleFlow(orchestrator, rawArgs) {
|
|
|
683
710
|
}
|
|
684
711
|
const format = flags['format'] === 'json' ? 'json' : 'text';
|
|
685
712
|
const executionMode = resolveExecutionModeFlag(flags);
|
|
713
|
+
const runtimeMode = resolveRuntimeModeFlag(flags);
|
|
686
714
|
applyRepoConfigRequiredPolicy(flags);
|
|
687
715
|
const autoIssueLogEnabled = resolveAutoIssueLogEnabled(flags);
|
|
688
716
|
const taskId = typeof flags['task'] === 'string' ? flags['task'] : undefined;
|
|
@@ -699,6 +727,7 @@ async function handleFlow(orchestrator, rawArgs) {
|
|
|
699
727
|
approvalPolicy,
|
|
700
728
|
targetStageId: docsReviewTargetStageId,
|
|
701
729
|
executionMode,
|
|
730
|
+
runtimeMode,
|
|
702
731
|
runEvents
|
|
703
732
|
});
|
|
704
733
|
const docsPayload = toRunOutputPayload(docsReviewResult);
|
|
@@ -744,6 +773,7 @@ async function handleFlow(orchestrator, rawArgs) {
|
|
|
744
773
|
approvalPolicy,
|
|
745
774
|
targetStageId: implementationGateTargetStageId,
|
|
746
775
|
executionMode,
|
|
776
|
+
runtimeMode,
|
|
747
777
|
runEvents
|
|
748
778
|
});
|
|
749
779
|
const implementationPayload = toRunOutputPayload(implementationGateResult);
|
|
@@ -886,6 +916,7 @@ async function handleRlm(orchestrator, rawArgs) {
|
|
|
886
916
|
printRlmHelp();
|
|
887
917
|
return;
|
|
888
918
|
}
|
|
919
|
+
const runtimeMode = resolveRuntimeModeFlag(flags);
|
|
889
920
|
applyRepoConfigRequiredPolicy(flags);
|
|
890
921
|
const goalFromArgs = positionals.length > 0 ? positionals.join(' ') : undefined;
|
|
891
922
|
const goal = goalFromArgs ?? readStringFlag(flags, 'goal') ?? process.env.RLM_GOAL?.trim();
|
|
@@ -921,6 +952,7 @@ async function handleRlm(orchestrator, rawArgs) {
|
|
|
921
952
|
taskId,
|
|
922
953
|
parentRunId: typeof flags['parent-run'] === 'string' ? flags['parent-run'] : undefined,
|
|
923
954
|
approvalPolicy: typeof flags['approval-policy'] === 'string' ? flags['approval-policy'] : undefined,
|
|
955
|
+
runtimeMode,
|
|
924
956
|
runEvents
|
|
925
957
|
});
|
|
926
958
|
emitRunOutput(startResult, 'text', 'Run started');
|
|
@@ -949,6 +981,7 @@ async function handleResume(orchestrator, rawArgs) {
|
|
|
949
981
|
printResumeHelp();
|
|
950
982
|
return;
|
|
951
983
|
}
|
|
984
|
+
const runtimeMode = resolveRuntimeModeFlag(flags);
|
|
952
985
|
applyRepoConfigRequiredPolicy(flags);
|
|
953
986
|
const runId = (flags['run'] ?? positionals[0]);
|
|
954
987
|
if (!runId) {
|
|
@@ -962,6 +995,7 @@ async function handleResume(orchestrator, rawArgs) {
|
|
|
962
995
|
actor: typeof flags['actor'] === 'string' ? flags['actor'] : undefined,
|
|
963
996
|
reason: typeof flags['reason'] === 'string' ? flags['reason'] : undefined,
|
|
964
997
|
targetStageId: resolveTargetStageId(flags),
|
|
998
|
+
runtimeMode,
|
|
965
999
|
runEvents
|
|
966
1000
|
});
|
|
967
1001
|
emitRunOutput(result, format, 'Run resumed');
|
|
@@ -1034,6 +1068,13 @@ function emitRunOutput(result, format, label, issueLogCapture = { issueLog: null
|
|
|
1034
1068
|
console.log(`Status: ${payload.status}`);
|
|
1035
1069
|
console.log(`Manifest: ${payload.manifest}`);
|
|
1036
1070
|
console.log(`Log: ${payload.log_path}`);
|
|
1071
|
+
if (payload.runtime_mode) {
|
|
1072
|
+
console.log(`Runtime: ${payload.runtime_mode}${payload.runtime_mode_requested ? ` (requested ${payload.runtime_mode_requested})` : ''}` +
|
|
1073
|
+
(payload.runtime_provider ? ` via ${payload.runtime_provider}` : ''));
|
|
1074
|
+
if (payload.runtime_fallback?.occurred) {
|
|
1075
|
+
console.log(`Runtime fallback: ${payload.runtime_fallback.code ?? 'runtime-fallback'} (${payload.runtime_fallback.reason ?? 'n/a'})`);
|
|
1076
|
+
}
|
|
1077
|
+
}
|
|
1037
1078
|
if (payload.cloud_fallback_reason) {
|
|
1038
1079
|
console.log(`Cloud fallback: ${payload.cloud_fallback_reason}`);
|
|
1039
1080
|
}
|
|
@@ -1060,6 +1101,10 @@ function toRunOutputPayload(result, issueLogCapture = { issueLog: null, issueLog
|
|
|
1060
1101
|
manifest: `${result.manifest.artifact_root}/manifest.json`,
|
|
1061
1102
|
log_path: result.manifest.log_path,
|
|
1062
1103
|
summary: result.manifest.summary ?? null,
|
|
1104
|
+
runtime_mode_requested: result.manifest.runtime_mode_requested ?? null,
|
|
1105
|
+
runtime_mode: result.manifest.runtime_mode ?? null,
|
|
1106
|
+
runtime_provider: result.manifest.runtime_provider ?? null,
|
|
1107
|
+
runtime_fallback: result.manifest.runtime_fallback ?? null,
|
|
1063
1108
|
cloud_fallback_reason: result.manifest.cloud_fallback?.reason ?? null,
|
|
1064
1109
|
issue_log: issueLogCapture.issueLog,
|
|
1065
1110
|
issue_log_error: issueLogCapture.issueLogError
|
|
@@ -1998,6 +2043,7 @@ Commands:
|
|
|
1998
2043
|
--format json Emit machine-readable output.
|
|
1999
2044
|
--execution-mode <mcp|cloud> Force execution mode for this run and child subpipelines.
|
|
2000
2045
|
--cloud Shortcut for --execution-mode cloud.
|
|
2046
|
+
--runtime-mode <cli|appserver> Force runtime mode for this run and child subpipelines.
|
|
2001
2047
|
--target <stage-id> Focus plan/build metadata on a specific stage (alias: --target-stage).
|
|
2002
2048
|
--auto-issue-log [true|false] On failure, auto-write doctor issue bundle/log entry.
|
|
2003
2049
|
--repo-config-required [true|false] Require repo-local codex.orchestrator.json (no package fallback).
|
|
@@ -2013,6 +2059,7 @@ Commands:
|
|
|
2013
2059
|
|
|
2014
2060
|
rlm "<goal>" Run RLM loop until validator passes.
|
|
2015
2061
|
--task <id> Override task identifier.
|
|
2062
|
+
--runtime-mode <cli|appserver> Force runtime mode for this run.
|
|
2016
2063
|
--repo-config-required [true|false] Require repo-local codex.orchestrator.json (no package fallback).
|
|
2017
2064
|
--multi-agent [auto|true|false] Preferred alias for multi-agent collab subagents (implies symbolic mode).
|
|
2018
2065
|
--collab [auto|true|false] Legacy alias for --multi-agent.
|
|
@@ -2028,6 +2075,7 @@ Commands:
|
|
|
2028
2075
|
frontend-test Run frontend testing pipeline.
|
|
2029
2076
|
--devtools Enable Chrome DevTools MCP for this run.
|
|
2030
2077
|
--task <id> Override task identifier (defaults to MCP_RUNNER_TASK_ID).
|
|
2078
|
+
--runtime-mode <cli|appserver> Force runtime mode for this run.
|
|
2031
2079
|
--repo-config-required [true|false] Require repo-local codex.orchestrator.json (no package fallback).
|
|
2032
2080
|
--parent-run <id> Link run to parent run id.
|
|
2033
2081
|
--approval-policy <p> Record approval policy metadata.
|
|
@@ -2043,6 +2091,7 @@ Commands:
|
|
|
2043
2091
|
--format json Emit machine-readable output summary for both runs.
|
|
2044
2092
|
--execution-mode <mcp|cloud> Force execution mode for both runs.
|
|
2045
2093
|
--cloud Shortcut for --execution-mode cloud.
|
|
2094
|
+
--runtime-mode <cli|appserver> Force runtime mode for both runs.
|
|
2046
2095
|
--target <stage-id> Focus plan/build metadata on a specific stage (alias: --target-stage).
|
|
2047
2096
|
--auto-issue-log [true|false] On failure, auto-write doctor issue bundle/log entry.
|
|
2048
2097
|
--repo-config-required [true|false] Require repo-local codex.orchestrator.json (no package fallback).
|
|
@@ -2058,6 +2107,7 @@ Commands:
|
|
|
2058
2107
|
--base <branch> Review against base branch.
|
|
2059
2108
|
--commit <sha> Review specific commit.
|
|
2060
2109
|
--non-interactive Force non-interactive review behavior.
|
|
2110
|
+
--runtime-mode <cli|appserver> Force runtime mode for the underlying codex review call.
|
|
2061
2111
|
--auto-issue-log [true|false] Auto-capture issue bundle on review failure.
|
|
2062
2112
|
--disable-delegation-mcp [true|false] Disable delegation MCP for this review.
|
|
2063
2113
|
|
|
@@ -2080,6 +2130,7 @@ Commands:
|
|
|
2080
2130
|
--actor <name> Record who resumed the run.
|
|
2081
2131
|
--reason <text> Record why the run was resumed.
|
|
2082
2132
|
--target <stage-id> Override stage selection before resuming (alias: --target-stage).
|
|
2133
|
+
--runtime-mode <cli|appserver> Force runtime mode before resuming.
|
|
2083
2134
|
--repo-config-required [true|false] Require repo-local codex.orchestrator.json (no package fallback).
|
|
2084
2135
|
--format json Emit machine-readable output.
|
|
2085
2136
|
--interactive | --ui Enable read-only HUD when running in a TTY.
|
|
@@ -2226,6 +2277,7 @@ Options:
|
|
|
2226
2277
|
--actor <name> Record who resumed the run.
|
|
2227
2278
|
--reason <text> Record why the run was resumed.
|
|
2228
2279
|
--target <stage-id> Override stage selection before resuming.
|
|
2280
|
+
--runtime-mode <cli|appserver> Force runtime mode before resuming.
|
|
2229
2281
|
--repo-config-required [true|false] Require repo-local codex.orchestrator.json (no package fallback).
|
|
2230
2282
|
--format json Emit machine-readable output.
|
|
2231
2283
|
--interactive | --ui Enable read-only HUD when running in a TTY.
|
|
@@ -2281,6 +2333,7 @@ function printRlmHelp() {
|
|
|
2281
2333
|
Options:
|
|
2282
2334
|
--goal "<goal>" Alternate way to set the goal (positional is preferred).
|
|
2283
2335
|
--task <id> Override task identifier (defaults to MCP_RUNNER_TASK_ID).
|
|
2336
|
+
--runtime-mode <cli|appserver> Force runtime mode for this run.
|
|
2284
2337
|
--repo-config-required [true|false] Require repo-local codex.orchestrator.json (no package fallback).
|
|
2285
2338
|
--multi-agent [auto|true|false] Preferred alias for multi-agent collab subagents (implies symbolic mode).
|
|
2286
2339
|
--collab [auto|true|false] Legacy alias for --multi-agent.
|
|
@@ -2315,6 +2368,7 @@ Options:
|
|
|
2315
2368
|
--format json Emit machine-readable output for both runs.
|
|
2316
2369
|
--execution-mode <mcp|cloud> Force execution mode for both runs.
|
|
2317
2370
|
--cloud Shortcut for --execution-mode cloud.
|
|
2371
|
+
--runtime-mode <cli|appserver> Force runtime mode for both runs.
|
|
2318
2372
|
--target <stage-id> Focus plan/build metadata (applies where the stage exists).
|
|
2319
2373
|
--auto-issue-log [true|false] On failure, auto-write doctor issue bundle/log entry.
|
|
2320
2374
|
--repo-config-required [true|false] Require repo-local codex.orchestrator.json (no package fallback).
|
|
@@ -2344,6 +2398,7 @@ Common options:
|
|
|
2344
2398
|
--commit <sha> Review a specific commit.
|
|
2345
2399
|
--title "<text>" Optional review title in the prompt.
|
|
2346
2400
|
--non-interactive Force non-interactive behavior.
|
|
2401
|
+
--runtime-mode <cli|appserver> Force runtime mode for the underlying codex review call.
|
|
2347
2402
|
--auto-issue-log [true|false] Auto-capture issue bundle on review failure.
|
|
2348
2403
|
--disable-delegation-mcp [true|false] Disable delegation MCP for this review.
|
|
2349
2404
|
--enable-delegation-mcp [true|false] Legacy delegation MCP toggle (disable via false).
|
|
@@ -2373,6 +2428,7 @@ Options:
|
|
|
2373
2428
|
--format json Emit machine-readable output.
|
|
2374
2429
|
--execution-mode <mcp|cloud> Force execution mode for this run.
|
|
2375
2430
|
--cloud Shortcut for --execution-mode cloud.
|
|
2431
|
+
--runtime-mode <cli|appserver> Force runtime mode for this run.
|
|
2376
2432
|
--target <stage-id> Focus plan/build metadata on a specific stage.
|
|
2377
2433
|
--auto-issue-log [true|false] On failure, auto-write doctor issue bundle/log entry.
|
|
2378
2434
|
--repo-config-required [true|false] Require repo-local codex.orchestrator.json (no package fallback).
|
|
@@ -55,11 +55,17 @@ function normalizeUserConfig(config, source) {
|
|
|
55
55
|
if (!config) {
|
|
56
56
|
return null;
|
|
57
57
|
}
|
|
58
|
+
const runtimeMode = normalizeRuntimeMode(config.runtimeMode);
|
|
58
59
|
const stageSets = normalizeStageSets(config.stageSets);
|
|
59
60
|
const pipelines = Array.isArray(config.pipelines)
|
|
60
61
|
? config.pipelines.map((pipeline) => expandPipelineStages(pipeline, stageSets))
|
|
61
62
|
: config.pipelines;
|
|
62
|
-
return {
|
|
63
|
+
return {
|
|
64
|
+
pipelines,
|
|
65
|
+
defaultPipeline: config.defaultPipeline,
|
|
66
|
+
runtimeMode,
|
|
67
|
+
source
|
|
68
|
+
};
|
|
63
69
|
}
|
|
64
70
|
async function readConfig(configPath) {
|
|
65
71
|
try {
|
|
@@ -111,3 +117,13 @@ function expandPipelineStages(pipeline, stageSets) {
|
|
|
111
117
|
function isStageSetRef(stage) {
|
|
112
118
|
return stage.kind === 'stage-set';
|
|
113
119
|
}
|
|
120
|
+
function normalizeRuntimeMode(value) {
|
|
121
|
+
if (typeof value !== 'string') {
|
|
122
|
+
return undefined;
|
|
123
|
+
}
|
|
124
|
+
const normalized = value.trim().toLowerCase();
|
|
125
|
+
if (normalized === 'cli' || normalized === 'appserver') {
|
|
126
|
+
return normalized;
|
|
127
|
+
}
|
|
128
|
+
throw new Error(`Invalid codex.orchestrator.json runtimeMode "${value}". Expected one of: cli, appserver.`);
|
|
129
|
+
}
|
|
@@ -31,6 +31,9 @@ export async function runDoctorUsage(options = {}) {
|
|
|
31
31
|
const collabByEventType = {};
|
|
32
32
|
const collabTools = new Map();
|
|
33
33
|
const collabCaptureDisabled = String(process.env.CODEX_ORCHESTRATOR_COLLAB_MAX_EVENTS ?? '').trim() === '0';
|
|
34
|
+
let collabSpawnForkContextTrue = 0;
|
|
35
|
+
let collabSpawnForkContextFalse = 0;
|
|
36
|
+
let collabSpawnForkContextUnknown = 0;
|
|
34
37
|
let collabRunsWithUnclosedSpawnAgents = 0;
|
|
35
38
|
let collabUnclosedSpawnAgents = 0;
|
|
36
39
|
let collabRunsWithSpawnThreadLimitFailures = 0;
|
|
@@ -160,6 +163,15 @@ export async function runDoctorUsage(options = {}) {
|
|
|
160
163
|
continue;
|
|
161
164
|
}
|
|
162
165
|
if (tool === 'spawn_agent') {
|
|
166
|
+
if (entry?.fork_context === true) {
|
|
167
|
+
collabSpawnForkContextTrue += 1;
|
|
168
|
+
}
|
|
169
|
+
else if (entry?.fork_context === false) {
|
|
170
|
+
collabSpawnForkContextFalse += 1;
|
|
171
|
+
}
|
|
172
|
+
else {
|
|
173
|
+
collabSpawnForkContextUnknown += 1;
|
|
174
|
+
}
|
|
163
175
|
if (isFailed) {
|
|
164
176
|
const rawFailedSpawnId = typeof entry?.item_id === 'string' ? entry.item_id.trim() : '';
|
|
165
177
|
const failedSpawnId = rawFailedSpawnId.length > 0 && rawFailedSpawnId !== 'unknown'
|
|
@@ -290,6 +302,9 @@ export async function runDoctorUsage(options = {}) {
|
|
|
290
302
|
by_event_type: collabByEventType,
|
|
291
303
|
top_tools: collabTopTools,
|
|
292
304
|
capture_disabled: collabCaptureDisabled,
|
|
305
|
+
spawn_agent_fork_context_true: collabSpawnForkContextTrue,
|
|
306
|
+
spawn_agent_fork_context_false: collabSpawnForkContextFalse,
|
|
307
|
+
spawn_agent_fork_context_unknown: collabSpawnForkContextUnknown,
|
|
293
308
|
runs_with_unclosed_spawn_agents: collabRunsWithUnclosedSpawnAgents,
|
|
294
309
|
unclosed_spawn_agents: collabUnclosedSpawnAgents,
|
|
295
310
|
runs_with_spawn_thread_limit_failures: collabRunsWithSpawnThreadLimitFailures,
|
|
@@ -357,9 +372,10 @@ export function formatDoctorUsageSummary(result) {
|
|
|
357
372
|
const collabLifecycleUnknownSignal = collabLifecycleUnknownRuns > 0
|
|
358
373
|
? `, lifecycle_unknown_runs=${collabLifecycleUnknownRuns}`
|
|
359
374
|
: '';
|
|
375
|
+
const collabForkContextSignal = `, fork_context=${result.collab.spawn_agent_fork_context_true}/${result.collab.spawn_agent_fork_context_false}/${result.collab.spawn_agent_fork_context_unknown}`;
|
|
360
376
|
const collabToolList = formatTopList(result.collab.top_tools.map((entry) => ({ key: entry.tool, value: entry.calls })), 3, 'tools');
|
|
361
377
|
lines.push(` - collab: ${result.collab.runs_with_tool_calls} (${formatPercent(result.collab.runs_with_tool_calls, result.runs.total)})${collabSuffix}`
|
|
362
|
-
+ `${collabTaskSuffix}, events=${result.collab.total_tool_calls}${collabAvg} (ok=${collabOk}, failed=${collabFailed}${collabLeakSignal}${collabThreadLimitSignal}${collabLifecycleUnknownSignal})${collabToolList}`);
|
|
378
|
+
+ `${collabTaskSuffix}, events=${result.collab.total_tool_calls}${collabAvg} (ok=${collabOk}, failed=${collabFailed}${collabLeakSignal}${collabThreadLimitSignal}${collabLifecycleUnknownSignal}${collabForkContextSignal})${collabToolList}`);
|
|
363
379
|
if (result.delegation.active_top_level_tasks > 0) {
|
|
364
380
|
lines.push(` - delegation: ${result.delegation.active_with_subagents}/${result.delegation.active_top_level_tasks} top-level tasks have subagent manifests (${result.delegation.total_subagent_manifests} total); child_runs=${result.delegation.total_child_runs} over ${result.delegation.tasks_with_child_runs} tasks`);
|
|
365
381
|
}
|
|
@@ -4,7 +4,7 @@ import { resolve } from 'node:path';
|
|
|
4
4
|
import process from 'node:process';
|
|
5
5
|
import { fileURLToPath } from 'node:url';
|
|
6
6
|
import { logger } from '../logger.js';
|
|
7
|
-
import {
|
|
7
|
+
import { createRuntimeCodexCommandContext, formatRuntimeSelectionSummary, parseRuntimeMode, resolveRuntimeCodexCommand } from './runtime/index.js';
|
|
8
8
|
const DEFAULT_PROMPT = [
|
|
9
9
|
'You are running frontend testing for the current project.',
|
|
10
10
|
'',
|
|
@@ -37,9 +37,9 @@ export async function loadFrontendTestingPrompt(env = process.env) {
|
|
|
37
37
|
}
|
|
38
38
|
return DEFAULT_PROMPT;
|
|
39
39
|
}
|
|
40
|
-
export function resolveFrontendTestingCommand(prompt,
|
|
40
|
+
export function resolveFrontendTestingCommand(prompt, context) {
|
|
41
41
|
const args = ['exec', prompt];
|
|
42
|
-
return
|
|
42
|
+
return resolveRuntimeCodexCommand(args, context);
|
|
43
43
|
}
|
|
44
44
|
function envFlagEnabled(value) {
|
|
45
45
|
if (!value) {
|
|
@@ -59,16 +59,21 @@ function shouldForceNonInteractive(env) {
|
|
|
59
59
|
}
|
|
60
60
|
export async function runFrontendTesting(env = process.env) {
|
|
61
61
|
const prompt = await loadFrontendTestingPrompt(env);
|
|
62
|
-
const
|
|
62
|
+
const repoRoot = typeof env.CODEX_ORCHESTRATOR_ROOT === 'string' && env.CODEX_ORCHESTRATOR_ROOT.trim().length > 0
|
|
63
|
+
? env.CODEX_ORCHESTRATOR_ROOT.trim()
|
|
64
|
+
: process.cwd();
|
|
65
|
+
const runtimeContext = await resolveFrontendTestingRuntimeContext(env, repoRoot);
|
|
66
|
+
logger.info(`[frontend-testing-runtime] ${formatRuntimeSelectionSummary(runtimeContext.runtime)}`);
|
|
67
|
+
const { command, args } = resolveFrontendTestingCommand(prompt, runtimeContext);
|
|
63
68
|
const nonInteractive = shouldForceNonInteractive(env);
|
|
64
|
-
const childEnv = { ...process.env, ...env };
|
|
69
|
+
const childEnv = { ...process.env, ...env, ...runtimeContext.env };
|
|
65
70
|
if (nonInteractive) {
|
|
66
71
|
childEnv.CODEX_NON_INTERACTIVE = childEnv.CODEX_NON_INTERACTIVE ?? '1';
|
|
67
72
|
childEnv.CODEX_NO_INTERACTIVE = childEnv.CODEX_NO_INTERACTIVE ?? '1';
|
|
68
73
|
childEnv.CODEX_INTERACTIVE = childEnv.CODEX_INTERACTIVE ?? '0';
|
|
69
74
|
}
|
|
70
75
|
const stdio = nonInteractive ? ['ignore', 'inherit', 'inherit'] : 'inherit';
|
|
71
|
-
const child = spawn(command, args, { stdio, env: childEnv });
|
|
76
|
+
const child = spawn(command, args, { stdio, env: childEnv, cwd: repoRoot });
|
|
72
77
|
await new Promise((resolvePromise, reject) => {
|
|
73
78
|
child.once('error', (error) => reject(error instanceof Error ? error : new Error(String(error))));
|
|
74
79
|
child.once('exit', (code) => {
|
|
@@ -81,6 +86,19 @@ export async function runFrontendTesting(env = process.env) {
|
|
|
81
86
|
});
|
|
82
87
|
});
|
|
83
88
|
}
|
|
89
|
+
async function resolveFrontendTestingRuntimeContext(env, repoRoot) {
|
|
90
|
+
const requestedMode = parseRuntimeMode(env.CODEX_ORCHESTRATOR_RUNTIME_MODE_ACTIVE ?? env.CODEX_ORCHESTRATOR_RUNTIME_MODE ?? null);
|
|
91
|
+
const runId = typeof env.CODEX_ORCHESTRATOR_RUN_ID === 'string' && env.CODEX_ORCHESTRATOR_RUN_ID.trim().length > 0
|
|
92
|
+
? env.CODEX_ORCHESTRATOR_RUN_ID.trim()
|
|
93
|
+
: `frontend-testing-${Date.now()}`;
|
|
94
|
+
return await createRuntimeCodexCommandContext({
|
|
95
|
+
requestedMode,
|
|
96
|
+
executionMode: 'mcp',
|
|
97
|
+
repoRoot,
|
|
98
|
+
env: { ...process.env, ...env },
|
|
99
|
+
runId
|
|
100
|
+
});
|
|
101
|
+
}
|
|
84
102
|
async function main() {
|
|
85
103
|
await runFrontendTesting();
|
|
86
104
|
}
|