@kbediako/codex-orchestrator 0.1.12 → 0.1.14-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +19 -5
- package/README.md +47 -2
- package/dist/bin/codex-orchestrator.js +93 -0
- package/dist/orchestrator/src/cli/adapters/CommandBuilder.js +27 -3
- package/dist/orchestrator/src/cli/adapters/CommandPlanner.js +17 -1
- package/dist/orchestrator/src/cli/adapters/CommandReviewer.js +36 -1
- package/dist/orchestrator/src/cli/adapters/CommandTester.js +28 -0
- package/dist/orchestrator/src/cli/adapters/cloudFailureDiagnostics.js +45 -0
- package/dist/orchestrator/src/cli/codexCliSetup.js +294 -0
- package/dist/orchestrator/src/cli/init.js +3 -0
- package/dist/orchestrator/src/cli/mcp.js +4 -2
- package/dist/orchestrator/src/cli/orchestrator.js +298 -28
- package/dist/orchestrator/src/cli/rlm/context.js +31 -3
- package/dist/orchestrator/src/cli/rlm/symbolic.js +152 -15
- package/dist/orchestrator/src/cli/rlmRunner.js +59 -5
- package/dist/orchestrator/src/cli/run/manifest.js +3 -0
- package/dist/orchestrator/src/cli/services/commandRunner.js +87 -0
- package/dist/orchestrator/src/cli/services/runSummaryWriter.js +24 -0
- package/dist/orchestrator/src/cli/skills.js +1 -1
- package/dist/orchestrator/src/cli/utils/codexCli.js +94 -0
- package/dist/orchestrator/src/cli/utils/codexPaths.js +13 -0
- package/dist/orchestrator/src/cli/utils/devtools.js +9 -12
- package/dist/orchestrator/src/cloud/CodexCloudTaskExecutor.js +255 -0
- package/dist/orchestrator/src/learning/crystalizer.js +2 -1
- package/dist/orchestrator/src/manager.js +1 -0
- package/dist/orchestrator/src/sync/CloudSyncWorker.js +37 -7
- package/dist/scripts/design/pipeline/context.js +3 -2
- package/dist/scripts/lib/run-manifests.js +14 -0
- package/docs/README.md +22 -2
- package/package.json +6 -2
- package/schemas/manifest.json +83 -0
- package/skills/collab-deliberation/SKILL.md +21 -0
- package/skills/collab-evals/SKILL.md +32 -0
- package/skills/delegate-early/SKILL.md +47 -0
- package/skills/delegation-usage/DELEGATION_GUIDE.md +5 -4
- package/skills/delegation-usage/SKILL.md +11 -5
- package/skills/docs-first/SKILL.md +2 -1
- package/templates/README.md +4 -0
package/LICENSE
CHANGED
|
@@ -1,7 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
1
3
|
Copyright (c) 2025 Kbediako
|
|
2
|
-
All rights reserved.
|
|
3
4
|
|
|
4
|
-
|
|
5
|
-
and
|
|
6
|
-
Software
|
|
7
|
-
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -39,6 +39,32 @@ Node.js >= 20 is required.
|
|
|
39
39
|
> Tip: if you prefer `npx`, replace `codex-orch` with `npx @kbediako/codex-orchestrator`.
|
|
40
40
|
> Tip: for multiple commands, you can also `export MCP_RUNNER_TASK_ID=<task-id>` once.
|
|
41
41
|
|
|
42
|
+
## Downstream init (recommended)
|
|
43
|
+
|
|
44
|
+
Use this when you want Codex to drive work inside another repo with the CO defaults.
|
|
45
|
+
|
|
46
|
+
1. Install templates:
|
|
47
|
+
```bash
|
|
48
|
+
codex-orchestrator init codex --cwd /path/to/repo
|
|
49
|
+
```
|
|
50
|
+
One-shot (templates + CO-managed Codex CLI):
|
|
51
|
+
```bash
|
|
52
|
+
codex-orchestrator init codex --codex-cli --yes
|
|
53
|
+
```
|
|
54
|
+
2. Register the delegation MCP server (one-time per machine):
|
|
55
|
+
```bash
|
|
56
|
+
codex mcp add delegation -- codex-orchestrator delegate-server --repo /path/to/repo
|
|
57
|
+
```
|
|
58
|
+
3. Optional (collab JSONL parity): set up a CO-managed Codex CLI:
|
|
59
|
+
```bash
|
|
60
|
+
codex-orchestrator codex setup
|
|
61
|
+
```
|
|
62
|
+
4. Optional (fast refresh helper for downstream users):
|
|
63
|
+
```bash
|
|
64
|
+
scripts/codex-cli-refresh.sh --repo /path/to/codex
|
|
65
|
+
```
|
|
66
|
+
Repo-only helper (not included in npm package). Set `CODEX_REPO` or `CODEX_CLI_SOURCE` to avoid passing `--repo` each time.
|
|
67
|
+
|
|
42
68
|
## Delegation MCP server
|
|
43
69
|
|
|
44
70
|
Run the delegation MCP server over stdio:
|
|
@@ -57,6 +83,7 @@ codex -c 'mcp_servers.delegation.enabled=true' ...
|
|
|
57
83
|
## Delegation + RLM flow
|
|
58
84
|
|
|
59
85
|
RLM (Recursive Language Model) is the long-horizon loop used by the `rlm` pipeline (`codex-orchestrator rlm "<goal>"` or `codex-orchestrator start rlm --goal "<goal>"`). Delegated runs only enter RLM when the child is launched with the `rlm` pipeline (or the rlm runner directly). In auto mode it resolves to symbolic when delegated, when `RLM_CONTEXT_PATH` is set, or when the context exceeds `RLM_SYMBOLIC_MIN_BYTES`; otherwise it stays iterative. The runner writes state to `.runs/<task-id>/cli/<run-id>/rlm/state.json` and stops when the validator passes or budgets are exhausted.
|
|
86
|
+
Symbolic subcalls can optionally use collab tools when `RLM_SYMBOLIC_COLLAB=1` (requires a collab-enabled Codex CLI via `codex-orchestrator codex setup`). Collab tool calls parsed from `codex exec --json --enable collab` are stored in `manifest.collab_tool_calls` (bounded by `CODEX_ORCHESTRATOR_COLLAB_MAX_EVENTS`, set to `0` to disable).
|
|
60
87
|
|
|
61
88
|
### Delegation flow
|
|
62
89
|
```mermaid
|
|
@@ -82,7 +109,7 @@ flowchart TB
|
|
|
82
109
|
G{Symbolic?}
|
|
83
110
|
H["Context store<br/>(chunk + search)"]
|
|
84
111
|
I["Planner JSON<br/>(select subcalls)"]
|
|
85
|
-
J["Subcalls<br/>(tool + edits)"]
|
|
112
|
+
J["Subcalls<br/>(tool + edits, collab optional)"]
|
|
86
113
|
K["Validator<br/>(test command)"]
|
|
87
114
|
L["State + artifacts<br/>.runs/<task-id>/cli/<run-id>/rlm/state.json"]
|
|
88
115
|
M["Exit status"]
|
|
@@ -110,6 +137,9 @@ Bundled skills (may vary by release):
|
|
|
110
137
|
- `delegation-usage`
|
|
111
138
|
- `standalone-review`
|
|
112
139
|
- `docs-first`
|
|
140
|
+
- `collab-evals`
|
|
141
|
+
- `collab-deliberation`
|
|
142
|
+
- `delegate-early`
|
|
113
143
|
|
|
114
144
|
## DevTools readiness
|
|
115
145
|
|
|
@@ -129,6 +159,9 @@ codex-orchestrator devtools setup
|
|
|
129
159
|
- `codex-orchestrator plan <pipeline>` — preview pipeline stages.
|
|
130
160
|
- `codex-orchestrator exec <cmd>` — run a one-off command with the exec runtime.
|
|
131
161
|
- `codex-orchestrator init codex` — install starter templates (`mcp-client.json`, `AGENTS.md`) into a repo.
|
|
162
|
+
- `codex-orchestrator init codex --codex-cli --yes --codex-source <path>` — also provision a CO-managed Codex CLI binary (build-from-source default; set `CODEX_CLI_SOURCE` to avoid passing `--codex-source` every time).
|
|
163
|
+
- `codex-orchestrator init codex --codex-cli --yes --codex-download-url <url> --codex-download-sha256 <sha>` — opt-in to a prebuilt Codex CLI download.
|
|
164
|
+
- `codex-orchestrator codex setup` — plan/apply a CO-managed Codex CLI install (for collab JSONL parity; use `--download-url` + `--download-sha256` for prebuilts).
|
|
132
165
|
- `codex-orchestrator self-check --format json` — JSON health payload.
|
|
133
166
|
- `codex-orchestrator mcp serve` — Codex MCP stdio server.
|
|
134
167
|
|
|
@@ -141,6 +174,18 @@ codex-orchestrator devtools setup
|
|
|
141
174
|
|
|
142
175
|
## Repository + contributor guide
|
|
143
176
|
|
|
144
|
-
Repo internals, development workflows, and deeper architecture notes live in the GitHub repository:
|
|
177
|
+
Repo internals, development workflows, and deeper architecture notes (contributor/internal) live in the GitHub repository:
|
|
145
178
|
- `docs/README.md`
|
|
146
179
|
- `docs/diagnostics-prompt-guide.md` (first-run diagnostics prompt + expected outputs)
|
|
180
|
+
- `docs/guides/collab-vs-mcp.md` (agent-first decision guide)
|
|
181
|
+
|
|
182
|
+
## RLM benchmark graphs
|
|
183
|
+
|
|
184
|
+
Seeded OOLONG accuracy curves (Wilson 95% CI, runs=5). In these runs, the baseline accuracy degrades as context length grows, while RLM stays near the ceiling across the tested lengths.
|
|
185
|
+
|
|
186
|
+
<table>
|
|
187
|
+
<tr>
|
|
188
|
+
<td><img src="docs/assets/oolong-baseline-seeded-wilson95-runs5.png" alt="Baseline OOLONG seeded Wilson 95% CI" width="420"></td>
|
|
189
|
+
<td><img src="docs/assets/oolong-rlm-seeded-wilson95-runs5.png" alt="RLM OOLONG seeded Wilson 95% CI" width="420"></td>
|
|
190
|
+
</tr>
|
|
191
|
+
</table>
|
|
@@ -13,6 +13,7 @@ import { buildSelfCheckResult } from '../orchestrator/src/cli/selfCheck.js';
|
|
|
13
13
|
import { initCodexTemplates, formatInitSummary } from '../orchestrator/src/cli/init.js';
|
|
14
14
|
import { runDoctor, formatDoctorSummary } from '../orchestrator/src/cli/doctor.js';
|
|
15
15
|
import { formatDevtoolsSetupSummary, runDevtoolsSetup } from '../orchestrator/src/cli/devtoolsSetup.js';
|
|
16
|
+
import { formatCodexCliSetupSummary, runCodexCliSetup } from '../orchestrator/src/cli/codexCliSetup.js';
|
|
16
17
|
import { formatSkillsInstallSummary, installSkills } from '../orchestrator/src/cli/skills.js';
|
|
17
18
|
import { loadPackageInfo } from '../orchestrator/src/cli/utils/packageInfo.js';
|
|
18
19
|
import { slugify } from '../orchestrator/src/cli/utils/strings.js';
|
|
@@ -63,6 +64,9 @@ async function main() {
|
|
|
63
64
|
case 'doctor':
|
|
64
65
|
await handleDoctor(args);
|
|
65
66
|
break;
|
|
67
|
+
case 'codex':
|
|
68
|
+
await handleCodex(args);
|
|
69
|
+
break;
|
|
66
70
|
case 'devtools':
|
|
67
71
|
await handleDevtools(args);
|
|
68
72
|
break;
|
|
@@ -136,6 +140,24 @@ function readStringFlag(flags, key) {
|
|
|
136
140
|
const trimmed = value.trim();
|
|
137
141
|
return trimmed.length > 0 ? trimmed : undefined;
|
|
138
142
|
}
|
|
143
|
+
function resolveExecutionModeFlag(flags) {
|
|
144
|
+
const cloudShortcut = flags['cloud'] === true;
|
|
145
|
+
const rawMode = readStringFlag(flags, 'execution-mode');
|
|
146
|
+
if (cloudShortcut) {
|
|
147
|
+
if (rawMode && rawMode.toLowerCase() !== 'cloud') {
|
|
148
|
+
throw new Error('Cannot combine --cloud with --execution-mode values other than cloud.');
|
|
149
|
+
}
|
|
150
|
+
return 'cloud';
|
|
151
|
+
}
|
|
152
|
+
if (!rawMode) {
|
|
153
|
+
return undefined;
|
|
154
|
+
}
|
|
155
|
+
const normalized = rawMode.toLowerCase();
|
|
156
|
+
if (normalized !== 'mcp' && normalized !== 'cloud') {
|
|
157
|
+
throw new Error('Invalid --execution-mode value. Expected one of: mcp, cloud.');
|
|
158
|
+
}
|
|
159
|
+
return normalized;
|
|
160
|
+
}
|
|
139
161
|
function applyRlmEnvOverrides(flags, goal) {
|
|
140
162
|
if (goal) {
|
|
141
163
|
process.env.RLM_GOAL = goal;
|
|
@@ -198,6 +220,7 @@ async function handleStart(orchestrator, rawArgs) {
|
|
|
198
220
|
const { positionals, flags } = parseArgs(rawArgs);
|
|
199
221
|
const pipelineId = positionals[0];
|
|
200
222
|
const format = flags['format'] === 'json' ? 'json' : 'text';
|
|
223
|
+
const executionMode = resolveExecutionModeFlag(flags);
|
|
201
224
|
if (pipelineId === 'rlm') {
|
|
202
225
|
const goal = readStringFlag(flags, 'goal');
|
|
203
226
|
applyRlmEnvOverrides(flags, goal);
|
|
@@ -217,6 +240,7 @@ async function handleStart(orchestrator, rawArgs) {
|
|
|
217
240
|
parentRunId: typeof flags['parent-run'] === 'string' ? flags['parent-run'] : undefined,
|
|
218
241
|
approvalPolicy: typeof flags['approval-policy'] === 'string' ? flags['approval-policy'] : undefined,
|
|
219
242
|
targetStageId: resolveTargetStageId(flags),
|
|
243
|
+
executionMode,
|
|
220
244
|
runEvents
|
|
221
245
|
});
|
|
222
246
|
emitRunOutput(result, format, 'Run started');
|
|
@@ -465,6 +489,25 @@ async function handleInit(rawArgs) {
|
|
|
465
489
|
for (const line of summary) {
|
|
466
490
|
console.log(line);
|
|
467
491
|
}
|
|
492
|
+
if (flags['codex-cli'] === true) {
|
|
493
|
+
const apply = Boolean(flags['yes']);
|
|
494
|
+
const source = readStringFlag(flags, 'codex-source');
|
|
495
|
+
const ref = readStringFlag(flags, 'codex-ref');
|
|
496
|
+
const downloadUrl = readStringFlag(flags, 'codex-download-url');
|
|
497
|
+
const downloadSha256 = readStringFlag(flags, 'codex-download-sha256');
|
|
498
|
+
const cliForce = Boolean(flags['codex-force']);
|
|
499
|
+
const setupResult = await runCodexCliSetup({
|
|
500
|
+
apply,
|
|
501
|
+
force: cliForce,
|
|
502
|
+
source,
|
|
503
|
+
ref,
|
|
504
|
+
downloadUrl,
|
|
505
|
+
downloadSha256
|
|
506
|
+
});
|
|
507
|
+
for (const line of formatCodexCliSetupSummary(setupResult)) {
|
|
508
|
+
console.log(line);
|
|
509
|
+
}
|
|
510
|
+
}
|
|
468
511
|
}
|
|
469
512
|
async function handleDoctor(rawArgs) {
|
|
470
513
|
const { flags } = parseArgs(rawArgs);
|
|
@@ -503,6 +546,39 @@ async function handleDevtools(rawArgs) {
|
|
|
503
546
|
console.log(line);
|
|
504
547
|
}
|
|
505
548
|
}
|
|
549
|
+
async function handleCodex(rawArgs) {
|
|
550
|
+
const { positionals, flags } = parseArgs(rawArgs);
|
|
551
|
+
const subcommand = positionals.shift();
|
|
552
|
+
if (!subcommand) {
|
|
553
|
+
throw new Error('codex requires a subcommand (setup).');
|
|
554
|
+
}
|
|
555
|
+
if (subcommand !== 'setup') {
|
|
556
|
+
throw new Error(`Unknown codex subcommand: ${subcommand}`);
|
|
557
|
+
}
|
|
558
|
+
const format = flags['format'] === 'json' ? 'json' : 'text';
|
|
559
|
+
const apply = Boolean(flags['yes']);
|
|
560
|
+
const source = readStringFlag(flags, 'source');
|
|
561
|
+
const ref = readStringFlag(flags, 'ref');
|
|
562
|
+
const downloadUrl = readStringFlag(flags, 'download-url');
|
|
563
|
+
const downloadSha256 = readStringFlag(flags, 'download-sha256');
|
|
564
|
+
const force = Boolean(flags['force']);
|
|
565
|
+
const result = await runCodexCliSetup({
|
|
566
|
+
apply,
|
|
567
|
+
force,
|
|
568
|
+
source,
|
|
569
|
+
ref,
|
|
570
|
+
downloadUrl,
|
|
571
|
+
downloadSha256
|
|
572
|
+
});
|
|
573
|
+
if (format === 'json') {
|
|
574
|
+
console.log(JSON.stringify(result, null, 2));
|
|
575
|
+
return;
|
|
576
|
+
}
|
|
577
|
+
const summary = formatCodexCliSetupSummary(result);
|
|
578
|
+
for (const line of summary) {
|
|
579
|
+
console.log(line);
|
|
580
|
+
}
|
|
581
|
+
}
|
|
506
582
|
async function handleSkills(rawArgs) {
|
|
507
583
|
const { positionals, flags } = parseArgs(rawArgs);
|
|
508
584
|
const subcommand = positionals[0];
|
|
@@ -690,6 +766,8 @@ Commands:
|
|
|
690
766
|
--parent-run <id> Link run to parent run id.
|
|
691
767
|
--approval-policy <p> Record approval policy metadata.
|
|
692
768
|
--format json Emit machine-readable output.
|
|
769
|
+
--execution-mode <mcp|cloud> Force execution mode for this run and child subpipelines.
|
|
770
|
+
--cloud Shortcut for --execution-mode cloud.
|
|
693
771
|
--target <stage-id> Focus plan/build metadata on a specific stage (alias: --target-stage).
|
|
694
772
|
--goal "<goal>" When pipeline is rlm, set the RLM goal.
|
|
695
773
|
--validator <cmd|none> When pipeline is rlm, set the validator command.
|
|
@@ -746,7 +824,22 @@ Commands:
|
|
|
746
824
|
|
|
747
825
|
self-check [--format json]
|
|
748
826
|
init codex [--cwd <path>] [--force]
|
|
827
|
+
--codex-cli Also run CO-managed Codex CLI setup (plan unless --yes).
|
|
828
|
+
--codex-source <path> Build from local Codex repo (or git URL).
|
|
829
|
+
--codex-ref <ref> Git ref (branch/tag/sha) when building from repo.
|
|
830
|
+
--codex-download-url <url> Download a prebuilt codex binary.
|
|
831
|
+
--codex-download-sha256 <sha> Expected SHA256 for the prebuilt download.
|
|
832
|
+
--codex-force Overwrite existing CO-managed codex binary.
|
|
833
|
+
--yes Apply codex CLI setup (otherwise plan only).
|
|
749
834
|
doctor [--format json]
|
|
835
|
+
codex setup
|
|
836
|
+
--source <path> Build from local Codex repo (or git URL).
|
|
837
|
+
--ref <ref> Git ref (branch/tag/sha) when building from repo.
|
|
838
|
+
--download-url <url> Download a prebuilt codex binary.
|
|
839
|
+
--download-sha256 <sha> Expected SHA256 for the prebuilt download.
|
|
840
|
+
--force Overwrite existing CO-managed codex binary.
|
|
841
|
+
--yes Apply setup (otherwise plan only).
|
|
842
|
+
--format json Emit machine-readable output.
|
|
750
843
|
devtools setup Print DevTools MCP setup instructions.
|
|
751
844
|
--yes Apply setup by running "codex mcp add ...".
|
|
752
845
|
--format json Emit machine-readable output (dry-run only).
|
|
@@ -4,17 +4,41 @@ export class CommandBuilder {
|
|
|
4
4
|
this.executePipeline = executePipeline;
|
|
5
5
|
}
|
|
6
6
|
async build(input) {
|
|
7
|
-
const result = await this.executePipeline();
|
|
7
|
+
const result = await this.executePipeline(input);
|
|
8
8
|
return {
|
|
9
9
|
subtaskId: input.target.id,
|
|
10
10
|
artifacts: [
|
|
11
11
|
{ path: result.manifestPath, description: 'CLI run manifest' },
|
|
12
|
-
{ path: result.logPath, description: 'Runner log (ndjson)' }
|
|
12
|
+
{ path: result.logPath, description: 'Runner log (ndjson)' },
|
|
13
|
+
...(result.manifest.cloud_execution?.diff_path
|
|
14
|
+
? [{ path: result.manifest.cloud_execution.diff_path, description: 'Cloud diff artifact' }]
|
|
15
|
+
: [])
|
|
13
16
|
],
|
|
14
17
|
mode: input.mode,
|
|
15
18
|
runId: input.runId,
|
|
16
19
|
success: result.success,
|
|
17
|
-
notes: result.notes.join('\n') || undefined
|
|
20
|
+
notes: result.notes.join('\n') || undefined,
|
|
21
|
+
cloudExecution: result.manifest.cloud_execution
|
|
22
|
+
? {
|
|
23
|
+
taskId: result.manifest.cloud_execution.task_id,
|
|
24
|
+
environmentId: result.manifest.cloud_execution.environment_id,
|
|
25
|
+
status: result.manifest.cloud_execution.status,
|
|
26
|
+
statusUrl: result.manifest.cloud_execution.status_url,
|
|
27
|
+
submittedAt: result.manifest.cloud_execution.submitted_at,
|
|
28
|
+
completedAt: result.manifest.cloud_execution.completed_at,
|
|
29
|
+
lastPolledAt: result.manifest.cloud_execution.last_polled_at,
|
|
30
|
+
pollCount: result.manifest.cloud_execution.poll_count,
|
|
31
|
+
pollIntervalSeconds: result.manifest.cloud_execution.poll_interval_seconds,
|
|
32
|
+
timeoutSeconds: result.manifest.cloud_execution.timeout_seconds,
|
|
33
|
+
attempts: result.manifest.cloud_execution.attempts,
|
|
34
|
+
diffPath: result.manifest.cloud_execution.diff_path,
|
|
35
|
+
diffUrl: result.manifest.cloud_execution.diff_url,
|
|
36
|
+
diffStatus: result.manifest.cloud_execution.diff_status,
|
|
37
|
+
applyStatus: result.manifest.cloud_execution.apply_status,
|
|
38
|
+
logPath: result.manifest.cloud_execution.log_path,
|
|
39
|
+
error: result.manifest.cloud_execution.error
|
|
40
|
+
}
|
|
41
|
+
: null
|
|
18
42
|
};
|
|
19
43
|
}
|
|
20
44
|
}
|
|
@@ -43,6 +43,9 @@ export class CommandPlanner {
|
|
|
43
43
|
if (stagePlanHints.executionMode) {
|
|
44
44
|
metadata.executionMode = stagePlanHints.executionMode;
|
|
45
45
|
}
|
|
46
|
+
if (stagePlanHints.cloudEnvId) {
|
|
47
|
+
metadata.cloudEnvId = stagePlanHints.cloudEnvId;
|
|
48
|
+
}
|
|
46
49
|
metadata.requiresCloud = requiresCloud;
|
|
47
50
|
return {
|
|
48
51
|
id: `${this.pipeline.id}:${stage.id}`,
|
|
@@ -117,12 +120,25 @@ function extractStagePlanHints(stage) {
|
|
|
117
120
|
const executionMode = typeof rawExecutionMode === 'string'
|
|
118
121
|
? rawExecutionMode.trim().toLowerCase() || null
|
|
119
122
|
: null;
|
|
123
|
+
const rawCloudEnvId = typeof planConfig.cloudEnvId === 'string'
|
|
124
|
+
? planConfig.cloudEnvId
|
|
125
|
+
: typeof planConfig.cloud_env_id === 'string'
|
|
126
|
+
? planConfig.cloud_env_id
|
|
127
|
+
: typeof stageRecord.cloudEnvId === 'string'
|
|
128
|
+
? stageRecord.cloudEnvId
|
|
129
|
+
: typeof stageRecord.cloud_env_id === 'string'
|
|
130
|
+
? stageRecord.cloud_env_id
|
|
131
|
+
: undefined;
|
|
132
|
+
const cloudEnvId = typeof rawCloudEnvId === 'string'
|
|
133
|
+
? rawCloudEnvId.trim() || null
|
|
134
|
+
: null;
|
|
120
135
|
return {
|
|
121
136
|
runnable: planConfig.runnable,
|
|
122
137
|
defaultTarget,
|
|
123
138
|
aliases,
|
|
124
139
|
requiresCloud,
|
|
125
|
-
executionMode
|
|
140
|
+
executionMode,
|
|
141
|
+
cloudEnvId
|
|
126
142
|
};
|
|
127
143
|
}
|
|
128
144
|
function resolveStageRequiresCloud(stage, hints) {
|
|
@@ -1,11 +1,46 @@
|
|
|
1
|
+
import { diagnoseCloudFailure } from './cloudFailureDiagnostics.js';
|
|
1
2
|
export class CommandReviewer {
|
|
2
3
|
getResult;
|
|
3
4
|
constructor(getResult) {
|
|
4
5
|
this.getResult = getResult;
|
|
5
6
|
}
|
|
6
7
|
async review(input) {
|
|
7
|
-
void input;
|
|
8
8
|
const result = this.requireResult();
|
|
9
|
+
if (input.mode === 'cloud') {
|
|
10
|
+
const cloudExecution = result.manifest.cloud_execution;
|
|
11
|
+
const status = cloudExecution?.status ?? 'unknown';
|
|
12
|
+
const cloudTask = cloudExecution?.task_id ?? '<unknown>';
|
|
13
|
+
const approved = status === 'ready' && result.success;
|
|
14
|
+
const diagnosis = diagnoseCloudFailure({
|
|
15
|
+
status,
|
|
16
|
+
statusDetail: result.manifest.status_detail ?? null,
|
|
17
|
+
error: cloudExecution?.error ?? null
|
|
18
|
+
});
|
|
19
|
+
const summaryLines = [
|
|
20
|
+
approved
|
|
21
|
+
? `Cloud task ${cloudTask} completed successfully.`
|
|
22
|
+
: `Cloud task ${cloudTask} did not complete successfully (${status}).`,
|
|
23
|
+
`Manifest: ${result.manifestPath}`,
|
|
24
|
+
`Runner log: ${result.logPath}`,
|
|
25
|
+
...(cloudExecution?.status_url ? [`Cloud status URL: ${cloudExecution.status_url}`] : [])
|
|
26
|
+
];
|
|
27
|
+
if (!approved) {
|
|
28
|
+
summaryLines.push(`Failure class: ${diagnosis.category}`);
|
|
29
|
+
summaryLines.push(`Guidance: ${diagnosis.guidance}`);
|
|
30
|
+
}
|
|
31
|
+
const feedbackLines = [cloudExecution?.error ?? (result.notes.join('\n') || undefined)].filter((line) => Boolean(line && line.trim().length > 0));
|
|
32
|
+
if (!approved) {
|
|
33
|
+
feedbackLines.push(`Failure class: ${diagnosis.category}`);
|
|
34
|
+
feedbackLines.push(`Guidance: ${diagnosis.guidance}`);
|
|
35
|
+
}
|
|
36
|
+
return {
|
|
37
|
+
summary: summaryLines.join('\n'),
|
|
38
|
+
decision: {
|
|
39
|
+
approved,
|
|
40
|
+
feedback: feedbackLines.length > 0 ? feedbackLines.join('\n') : undefined
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
}
|
|
9
44
|
const summaryLines = [
|
|
10
45
|
result.success
|
|
11
46
|
? 'Diagnostics pipeline succeeded.'
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { ensureGuardrailStatus } from '../run/manifest.js';
|
|
2
|
+
import { diagnoseCloudFailure } from './cloudFailureDiagnostics.js';
|
|
2
3
|
export class CommandTester {
|
|
3
4
|
getResult;
|
|
4
5
|
constructor(getResult) {
|
|
@@ -6,6 +7,33 @@ export class CommandTester {
|
|
|
6
7
|
}
|
|
7
8
|
async test(input) {
|
|
8
9
|
const result = this.requireResult();
|
|
10
|
+
if (input.mode === 'cloud') {
|
|
11
|
+
const cloudExecution = result.manifest.cloud_execution;
|
|
12
|
+
const status = cloudExecution?.status ?? 'unknown';
|
|
13
|
+
const passed = status === 'ready' && result.success;
|
|
14
|
+
const diagnosis = diagnoseCloudFailure({
|
|
15
|
+
status,
|
|
16
|
+
statusDetail: result.manifest.status_detail ?? null,
|
|
17
|
+
error: cloudExecution?.error ?? null
|
|
18
|
+
});
|
|
19
|
+
const failureDetails = cloudExecution?.error ??
|
|
20
|
+
`Cloud task status: ${status}${cloudExecution?.task_id ? ` (${cloudExecution.task_id})` : ''}`;
|
|
21
|
+
const reports = [
|
|
22
|
+
{
|
|
23
|
+
name: 'cloud-task',
|
|
24
|
+
status: passed ? 'passed' : 'failed',
|
|
25
|
+
details: passed
|
|
26
|
+
? failureDetails
|
|
27
|
+
: `${failureDetails}\nFailure class: ${diagnosis.category}. ${diagnosis.guidance}`
|
|
28
|
+
}
|
|
29
|
+
];
|
|
30
|
+
return {
|
|
31
|
+
subtaskId: input.build.subtaskId,
|
|
32
|
+
success: passed,
|
|
33
|
+
reports,
|
|
34
|
+
runId: input.runId
|
|
35
|
+
};
|
|
36
|
+
}
|
|
9
37
|
const guardrailStatus = ensureGuardrailStatus(result.manifest);
|
|
10
38
|
const reports = [
|
|
11
39
|
{
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
const CLOUD_FAILURE_RULES = [
|
|
2
|
+
{
|
|
3
|
+
category: 'configuration',
|
|
4
|
+
patterns: ['cloud-env-missing', 'codex_cloud_env_id', 'no environment id is configured', '--env'],
|
|
5
|
+
guidance: 'Set CODEX_CLOUD_ENV_ID (or metadata.cloudEnvId) to a valid cloud environment id before re-running.'
|
|
6
|
+
},
|
|
7
|
+
{
|
|
8
|
+
category: 'credentials',
|
|
9
|
+
patterns: ['unauthorized', 'forbidden', 'not logged in', 'login', 'api key', 'credential', 'token'],
|
|
10
|
+
guidance: 'Ensure Codex Cloud credentials are available to the runner and have access to the configured environment.'
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
category: 'connectivity',
|
|
14
|
+
patterns: ['enotfound', 'econn', 'timed out', 'timeout', 'network', '502', '503', '504'],
|
|
15
|
+
guidance: 'Cloud endpoint connectivity looks unstable; retry and inspect network/endpoint health.'
|
|
16
|
+
}
|
|
17
|
+
];
|
|
18
|
+
const TERMINAL_FAILURE_STATUSES = new Set(['failed', 'error', 'cancelled']);
|
|
19
|
+
export function diagnoseCloudFailure(options) {
|
|
20
|
+
const signal = [options.status ?? null, options.statusDetail ?? null, options.error ?? null]
|
|
21
|
+
.filter((value) => typeof value === 'string' && value.trim().length > 0)
|
|
22
|
+
.join('\n');
|
|
23
|
+
const normalized = signal.toLowerCase();
|
|
24
|
+
for (const rule of CLOUD_FAILURE_RULES) {
|
|
25
|
+
if (rule.patterns.some((pattern) => normalized.includes(pattern))) {
|
|
26
|
+
return {
|
|
27
|
+
category: rule.category,
|
|
28
|
+
guidance: rule.guidance,
|
|
29
|
+
signal
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
if (options.status && TERMINAL_FAILURE_STATUSES.has(options.status.toLowerCase())) {
|
|
34
|
+
return {
|
|
35
|
+
category: 'execution',
|
|
36
|
+
guidance: 'Inspect manifest cloud_execution.error and cloud command logs for the terminal cloud failure.',
|
|
37
|
+
signal
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
return {
|
|
41
|
+
category: 'unknown',
|
|
42
|
+
guidance: 'Inspect manifest status_detail plus cloud command logs to classify this failure.',
|
|
43
|
+
signal
|
|
44
|
+
};
|
|
45
|
+
}
|