@kbediako/codex-orchestrator 0.1.12 → 0.1.14-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/LICENSE +19 -5
  2. package/README.md +47 -2
  3. package/dist/bin/codex-orchestrator.js +93 -0
  4. package/dist/orchestrator/src/cli/adapters/CommandBuilder.js +27 -3
  5. package/dist/orchestrator/src/cli/adapters/CommandPlanner.js +17 -1
  6. package/dist/orchestrator/src/cli/adapters/CommandReviewer.js +36 -1
  7. package/dist/orchestrator/src/cli/adapters/CommandTester.js +28 -0
  8. package/dist/orchestrator/src/cli/adapters/cloudFailureDiagnostics.js +45 -0
  9. package/dist/orchestrator/src/cli/codexCliSetup.js +294 -0
  10. package/dist/orchestrator/src/cli/init.js +3 -0
  11. package/dist/orchestrator/src/cli/mcp.js +4 -2
  12. package/dist/orchestrator/src/cli/orchestrator.js +298 -28
  13. package/dist/orchestrator/src/cli/rlm/context.js +31 -3
  14. package/dist/orchestrator/src/cli/rlm/symbolic.js +152 -15
  15. package/dist/orchestrator/src/cli/rlmRunner.js +59 -5
  16. package/dist/orchestrator/src/cli/run/manifest.js +3 -0
  17. package/dist/orchestrator/src/cli/services/commandRunner.js +87 -0
  18. package/dist/orchestrator/src/cli/services/runSummaryWriter.js +24 -0
  19. package/dist/orchestrator/src/cli/skills.js +1 -1
  20. package/dist/orchestrator/src/cli/utils/codexCli.js +94 -0
  21. package/dist/orchestrator/src/cli/utils/codexPaths.js +13 -0
  22. package/dist/orchestrator/src/cli/utils/devtools.js +9 -12
  23. package/dist/orchestrator/src/cloud/CodexCloudTaskExecutor.js +255 -0
  24. package/dist/orchestrator/src/learning/crystalizer.js +2 -1
  25. package/dist/orchestrator/src/manager.js +1 -0
  26. package/dist/orchestrator/src/sync/CloudSyncWorker.js +37 -7
  27. package/dist/scripts/design/pipeline/context.js +3 -2
  28. package/dist/scripts/lib/run-manifests.js +14 -0
  29. package/docs/README.md +22 -2
  30. package/package.json +6 -2
  31. package/schemas/manifest.json +83 -0
  32. package/skills/collab-deliberation/SKILL.md +21 -0
  33. package/skills/collab-evals/SKILL.md +32 -0
  34. package/skills/delegate-early/SKILL.md +47 -0
  35. package/skills/delegation-usage/DELEGATION_GUIDE.md +5 -4
  36. package/skills/delegation-usage/SKILL.md +11 -5
  37. package/skills/docs-first/SKILL.md +2 -1
  38. package/templates/README.md +4 -0
package/LICENSE CHANGED
@@ -1,7 +1,21 @@
1
+ MIT License
2
+
1
3
  Copyright (c) 2025 Kbediako
2
- All rights reserved.
3
4
 
4
- This software and associated documentation files (the "Software") are proprietary
5
- and confidential. Unauthorized copying, modification, distribution, or use of the
6
- Software, via any medium, is strictly prohibited without prior written permission
7
- from the copyright holder.
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md CHANGED
@@ -39,6 +39,32 @@ Node.js >= 20 is required.
39
39
  > Tip: if you prefer `npx`, replace `codex-orch` with `npx @kbediako/codex-orchestrator`.
40
40
  > Tip: for multiple commands, you can also `export MCP_RUNNER_TASK_ID=<task-id>` once.
41
41
 
42
+ ## Downstream init (recommended)
43
+
44
+ Use this when you want Codex to drive work inside another repo with the CO defaults.
45
+
46
+ 1. Install templates:
47
+ ```bash
48
+ codex-orchestrator init codex --cwd /path/to/repo
49
+ ```
50
+ One-shot (templates + CO-managed Codex CLI):
51
+ ```bash
52
+ codex-orchestrator init codex --codex-cli --yes
53
+ ```
54
+ 2. Register the delegation MCP server (one-time per machine):
55
+ ```bash
56
+ codex mcp add delegation -- codex-orchestrator delegate-server --repo /path/to/repo
57
+ ```
58
+ 3. Optional (collab JSONL parity): set up a CO-managed Codex CLI:
59
+ ```bash
60
+ codex-orchestrator codex setup
61
+ ```
62
+ 4. Optional (fast refresh helper for downstream users):
63
+ ```bash
64
+ scripts/codex-cli-refresh.sh --repo /path/to/codex
65
+ ```
66
+ Repo-only helper (not included in npm package). Set `CODEX_REPO` or `CODEX_CLI_SOURCE` to avoid passing `--repo` each time.
67
+
42
68
  ## Delegation MCP server
43
69
 
44
70
  Run the delegation MCP server over stdio:
@@ -57,6 +83,7 @@ codex -c 'mcp_servers.delegation.enabled=true' ...
57
83
  ## Delegation + RLM flow
58
84
 
59
85
  RLM (Recursive Language Model) is the long-horizon loop used by the `rlm` pipeline (`codex-orchestrator rlm "<goal>"` or `codex-orchestrator start rlm --goal "<goal>"`). Delegated runs only enter RLM when the child is launched with the `rlm` pipeline (or the rlm runner directly). In auto mode it resolves to symbolic when delegated, when `RLM_CONTEXT_PATH` is set, or when the context exceeds `RLM_SYMBOLIC_MIN_BYTES`; otherwise it stays iterative. The runner writes state to `.runs/<task-id>/cli/<run-id>/rlm/state.json` and stops when the validator passes or budgets are exhausted.
86
+ Symbolic subcalls can optionally use collab tools when `RLM_SYMBOLIC_COLLAB=1` (requires a collab-enabled Codex CLI via `codex-orchestrator codex setup`). Collab tool calls parsed from `codex exec --json --enable collab` are stored in `manifest.collab_tool_calls` (bounded by `CODEX_ORCHESTRATOR_COLLAB_MAX_EVENTS`, set to `0` to disable).
60
87
 
61
88
  ### Delegation flow
62
89
  ```mermaid
@@ -82,7 +109,7 @@ flowchart TB
82
109
  G{Symbolic?}
83
110
  H["Context store<br/>(chunk + search)"]
84
111
  I["Planner JSON<br/>(select subcalls)"]
85
- J["Subcalls<br/>(tool + edits)"]
112
+ J["Subcalls<br/>(tool + edits, collab optional)"]
86
113
  K["Validator<br/>(test command)"]
87
114
  L["State + artifacts<br/>.runs/&lt;task-id&gt;/cli/&lt;run-id&gt;/rlm/state.json"]
88
115
  M["Exit status"]
@@ -110,6 +137,9 @@ Bundled skills (may vary by release):
110
137
  - `delegation-usage`
111
138
  - `standalone-review`
112
139
  - `docs-first`
140
+ - `collab-evals`
141
+ - `collab-deliberation`
142
+ - `delegate-early`
113
143
 
114
144
  ## DevTools readiness
115
145
 
@@ -129,6 +159,9 @@ codex-orchestrator devtools setup
129
159
  - `codex-orchestrator plan <pipeline>` — preview pipeline stages.
130
160
  - `codex-orchestrator exec <cmd>` — run a one-off command with the exec runtime.
131
161
  - `codex-orchestrator init codex` — install starter templates (`mcp-client.json`, `AGENTS.md`) into a repo.
162
+ - `codex-orchestrator init codex --codex-cli --yes --codex-source <path>` — also provision a CO-managed Codex CLI binary (build-from-source default; set `CODEX_CLI_SOURCE` to avoid passing `--codex-source` every time).
163
+ - `codex-orchestrator init codex --codex-cli --yes --codex-download-url <url> --codex-download-sha256 <sha>` — opt-in to a prebuilt Codex CLI download.
164
+ - `codex-orchestrator codex setup` — plan/apply a CO-managed Codex CLI install (for collab JSONL parity; use `--download-url` + `--download-sha256` for prebuilts).
132
165
  - `codex-orchestrator self-check --format json` — JSON health payload.
133
166
  - `codex-orchestrator mcp serve` — Codex MCP stdio server.
134
167
 
@@ -141,6 +174,18 @@ codex-orchestrator devtools setup
141
174
 
142
175
  ## Repository + contributor guide
143
176
 
144
- Repo internals, development workflows, and deeper architecture notes live in the GitHub repository:
177
+ Repo internals, development workflows, and deeper architecture notes (contributor/internal) live in the GitHub repository:
145
178
  - `docs/README.md`
146
179
  - `docs/diagnostics-prompt-guide.md` (first-run diagnostics prompt + expected outputs)
180
+ - `docs/guides/collab-vs-mcp.md` (agent-first decision guide)
181
+
182
+ ## RLM benchmark graphs
183
+
184
+ Seeded OOLONG accuracy curves (Wilson 95% CI, runs=5). In these runs, the baseline accuracy degrades as context length grows, while RLM stays near the ceiling across the tested lengths.
185
+
186
+ <table>
187
+ <tr>
188
+ <td><img src="docs/assets/oolong-baseline-seeded-wilson95-runs5.png" alt="Baseline OOLONG seeded Wilson 95% CI" width="420"></td>
189
+ <td><img src="docs/assets/oolong-rlm-seeded-wilson95-runs5.png" alt="RLM OOLONG seeded Wilson 95% CI" width="420"></td>
190
+ </tr>
191
+ </table>
@@ -13,6 +13,7 @@ import { buildSelfCheckResult } from '../orchestrator/src/cli/selfCheck.js';
13
13
  import { initCodexTemplates, formatInitSummary } from '../orchestrator/src/cli/init.js';
14
14
  import { runDoctor, formatDoctorSummary } from '../orchestrator/src/cli/doctor.js';
15
15
  import { formatDevtoolsSetupSummary, runDevtoolsSetup } from '../orchestrator/src/cli/devtoolsSetup.js';
16
+ import { formatCodexCliSetupSummary, runCodexCliSetup } from '../orchestrator/src/cli/codexCliSetup.js';
16
17
  import { formatSkillsInstallSummary, installSkills } from '../orchestrator/src/cli/skills.js';
17
18
  import { loadPackageInfo } from '../orchestrator/src/cli/utils/packageInfo.js';
18
19
  import { slugify } from '../orchestrator/src/cli/utils/strings.js';
@@ -63,6 +64,9 @@ async function main() {
63
64
  case 'doctor':
64
65
  await handleDoctor(args);
65
66
  break;
67
+ case 'codex':
68
+ await handleCodex(args);
69
+ break;
66
70
  case 'devtools':
67
71
  await handleDevtools(args);
68
72
  break;
@@ -136,6 +140,24 @@ function readStringFlag(flags, key) {
136
140
  const trimmed = value.trim();
137
141
  return trimmed.length > 0 ? trimmed : undefined;
138
142
  }
143
+ function resolveExecutionModeFlag(flags) {
144
+ const cloudShortcut = flags['cloud'] === true;
145
+ const rawMode = readStringFlag(flags, 'execution-mode');
146
+ if (cloudShortcut) {
147
+ if (rawMode && rawMode.toLowerCase() !== 'cloud') {
148
+ throw new Error('Cannot combine --cloud with --execution-mode values other than cloud.');
149
+ }
150
+ return 'cloud';
151
+ }
152
+ if (!rawMode) {
153
+ return undefined;
154
+ }
155
+ const normalized = rawMode.toLowerCase();
156
+ if (normalized !== 'mcp' && normalized !== 'cloud') {
157
+ throw new Error('Invalid --execution-mode value. Expected one of: mcp, cloud.');
158
+ }
159
+ return normalized;
160
+ }
139
161
  function applyRlmEnvOverrides(flags, goal) {
140
162
  if (goal) {
141
163
  process.env.RLM_GOAL = goal;
@@ -198,6 +220,7 @@ async function handleStart(orchestrator, rawArgs) {
198
220
  const { positionals, flags } = parseArgs(rawArgs);
199
221
  const pipelineId = positionals[0];
200
222
  const format = flags['format'] === 'json' ? 'json' : 'text';
223
+ const executionMode = resolveExecutionModeFlag(flags);
201
224
  if (pipelineId === 'rlm') {
202
225
  const goal = readStringFlag(flags, 'goal');
203
226
  applyRlmEnvOverrides(flags, goal);
@@ -217,6 +240,7 @@ async function handleStart(orchestrator, rawArgs) {
217
240
  parentRunId: typeof flags['parent-run'] === 'string' ? flags['parent-run'] : undefined,
218
241
  approvalPolicy: typeof flags['approval-policy'] === 'string' ? flags['approval-policy'] : undefined,
219
242
  targetStageId: resolveTargetStageId(flags),
243
+ executionMode,
220
244
  runEvents
221
245
  });
222
246
  emitRunOutput(result, format, 'Run started');
@@ -465,6 +489,25 @@ async function handleInit(rawArgs) {
465
489
  for (const line of summary) {
466
490
  console.log(line);
467
491
  }
492
+ if (flags['codex-cli'] === true) {
493
+ const apply = Boolean(flags['yes']);
494
+ const source = readStringFlag(flags, 'codex-source');
495
+ const ref = readStringFlag(flags, 'codex-ref');
496
+ const downloadUrl = readStringFlag(flags, 'codex-download-url');
497
+ const downloadSha256 = readStringFlag(flags, 'codex-download-sha256');
498
+ const cliForce = Boolean(flags['codex-force']);
499
+ const setupResult = await runCodexCliSetup({
500
+ apply,
501
+ force: cliForce,
502
+ source,
503
+ ref,
504
+ downloadUrl,
505
+ downloadSha256
506
+ });
507
+ for (const line of formatCodexCliSetupSummary(setupResult)) {
508
+ console.log(line);
509
+ }
510
+ }
468
511
  }
469
512
  async function handleDoctor(rawArgs) {
470
513
  const { flags } = parseArgs(rawArgs);
@@ -503,6 +546,39 @@ async function handleDevtools(rawArgs) {
503
546
  console.log(line);
504
547
  }
505
548
  }
549
+ async function handleCodex(rawArgs) {
550
+ const { positionals, flags } = parseArgs(rawArgs);
551
+ const subcommand = positionals.shift();
552
+ if (!subcommand) {
553
+ throw new Error('codex requires a subcommand (setup).');
554
+ }
555
+ if (subcommand !== 'setup') {
556
+ throw new Error(`Unknown codex subcommand: ${subcommand}`);
557
+ }
558
+ const format = flags['format'] === 'json' ? 'json' : 'text';
559
+ const apply = Boolean(flags['yes']);
560
+ const source = readStringFlag(flags, 'source');
561
+ const ref = readStringFlag(flags, 'ref');
562
+ const downloadUrl = readStringFlag(flags, 'download-url');
563
+ const downloadSha256 = readStringFlag(flags, 'download-sha256');
564
+ const force = Boolean(flags['force']);
565
+ const result = await runCodexCliSetup({
566
+ apply,
567
+ force,
568
+ source,
569
+ ref,
570
+ downloadUrl,
571
+ downloadSha256
572
+ });
573
+ if (format === 'json') {
574
+ console.log(JSON.stringify(result, null, 2));
575
+ return;
576
+ }
577
+ const summary = formatCodexCliSetupSummary(result);
578
+ for (const line of summary) {
579
+ console.log(line);
580
+ }
581
+ }
506
582
  async function handleSkills(rawArgs) {
507
583
  const { positionals, flags } = parseArgs(rawArgs);
508
584
  const subcommand = positionals[0];
@@ -690,6 +766,8 @@ Commands:
690
766
  --parent-run <id> Link run to parent run id.
691
767
  --approval-policy <p> Record approval policy metadata.
692
768
  --format json Emit machine-readable output.
769
+ --execution-mode <mcp|cloud> Force execution mode for this run and child subpipelines.
770
+ --cloud Shortcut for --execution-mode cloud.
693
771
  --target <stage-id> Focus plan/build metadata on a specific stage (alias: --target-stage).
694
772
  --goal "<goal>" When pipeline is rlm, set the RLM goal.
695
773
  --validator <cmd|none> When pipeline is rlm, set the validator command.
@@ -746,7 +824,22 @@ Commands:
746
824
 
747
825
  self-check [--format json]
748
826
  init codex [--cwd <path>] [--force]
827
+ --codex-cli Also run CO-managed Codex CLI setup (plan unless --yes).
828
+ --codex-source <path> Build from local Codex repo (or git URL).
829
+ --codex-ref <ref> Git ref (branch/tag/sha) when building from repo.
830
+ --codex-download-url <url> Download a prebuilt codex binary.
831
+ --codex-download-sha256 <sha> Expected SHA256 for the prebuilt download.
832
+ --codex-force Overwrite existing CO-managed codex binary.
833
+ --yes Apply codex CLI setup (otherwise plan only).
749
834
  doctor [--format json]
835
+ codex setup
836
+ --source <path> Build from local Codex repo (or git URL).
837
+ --ref <ref> Git ref (branch/tag/sha) when building from repo.
838
+ --download-url <url> Download a prebuilt codex binary.
839
+ --download-sha256 <sha> Expected SHA256 for the prebuilt download.
840
+ --force Overwrite existing CO-managed codex binary.
841
+ --yes Apply setup (otherwise plan only).
842
+ --format json Emit machine-readable output.
750
843
  devtools setup Print DevTools MCP setup instructions.
751
844
  --yes Apply setup by running "codex mcp add ...".
752
845
  --format json Emit machine-readable output (dry-run only).
@@ -4,17 +4,41 @@ export class CommandBuilder {
4
4
  this.executePipeline = executePipeline;
5
5
  }
6
6
  async build(input) {
7
- const result = await this.executePipeline();
7
+ const result = await this.executePipeline(input);
8
8
  return {
9
9
  subtaskId: input.target.id,
10
10
  artifacts: [
11
11
  { path: result.manifestPath, description: 'CLI run manifest' },
12
- { path: result.logPath, description: 'Runner log (ndjson)' }
12
+ { path: result.logPath, description: 'Runner log (ndjson)' },
13
+ ...(result.manifest.cloud_execution?.diff_path
14
+ ? [{ path: result.manifest.cloud_execution.diff_path, description: 'Cloud diff artifact' }]
15
+ : [])
13
16
  ],
14
17
  mode: input.mode,
15
18
  runId: input.runId,
16
19
  success: result.success,
17
- notes: result.notes.join('\n') || undefined
20
+ notes: result.notes.join('\n') || undefined,
21
+ cloudExecution: result.manifest.cloud_execution
22
+ ? {
23
+ taskId: result.manifest.cloud_execution.task_id,
24
+ environmentId: result.manifest.cloud_execution.environment_id,
25
+ status: result.manifest.cloud_execution.status,
26
+ statusUrl: result.manifest.cloud_execution.status_url,
27
+ submittedAt: result.manifest.cloud_execution.submitted_at,
28
+ completedAt: result.manifest.cloud_execution.completed_at,
29
+ lastPolledAt: result.manifest.cloud_execution.last_polled_at,
30
+ pollCount: result.manifest.cloud_execution.poll_count,
31
+ pollIntervalSeconds: result.manifest.cloud_execution.poll_interval_seconds,
32
+ timeoutSeconds: result.manifest.cloud_execution.timeout_seconds,
33
+ attempts: result.manifest.cloud_execution.attempts,
34
+ diffPath: result.manifest.cloud_execution.diff_path,
35
+ diffUrl: result.manifest.cloud_execution.diff_url,
36
+ diffStatus: result.manifest.cloud_execution.diff_status,
37
+ applyStatus: result.manifest.cloud_execution.apply_status,
38
+ logPath: result.manifest.cloud_execution.log_path,
39
+ error: result.manifest.cloud_execution.error
40
+ }
41
+ : null
18
42
  };
19
43
  }
20
44
  }
@@ -43,6 +43,9 @@ export class CommandPlanner {
43
43
  if (stagePlanHints.executionMode) {
44
44
  metadata.executionMode = stagePlanHints.executionMode;
45
45
  }
46
+ if (stagePlanHints.cloudEnvId) {
47
+ metadata.cloudEnvId = stagePlanHints.cloudEnvId;
48
+ }
46
49
  metadata.requiresCloud = requiresCloud;
47
50
  return {
48
51
  id: `${this.pipeline.id}:${stage.id}`,
@@ -117,12 +120,25 @@ function extractStagePlanHints(stage) {
117
120
  const executionMode = typeof rawExecutionMode === 'string'
118
121
  ? rawExecutionMode.trim().toLowerCase() || null
119
122
  : null;
123
+ const rawCloudEnvId = typeof planConfig.cloudEnvId === 'string'
124
+ ? planConfig.cloudEnvId
125
+ : typeof planConfig.cloud_env_id === 'string'
126
+ ? planConfig.cloud_env_id
127
+ : typeof stageRecord.cloudEnvId === 'string'
128
+ ? stageRecord.cloudEnvId
129
+ : typeof stageRecord.cloud_env_id === 'string'
130
+ ? stageRecord.cloud_env_id
131
+ : undefined;
132
+ const cloudEnvId = typeof rawCloudEnvId === 'string'
133
+ ? rawCloudEnvId.trim() || null
134
+ : null;
120
135
  return {
121
136
  runnable: planConfig.runnable,
122
137
  defaultTarget,
123
138
  aliases,
124
139
  requiresCloud,
125
- executionMode
140
+ executionMode,
141
+ cloudEnvId
126
142
  };
127
143
  }
128
144
  function resolveStageRequiresCloud(stage, hints) {
@@ -1,11 +1,46 @@
1
+ import { diagnoseCloudFailure } from './cloudFailureDiagnostics.js';
1
2
  export class CommandReviewer {
2
3
  getResult;
3
4
  constructor(getResult) {
4
5
  this.getResult = getResult;
5
6
  }
6
7
  async review(input) {
7
- void input;
8
8
  const result = this.requireResult();
9
+ if (input.mode === 'cloud') {
10
+ const cloudExecution = result.manifest.cloud_execution;
11
+ const status = cloudExecution?.status ?? 'unknown';
12
+ const cloudTask = cloudExecution?.task_id ?? '<unknown>';
13
+ const approved = status === 'ready' && result.success;
14
+ const diagnosis = diagnoseCloudFailure({
15
+ status,
16
+ statusDetail: result.manifest.status_detail ?? null,
17
+ error: cloudExecution?.error ?? null
18
+ });
19
+ const summaryLines = [
20
+ approved
21
+ ? `Cloud task ${cloudTask} completed successfully.`
22
+ : `Cloud task ${cloudTask} did not complete successfully (${status}).`,
23
+ `Manifest: ${result.manifestPath}`,
24
+ `Runner log: ${result.logPath}`,
25
+ ...(cloudExecution?.status_url ? [`Cloud status URL: ${cloudExecution.status_url}`] : [])
26
+ ];
27
+ if (!approved) {
28
+ summaryLines.push(`Failure class: ${diagnosis.category}`);
29
+ summaryLines.push(`Guidance: ${diagnosis.guidance}`);
30
+ }
31
+ const feedbackLines = [cloudExecution?.error ?? (result.notes.join('\n') || undefined)].filter((line) => Boolean(line && line.trim().length > 0));
32
+ if (!approved) {
33
+ feedbackLines.push(`Failure class: ${diagnosis.category}`);
34
+ feedbackLines.push(`Guidance: ${diagnosis.guidance}`);
35
+ }
36
+ return {
37
+ summary: summaryLines.join('\n'),
38
+ decision: {
39
+ approved,
40
+ feedback: feedbackLines.length > 0 ? feedbackLines.join('\n') : undefined
41
+ }
42
+ };
43
+ }
9
44
  const summaryLines = [
10
45
  result.success
11
46
  ? 'Diagnostics pipeline succeeded.'
@@ -1,4 +1,5 @@
1
1
  import { ensureGuardrailStatus } from '../run/manifest.js';
2
+ import { diagnoseCloudFailure } from './cloudFailureDiagnostics.js';
2
3
  export class CommandTester {
3
4
  getResult;
4
5
  constructor(getResult) {
@@ -6,6 +7,33 @@ export class CommandTester {
6
7
  }
7
8
  async test(input) {
8
9
  const result = this.requireResult();
10
+ if (input.mode === 'cloud') {
11
+ const cloudExecution = result.manifest.cloud_execution;
12
+ const status = cloudExecution?.status ?? 'unknown';
13
+ const passed = status === 'ready' && result.success;
14
+ const diagnosis = diagnoseCloudFailure({
15
+ status,
16
+ statusDetail: result.manifest.status_detail ?? null,
17
+ error: cloudExecution?.error ?? null
18
+ });
19
+ const failureDetails = cloudExecution?.error ??
20
+ `Cloud task status: ${status}${cloudExecution?.task_id ? ` (${cloudExecution.task_id})` : ''}`;
21
+ const reports = [
22
+ {
23
+ name: 'cloud-task',
24
+ status: passed ? 'passed' : 'failed',
25
+ details: passed
26
+ ? failureDetails
27
+ : `${failureDetails}\nFailure class: ${diagnosis.category}. ${diagnosis.guidance}`
28
+ }
29
+ ];
30
+ return {
31
+ subtaskId: input.build.subtaskId,
32
+ success: passed,
33
+ reports,
34
+ runId: input.runId
35
+ };
36
+ }
9
37
  const guardrailStatus = ensureGuardrailStatus(result.manifest);
10
38
  const reports = [
11
39
  {
@@ -0,0 +1,45 @@
1
+ const CLOUD_FAILURE_RULES = [
2
+ {
3
+ category: 'configuration',
4
+ patterns: ['cloud-env-missing', 'codex_cloud_env_id', 'no environment id is configured', '--env'],
5
+ guidance: 'Set CODEX_CLOUD_ENV_ID (or metadata.cloudEnvId) to a valid cloud environment id before re-running.'
6
+ },
7
+ {
8
+ category: 'credentials',
9
+ patterns: ['unauthorized', 'forbidden', 'not logged in', 'login', 'api key', 'credential', 'token'],
10
+ guidance: 'Ensure Codex Cloud credentials are available to the runner and have access to the configured environment.'
11
+ },
12
+ {
13
+ category: 'connectivity',
14
+ patterns: ['enotfound', 'econn', 'timed out', 'timeout', 'network', '502', '503', '504'],
15
+ guidance: 'Cloud endpoint connectivity looks unstable; retry and inspect network/endpoint health.'
16
+ }
17
+ ];
18
+ const TERMINAL_FAILURE_STATUSES = new Set(['failed', 'error', 'cancelled']);
19
+ export function diagnoseCloudFailure(options) {
20
+ const signal = [options.status ?? null, options.statusDetail ?? null, options.error ?? null]
21
+ .filter((value) => typeof value === 'string' && value.trim().length > 0)
22
+ .join('\n');
23
+ const normalized = signal.toLowerCase();
24
+ for (const rule of CLOUD_FAILURE_RULES) {
25
+ if (rule.patterns.some((pattern) => normalized.includes(pattern))) {
26
+ return {
27
+ category: rule.category,
28
+ guidance: rule.guidance,
29
+ signal
30
+ };
31
+ }
32
+ }
33
+ if (options.status && TERMINAL_FAILURE_STATUSES.has(options.status.toLowerCase())) {
34
+ return {
35
+ category: 'execution',
36
+ guidance: 'Inspect manifest cloud_execution.error and cloud command logs for the terminal cloud failure.',
37
+ signal
38
+ };
39
+ }
40
+ return {
41
+ category: 'unknown',
42
+ guidance: 'Inspect manifest status_detail plus cloud command logs to classify this failure.',
43
+ signal
44
+ };
45
+ }