@kbediako/codex-orchestrator 0.1.13 → 0.1.14-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -59,6 +59,11 @@ Use this when you want Codex to drive work inside another repo with the CO defau
59
59
  ```bash
60
60
  codex-orchestrator codex setup
61
61
  ```
62
+ 4. Optional (fast refresh helper for downstream users):
63
+ ```bash
64
+ scripts/codex-cli-refresh.sh --repo /path/to/codex
65
+ ```
66
+ Repo-only helper (not included in npm package). Set `CODEX_REPO` or `CODEX_CLI_SOURCE` to avoid passing `--repo` each time.
62
67
 
63
68
  ## Delegation MCP server
64
69
 
@@ -140,6 +140,24 @@ function readStringFlag(flags, key) {
140
140
  const trimmed = value.trim();
141
141
  return trimmed.length > 0 ? trimmed : undefined;
142
142
  }
143
+ function resolveExecutionModeFlag(flags) {
144
+ const cloudShortcut = flags['cloud'] === true;
145
+ const rawMode = readStringFlag(flags, 'execution-mode');
146
+ if (cloudShortcut) {
147
+ if (rawMode && rawMode.toLowerCase() !== 'cloud') {
148
+ throw new Error('Cannot combine --cloud with --execution-mode values other than cloud.');
149
+ }
150
+ return 'cloud';
151
+ }
152
+ if (!rawMode) {
153
+ return undefined;
154
+ }
155
+ const normalized = rawMode.toLowerCase();
156
+ if (normalized !== 'mcp' && normalized !== 'cloud') {
157
+ throw new Error('Invalid --execution-mode value. Expected one of: mcp, cloud.');
158
+ }
159
+ return normalized;
160
+ }
143
161
  function applyRlmEnvOverrides(flags, goal) {
144
162
  if (goal) {
145
163
  process.env.RLM_GOAL = goal;
@@ -202,6 +220,7 @@ async function handleStart(orchestrator, rawArgs) {
202
220
  const { positionals, flags } = parseArgs(rawArgs);
203
221
  const pipelineId = positionals[0];
204
222
  const format = flags['format'] === 'json' ? 'json' : 'text';
223
+ const executionMode = resolveExecutionModeFlag(flags);
205
224
  if (pipelineId === 'rlm') {
206
225
  const goal = readStringFlag(flags, 'goal');
207
226
  applyRlmEnvOverrides(flags, goal);
@@ -221,6 +240,7 @@ async function handleStart(orchestrator, rawArgs) {
221
240
  parentRunId: typeof flags['parent-run'] === 'string' ? flags['parent-run'] : undefined,
222
241
  approvalPolicy: typeof flags['approval-policy'] === 'string' ? flags['approval-policy'] : undefined,
223
242
  targetStageId: resolveTargetStageId(flags),
243
+ executionMode,
224
244
  runEvents
225
245
  });
226
246
  emitRunOutput(result, format, 'Run started');
@@ -746,6 +766,8 @@ Commands:
746
766
  --parent-run <id> Link run to parent run id.
747
767
  --approval-policy <p> Record approval policy metadata.
748
768
  --format json Emit machine-readable output.
769
+ --execution-mode <mcp|cloud> Force execution mode for this run and child subpipelines.
770
+ --cloud Shortcut for --execution-mode cloud.
749
771
  --target <stage-id> Focus plan/build metadata on a specific stage (alias: --target-stage).
750
772
  --goal "<goal>" When pipeline is rlm, set the RLM goal.
751
773
  --validator <cmd|none> When pipeline is rlm, set the validator command.
@@ -4,17 +4,41 @@ export class CommandBuilder {
4
4
  this.executePipeline = executePipeline;
5
5
  }
6
6
  async build(input) {
7
- const result = await this.executePipeline();
7
+ const result = await this.executePipeline(input);
8
8
  return {
9
9
  subtaskId: input.target.id,
10
10
  artifacts: [
11
11
  { path: result.manifestPath, description: 'CLI run manifest' },
12
- { path: result.logPath, description: 'Runner log (ndjson)' }
12
+ { path: result.logPath, description: 'Runner log (ndjson)' },
13
+ ...(result.manifest.cloud_execution?.diff_path
14
+ ? [{ path: result.manifest.cloud_execution.diff_path, description: 'Cloud diff artifact' }]
15
+ : [])
13
16
  ],
14
17
  mode: input.mode,
15
18
  runId: input.runId,
16
19
  success: result.success,
17
- notes: result.notes.join('\n') || undefined
20
+ notes: result.notes.join('\n') || undefined,
21
+ cloudExecution: result.manifest.cloud_execution
22
+ ? {
23
+ taskId: result.manifest.cloud_execution.task_id,
24
+ environmentId: result.manifest.cloud_execution.environment_id,
25
+ status: result.manifest.cloud_execution.status,
26
+ statusUrl: result.manifest.cloud_execution.status_url,
27
+ submittedAt: result.manifest.cloud_execution.submitted_at,
28
+ completedAt: result.manifest.cloud_execution.completed_at,
29
+ lastPolledAt: result.manifest.cloud_execution.last_polled_at,
30
+ pollCount: result.manifest.cloud_execution.poll_count,
31
+ pollIntervalSeconds: result.manifest.cloud_execution.poll_interval_seconds,
32
+ timeoutSeconds: result.manifest.cloud_execution.timeout_seconds,
33
+ attempts: result.manifest.cloud_execution.attempts,
34
+ diffPath: result.manifest.cloud_execution.diff_path,
35
+ diffUrl: result.manifest.cloud_execution.diff_url,
36
+ diffStatus: result.manifest.cloud_execution.diff_status,
37
+ applyStatus: result.manifest.cloud_execution.apply_status,
38
+ logPath: result.manifest.cloud_execution.log_path,
39
+ error: result.manifest.cloud_execution.error
40
+ }
41
+ : null
18
42
  };
19
43
  }
20
44
  }
@@ -43,6 +43,9 @@ export class CommandPlanner {
43
43
  if (stagePlanHints.executionMode) {
44
44
  metadata.executionMode = stagePlanHints.executionMode;
45
45
  }
46
+ if (stagePlanHints.cloudEnvId) {
47
+ metadata.cloudEnvId = stagePlanHints.cloudEnvId;
48
+ }
46
49
  metadata.requiresCloud = requiresCloud;
47
50
  return {
48
51
  id: `${this.pipeline.id}:${stage.id}`,
@@ -117,12 +120,25 @@ function extractStagePlanHints(stage) {
117
120
  const executionMode = typeof rawExecutionMode === 'string'
118
121
  ? rawExecutionMode.trim().toLowerCase() || null
119
122
  : null;
123
+ const rawCloudEnvId = typeof planConfig.cloudEnvId === 'string'
124
+ ? planConfig.cloudEnvId
125
+ : typeof planConfig.cloud_env_id === 'string'
126
+ ? planConfig.cloud_env_id
127
+ : typeof stageRecord.cloudEnvId === 'string'
128
+ ? stageRecord.cloudEnvId
129
+ : typeof stageRecord.cloud_env_id === 'string'
130
+ ? stageRecord.cloud_env_id
131
+ : undefined;
132
+ const cloudEnvId = typeof rawCloudEnvId === 'string'
133
+ ? rawCloudEnvId.trim() || null
134
+ : null;
120
135
  return {
121
136
  runnable: planConfig.runnable,
122
137
  defaultTarget,
123
138
  aliases,
124
139
  requiresCloud,
125
- executionMode
140
+ executionMode,
141
+ cloudEnvId
126
142
  };
127
143
  }
128
144
  function resolveStageRequiresCloud(stage, hints) {
@@ -1,11 +1,46 @@
1
+ import { diagnoseCloudFailure } from './cloudFailureDiagnostics.js';
1
2
  export class CommandReviewer {
2
3
  getResult;
3
4
  constructor(getResult) {
4
5
  this.getResult = getResult;
5
6
  }
6
7
  async review(input) {
7
- void input;
8
8
  const result = this.requireResult();
9
+ if (input.mode === 'cloud') {
10
+ const cloudExecution = result.manifest.cloud_execution;
11
+ const status = cloudExecution?.status ?? 'unknown';
12
+ const cloudTask = cloudExecution?.task_id ?? '<unknown>';
13
+ const approved = status === 'ready' && result.success;
14
+ const diagnosis = diagnoseCloudFailure({
15
+ status,
16
+ statusDetail: result.manifest.status_detail ?? null,
17
+ error: cloudExecution?.error ?? null
18
+ });
19
+ const summaryLines = [
20
+ approved
21
+ ? `Cloud task ${cloudTask} completed successfully.`
22
+ : `Cloud task ${cloudTask} did not complete successfully (${status}).`,
23
+ `Manifest: ${result.manifestPath}`,
24
+ `Runner log: ${result.logPath}`,
25
+ ...(cloudExecution?.status_url ? [`Cloud status URL: ${cloudExecution.status_url}`] : [])
26
+ ];
27
+ if (!approved) {
28
+ summaryLines.push(`Failure class: ${diagnosis.category}`);
29
+ summaryLines.push(`Guidance: ${diagnosis.guidance}`);
30
+ }
31
+ const feedbackLines = [cloudExecution?.error ?? (result.notes.join('\n') || undefined)].filter((line) => Boolean(line && line.trim().length > 0));
32
+ if (!approved) {
33
+ feedbackLines.push(`Failure class: ${diagnosis.category}`);
34
+ feedbackLines.push(`Guidance: ${diagnosis.guidance}`);
35
+ }
36
+ return {
37
+ summary: summaryLines.join('\n'),
38
+ decision: {
39
+ approved,
40
+ feedback: feedbackLines.length > 0 ? feedbackLines.join('\n') : undefined
41
+ }
42
+ };
43
+ }
9
44
  const summaryLines = [
10
45
  result.success
11
46
  ? 'Diagnostics pipeline succeeded.'
@@ -1,4 +1,5 @@
1
1
  import { ensureGuardrailStatus } from '../run/manifest.js';
2
+ import { diagnoseCloudFailure } from './cloudFailureDiagnostics.js';
2
3
  export class CommandTester {
3
4
  getResult;
4
5
  constructor(getResult) {
@@ -6,6 +7,33 @@ export class CommandTester {
6
7
  }
7
8
  async test(input) {
8
9
  const result = this.requireResult();
10
+ if (input.mode === 'cloud') {
11
+ const cloudExecution = result.manifest.cloud_execution;
12
+ const status = cloudExecution?.status ?? 'unknown';
13
+ const passed = status === 'ready' && result.success;
14
+ const diagnosis = diagnoseCloudFailure({
15
+ status,
16
+ statusDetail: result.manifest.status_detail ?? null,
17
+ error: cloudExecution?.error ?? null
18
+ });
19
+ const failureDetails = cloudExecution?.error ??
20
+ `Cloud task status: ${status}${cloudExecution?.task_id ? ` (${cloudExecution.task_id})` : ''}`;
21
+ const reports = [
22
+ {
23
+ name: 'cloud-task',
24
+ status: passed ? 'passed' : 'failed',
25
+ details: passed
26
+ ? failureDetails
27
+ : `${failureDetails}\nFailure class: ${diagnosis.category}. ${diagnosis.guidance}`
28
+ }
29
+ ];
30
+ return {
31
+ subtaskId: input.build.subtaskId,
32
+ success: passed,
33
+ reports,
34
+ runId: input.runId
35
+ };
36
+ }
9
37
  const guardrailStatus = ensureGuardrailStatus(result.manifest);
10
38
  const reports = [
11
39
  {
@@ -0,0 +1,45 @@
1
+ const CLOUD_FAILURE_RULES = [
2
+ {
3
+ category: 'configuration',
4
+ patterns: ['cloud-env-missing', 'codex_cloud_env_id', 'no environment id is configured', '--env'],
5
+ guidance: 'Set CODEX_CLOUD_ENV_ID (or metadata.cloudEnvId) to a valid cloud environment id before re-running.'
6
+ },
7
+ {
8
+ category: 'credentials',
9
+ patterns: ['unauthorized', 'forbidden', 'not logged in', 'login', 'api key', 'credential', 'token'],
10
+ guidance: 'Ensure Codex Cloud credentials are available to the runner and have access to the configured environment.'
11
+ },
12
+ {
13
+ category: 'connectivity',
14
+ patterns: ['enotfound', 'econn', 'timed out', 'timeout', 'network', '502', '503', '504'],
15
+ guidance: 'Cloud endpoint connectivity looks unstable; retry and inspect network/endpoint health.'
16
+ }
17
+ ];
18
+ const TERMINAL_FAILURE_STATUSES = new Set(['failed', 'error', 'cancelled']);
19
+ export function diagnoseCloudFailure(options) {
20
+ const signal = [options.status ?? null, options.statusDetail ?? null, options.error ?? null]
21
+ .filter((value) => typeof value === 'string' && value.trim().length > 0)
22
+ .join('\n');
23
+ const normalized = signal.toLowerCase();
24
+ for (const rule of CLOUD_FAILURE_RULES) {
25
+ if (rule.patterns.some((pattern) => normalized.includes(pattern))) {
26
+ return {
27
+ category: rule.category,
28
+ guidance: rule.guidance,
29
+ signal
30
+ };
31
+ }
32
+ }
33
+ if (options.status && TERMINAL_FAILURE_STATUSES.has(options.status.toLowerCase())) {
34
+ return {
35
+ category: 'execution',
36
+ guidance: 'Inspect manifest cloud_execution.error and cloud command logs for the terminal cloud failure.',
37
+ signal
38
+ };
39
+ }
40
+ return {
41
+ category: 'unknown',
42
+ guidance: 'Inspect manifest status_detail plus cloud command logs to classify this failure.',
43
+ signal
44
+ };
45
+ }
@@ -20,7 +20,7 @@ import { PipelineResolver } from './services/pipelineResolver.js';
20
20
  import { ControlPlaneService } from './services/controlPlaneService.js';
21
21
  import { ControlWatcher } from './control/controlWatcher.js';
22
22
  import { SchedulerService } from './services/schedulerService.js';
23
- import { applyHandlesToRunSummary, applyPrivacyToRunSummary, persistRunSummary } from './services/runSummaryWriter.js';
23
+ import { applyHandlesToRunSummary, applyPrivacyToRunSummary, applyCloudExecutionToRunSummary, persistRunSummary } from './services/runSummaryWriter.js';
24
24
  import { prepareRun, resolvePipelineForResume, overrideTaskEnvironment } from './services/runPreparation.js';
25
25
  import { loadPackageConfig, loadUserConfig } from './config/userConfig.js';
26
26
  import { loadDelegationConfigFiles, computeEffectiveDelegationConfig, parseDelegationConfigOverride, splitDelegationConfigOverrides } from './config/delegationConfig.js';
@@ -28,8 +28,13 @@ import { ControlServer } from './control/controlServer.js';
28
28
  import { RunEventEmitter, RunEventPublisher, snapshotStages } from './events/runEvents.js';
29
29
  import { RunEventStream, attachRunEventAdapter } from './events/runEventStream.js';
30
30
  import { CLI_EXECUTION_MODE_PARSER, resolveRequiresCloudPolicy } from '../utils/executionMode.js';
31
+ import { resolveCodexCliBin } from './utils/codexCli.js';
32
+ import { CodexCloudTaskExecutor } from '../cloud/CodexCloudTaskExecutor.js';
31
33
  const resolveBaseEnvironment = () => normalizeEnvironmentPaths(resolveEnvironmentPaths());
32
34
  const CONFIG_OVERRIDE_ENV_KEYS = ['CODEX_CONFIG_OVERRIDES', 'CODEX_MCP_CONFIG_OVERRIDES'];
35
+ const DEFAULT_CLOUD_POLL_INTERVAL_SECONDS = 10;
36
+ const DEFAULT_CLOUD_TIMEOUT_SECONDS = 1800;
37
+ const DEFAULT_CLOUD_ATTEMPTS = 1;
33
38
  function collectDelegationEnvOverrides(env = process.env) {
34
39
  const layers = [];
35
40
  for (const key of CONFIG_OVERRIDE_ENV_KEYS) {
@@ -52,6 +57,37 @@ function collectDelegationEnvOverrides(env = process.env) {
52
57
  }
53
58
  return layers;
54
59
  }
60
+ function readCloudString(value) {
61
+ return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null;
62
+ }
63
+ function readCloudNumber(raw, fallback) {
64
+ if (!raw) {
65
+ return fallback;
66
+ }
67
+ const parsed = Number.parseInt(raw, 10);
68
+ if (!Number.isFinite(parsed) || parsed <= 0) {
69
+ return fallback;
70
+ }
71
+ return parsed;
72
+ }
73
+ function resolveCloudEnvironmentId(task, target, envOverrides) {
74
+ const metadata = (target.metadata ?? {});
75
+ const taskMetadata = (task.metadata ?? {});
76
+ const taskCloud = (taskMetadata.cloud ?? null);
77
+ const candidates = [
78
+ readCloudString(metadata.cloudEnvId),
79
+ readCloudString(metadata.cloud_env_id),
80
+ readCloudString(metadata.envId),
81
+ readCloudString(metadata.environmentId),
82
+ readCloudString(taskCloud?.envId),
83
+ readCloudString(taskCloud?.environmentId),
84
+ readCloudString(taskMetadata.cloudEnvId),
85
+ readCloudString(taskMetadata.cloud_env_id),
86
+ readCloudString(envOverrides?.CODEX_CLOUD_ENV_ID),
87
+ readCloudString(process.env.CODEX_CLOUD_ENV_ID)
88
+ ];
89
+ return candidates.find((candidate) => candidate !== null) ?? null;
90
+ }
55
91
  export class CodexOrchestrator {
56
92
  baseEnv;
57
93
  controlPlane = new ControlPlaneService();
@@ -136,7 +172,8 @@ export class CodexOrchestrator {
136
172
  eventStream: stream,
137
173
  onEventEntry,
138
174
  persister,
139
- envOverrides: preparation.envOverrides
175
+ envOverrides: preparation.envOverrides,
176
+ executionModeOverride: options.executionMode
140
177
  });
141
178
  }
142
179
  finally {
@@ -360,7 +397,7 @@ export class CodexOrchestrator {
360
397
  logPath: params.paths.logPath
361
398
  });
362
399
  }
363
- createTaskManager(runId, pipeline, executePipeline, getResult, plannerInstance, env) {
400
+ createTaskManager(runId, pipeline, executePipeline, getResult, plannerInstance, env, modeOverride) {
364
401
  const planner = plannerInstance ?? new CommandPlanner(pipeline);
365
402
  const builder = new CommandBuilder(executePipeline);
366
403
  const tester = new CommandTester(getResult);
@@ -373,12 +410,15 @@ export class CodexOrchestrator {
373
410
  tester,
374
411
  reviewer,
375
412
  runIdFactory: () => runId,
376
- modePolicy: (task, subtask) => this.determineMode(task, subtask),
413
+ modePolicy: (task, subtask) => this.determineMode(task, subtask, modeOverride),
377
414
  persistence: { autoStart: true, stateStore, manifestWriter }
378
415
  };
379
416
  return new TaskManager(options);
380
417
  }
381
- determineMode(task, subtask) {
418
+ determineMode(task, subtask, overrideMode) {
419
+ if (overrideMode) {
420
+ return overrideMode;
421
+ }
382
422
  if (this.requiresCloudExecution(task, subtask)) {
383
423
  return 'cloud';
384
424
  }
@@ -402,6 +442,9 @@ export class CodexOrchestrator {
402
442
  return Boolean(task.metadata?.execution?.parallel);
403
443
  }
404
444
  async executePipeline(options) {
445
+ if (options.mode === 'cloud') {
446
+ return await this.executeCloudPipeline(options);
447
+ }
405
448
  const { env, pipeline, manifest, paths, runEvents, envOverrides } = options;
406
449
  const notes = [];
407
450
  let success = true;
@@ -513,7 +556,8 @@ export class CodexOrchestrator {
513
556
  taskId: env.taskId,
514
557
  pipelineId: stage.pipeline,
515
558
  parentRunId: manifest.run_id,
516
- format: 'json'
559
+ format: 'json',
560
+ executionMode: options.executionModeOverride
517
561
  });
518
562
  entry.completed_at = isoTimestamp();
519
563
  entry.sub_run_id = child.manifest.run_id;
@@ -607,31 +651,251 @@ export class CodexOrchestrator {
607
651
  logPath: relativeToRepo(env, paths.logPath)
608
652
  };
609
653
  }
654
+ async executeCloudPipeline(options) {
655
+ const { env, pipeline, manifest, paths, runEvents, target, task, envOverrides } = options;
656
+ const notes = [];
657
+ let success = true;
658
+ manifest.guardrail_status = undefined;
659
+ const persister = options.persister ??
660
+ new ManifestPersister({
661
+ manifest,
662
+ paths,
663
+ persistIntervalMs: Math.max(1000, manifest.heartbeat_interval_seconds * 1000)
664
+ });
665
+ const schedulePersist = (persistOptions = {}) => persister.schedule(persistOptions);
666
+ const pushHeartbeat = (forceManifest = false) => {
667
+ updateHeartbeat(manifest);
668
+ return schedulePersist({ manifest: forceManifest, heartbeat: true, force: forceManifest });
669
+ };
670
+ const controlWatcher = new ControlWatcher({
671
+ paths,
672
+ manifest,
673
+ eventStream: options.eventStream,
674
+ onEntry: options.onEventEntry,
675
+ persist: () => schedulePersist({ manifest: true, force: true })
676
+ });
677
+ manifest.status = 'in_progress';
678
+ updateHeartbeat(manifest);
679
+ await schedulePersist({ manifest: true, heartbeat: true, force: true });
680
+ runEvents?.runStarted(snapshotStages(manifest, pipeline), manifest.status);
681
+ const heartbeatInterval = setInterval(() => {
682
+ void pushHeartbeat(false).catch((error) => {
683
+ logger.warn(`Heartbeat update failed for run ${manifest.run_id}: ${error?.message ?? String(error)}`);
684
+ });
685
+ }, manifest.heartbeat_interval_seconds * 1000);
686
+ const targetStageId = this.resolveTargetStageId(target, pipeline);
687
+ const targetStage = targetStageId
688
+ ? pipeline.stages.find((stage) => stage.id === targetStageId)
689
+ : undefined;
690
+ const targetEntry = targetStageId
691
+ ? manifest.commands.find((command) => command.id === targetStageId)
692
+ : undefined;
693
+ try {
694
+ await controlWatcher.sync();
695
+ await controlWatcher.waitForResume();
696
+ if (controlWatcher.isCanceled()) {
697
+ manifest.status_detail = 'run-canceled';
698
+ success = false;
699
+ }
700
+ else if (!targetStage || targetStage.kind !== 'command' || !targetEntry) {
701
+ success = false;
702
+ manifest.status_detail = 'cloud-target-missing';
703
+ const detail = targetStageId
704
+ ? `Cloud execution target "${targetStageId}" could not be resolved to a command stage.`
705
+ : `Cloud execution target "${target.id}" could not be resolved.`;
706
+ appendSummary(manifest, detail);
707
+ notes.push(detail);
708
+ }
709
+ else {
710
+ for (let i = 0; i < manifest.commands.length; i += 1) {
711
+ const entry = manifest.commands[i];
712
+ if (!entry || entry.id === targetStageId) {
713
+ continue;
714
+ }
715
+ entry.status = 'skipped';
716
+ entry.started_at = entry.started_at ?? isoTimestamp();
717
+ entry.completed_at = isoTimestamp();
718
+ entry.summary = `Skipped in cloud mode (target stage: ${targetStageId}).`;
719
+ }
720
+ const environmentId = resolveCloudEnvironmentId(task, target, envOverrides);
721
+ if (!environmentId) {
722
+ success = false;
723
+ manifest.status_detail = 'cloud-env-missing';
724
+ const detail = 'Cloud execution requested but no environment id is configured. Set CODEX_CLOUD_ENV_ID or provide target metadata.cloudEnvId.';
725
+ manifest.cloud_execution = {
726
+ task_id: null,
727
+ environment_id: null,
728
+ status: 'failed',
729
+ status_url: null,
730
+ submitted_at: null,
731
+ completed_at: isoTimestamp(),
732
+ last_polled_at: null,
733
+ poll_count: 0,
734
+ poll_interval_seconds: DEFAULT_CLOUD_POLL_INTERVAL_SECONDS,
735
+ timeout_seconds: DEFAULT_CLOUD_TIMEOUT_SECONDS,
736
+ attempts: DEFAULT_CLOUD_ATTEMPTS,
737
+ diff_path: null,
738
+ diff_url: null,
739
+ diff_status: 'unavailable',
740
+ apply_status: 'not_requested',
741
+ log_path: null,
742
+ error: detail
743
+ };
744
+ appendSummary(manifest, detail);
745
+ notes.push(detail);
746
+ targetEntry.status = 'failed';
747
+ targetEntry.started_at = targetEntry.started_at ?? isoTimestamp();
748
+ targetEntry.completed_at = isoTimestamp();
749
+ targetEntry.exit_code = 1;
750
+ targetEntry.summary = detail;
751
+ }
752
+ else {
753
+ targetEntry.status = 'running';
754
+ targetEntry.started_at = isoTimestamp();
755
+ await schedulePersist({ manifest: true, force: true });
756
+ runEvents?.stageStarted({
757
+ stageId: targetStage.id,
758
+ stageIndex: targetEntry.index,
759
+ title: targetStage.title,
760
+ kind: 'command',
761
+ logPath: targetEntry.log_path,
762
+ status: targetEntry.status
763
+ });
764
+ const executor = new CodexCloudTaskExecutor();
765
+ const prompt = this.buildCloudPrompt(task, target, pipeline, targetStage);
766
+ const pollIntervalSeconds = readCloudNumber(envOverrides?.CODEX_CLOUD_POLL_INTERVAL_SECONDS ?? process.env.CODEX_CLOUD_POLL_INTERVAL_SECONDS, DEFAULT_CLOUD_POLL_INTERVAL_SECONDS);
767
+ const timeoutSeconds = readCloudNumber(envOverrides?.CODEX_CLOUD_TIMEOUT_SECONDS ?? process.env.CODEX_CLOUD_TIMEOUT_SECONDS, DEFAULT_CLOUD_TIMEOUT_SECONDS);
768
+ const attempts = readCloudNumber(envOverrides?.CODEX_CLOUD_EXEC_ATTEMPTS ?? process.env.CODEX_CLOUD_EXEC_ATTEMPTS, DEFAULT_CLOUD_ATTEMPTS);
769
+ const branch = readCloudString(envOverrides?.CODEX_CLOUD_BRANCH) ??
770
+ readCloudString(process.env.CODEX_CLOUD_BRANCH);
771
+ const codexBin = resolveCodexCliBin({ ...process.env, ...(envOverrides ?? {}) });
772
+ const cloudResult = await executor.execute({
773
+ codexBin,
774
+ prompt,
775
+ environmentId,
776
+ repoRoot: env.repoRoot,
777
+ runDir: paths.runDir,
778
+ pollIntervalSeconds,
779
+ timeoutSeconds,
780
+ attempts,
781
+ branch,
782
+ env: envOverrides
783
+ });
784
+ success = cloudResult.success;
785
+ notes.push(...cloudResult.notes);
786
+ manifest.cloud_execution = cloudResult.cloudExecution;
787
+ targetEntry.log_path = cloudResult.cloudExecution.log_path;
788
+ targetEntry.completed_at = isoTimestamp();
789
+ targetEntry.exit_code = cloudResult.success ? 0 : 1;
790
+ targetEntry.status = cloudResult.success ? 'succeeded' : 'failed';
791
+ targetEntry.summary = cloudResult.summary;
792
+ if (!cloudResult.success) {
793
+ manifest.status_detail = `cloud:${targetStage.id}:failed`;
794
+ appendSummary(manifest, cloudResult.summary);
795
+ }
796
+ await schedulePersist({ manifest: true, force: true });
797
+ runEvents?.stageCompleted({
798
+ stageId: targetStage.id,
799
+ stageIndex: targetEntry.index,
800
+ title: targetStage.title,
801
+ kind: 'command',
802
+ status: targetEntry.status,
803
+ exitCode: targetEntry.exit_code,
804
+ summary: targetEntry.summary,
805
+ logPath: targetEntry.log_path
806
+ });
807
+ }
808
+ }
809
+ }
810
+ finally {
811
+ clearInterval(heartbeatInterval);
812
+ await schedulePersist({ force: true });
813
+ }
814
+ await controlWatcher.sync();
815
+ if (controlWatcher.isCanceled()) {
816
+ finalizeStatus(manifest, 'cancelled', manifest.status_detail ?? 'run-canceled');
817
+ }
818
+ else if (success) {
819
+ finalizeStatus(manifest, 'succeeded');
820
+ }
821
+ else {
822
+ finalizeStatus(manifest, 'failed', manifest.status_detail ?? 'cloud-execution-failed');
823
+ }
824
+ updateHeartbeat(manifest);
825
+ await schedulePersist({ manifest: true, heartbeat: true, force: true }).catch((error) => {
826
+ logger.warn(`Heartbeat update failed for run ${manifest.run_id}: ${error?.message ?? String(error)}`);
827
+ });
828
+ await schedulePersist({ force: true });
829
+ await appendMetricsEntry(env, paths, manifest, persister);
830
+ return {
831
+ success,
832
+ notes,
833
+ manifest,
834
+ manifestPath: relativeToRepo(env, paths.manifestPath),
835
+ logPath: relativeToRepo(env, paths.logPath)
836
+ };
837
+ }
838
+ resolveTargetStageId(target, pipeline) {
839
+ const metadataStageId = typeof target.metadata?.stageId === 'string' ? target.metadata.stageId : null;
840
+ if (metadataStageId && pipeline.stages.some((stage) => stage.id === metadataStageId)) {
841
+ return metadataStageId;
842
+ }
843
+ if (target.id.includes(':')) {
844
+ const suffix = target.id.split(':').pop() ?? null;
845
+ if (suffix && pipeline.stages.some((stage) => stage.id === suffix)) {
846
+ return suffix;
847
+ }
848
+ }
849
+ if (pipeline.stages.some((stage) => stage.id === target.id)) {
850
+ return target.id;
851
+ }
852
+ return null;
853
+ }
854
+ buildCloudPrompt(task, target, pipeline, stage) {
855
+ const lines = [
856
+ `Task ID: ${task.id}`,
857
+ `Task title: ${task.title}`,
858
+ task.description ? `Task description: ${task.description}` : null,
859
+ `Pipeline: ${pipeline.id}`,
860
+ `Target stage: ${stage.id} (${target.description})`,
861
+ '',
862
+ 'Apply the required repository changes for this target stage and produce a diff.'
863
+ ].filter((line) => Boolean(line));
864
+ return lines.join('\n');
865
+ }
610
866
  async performRunLifecycle(context) {
611
- const { env, pipeline, manifest, paths, planner, taskContext, runId, persister, envOverrides } = context;
612
- let pipelineResult = null;
613
- let executing = null;
614
- const executePipeline = async () => {
615
- if (!executing) {
616
- executing = this.executePipeline({
617
- env,
618
- pipeline,
619
- manifest,
620
- paths,
621
- runEvents: context.runEvents,
622
- eventStream: context.eventStream,
623
- onEventEntry: context.onEventEntry,
624
- persister,
625
- envOverrides
626
- }).then((result) => {
627
- pipelineResult = result;
628
- return result;
629
- });
867
+ const { env, pipeline, manifest, paths, planner, taskContext, runId, persister, envOverrides, executionModeOverride } = context;
868
+ let latestPipelineResult = null;
869
+ const executingByKey = new Map();
870
+ const executePipeline = async (input) => {
871
+ const key = `${input.mode}:${input.target.id}`;
872
+ const existing = executingByKey.get(key);
873
+ if (existing) {
874
+ return existing;
630
875
  }
876
+ const executing = this.executePipeline({
877
+ env,
878
+ pipeline,
879
+ manifest,
880
+ paths,
881
+ mode: input.mode,
882
+ executionModeOverride,
883
+ target: input.target,
884
+ task: taskContext,
885
+ runEvents: context.runEvents,
886
+ eventStream: context.eventStream,
887
+ onEventEntry: context.onEventEntry,
888
+ persister,
889
+ envOverrides
890
+ }).then((result) => {
891
+ latestPipelineResult = result;
892
+ return result;
893
+ });
894
+ executingByKey.set(key, executing);
631
895
  return executing;
632
896
  };
633
- const getResult = () => pipelineResult;
634
- const manager = this.createTaskManager(runId, pipeline, executePipeline, getResult, planner, env);
897
+ const getResult = () => latestPipelineResult;
898
+ const manager = this.createTaskManager(runId, pipeline, executePipeline, getResult, planner, env, executionModeOverride);
635
899
  this.attachPlanTargetTracker(manager, manifest, paths, persister);
636
900
  getPrivacyGuard().reset();
637
901
  const controlPlaneResult = await this.controlPlane.guard({
@@ -672,6 +936,7 @@ export class CodexOrchestrator {
672
936
  this.scheduler.applySchedulerToRunSummary(runSummary, schedulerPlan);
673
937
  applyHandlesToRunSummary(runSummary, manifest);
674
938
  applyPrivacyToRunSummary(runSummary, manifest);
939
+ applyCloudExecutionToRunSummary(runSummary, manifest);
675
940
  this.controlPlane.applyControlPlaneToRunSummary(runSummary, controlPlaneResult);
676
941
  await persistRunSummary(env, paths, manifest, runSummary, persister);
677
942
  context.runEvents?.runCompleted({
@@ -722,7 +987,8 @@ export class CodexOrchestrator {
722
987
  log_path: manifest.log_path,
723
988
  heartbeat_at: manifest.heartbeat_at,
724
989
  commands: manifest.commands,
725
- child_runs: manifest.child_runs
990
+ child_runs: manifest.child_runs,
991
+ cloud_execution: manifest.cloud_execution ?? null
726
992
  };
727
993
  }
728
994
  renderStatus(manifest) {
@@ -731,6 +997,10 @@ export class CodexOrchestrator {
731
997
  logger.info(`Started: ${manifest.started_at}`);
732
998
  logger.info(`Completed: ${manifest.completed_at ?? 'in-progress'}`);
733
999
  logger.info(`Manifest: ${manifest.artifact_root}/manifest.json`);
1000
+ if (manifest.cloud_execution?.task_id) {
1001
+ logger.info(`Cloud: ${manifest.cloud_execution.task_id} [${manifest.cloud_execution.status}]` +
1002
+ (manifest.cloud_execution.status_url ? ` ${manifest.cloud_execution.status_url}` : ''));
1003
+ }
734
1004
  logger.info('Commands:');
735
1005
  for (const command of manifest.commands) {
736
1006
  const summary = command.summary ? ` — ${command.summary}` : '';
@@ -55,6 +55,7 @@ export async function bootstrapManifest(runId, options) {
55
55
  instructions_sources: [],
56
56
  prompt_packs: [],
57
57
  guardrails_required: pipeline.guardrailsRequired !== false,
58
+ cloud_execution: null,
58
59
  learning: {
59
60
  validation: {
60
61
  mode: 'per-task',
@@ -170,6 +171,7 @@ export function resetForResume(manifest) {
170
171
  manifest.status = 'in_progress';
171
172
  manifest.status_detail = 'resuming';
172
173
  manifest.guardrail_status = undefined;
174
+ manifest.cloud_execution = null;
173
175
  }
174
176
  export function recordResumeEvent(manifest, event) {
175
177
  manifest.resume_events.push({ ...event, timestamp: isoTimestamp() });
@@ -27,6 +27,30 @@ export function applyPrivacyToRunSummary(runSummary, manifest) {
27
27
  allowedFrames: manifest.privacy.totals.allowed_frames
28
28
  };
29
29
  }
30
+ export function applyCloudExecutionToRunSummary(runSummary, manifest) {
31
+ if (!manifest.cloud_execution) {
32
+ return;
33
+ }
34
+ runSummary.cloudExecution = {
35
+ taskId: manifest.cloud_execution.task_id,
36
+ environmentId: manifest.cloud_execution.environment_id,
37
+ status: manifest.cloud_execution.status,
38
+ statusUrl: manifest.cloud_execution.status_url,
39
+ submittedAt: manifest.cloud_execution.submitted_at,
40
+ completedAt: manifest.cloud_execution.completed_at,
41
+ lastPolledAt: manifest.cloud_execution.last_polled_at,
42
+ pollCount: manifest.cloud_execution.poll_count,
43
+ pollIntervalSeconds: manifest.cloud_execution.poll_interval_seconds,
44
+ timeoutSeconds: manifest.cloud_execution.timeout_seconds,
45
+ attempts: manifest.cloud_execution.attempts,
46
+ diffPath: manifest.cloud_execution.diff_path,
47
+ diffUrl: manifest.cloud_execution.diff_url,
48
+ diffStatus: manifest.cloud_execution.diff_status,
49
+ applyStatus: manifest.cloud_execution.apply_status,
50
+ logPath: manifest.cloud_execution.log_path,
51
+ error: manifest.cloud_execution.error
52
+ };
53
+ }
30
54
  export async function persistRunSummary(env, paths, manifest, runSummary, persister) {
31
55
  const summaryPath = join(paths.runDir, 'run-summary.json');
32
56
  await writeJsonAtomic(summaryPath, runSummary);
@@ -0,0 +1,255 @@
1
+ import { spawn } from 'node:child_process';
2
+ import { appendFile, mkdir, writeFile } from 'node:fs/promises';
3
+ import { join, relative } from 'node:path';
4
+ import { setTimeout as sleep } from 'node:timers/promises';
5
+ import { isoTimestamp } from '../cli/utils/time.js';
6
+ const TASK_ID_PATTERN = /\btask_[a-z]_[a-f0-9]+\b/i;
7
+ const MAX_LOG_CHARS = 32 * 1024;
8
+ const STATUS_RETRY_LIMIT = 3;
9
+ const STATUS_RETRY_BACKOFF_MS = 1500;
10
+ const DEFAULT_LIST_LIMIT = 20;
11
+ export function extractCloudTaskId(text) {
12
+ const match = TASK_ID_PATTERN.exec(text);
13
+ if (!match?.[0]) {
14
+ return null;
15
+ }
16
+ return match[0];
17
+ }
18
+ export function parseCloudStatusToken(text) {
19
+ const match = /^\s*\[([A-Z_]+)\]/m.exec(text);
20
+ if (!match?.[1]) {
21
+ return null;
22
+ }
23
+ return match[1].toUpperCase();
24
+ }
25
+ export function mapCloudStatusToken(token) {
26
+ if (!token) {
27
+ return 'unknown';
28
+ }
29
+ switch (token) {
30
+ case 'READY':
31
+ case 'COMPLETED':
32
+ case 'SUCCEEDED':
33
+ return 'ready';
34
+ case 'RUNNING':
35
+ case 'IN_PROGRESS':
36
+ return 'running';
37
+ case 'QUEUED':
38
+ case 'PENDING':
39
+ return 'queued';
40
+ case 'ERROR':
41
+ return 'error';
42
+ case 'FAILED':
43
+ return 'failed';
44
+ case 'CANCELLED':
45
+ case 'CANCELED':
46
+ return 'cancelled';
47
+ default:
48
+ return 'unknown';
49
+ }
50
+ }
51
+ export class CodexCloudTaskExecutor {
52
+ commandRunner;
53
+ now;
54
+ sleepFn;
55
+ constructor(options = {}) {
56
+ this.commandRunner = options.commandRunner ?? defaultCloudCommandRunner;
57
+ this.now = options.now ?? isoTimestamp;
58
+ this.sleepFn = options.sleepFn ?? sleep;
59
+ }
60
+ async execute(input) {
61
+ const cloudDir = join(input.runDir, 'cloud');
62
+ await mkdir(cloudDir, { recursive: true });
63
+ const commandLogPath = join(cloudDir, 'commands.ndjson');
64
+ const env = { ...process.env, ...(input.env ?? {}) };
65
+ const notes = [];
66
+ const cloudExecution = {
67
+ task_id: null,
68
+ environment_id: input.environmentId,
69
+ status: 'queued',
70
+ status_url: null,
71
+ submitted_at: null,
72
+ completed_at: null,
73
+ last_polled_at: null,
74
+ poll_count: 0,
75
+ poll_interval_seconds: Math.max(1, input.pollIntervalSeconds),
76
+ timeout_seconds: Math.max(1, input.timeoutSeconds),
77
+ attempts: Math.max(1, input.attempts),
78
+ diff_path: null,
79
+ diff_url: null,
80
+ diff_status: 'pending',
81
+ apply_status: 'not_requested',
82
+ log_path: relative(input.repoRoot, commandLogPath),
83
+ error: null
84
+ };
85
+ const runCloudCommand = async (args) => {
86
+ const result = await this.commandRunner({
87
+ command: input.codexBin,
88
+ args,
89
+ cwd: input.repoRoot,
90
+ env
91
+ });
92
+ await appendFile(commandLogPath, `${JSON.stringify({
93
+ timestamp: this.now(),
94
+ command: input.codexBin,
95
+ args,
96
+ exit_code: result.exitCode,
97
+ stdout: truncate(result.stdout),
98
+ stderr: truncate(result.stderr)
99
+ })}\n`, 'utf8');
100
+ return result;
101
+ };
102
+ try {
103
+ const execArgs = ['cloud', 'exec', '--env', input.environmentId, '--attempts', String(cloudExecution.attempts)];
104
+ if (input.branch && input.branch.trim()) {
105
+ execArgs.push('--branch', input.branch.trim());
106
+ }
107
+ execArgs.push(input.prompt);
108
+ const execResult = await runCloudCommand(execArgs);
109
+ if (execResult.exitCode !== 0) {
110
+ throw new Error(`codex cloud exec failed with exit ${execResult.exitCode}: ${compactError(execResult.stderr, execResult.stdout)}`);
111
+ }
112
+ const taskId = extractCloudTaskId(`${execResult.stdout}\n${execResult.stderr}`);
113
+ if (!taskId) {
114
+ throw new Error('Unable to parse cloud task id from codex cloud exec output.');
115
+ }
116
+ cloudExecution.task_id = taskId;
117
+ cloudExecution.status = 'running';
118
+ cloudExecution.submitted_at = this.now();
119
+ notes.push(`Cloud task submitted: ${taskId}`);
120
+ const metadata = await this.lookupTaskMetadata(taskId, runCloudCommand);
121
+ if (metadata?.url) {
122
+ cloudExecution.status_url = metadata.url;
123
+ }
124
+ const timeoutAt = Date.now() + cloudExecution.timeout_seconds * 1000;
125
+ let statusRetries = 0;
126
+ while (Date.now() < timeoutAt) {
127
+ const statusResult = await runCloudCommand(['cloud', 'status', taskId]);
128
+ cloudExecution.last_polled_at = this.now();
129
+ cloudExecution.poll_count += 1;
130
+ const token = parseCloudStatusToken(`${statusResult.stdout}\n${statusResult.stderr}`);
131
+ const mapped = mapCloudStatusToken(token);
132
+ // `codex cloud status` may return a non-zero exit while the task is still pending.
133
+ // Treat non-zero as a retry only when no recognizable status token is present.
134
+ if (statusResult.exitCode !== 0 && mapped === 'unknown') {
135
+ statusRetries += 1;
136
+ if (statusRetries > STATUS_RETRY_LIMIT) {
137
+ throw new Error(`codex cloud status failed ${statusRetries} times: ${compactError(statusResult.stderr, statusResult.stdout)}`);
138
+ }
139
+ await this.sleepFn(STATUS_RETRY_BACKOFF_MS * statusRetries);
140
+ continue;
141
+ }
142
+ statusRetries = 0;
143
+ if (mapped !== 'unknown') {
144
+ cloudExecution.status = mapped;
145
+ }
146
+ if (mapped === 'ready') {
147
+ notes.push(`Cloud task completed: ${taskId}`);
148
+ break;
149
+ }
150
+ if (mapped === 'error' || mapped === 'failed' || mapped === 'cancelled') {
151
+ cloudExecution.error = `Cloud task ended with status ${mapped}.`;
152
+ break;
153
+ }
154
+ await this.sleepFn(cloudExecution.poll_interval_seconds * 1000);
155
+ }
156
+ if (cloudExecution.status === 'running' || cloudExecution.status === 'queued') {
157
+ cloudExecution.status = 'failed';
158
+ cloudExecution.error = `Timed out waiting for cloud task completion after ${cloudExecution.timeout_seconds}s.`;
159
+ }
160
+ if (cloudExecution.status === 'ready') {
161
+ const diffResult = await runCloudCommand(['cloud', 'diff', taskId]);
162
+ if (diffResult.exitCode === 0 && diffResult.stdout.trim().length > 0) {
163
+ const diffPath = join(cloudDir, `${taskId}.diff.patch`);
164
+ await writeFile(diffPath, diffResult.stdout, 'utf8');
165
+ cloudExecution.diff_path = relative(input.repoRoot, diffPath);
166
+ cloudExecution.diff_status = 'available';
167
+ cloudExecution.diff_url = cloudExecution.status_url;
168
+ notes.push(`Cloud diff captured: ${cloudExecution.diff_path}`);
169
+ }
170
+ else {
171
+ cloudExecution.diff_status = 'unavailable';
172
+ if (diffResult.exitCode !== 0) {
173
+ notes.push(`Cloud diff unavailable (exit ${diffResult.exitCode}).`);
174
+ }
175
+ else {
176
+ notes.push('Cloud diff unavailable (empty payload).');
177
+ }
178
+ }
179
+ }
180
+ else {
181
+ cloudExecution.diff_status = 'unavailable';
182
+ }
183
+ cloudExecution.completed_at = this.now();
184
+ const success = cloudExecution.status === 'ready';
185
+ const summary = success
186
+ ? `Cloud task ${cloudExecution.task_id} completed successfully.`
187
+ : `Cloud task ${cloudExecution.task_id ?? '<unknown>'} failed (${cloudExecution.status}).`;
188
+ return { success, summary, notes, cloudExecution };
189
+ }
190
+ catch (error) {
191
+ // Preserve non-queued status to reflect last known remote state at failure time.
192
+ cloudExecution.status = cloudExecution.status === 'queued' ? 'failed' : cloudExecution.status;
193
+ cloudExecution.diff_status = 'unavailable';
194
+ cloudExecution.error = error?.message ?? String(error);
195
+ cloudExecution.completed_at = this.now();
196
+ const summary = `Cloud execution failed: ${cloudExecution.error}`;
197
+ notes.push(summary);
198
+ return { success: false, summary, notes, cloudExecution };
199
+ }
200
+ }
201
+ async lookupTaskMetadata(taskId, runCloudCommand) {
202
+ const listResult = await runCloudCommand(['cloud', 'list', '--json', '--limit', String(DEFAULT_LIST_LIMIT)]);
203
+ if (listResult.exitCode !== 0) {
204
+ return null;
205
+ }
206
+ try {
207
+ const payload = JSON.parse(listResult.stdout);
208
+ const match = payload.tasks?.find((task) => task.id === taskId) ?? null;
209
+ return { url: match?.url ?? null };
210
+ }
211
+ catch {
212
+ return null;
213
+ }
214
+ }
215
+ }
216
+ export async function defaultCloudCommandRunner(request) {
217
+ return await new Promise((resolve, reject) => {
218
+ const child = spawn(request.command, request.args, {
219
+ cwd: request.cwd,
220
+ env: request.env,
221
+ stdio: ['ignore', 'pipe', 'pipe']
222
+ });
223
+ let stdout = '';
224
+ let stderr = '';
225
+ child.stdout?.on('data', (chunk) => {
226
+ stdout += chunk.toString();
227
+ });
228
+ child.stderr?.on('data', (chunk) => {
229
+ stderr += chunk.toString();
230
+ });
231
+ child.once('error', (error) => {
232
+ reject(error instanceof Error ? error : new Error(String(error)));
233
+ });
234
+ child.once('close', (code) => {
235
+ resolve({
236
+ exitCode: typeof code === 'number' ? code : 1,
237
+ stdout,
238
+ stderr
239
+ });
240
+ });
241
+ });
242
+ }
243
+ function truncate(value) {
244
+ if (value.length <= MAX_LOG_CHARS) {
245
+ return value;
246
+ }
247
+ return `${value.slice(0, MAX_LOG_CHARS)}…`;
248
+ }
249
+ function compactError(...values) {
250
+ const merged = values
251
+ .map((value) => value.trim())
252
+ .filter((value) => value.length > 0)
253
+ .join(' | ');
254
+ return merged.length > 0 ? truncate(merged) : 'no stderr/stdout captured';
255
+ }
@@ -151,6 +151,7 @@ export class TaskManager {
151
151
  build,
152
152
  test,
153
153
  review,
154
+ cloudExecution: build.cloudExecution ?? null,
154
155
  timestamp
155
156
  };
156
157
  }
package/docs/README.md CHANGED
@@ -21,6 +21,7 @@ Codex Orchestrator is the coordination layer that glues together Codex-driven ag
21
21
 
22
22
  ## Release Notes
23
23
  - Shipped skills note: `docs/release-notes-template-addendum.md`.
24
+ - Optional overview override: add and commit a release overview file at .github/release-overview.md before tagging; the release workflow uses it when present.
24
25
 
25
26
  ## How It Works
26
27
  - **Planner → Builder → Tester → Reviewer:** The core `TaskManager` (see `orchestrator/src/manager.ts`) wires together agent interfaces that decide *what* to run (planner), execute the selected pipeline stage (builder), verify results (tester), and give a final decision (reviewer).
@@ -146,6 +147,7 @@ Notes:
146
147
  - `/prompts:diagnostics` takes `TASK=<task-id> MANIFEST=<path> [NOTES=<free text>]`, exports `MCP_RUNNER_TASK_ID=$TASK`, runs `npx @kbediako/codex-orchestrator start diagnostics --format json`, tails `.runs/$TASK/cli/<run-id>/manifest.json` (or `npx @kbediako/codex-orchestrator status --run <run-id> --watch --interval 10`), and records evidence to `/tasks`, `docs/TASKS.md`, `.agent/task/...`, `.runs/$TASK/metrics.json`, and `out/$TASK/state.json` using `$MANIFEST`.
147
148
  - `/prompts:review-handoff` takes `TASK=<task-id> MANIFEST=<path> NOTES=<goal + summary + risks + optional questions>`, re-exports `MCP_RUNNER_TASK_ID`, and (repo-only) runs `node scripts/delegation-guard.mjs`, `node scripts/spec-guard.mjs --dry-run`, `npm run lint`, `npm run test`, optional `npm run eval:test`, plus `npm run review` (wraps `codex review` against the current diff and includes the latest run manifest path as evidence). It also reminds you to log approvals in `$MANIFEST` and mirror the evidence to the same docs/metrics/state targets.
148
149
  - In CI / `--no-interactive` pipelines (or when stdin is not a TTY, or `CODEX_REVIEW_NON_INTERACTIVE=1` / `CODEX_NON_INTERACTIVE=1` / `CODEX_NO_INTERACTIVE=1`), `npm run review` prints the review handoff prompt (including evidence paths) and exits successfully instead of invoking `codex review`. Set `FORCE_CODEX_REVIEW=1` to run `codex review` in those environments.
150
+ - When forcing non-interactive review execution, `npm run review` enforces a timeout (`CODEX_REVIEW_TIMEOUT_SECONDS`, default `900`). Set `CODEX_REVIEW_TIMEOUT_SECONDS=0` to disable the timeout.
149
151
  - Always trigger diagnostics and review workflows through these prompts whenever you run the orchestrator so contributors consistently execute the required command sequences and capture auditable manifests.
150
152
 
151
153
  ### Identifier Guardrails
@@ -195,6 +197,7 @@ Note: the commands below assume a source checkout; `scripts/` helpers are not in
195
197
  | `npm run eval:test` | Optional evaluation harness (enable when `evaluation/fixtures/**` is populated). |
196
198
  | `npm run docs:check` | Deterministically validates scripts/pipelines/paths referenced in agent-facing docs. |
197
199
  | `npm run docs:freshness` | Validates docs registry coverage + review recency; writes `out/<task-id>/docs-freshness.json`. |
200
+ | `npm run ci:cloud-canary` | Runs the cloud canary harness (`scripts/cloud-canary-ci.mjs`) to verify cloud lifecycle manifest + run-summary evidence; credential-gated by `CODEX_CLOUD_ENV_ID` and optional auth secrets (`CODEX_CLOUD_BRANCH` defaults to `main`). |
198
201
  | `node scripts/delegation-guard.mjs` | Enforces subagent delegation evidence before review (repo-only). |
199
202
  | `node scripts/spec-guard.mjs --dry-run` | Validates spec freshness; required before review (repo-only). |
200
203
  | `node scripts/diff-budget.mjs` | Guards against oversized diffs before review (repo-only; defaults: 25 files / 800 lines; supports explicit overrides). |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kbediako/codex-orchestrator",
3
- "version": "0.1.13",
3
+ "version": "0.1.14-alpha.1",
4
4
  "license": "MIT",
5
5
  "type": "module",
6
6
  "bin": {
@@ -40,6 +40,7 @@
40
40
  "docs:archive-tasks": "node scripts/tasks-archive.mjs",
41
41
  "docs:freshness": "node scripts/docs-freshness.mjs --check",
42
42
  "docs:sync": "node --loader ts-node/esm scripts/docs-hygiene.ts --sync",
43
+ "ci:cloud-canary": "node scripts/cloud-canary-ci.mjs",
43
44
  "prelint": "node scripts/build-patterns-if-needed.mjs",
44
45
  "lint": "eslint orchestrator/src orchestrator/tests packages/orchestrator/src packages/orchestrator/tests packages/shared adapters evaluation/harness evaluation/tests --ext .ts,.tsx",
45
46
  "pack:audit": "node scripts/pack-audit.mjs",
@@ -75,6 +76,9 @@
75
76
  "eslint-plugin-patterns": "file:eslint-plugin-patterns",
76
77
  "jscodeshift": "^0.15.2",
77
78
  "json-schema-to-typescript": "^14.0.0",
79
+ "pixelmatch": "^7.1.0",
80
+ "playwright": "^1.57.0",
81
+ "pngjs": "^7.0.0",
78
82
  "ts-node": "^10.9.2",
79
83
  "typescript": "^5.4.0",
80
84
  "vitest": "^1.3.1"
@@ -317,6 +317,51 @@
317
317
  }
318
318
  }
319
319
  },
320
+ "cloud_execution": {
321
+ "type": ["object", "null"],
322
+ "additionalProperties": false,
323
+ "required": [
324
+ "task_id",
325
+ "environment_id",
326
+ "status",
327
+ "status_url",
328
+ "submitted_at",
329
+ "completed_at",
330
+ "last_polled_at",
331
+ "poll_count",
332
+ "poll_interval_seconds",
333
+ "timeout_seconds",
334
+ "attempts",
335
+ "diff_path",
336
+ "diff_url",
337
+ "diff_status",
338
+ "apply_status",
339
+ "log_path",
340
+ "error"
341
+ ],
342
+ "properties": {
343
+ "task_id": { "type": ["string", "null"] },
344
+ "environment_id": { "type": ["string", "null"] },
345
+ "status": {
346
+ "type": "string",
347
+ "enum": ["queued", "running", "ready", "error", "failed", "cancelled", "unknown"]
348
+ },
349
+ "status_url": { "type": ["string", "null"] },
350
+ "submitted_at": { "type": ["string", "null"] },
351
+ "completed_at": { "type": ["string", "null"] },
352
+ "last_polled_at": { "type": ["string", "null"] },
353
+ "poll_count": { "type": "integer", "minimum": 0 },
354
+ "poll_interval_seconds": { "type": "integer", "minimum": 1 },
355
+ "timeout_seconds": { "type": "integer", "minimum": 1 },
356
+ "attempts": { "type": "integer", "minimum": 1 },
357
+ "diff_path": { "type": ["string", "null"] },
358
+ "diff_url": { "type": ["string", "null"] },
359
+ "diff_status": { "type": "string", "enum": ["pending", "available", "unavailable"] },
360
+ "apply_status": { "type": "string", "enum": ["not_requested", "succeeded", "failed"] },
361
+ "log_path": { "type": ["string", "null"] },
362
+ "error": { "type": ["string", "null"] }
363
+ }
364
+ },
320
365
  "privacy": {
321
366
  "type": ["object", "null"],
322
367
  "additionalProperties": false,
@@ -158,6 +158,7 @@ repeat:
158
158
 
159
159
  - **Long waits:** `wait_ms` never blocks longer than 10s per call; use polling.
160
160
  - **Long-running delegate.spawn:** Prefer `start_only=true` (default) to avoid tool-call timeouts. If you must use `start_only=false`, keep runs short or run long jobs outside delegation (no question queue).
161
+ - **Cloud run branch mismatch:** cloud-mode orchestration against a local-only branch can fail with `couldn't find remote ref ...`; set `CODEX_CLOUD_BRANCH` to a pushed branch (typically `main`) before cloud execution.
161
162
  - **Tool profile mismatch:** child tool profile must be allowed by repo policy; invalid or unsafe names are ignored.
162
163
  - **Confirmation misuse:** never pass `confirm_nonce` from model/tool input; it is runner‑injected only.
163
164
  - **Secrets exposure:** never include secrets/tokens/PII in delegate prompts or files.
@@ -25,6 +25,7 @@ Use this skill when a task needs a spec-driven workflow. The objective is to cre
25
25
 
26
26
  3) Run docs-review before implementation
27
27
  - `npx codex-orchestrator start docs-review --format json --no-interactive --task <task-id>`
28
+ - If running in cloud mode, ensure the branch exists on remote. For local-only branches, set `CODEX_CLOUD_BRANCH=main` (or another pushed branch).
28
29
  - Link the manifest path in the checklists.
29
30
 
30
31
  4) Implement and validate