npm - @kbediako/codex-orchestrator - Versions diffs - 0.1.12 → 0.1.14-alpha.1 - Mend

@kbediako/codex-orchestrator 0.1.12 → 0.1.14-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/LICENSE +19 -5
package/README.md +47 -2
package/dist/bin/codex-orchestrator.js +93 -0
package/dist/orchestrator/src/cli/adapters/CommandBuilder.js +27 -3
package/dist/orchestrator/src/cli/adapters/CommandPlanner.js +17 -1
package/dist/orchestrator/src/cli/adapters/CommandReviewer.js +36 -1
package/dist/orchestrator/src/cli/adapters/CommandTester.js +28 -0
package/dist/orchestrator/src/cli/adapters/cloudFailureDiagnostics.js +45 -0
package/dist/orchestrator/src/cli/codexCliSetup.js +294 -0
package/dist/orchestrator/src/cli/init.js +3 -0
package/dist/orchestrator/src/cli/mcp.js +4 -2
package/dist/orchestrator/src/cli/orchestrator.js +298 -28
package/dist/orchestrator/src/cli/rlm/context.js +31 -3
package/dist/orchestrator/src/cli/rlm/symbolic.js +152 -15
package/dist/orchestrator/src/cli/rlmRunner.js +59 -5
package/dist/orchestrator/src/cli/run/manifest.js +3 -0
package/dist/orchestrator/src/cli/services/commandRunner.js +87 -0
package/dist/orchestrator/src/cli/services/runSummaryWriter.js +24 -0
package/dist/orchestrator/src/cli/skills.js +1 -1
package/dist/orchestrator/src/cli/utils/codexCli.js +94 -0
package/dist/orchestrator/src/cli/utils/codexPaths.js +13 -0
package/dist/orchestrator/src/cli/utils/devtools.js +9 -12
package/dist/orchestrator/src/cloud/CodexCloudTaskExecutor.js +255 -0
package/dist/orchestrator/src/learning/crystalizer.js +2 -1
package/dist/orchestrator/src/manager.js +1 -0
package/dist/orchestrator/src/sync/CloudSyncWorker.js +37 -7
package/dist/scripts/design/pipeline/context.js +3 -2
package/dist/scripts/lib/run-manifests.js +14 -0
package/docs/README.md +22 -2
package/package.json +6 -2
package/schemas/manifest.json +83 -0
package/skills/collab-deliberation/SKILL.md +21 -0
package/skills/collab-evals/SKILL.md +32 -0
package/skills/delegate-early/SKILL.md +47 -0
package/skills/delegation-usage/DELEGATION_GUIDE.md +5 -4
package/skills/delegation-usage/SKILL.md +11 -5
package/skills/docs-first/SKILL.md +2 -1
package/templates/README.md +4 -0

package/dist/orchestrator/src/cloud/CodexCloudTaskExecutor.js ADDED Viewed

@@ -0,0 +1,255 @@
+import { spawn } from 'node:child_process';
+import { appendFile, mkdir, writeFile } from 'node:fs/promises';
+import { join, relative } from 'node:path';
+import { setTimeout as sleep } from 'node:timers/promises';
+import { isoTimestamp } from '../cli/utils/time.js';
+const TASK_ID_PATTERN = /\btask_[a-z]_[a-f0-9]+\b/i;
+const MAX_LOG_CHARS = 32 * 1024;
+const STATUS_RETRY_LIMIT = 3;
+const STATUS_RETRY_BACKOFF_MS = 1500;
+const DEFAULT_LIST_LIMIT = 20;
+export function extractCloudTaskId(text) {
+    const match = TASK_ID_PATTERN.exec(text);
+    if (!match?.[0]) {
+        return null;
+    }
+    return match[0];
+}
+export function parseCloudStatusToken(text) {
+    const match = /^\s*\[([A-Z_]+)\]/m.exec(text);
+    if (!match?.[1]) {
+        return null;
+    }
+    return match[1].toUpperCase();
+}
+export function mapCloudStatusToken(token) {
+    if (!token) {
+        return 'unknown';
+    }
+    switch (token) {
+        case 'READY':
+        case 'COMPLETED':
+        case 'SUCCEEDED':
+            return 'ready';
+        case 'RUNNING':
+        case 'IN_PROGRESS':
+            return 'running';
+        case 'QUEUED':
+        case 'PENDING':
+            return 'queued';
+        case 'ERROR':
+            return 'error';
+        case 'FAILED':
+            return 'failed';
+        case 'CANCELLED':
+        case 'CANCELED':
+            return 'cancelled';
+        default:
+            return 'unknown';
+    }
+}
+export class CodexCloudTaskExecutor {
+    commandRunner;
+    now;
+    sleepFn;
+    constructor(options = {}) {
+        this.commandRunner = options.commandRunner ?? defaultCloudCommandRunner;
+        this.now = options.now ?? isoTimestamp;
+        this.sleepFn = options.sleepFn ?? sleep;
+    }
+    async execute(input) {
+        const cloudDir = join(input.runDir, 'cloud');
+        await mkdir(cloudDir, { recursive: true });
+        const commandLogPath = join(cloudDir, 'commands.ndjson');
+        const env = { ...process.env, ...(input.env ?? {}) };
+        const notes = [];
+        const cloudExecution = {
+            task_id: null,
+            environment_id: input.environmentId,
+            status: 'queued',
+            status_url: null,
+            submitted_at: null,
+            completed_at: null,
+            last_polled_at: null,
+            poll_count: 0,
+            poll_interval_seconds: Math.max(1, input.pollIntervalSeconds),
+            timeout_seconds: Math.max(1, input.timeoutSeconds),
+            attempts: Math.max(1, input.attempts),
+            diff_path: null,
+            diff_url: null,
+            diff_status: 'pending',
+            apply_status: 'not_requested',
+            log_path: relative(input.repoRoot, commandLogPath),
+            error: null
+        };
+        const runCloudCommand = async (args) => {
+            const result = await this.commandRunner({
+                command: input.codexBin,
+                args,
+                cwd: input.repoRoot,
+                env
+            });
+            await appendFile(commandLogPath, `${JSON.stringify({
+                timestamp: this.now(),
+                command: input.codexBin,
+                args,
+                exit_code: result.exitCode,
+                stdout: truncate(result.stdout),
+                stderr: truncate(result.stderr)
+            })}\n`, 'utf8');
+            return result;
+        };
+        try {
+            const execArgs = ['cloud', 'exec', '--env', input.environmentId, '--attempts', String(cloudExecution.attempts)];
+            if (input.branch && input.branch.trim()) {
+                execArgs.push('--branch', input.branch.trim());
+            }
+            execArgs.push(input.prompt);
+            const execResult = await runCloudCommand(execArgs);
+            if (execResult.exitCode !== 0) {
+                throw new Error(`codex cloud exec failed with exit ${execResult.exitCode}: ${compactError(execResult.stderr, execResult.stdout)}`);
+            }
+            const taskId = extractCloudTaskId(`${execResult.stdout}\n${execResult.stderr}`);
+            if (!taskId) {
+                throw new Error('Unable to parse cloud task id from codex cloud exec output.');
+            }
+            cloudExecution.task_id = taskId;
+            cloudExecution.status = 'running';
+            cloudExecution.submitted_at = this.now();
+            notes.push(`Cloud task submitted: ${taskId}`);
+            const metadata = await this.lookupTaskMetadata(taskId, runCloudCommand);
+            if (metadata?.url) {
+                cloudExecution.status_url = metadata.url;
+            }
+            const timeoutAt = Date.now() + cloudExecution.timeout_seconds * 1000;
+            let statusRetries = 0;
+            while (Date.now() < timeoutAt) {
+                const statusResult = await runCloudCommand(['cloud', 'status', taskId]);
+                cloudExecution.last_polled_at = this.now();
+                cloudExecution.poll_count += 1;
+                const token = parseCloudStatusToken(`${statusResult.stdout}\n${statusResult.stderr}`);
+                const mapped = mapCloudStatusToken(token);
+                // `codex cloud status` may return a non-zero exit while the task is still pending.
+                // Treat non-zero as a retry only when no recognizable status token is present.
+                if (statusResult.exitCode !== 0 && mapped === 'unknown') {
+                    statusRetries += 1;
+                    if (statusRetries > STATUS_RETRY_LIMIT) {
+                        throw new Error(`codex cloud status failed ${statusRetries} times: ${compactError(statusResult.stderr, statusResult.stdout)}`);
+                    }
+                    await this.sleepFn(STATUS_RETRY_BACKOFF_MS * statusRetries);
+                    continue;
+                }
+                statusRetries = 0;
+                if (mapped !== 'unknown') {
+                    cloudExecution.status = mapped;
+                }
+                if (mapped === 'ready') {
+                    notes.push(`Cloud task completed: ${taskId}`);
+                    break;
+                }
+                if (mapped === 'error' || mapped === 'failed' || mapped === 'cancelled') {
+                    cloudExecution.error = `Cloud task ended with status ${mapped}.`;
+                    break;
+                }
+                await this.sleepFn(cloudExecution.poll_interval_seconds * 1000);
+            }
+            if (cloudExecution.status === 'running' || cloudExecution.status === 'queued') {
+                cloudExecution.status = 'failed';
+                cloudExecution.error = `Timed out waiting for cloud task completion after ${cloudExecution.timeout_seconds}s.`;
+            }
+            if (cloudExecution.status === 'ready') {
+                const diffResult = await runCloudCommand(['cloud', 'diff', taskId]);
+                if (diffResult.exitCode === 0 && diffResult.stdout.trim().length > 0) {
+                    const diffPath = join(cloudDir, `${taskId}.diff.patch`);
+                    await writeFile(diffPath, diffResult.stdout, 'utf8');
+                    cloudExecution.diff_path = relative(input.repoRoot, diffPath);
+                    cloudExecution.diff_status = 'available';
+                    cloudExecution.diff_url = cloudExecution.status_url;
+                    notes.push(`Cloud diff captured: ${cloudExecution.diff_path}`);
+                }
+                else {
+                    cloudExecution.diff_status = 'unavailable';
+                    if (diffResult.exitCode !== 0) {
+                        notes.push(`Cloud diff unavailable (exit ${diffResult.exitCode}).`);
+                    }
+                    else {
+                        notes.push('Cloud diff unavailable (empty payload).');
+                    }
+                }
+            }
+            else {
+                cloudExecution.diff_status = 'unavailable';
+            }
+            cloudExecution.completed_at = this.now();
+            const success = cloudExecution.status === 'ready';
+            const summary = success
+                ? `Cloud task ${cloudExecution.task_id} completed successfully.`
+                : `Cloud task ${cloudExecution.task_id ?? '<unknown>'} failed (${cloudExecution.status}).`;
+            return { success, summary, notes, cloudExecution };
+        }
+        catch (error) {
+            // Preserve non-queued status to reflect last known remote state at failure time.
+            cloudExecution.status = cloudExecution.status === 'queued' ? 'failed' : cloudExecution.status;
+            cloudExecution.diff_status = 'unavailable';
+            cloudExecution.error = error?.message ?? String(error);
+            cloudExecution.completed_at = this.now();
+            const summary = `Cloud execution failed: ${cloudExecution.error}`;
+            notes.push(summary);
+            return { success: false, summary, notes, cloudExecution };
+        }
+    }
+    async lookupTaskMetadata(taskId, runCloudCommand) {
+        const listResult = await runCloudCommand(['cloud', 'list', '--json', '--limit', String(DEFAULT_LIST_LIMIT)]);
+        if (listResult.exitCode !== 0) {
+            return null;
+        }
+        try {
+            const payload = JSON.parse(listResult.stdout);
+            const match = payload.tasks?.find((task) => task.id === taskId) ?? null;
+            return { url: match?.url ?? null };
+        }
+        catch {
+            return null;
+        }
+    }
+}
+export async function defaultCloudCommandRunner(request) {
+    return await new Promise((resolve, reject) => {
+        const child = spawn(request.command, request.args, {
+            cwd: request.cwd,
+            env: request.env,
+            stdio: ['ignore', 'pipe', 'pipe']
+        });
+        let stdout = '';
+        let stderr = '';
+        child.stdout?.on('data', (chunk) => {
+            stdout += chunk.toString();
+        });
+        child.stderr?.on('data', (chunk) => {
+            stderr += chunk.toString();
+        });
+        child.once('error', (error) => {
+            reject(error instanceof Error ? error : new Error(String(error)));
+        });
+        child.once('close', (code) => {
+            resolve({
+                exitCode: typeof code === 'number' ? code : 1,
+                stdout,
+                stderr
+            });
+        });
+    });
+}
+function truncate(value) {
+    if (value.length <= MAX_LOG_CHARS) {
+        return value;
+    }
+    return `${value.slice(0, MAX_LOG_CHARS)}…`;
+}
+function compactError(...values) {
+    const merged = values
+        .map((value) => value.trim())
+        .filter((value) => value.length > 0)
+        .join(' | ');
+    return merged.length > 0 ? truncate(merged) : 'no stderr/stdout captured';
+}

package/dist/orchestrator/src/learning/crystalizer.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { mkdtemp, readFile, writeFile, mkdir, rm } from 'node:fs/promises';
 import { tmpdir } from 'node:os';
 import { join, relative } from 'node:path';
 import { isoTimestamp } from '../cli/utils/time.js';
+import { resolveCodexCliBin } from '../cli/utils/codexCli.js';
 import { slugify } from '../cli/utils/strings.js';
 import { appendLearningAlert, ensureLearningSection } from './manifest.js';
 import { computePromptPackStamp, loadPromptPacks } from '../../../packages/orchestrator/src/instructions/promptPacks.js';
@@ -86,7 +87,7 @@ function composePrompt(promptBody, packStamp, problem, patch, scenarioSummary) {
     ];
     return segments.filter(Boolean).join('\n\n');
 }
-export async function createCodexCliCrystalizerClient(binary = process.env.CODEX_CLI_BIN ?? 'codex') {
+export async function createCodexCliCrystalizerClient(binary = resolveCodexCliBin(process.env)) {
     const execFileAsync = promisify(execFile);
     return {
         async generate(prompt, options) {

package/dist/orchestrator/src/manager.js CHANGED Viewed

@@ -151,6 +151,7 @@ export class TaskManager {
             build,
             test,
             review,
+            cloudExecution: build.cloudExecution ?? null,
             timestamp
         };
     }

package/dist/orchestrator/src/sync/CloudSyncWorker.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { createHash } from 'node:crypto';
 import { CloudRunsHttpError } from './CloudRunsHttpClient.js';
 import { sanitizeTaskId } from '../persistence/sanitizeTaskId.js';
 import { sanitizeRunId } from '../persistence/sanitizeRunId.js';
+import { resolveRunDir } from '../../../scripts/lib/run-manifests.js';
 export class CloudSyncWorker {
     bus;
     client;
@@ -99,11 +100,25 @@ export class CloudSyncWorker {
             }
         }
     }
-    buildManifestPath(summary) {
+    buildManifestPaths(summary) {
         const safeTaskId = sanitizeTaskId(summary.taskId);
         const safeRunId = sanitizeRunId(summary.runId);
-        const runDir = join(this.runsDir, safeTaskId, safeRunId);
-        return join(runDir, 'manifest.json');
+        const primaryRunDir = resolveRunDir({
+            runsRoot: this.runsDir,
+            taskId: safeTaskId,
+            runId: safeRunId,
+            layout: 'cli'
+        });
+        const fallbackRunDir = resolveRunDir({
+            runsRoot: this.runsDir,
+            taskId: safeTaskId,
+            runId: safeRunId,
+            layout: 'legacy'
+        });
+        return {
+            primary: join(primaryRunDir, 'manifest.json'),
+            fallback: join(fallbackRunDir, 'manifest.json')
+        };
     }
     async appendAuditLog(entry) {
         const safeTaskId = sanitizeTaskId(entry.summary.taskId);
@@ -146,7 +161,7 @@ export class CloudSyncWorker {
         return true;
     }
     async readManifestWithRetry(summary) {
-        const manifestPath = this.buildManifestPath(summary);
+        const { primary, fallback } = this.buildManifestPaths(summary);
         let attempt = 0;
         let delay = this.manifestInitialDelayMs;
         let lastError;
@@ -154,13 +169,24 @@ export class CloudSyncWorker {
         while (attempt < this.manifestReadRetries) {
             attempt += 1;
             try {
-                const contents = await readFile(manifestPath, 'utf-8');
+                const contents = await readFile(primary, 'utf-8');
                 lastContents = contents;
                 return JSON.parse(contents);
             }
             catch (error) {
-                lastError = error;
-                if (shouldRetryManifestRead(error) && attempt < this.manifestReadRetries) {
+                let candidateError = error;
+                if (isMissingPathError(error)) {
+                    try {
+                        const contents = await readFile(fallback, 'utf-8');
+                        lastContents = contents;
+                        return JSON.parse(contents);
+                    }
+                    catch (fallbackError) {
+                        candidateError = fallbackError;
+                    }
+                }
+                lastError = candidateError;
+                if (shouldRetryManifestRead(candidateError) && attempt < this.manifestReadRetries) {
                     await new Promise((resolve) => setTimeout(resolve, delay));
                     delay *= 2;
                     continue;
@@ -196,6 +222,10 @@ function shouldRetryManifestRead(error) {
     const code = error?.code;
     return code === 'ENOENT' || code === 'EBUSY' || code === 'EMFILE';
 }
+function isMissingPathError(error) {
+    const code = error?.code;
+    return code === 'ENOENT' || code === 'ENOTDIR';
+}
 function attemptJsonRecovery(contents) {
     const lastBrace = contents.lastIndexOf('}');
     if (lastBrace === -1) {

package/dist/scripts/design/pipeline/context.js CHANGED Viewed

@@ -3,13 +3,14 @@ import { mkdir } from 'node:fs/promises';
 import { loadDesignConfig, designPipelineId } from '../../../packages/shared/config/index.js';
 import { sanitizeTaskId } from '../../../orchestrator/src/persistence/sanitizeTaskId.js';
 import { sanitizeRunId } from '../../../orchestrator/src/persistence/sanitizeRunId.js';
-import { resolveEnvironmentPaths } from '../../lib/run-manifests.js';
+import { resolveEnvironmentPaths, resolveRunDir } from '../../lib/run-manifests.js';
 export async function loadDesignContext() {
     const { repoRoot, runsRoot, outRoot } = resolveEnvironmentPaths();
     const taskId = sanitizeTaskId(process.env.CODEX_ORCHESTRATOR_TASK_ID ?? process.env.MCP_RUNNER_TASK_ID ?? 'unknown-task');
     const rawRunId = process.env.CODEX_ORCHESTRATOR_RUN_ID ?? 'run-local';
     const runId = sanitizeRunId(rawRunId);
-    const runDir = process.env.CODEX_ORCHESTRATOR_RUN_DIR ?? join(runsRoot, taskId, runId);
+    const runDir = process.env.CODEX_ORCHESTRATOR_RUN_DIR ??
+        resolveRunDir({ runsRoot, taskId, runId, layout: 'cli' });
     const manifestPath = process.env.CODEX_ORCHESTRATOR_MANIFEST_PATH ?? join(runDir, 'manifest.json');
     const designConfigPath = process.env.DESIGN_CONFIG_PATH ?? join(repoRoot, 'design.config.yaml');
     const config = await loadDesignConfig({ rootDir: repoRoot, filePath: designConfigPath });

package/dist/scripts/lib/run-manifests.js CHANGED Viewed

@@ -2,6 +2,7 @@ import { access, readdir } from 'node:fs/promises';
 import { isAbsolute, join, resolve } from 'node:path';
 import process from 'node:process';
 const DEFAULT_TASK_ID = '0101';
+const DEFAULT_RUN_LAYOUT = 'cli';
 function resolveRepoRoot() {
     const configured = process.env.CODEX_ORCHESTRATOR_ROOT;
     if (!configured) {
@@ -33,6 +34,19 @@ export function resolveEnvironmentPaths() {
     const taskId = process.env.MCP_RUNNER_TASK_ID ?? DEFAULT_TASK_ID;
     return { repoRoot, runsRoot, outRoot, taskId };
 }
+export function resolveRunDir(options) {
+    const { runsRoot, taskId, runId, layout = DEFAULT_RUN_LAYOUT } = options ?? {};
+    if (!runsRoot || !taskId || !runId) {
+        throw new Error('resolveRunDir requires runsRoot, taskId, and runId');
+    }
+    if (layout !== 'cli' && layout !== 'legacy') {
+        throw new Error(`resolveRunDir received unsupported layout: ${layout}`);
+    }
+    if (layout === 'legacy') {
+        return join(runsRoot, taskId, runId);
+    }
+    return join(runsRoot, taskId, 'cli', runId);
+}
 export async function listDirectories(dirPath) {
     try {
         const entries = await readdir(dirPath, { withFileTypes: true });

package/docs/README.md CHANGED Viewed

@@ -1,11 +1,28 @@
 # Codex Orchestrator (Repository Guide)
-This document covers repository internals, contributor workflows, and deeper architecture. For end‑user install and usage instructions, see the main `README.md`.
+> **Internal/Contributor guide:** This document covers repository internals and workflow details. End‑user installation and usage live in `README.md`.
 Codex Orchestrator is the coordination layer that glues together Codex-driven agents, run pipelines, approval policies, and evidence capture for multi-stage automation projects. It wraps a reusable orchestration core with a CLI that produces auditable manifests, integrates with control-plane validators, and syncs run results to downstream systems.
 > **At a glance:** Every run starts from a task description, writes the active CLI manifest to `.runs/<task-id>/cli/<run-id>/manifest.json`, emits a persisted run summary at `.runs/<task-id>/<run-id>/manifest.json`, mirrors human-readable data to `out/<task-id>/`, and can optionally sync to a remote control plane. Pipelines define the concrete commands (build, lint, test, etc.) that execute for a given task.
+## Evaluation & Metrics
+- Evaluation playbook: `docs/guides/evaluation-playbook.md`.
+- Metrics reference: `docs/reference/metrics-collab-context-rot.md`.
+## Collab vs MCP
+- Decision guide: `docs/guides/collab-vs-mcp.md`.
+## Downstream init
+- See `README.md` for the recommended quick-start flow.
+## Upstream Sync
+- Codex CLI sync strategy: `docs/guides/upstream-codex-cli-sync.md`.
+## Release Notes
+- Shipped skills note: `docs/release-notes-template-addendum.md`.
+- Optional overview override: add and commit a release overview file at .github/release-overview.md before tagging; the release workflow uses it when present.
 ## How It Works
 - **Planner → Builder → Tester → Reviewer:** The core `TaskManager` (see `orchestrator/src/manager.ts`) wires together agent interfaces that decide *what* to run (planner), execute the selected pipeline stage (builder), verify results (tester), and give a final decision (reviewer).
 - **Execution modes:** Each plan item can flag `requires_cloud` and task metadata can set `execution.parallel`; the mode policy picks `mcp` (local MCP runtime) or `cloud` execution accordingly.
@@ -130,6 +147,7 @@ Notes:
 - `/prompts:diagnostics` takes `TASK=<task-id> MANIFEST=<path> [NOTES=<free text>]`, exports `MCP_RUNNER_TASK_ID=$TASK`, runs `npx @kbediako/codex-orchestrator start diagnostics --format json`, tails `.runs/$TASK/cli/<run-id>/manifest.json` (or `npx @kbediako/codex-orchestrator status --run <run-id> --watch --interval 10`), and records evidence to `/tasks`, `docs/TASKS.md`, `.agent/task/...`, `.runs/$TASK/metrics.json`, and `out/$TASK/state.json` using `$MANIFEST`.
 - `/prompts:review-handoff` takes `TASK=<task-id> MANIFEST=<path> NOTES=<goal + summary + risks + optional questions>`, re-exports `MCP_RUNNER_TASK_ID`, and (repo-only) runs `node scripts/delegation-guard.mjs`, `node scripts/spec-guard.mjs --dry-run`, `npm run lint`, `npm run test`, optional `npm run eval:test`, plus `npm run review` (wraps `codex review` against the current diff and includes the latest run manifest path as evidence). It also reminds you to log approvals in `$MANIFEST` and mirror the evidence to the same docs/metrics/state targets.
 - In CI / `--no-interactive` pipelines (or when stdin is not a TTY, or `CODEX_REVIEW_NON_INTERACTIVE=1` / `CODEX_NON_INTERACTIVE=1` / `CODEX_NO_INTERACTIVE=1`), `npm run review` prints the review handoff prompt (including evidence paths) and exits successfully instead of invoking `codex review`. Set `FORCE_CODEX_REVIEW=1` to run `codex review` in those environments.
+- When forcing non-interactive review execution, `npm run review` enforces a timeout (`CODEX_REVIEW_TIMEOUT_SECONDS`, default `900`). Set `CODEX_REVIEW_TIMEOUT_SECONDS=0` to disable the timeout.
 - Always trigger diagnostics and review workflows through these prompts whenever you run the orchestrator so contributors consistently execute the required command sequences and capture auditable manifests.
 ### Identifier Guardrails
@@ -159,6 +177,7 @@ Notes:
 ## Persistence & Observability
 - `TaskStateStore` writes per-task snapshots with bounded lock retries; failures degrade gracefully while still writing the main manifest.
 - `RunManifestWriter` generates the canonical manifest JSON for each run (mirrored under `.runs/`), while metrics appenders and summary writers keep `out/` up to date.
+- `collab_tool_calls` in the manifest captures collab tool call JSONL lines extracted from command stdout (bounded by `CODEX_ORCHESTRATOR_COLLAB_MAX_EVENTS`, default 200; set 0 to disable capture).
 - Heartbeat files and timestamps guard against stalled runs. `orchestrator/src/cli/metrics/metricsRecorder.ts` aggregates command durations, exit codes, and guardrail stats for later review.
 - Optional caps: `CODEX_ORCHESTRATOR_EXEC_EVENT_MAX_CHUNKS` limits captured exec chunk events per command (defaults to 500; set 0 for no cap), `CODEX_ORCHESTRATOR_TELEMETRY_MAX_EVENTS` caps in-memory telemetry events queued before flush (defaults to 1000; set 0 for no cap), and `CODEX_METRICS_PRIVACY_EVENTS_MAX` limits privacy decision events stored in `metrics.json` (-1 = no cap; `privacy_event_count` still reflects total).
@@ -178,6 +197,7 @@ Note: the commands below assume a source checkout; `scripts/` helpers are not in
 | `npm run eval:test` | Optional evaluation harness (enable when `evaluation/fixtures/**` is populated). |
 | `npm run docs:check` | Deterministically validates scripts/pipelines/paths referenced in agent-facing docs. |
 | `npm run docs:freshness` | Validates docs registry coverage + review recency; writes `out/<task-id>/docs-freshness.json`. |
+| `npm run ci:cloud-canary` | Runs the cloud canary harness (`scripts/cloud-canary-ci.mjs`) to verify cloud lifecycle manifest + run-summary evidence; credential-gated by `CODEX_CLOUD_ENV_ID` and optional auth secrets (`CODEX_CLOUD_BRANCH` defaults to `main`). |
 | `node scripts/delegation-guard.mjs` | Enforces subagent delegation evidence before review (repo-only). |
 | `node scripts/spec-guard.mjs --dry-run` | Validates spec freshness; required before review (repo-only). |
 | `node scripts/diff-budget.mjs` | Guards against oversized diffs before review (repo-only; defaults: 25 files / 800 lines; supports explicit overrides). |
@@ -241,7 +261,7 @@ Check readiness with `codex-orchestrator doctor --format json` (reports DevTools
 Use the hi-fi pipeline to snapshot complex marketing sites (motion, interactions, tokens) while keeping the repo cloneable:
 1. **Configure the source:** Update `design.config.yaml` → `pipelines.hi_fi_design_toolkit.sources` with the target URL, slug, title, and breakpoints (the repo defaults to an empty `sources` list until you add one).
-2. **Permit the domain:** Add (or update) the matching record in `compliance/permit.json` so Playwright, video capture, and live assets are explicitly approved for that origin.
+2. **Permit the domain:** Copy `compliance/permit.example.json` to `compliance/permit.json`, then add (or update) the matching record so Playwright, video capture, and live assets are explicitly approved for that origin.
 3. **Prep tooling:**
    - `npm install && npm run build`
    - `npm run setup:design-tools` (installs design-system deps) and ensure FFmpeg is available (`brew install ffmpeg` on macOS).

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@kbediako/codex-orchestrator",
-  "version": "0.1.12",
-  "license": "SEE LICENSE IN LICENSE",
+  "version": "0.1.14-alpha.1",
+  "license": "MIT",
   "type": "module",
   "bin": {
     "codex-orchestrator": "dist/bin/codex-orchestrator.js",
@@ -40,6 +40,7 @@
     "docs:archive-tasks": "node scripts/tasks-archive.mjs",
     "docs:freshness": "node scripts/docs-freshness.mjs --check",
     "docs:sync": "node --loader ts-node/esm scripts/docs-hygiene.ts --sync",
+    "ci:cloud-canary": "node scripts/cloud-canary-ci.mjs",
     "prelint": "node scripts/build-patterns-if-needed.mjs",
     "lint": "eslint orchestrator/src orchestrator/tests packages/orchestrator/src packages/orchestrator/tests packages/shared adapters evaluation/harness evaluation/tests --ext .ts,.tsx",
     "pack:audit": "node scripts/pack-audit.mjs",
@@ -75,6 +76,9 @@
     "eslint-plugin-patterns": "file:eslint-plugin-patterns",
     "jscodeshift": "^0.15.2",
     "json-schema-to-typescript": "^14.0.0",
+    "pixelmatch": "^7.1.0",
+    "playwright": "^1.57.0",
+    "pngjs": "^7.0.0",
     "ts-node": "^10.9.2",
     "typescript": "^5.4.0",
     "vitest": "^1.3.1"

package/schemas/manifest.json CHANGED Viewed

@@ -154,6 +154,10 @@
         }
       }
     },
+    "collab_tool_calls": {
+      "type": ["array", "null"],
+      "items": { "$ref": "#/definitions/collabToolCall" }
+    },
     "child_runs": {
       "type": "array",
       "items": {
@@ -313,6 +317,51 @@
         }
       }
     },
+    "cloud_execution": {
+      "type": ["object", "null"],
+      "additionalProperties": false,
+      "required": [
+        "task_id",
+        "environment_id",
+        "status",
+        "status_url",
+        "submitted_at",
+        "completed_at",
+        "last_polled_at",
+        "poll_count",
+        "poll_interval_seconds",
+        "timeout_seconds",
+        "attempts",
+        "diff_path",
+        "diff_url",
+        "diff_status",
+        "apply_status",
+        "log_path",
+        "error"
+      ],
+      "properties": {
+        "task_id": { "type": ["string", "null"] },
+        "environment_id": { "type": ["string", "null"] },
+        "status": {
+          "type": "string",
+          "enum": ["queued", "running", "ready", "error", "failed", "cancelled", "unknown"]
+        },
+        "status_url": { "type": ["string", "null"] },
+        "submitted_at": { "type": ["string", "null"] },
+        "completed_at": { "type": ["string", "null"] },
+        "last_polled_at": { "type": ["string", "null"] },
+        "poll_count": { "type": "integer", "minimum": 0 },
+        "poll_interval_seconds": { "type": "integer", "minimum": 1 },
+        "timeout_seconds": { "type": "integer", "minimum": 1 },
+        "attempts": { "type": "integer", "minimum": 1 },
+        "diff_path": { "type": ["string", "null"] },
+        "diff_url": { "type": ["string", "null"] },
+        "diff_status": { "type": "string", "enum": ["pending", "available", "unavailable"] },
+        "apply_status": { "type": "string", "enum": ["not_requested", "succeeded", "failed"] },
+        "log_path": { "type": ["string", "null"] },
+        "error": { "type": ["string", "null"] }
+      }
+    },
     "privacy": {
       "type": ["object", "null"],
       "additionalProperties": false,
@@ -756,6 +805,40 @@
       },
       "additionalProperties": true
     },
+    "collabToolCall": {
+      "type": "object",
+      "required": [
+        "observed_at",
+        "stage_id",
+        "command_index",
+        "event_type",
+        "item_id",
+        "tool",
+        "status",
+        "sender_thread_id",
+        "receiver_thread_ids"
+      ],
+      "additionalProperties": false,
+      "properties": {
+        "observed_at": { "type": "string", "minLength": 1 },
+        "stage_id": { "type": "string", "minLength": 1 },
+        "command_index": { "type": "integer", "minimum": 1 },
+        "event_type": { "type": "string", "enum": ["item.started", "item.completed", "item.updated"] },
+        "item_id": { "type": "string", "minLength": 1 },
+        "tool": { "type": "string", "minLength": 1 },
+        "status": { "type": "string", "enum": ["in_progress", "completed", "failed"] },
+        "sender_thread_id": { "type": "string", "minLength": 1 },
+        "receiver_thread_ids": {
+          "type": "array",
+          "items": { "type": "string", "minLength": 1 }
+        },
+        "prompt": { "type": ["string", "null"] },
+        "agents_states": {
+          "type": ["object", "null"],
+          "additionalProperties": true
+        }
+      }
+    },
     "designArtifact": {
       "type": "object",
       "required": ["stage", "status", "relative_path"],

package/skills/collab-deliberation/SKILL.md ADDED Viewed

@@ -0,0 +1,21 @@
+---
+name: collab-deliberation
+description: Structure multi-agent brainstorming and deliberation (options, tradeoffs, decision framing) without drifting into implementation.
+---
+# Collab Deliberation
+Use this skill when the user asks for brainstorming, multiple approaches, pros/cons, or decision support. This skill is for **ideas**, not implementation.
+## Workflow
+1) Clarify the decision: summarize the goal, constraints, and success criteria.
+2) Generate options: 3–5 distinct approaches with short descriptions.
+3) Compare tradeoffs: cost, risk, speed, maintenance, and alignment with guardrails.
+4) Recommend: choose a recommended approach and explain why.
+5) Open questions: list 1–3 questions that would change the recommendation.
+## Guardrails
+- Separate ideas from decisions.
+- Do not implement or modify code unless explicitly asked.
+- Keep outputs concise and action-oriented.

package/skills/collab-evals/SKILL.md ADDED Viewed

@@ -0,0 +1,32 @@
+---
+name: collab-evals
+description: Run collab/multi-agent eval scenarios (symbolic RLM, large-context, pause/resume, multi-hour checkpoints) and capture manifest-backed evidence.
+---
+# Collab Evals
+Use this skill to run repeatable collab evaluation scenarios and record evidence. Keep scope to evals; do not implement unrelated fixes.
+## Quick start
+1) Pick the scenario(s):
+- Large-context symbolic RLM with collab subcalls.
+- Multi-hour refactor with checkpoints.
+- 24h pause/resume context-rot regression.
+- Multi-day initiative (48–72h) with multiple resumes.
+2) Ensure task context:
+- `export MCP_RUNNER_TASK_ID=<task-id>`
+3) Run the scenario using `codex-orchestrator start <pipeline> --format json` and record the manifest path.
+## Evidence checklist
+- Manifest path under `.runs/<task-id>/cli/<run-id>/manifest.json`.
+- Log path under `.runs/<task-id>/cli/<run-id>/runner.ndjson`.
+- Findings recorded in `docs/findings/<date>-<topic>.md`.
+- Task mirror update in `docs/TASKS.md` and task spec.
+## Guardrails
+- Collab is additive; keep MCP as the control plane for approvals and audit trails.
+- Cap collab event capture with `CODEX_ORCHESTRATOR_COLLAB_MAX_EVENTS` when needed.
+- If pause/resume is required, use control endpoints or `codex-orchestrator resume` with manifest evidence.