npm - @kbediako/codex-orchestrator - Versions diffs - 0.1.32 → 0.1.34 - Mend

@kbediako/codex-orchestrator 0.1.32 → 0.1.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/README.md +96 -12
package/codex.orchestrator.json +448 -0
package/dist/bin/codex-orchestrator.js +703 -136
package/dist/orchestrator/src/cli/codexCliSetup.js +1 -0
package/dist/orchestrator/src/cli/config/repoConfigPolicy.js +22 -0
package/dist/orchestrator/src/cli/config/userConfig.js +20 -9
package/dist/orchestrator/src/cli/delegationSetup.js +111 -14
package/dist/orchestrator/src/cli/doctor.js +264 -8
package/dist/orchestrator/src/cli/doctorIssueLog.js +350 -0
package/dist/orchestrator/src/cli/doctorUsage.js +150 -8
package/dist/orchestrator/src/cli/init.js +24 -1
package/dist/orchestrator/src/cli/mcpEnable.js +392 -0
package/dist/orchestrator/src/cli/orchestrator.js +180 -5
package/dist/orchestrator/src/cli/rlmRunner.js +289 -35
package/dist/orchestrator/src/cli/run/manifest.js +31 -6
package/dist/orchestrator/src/cli/services/commandRunner.js +10 -2
package/dist/orchestrator/src/cli/services/pipelineResolver.js +70 -18
package/dist/orchestrator/src/cli/services/runPreparation.js +2 -0
package/dist/orchestrator/src/cli/services/runSummaryWriter.js +35 -0
package/dist/orchestrator/src/cli/skills.js +3 -8
package/dist/orchestrator/src/cli/utils/advancedAutopilot.js +114 -0
package/dist/orchestrator/src/cli/utils/codexCli.js +21 -0
package/dist/orchestrator/src/cli/utils/commandPreview.js +10 -0
package/dist/orchestrator/src/cli/utils/delegationGuardRunner.js +85 -8
package/dist/orchestrator/src/cli/utils/devtools.js +2 -1
package/dist/orchestrator/src/cli/utils/specGuardRunner.js +79 -19
package/dist/orchestrator/src/cloud/CodexCloudTaskExecutor.js +46 -6
package/dist/orchestrator/src/control-plane/request-builder.js +9 -8
package/dist/scripts/lib/pr-watch-merge.js +367 -3
package/docs/README.md +17 -11
package/package.json +2 -1
package/schemas/manifest.json +27 -0
package/skills/collab-deliberation/SKILL.md +6 -0
package/skills/collab-evals/SKILL.md +4 -0
package/skills/collab-subagents-first/SKILL.md +29 -7
package/skills/delegation-usage/DELEGATION_GUIDE.md +31 -5
package/skills/delegation-usage/SKILL.md +29 -4
package/skills/elegance-review/SKILL.md +14 -3
package/skills/standalone-review/SKILL.md +8 -2
package/templates/README.md +1 -1
package/templates/codex/AGENTS.md +12 -1

package/README.md CHANGED Viewed

@@ -47,7 +47,7 @@ Use this when you want Codex to drive work inside another repo with the CO defau
    ```bash
    codex-orchestrator init codex --cwd /path/to/repo
    ```
-   One-shot (templates + CO-managed Codex CLI):
+   One-shot (templates + optional CO-managed Codex CLI install):
    ```bash
    codex-orchestrator init codex --codex-cli --yes
    ```
@@ -59,7 +59,11 @@ Use this when you want Codex to drive work inside another repo with the CO defau
    ```bash
    codex-orchestrator codex setup
    ```
-   Use this when you want a pinned binary, build-from-source behavior, or a custom fork. Stock `codex` works for default flows.
+   Use this when you want a pinned binary, build-from-source behavior, or a custom fork.
+   Stock/global `codex` is still the default selection; activate managed binary routing with:
+   ```bash
+   export CODEX_CLI_USE_MANAGED=1
+   ```
 4. Optional (fast refresh helper for downstream users):
    ```bash
    scripts/codex-cli-refresh.sh --repo /path/to/codex --align-only
@@ -81,6 +85,58 @@ codex -c 'mcp_servers.delegation.enabled=true' ...
 ```
 `delegate-server` is the canonical name; `delegation-server` is supported as an alias (older docs may use it).
+## Agent role defaults (recommended)
+Codex built-ins are `default`, `explorer`, and `worker`. `researcher` is user-defined.
+- `spawn_agent` defaults to `default` when `agent_type` is omitted, so always set `agent_type` explicitly when using collab subagents.
+Built-in `explorer` in Codex currently uses `gpt-5.1-codex-mini` with `medium` reasoning unless you override it. If you want latest-codex defaults end-to-end, add role overrides in `~/.codex/config.toml`:
+```toml
+model = "gpt-5.3-codex"
+model_reasoning_effort = "xhigh"
+[agents]
+max_threads = 8
+[agents.explorer]
+description = "Explorer role override (no config_file): keep built-in explorer on top-level model defaults."
+[agents.explorer_fast]
+description = "Fast explorer (spark text-only)."
+config_file = "/absolute/path/to/.codex/agents/explorer-fast.toml"
+[agents.explorer_detailed]
+description = "Detailed explorer."
+config_file = "/absolute/path/to/.codex/agents/explorer-detailed.toml"
+[agents.worker_complex]
+description = "Complex worker role."
+config_file = "/absolute/path/to/.codex/agents/worker-complex.toml"
+```
+```toml
+# ~/.codex/agents/explorer-fast.toml
+model = "gpt-5.3-codex-spark"
+model_reasoning_effort = "xhigh"
+```
+```toml
+# ~/.codex/agents/explorer-detailed.toml
+model = "gpt-5.3-codex"
+model_reasoning_effort = "high"
+```
+```toml
+# ~/.codex/agents/worker-complex.toml
+model = "gpt-5.3-codex"
+model_reasoning_effort = "xhigh"
+```
+Caveats:
+- `gpt-5.3-codex-spark` is text-only (no image inputs). Keep it for fast search/synthesis.
+- Use `max_threads = 8` as a balanced default; only move to `12` after verifying your machine/tooling stays stable under higher concurrency.
 Delegation guard profile:
 - `CODEX_ORCHESTRATOR_GUARD_PROFILE=auto` (default): strict in CO-style repos, warn in lightweight repos.
 - Set `CODEX_ORCHESTRATOR_GUARD_PROFILE=warn` for ad-hoc/no-task-id runs.
@@ -88,8 +144,8 @@ Delegation guard profile:
 ## Delegation + RLM flow
-RLM (Recursive Language Model) is the long-horizon loop used by the `rlm` pipeline (`codex-orchestrator rlm "<goal>"` or `codex-orchestrator start rlm --goal "<goal>"`). Delegated runs only enter RLM when the child is launched with the `rlm` pipeline (or the rlm runner directly). In auto mode it resolves to symbolic when delegated, when `RLM_CONTEXT_PATH` is set, or when the context exceeds `RLM_SYMBOLIC_MIN_BYTES`; otherwise it stays iterative. The runner writes state to `.runs/<task-id>/cli/<run-id>/rlm/state.json` and stops when the validator passes or budgets are exhausted.
-Symbolic subcalls can optionally use collab tools. Fast path: `codex-orchestrator rlm --collab auto "<goal>"` (sets `RLM_SYMBOLIC_COLLAB=1` and implies symbolic mode). Collab requires `collab=true` in `codex features list`. Collab tool calls parsed from `codex exec --json --enable collab` are stored in `manifest.collab_tool_calls` (bounded by `CODEX_ORCHESTRATOR_COLLAB_MAX_EVENTS`, set to `0` to disable). `codex-orchestrator codex setup` remains available when you want a managed/pinned CLI path.
+RLM (Recursive Language Model) is the long-horizon loop used by the `rlm` pipeline (`codex-orchestrator rlm "<goal>"` or `codex-orchestrator start rlm --goal "<goal>"`). Delegated runs only enter RLM when the child is launched with the `rlm` pipeline (or the rlm runner directly). In auto mode it resolves to symbolic only when context is large (`RLM_SYMBOLIC_MIN_BYTES`) and an explicit context signal is present (`RLM_CONTEXT_PATH` or delegated run); otherwise it stays iterative. The runner writes state to `.runs/<task-id>/cli/<run-id>/rlm/state.json` and stops when the validator passes or budgets are exhausted.
+Symbolic subcalls can optionally use collab tools. Fast path: `codex-orchestrator rlm --multi-agent auto "<goal>"` (legacy alias: `--collab auto`; sets `RLM_SYMBOLIC_MULTI_AGENT=1` plus legacy `RLM_SYMBOLIC_COLLAB=1` for compatibility, and implies symbolic mode). Collab requires `multi_agent=true` in `codex features list` (`collab` remains a legacy alias). Collab tool calls parsed from `codex exec --json --enable multi_agent` are stored in `manifest.collab_tool_calls` (bounded by `CODEX_ORCHESTRATOR_COLLAB_MAX_EVENTS`, set to `0` to disable). For auditable role routing, prefix spawned prompts with `[agent_type:<role>]` and set `spawn_agent.agent_type` when supported; lifecycle validation enforces prompt-role evidence and validates `agent_type` when present (`RLM_SYMBOLIC_MULTI_AGENT_ROLE_POLICY=warn|off`, legacy alias `RLM_COLLAB_ROLE_POLICY`; `RLM_SYMBOLIC_MULTI_AGENT_ALLOW_DEFAULT_ROLE=1`, legacy alias `RLM_COLLAB_ALLOW_DEFAULT_ROLE`). `codex-orchestrator codex setup` remains available when you want a managed/pinned CLI path (opt-in via `CODEX_CLI_USE_MANAGED=1`).
 ### Delegation flow
 ```mermaid
@@ -133,6 +189,8 @@ flowchart TB
 Recommended one-shot bootstrap (skills + delegation + DevTools wiring):
 ```bash
 codex-orchestrator setup --yes
+# Optional: overwrite existing bundled skills in $CODEX_HOME/skills
+# codex-orchestrator setup --yes --refresh-skills
 ```
 The release ships skills under `skills/` for downstream packaging. If you already have global skills installed, treat those as the primary reference and use bundled skills as the shipped fallback. Install bundled skills into `$CODEX_HOME/skills`:
@@ -172,15 +230,40 @@ Usage snapshot (scans local `.runs/`):
 ```bash
 codex-orchestrator doctor --usage
 ```
+`doctor --usage` prints adoption KPIs (advanced/cloud/rlm/collab/delegation coverage), and per-run `run-summary.json` now includes a `usageKpi` section plus cloud fallback metadata when preflight downgrades to MCP.
+Issue bundle logging (downstream dogfooding / repro handoff):
+```bash
+codex-orchestrator doctor --issue-log --issue-title "Observed failure" --issue-notes "what happened"
+```
+`doctor --issue-log` appends `docs/codex-orchestrator-issues.md` (override via `--issue-log-path`) and writes a JSON bundle under `out/<resolved-task>/doctor/issue-bundles/` with doctor/cloud context (latest run context is included when available).
+Auto-capture issue bundles when runs fail:
+```bash
+codex-orchestrator start <pipeline> --auto-issue-log
+codex-orchestrator flow --task <task-id> --auto-issue-log
+```
+This captures both post-manifest run failures and setup failures that occur before a run manifest is created (for example strict repo-config enforcement).
+Cloud preflight check (without starting a pipeline):
+```bash
+codex-orchestrator doctor --cloud-preflight
+```
 ## Downstream usage cheatsheet (agent-first)
-- Bootstrap + wire everything: `codex-orchestrator setup --yes`
+- Bootstrap + wire everything: `codex-orchestrator setup --yes` (non-destructive for existing skills by default; add `--refresh-skills` to overwrite)
+- Enable required MCP servers with least privilege: `codex-orchestrator mcp enable --servers delegation --yes` (plan with `--format json`; omit `--servers` only when you intentionally want all disabled servers enabled; env/secret values are redacted in displayed command lines)
 - Low-friction docs->implementation guardrails: `codex-orchestrator flow --task <task-id>`
 - Validate + measure adoption locally: `codex-orchestrator doctor --usage --format json`
+- Capture reproducible downstream failures: `codex-orchestrator doctor --issue-log --issue-title "<title>" --issue-notes "<notes>"`
+- Auto-capture failed run issue bundles: `codex-orchestrator start <pipeline> --auto-issue-log` or `codex-orchestrator flow --auto-issue-log`
 - Delegation: `codex-orchestrator doctor --apply --yes`, then enable for a Codex run with: `codex -c 'mcp_servers.delegation.enabled=true' ...`
-- Collab (symbolic RLM subagents): `codex-orchestrator rlm --collab auto "<goal>"` (requires collab feature enabled in Codex)
+- Collab (symbolic RLM subagents): `codex-orchestrator rlm --multi-agent auto "<goal>"` (legacy alias: `--collab auto`; requires Codex `features.multi_agent=true`)
 - Cloud: set `CODEX_CLOUD_ENV_ID` (and optional `CODEX_CLOUD_BRANCH`), then run: `codex-orchestrator start <pipeline> --cloud --target <stage-id>`
+- Cloud fail-fast (avoid fallback reliance): set `CODEX_ORCHESTRATOR_CLOUD_FALLBACK=deny`
+- Repo-config fail-fast (deny packaged config fallback): set `CODEX_ORCHESTRATOR_REPO_CONFIG_REQUIRED=1` or pass `--repo-config-required`
+- Cloud status retry tuning (optional): `CODEX_CLOUD_STATUS_RETRY_LIMIT`, `CODEX_CLOUD_STATUS_RETRY_BACKOFF_MS`
 Print DevTools MCP setup guidance:
 ```bash
@@ -189,16 +272,17 @@ codex-orchestrator devtools setup
 ## Common commands
-- `codex-orchestrator start <pipeline>` — run a pipeline.
-- `codex-orchestrator flow --task <task-id>` — run `docs-review` then `implementation-gate` in sequence.
+- `codex-orchestrator start <pipeline>` — run a pipeline (add `--auto-issue-log` for automatic failure bundle capture; add `--repo-config-required` for strict repo-local config mode).
+- `codex-orchestrator flow --task <task-id>` — run `docs-review` then `implementation-gate` in sequence (supports `--auto-issue-log` and `--repo-config-required`).
 - `codex-orchestrator plan <pipeline>` — preview pipeline stages.
 - `codex-orchestrator exec <cmd>` — run a one-off command with the exec runtime.
-- `codex-orchestrator init codex` — install starter templates (`mcp-client.json`, `AGENTS.md`) into a repo.
-- `codex-orchestrator setup --yes` — install bundled skills and configure delegation + DevTools wiring.
-- `codex-orchestrator init codex --codex-cli --yes --codex-source <path>` — optionally provision a CO-managed Codex CLI binary (build-from-source default; set `CODEX_CLI_SOURCE` to avoid passing `--codex-source` every time).
+- `codex-orchestrator init codex` — install starter templates (`mcp-client.json`, `AGENTS.md`, `codex.orchestrator.json`) into a repo.
+- `codex-orchestrator setup --yes` — install bundled skills and configure delegation + DevTools wiring (add `--refresh-skills` to overwrite existing skills in `$CODEX_HOME/skills`).
+- `codex-orchestrator init codex --codex-cli --yes --codex-source <path>` — optionally provision a CO-managed Codex CLI binary (build-from-source default; set `CODEX_CLI_SOURCE` to avoid passing `--codex-source` every time, and `CODEX_CLI_USE_MANAGED=1` to route runs to it).
 - `codex-orchestrator init codex --codex-cli --yes --codex-download-url <url> --codex-download-sha256 <sha>` — opt-in to a prebuilt Codex CLI download.
-- `codex-orchestrator codex setup` — plan/apply a CO-managed Codex CLI install (optional managed/pinned path; use `--download-url` + `--download-sha256` for prebuilts).
+- `codex-orchestrator codex setup` — plan/apply a CO-managed Codex CLI install (optional managed/pinned path; use `--download-url` + `--download-sha256` for prebuilts; activate with `CODEX_CLI_USE_MANAGED=1`).
 - `codex-orchestrator delegation setup --yes` — configure delegation MCP server wiring.
+- `codex-orchestrator mcp enable --servers <csv> --yes` — enable specific disabled MCP servers from existing Codex config entries.
 - `codex-orchestrator self-check --format json` — JSON health payload.
 - `codex-orchestrator mcp serve` — Codex MCP stdio server.

package/codex.orchestrator.json ADDED Viewed

@@ -0,0 +1,448 @@
+{
+  "stageSets": {
+    "build-lint-test": [
+      {
+        "kind": "command",
+        "id": "build",
+        "title": "npm run build",
+        "command": "npm run build"
+      },
+      {
+        "kind": "command",
+        "id": "lint",
+        "title": "npm run lint",
+        "command": "npm run lint"
+      },
+      {
+        "kind": "command",
+        "id": "test",
+        "title": "npm run test",
+        "command": "npm run test"
+      }
+    ],
+    "delegation-guard-stage": [
+      {
+        "kind": "command",
+        "id": "delegation-guard",
+        "title": "Run delegation guard",
+        "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/orchestrator/src/cli/utils/delegationGuardRunner.js\""
+      }
+    ],
+    "diagnostics-spec-guard": [
+      {
+        "kind": "command",
+        "id": "spec-guard",
+        "title": "node scripts/spec-guard.mjs --dry-run",
+        "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/orchestrator/src/cli/utils/specGuardRunner.js\" --dry-run"
+      }
+    ],
+    "docs-review-checks": [
+      {
+        "kind": "command",
+        "id": "docs-check",
+        "title": "npm run docs:check",
+        "command": "npm run docs:check"
+      },
+      {
+        "kind": "command",
+        "id": "docs-freshness",
+        "title": "npm run docs:freshness",
+        "command": "npm run docs:freshness"
+      }
+    ],
+    "design-artifacts": [
+      {
+        "kind": "command",
+        "id": "design-spec-guard",
+        "title": "Validate specs via spec-guard",
+        "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/orchestrator/src/cli/utils/specGuardRunner.js\" --dry-run",
+        "env": {
+          "DESIGN_PIPELINE": "1"
+        },
+        "summaryHint": "Ensures design specs are fresh before artifact write"
+      },
+      {
+        "kind": "command",
+        "id": "design-artifact-writer",
+        "title": "Persist design artifact manifests",
+        "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/write-artifacts.js\"",
+        "env": {
+          "DESIGN_PIPELINE": "1"
+        }
+      }
+    ]
+  },
+  "pipelines": [
+    {
+      "id": "diagnostics",
+      "title": "Diagnostics Pipeline",
+      "description": "Build, lint, test, and spec-guard the repository with grouped runner support.",
+      "tags": [
+        "diagnostics-primary",
+        "diagnostics-secondary"
+      ],
+      "stages": [
+        {
+          "kind": "stage-set",
+          "ref": "delegation-guard-stage"
+        },
+        {
+          "kind": "stage-set",
+          "ref": "build-lint-test"
+        },
+        {
+          "kind": "stage-set",
+          "ref": "diagnostics-spec-guard"
+        }
+      ]
+    },
+    {
+      "id": "rlm",
+      "title": "RLM Runner",
+      "description": "Runs the recursive language model loop with validator gating.",
+      "tags": [
+        "rlm"
+      ],
+      "guardrailsRequired": false,
+      "stages": [
+        {
+          "kind": "command",
+          "id": "rlm-runner",
+          "title": "Run RLM loop",
+          "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/orchestrator/src/cli/rlmRunner.js\"",
+          "summaryHint": "RLM loop completed"
+        }
+      ]
+    },
+    {
+      "id": "implementation-gate",
+      "title": "Implementation Complete Gate",
+      "description": "Runs the required implementation validations (spec-guard, build, lint, test, docs:check, docs:freshness, diff-budget) and launches the Codex review handoff, writing a single manifest for evidence.",
+      "tags": [
+        "implementation-gate"
+      ],
+      "stages": [
+        {
+          "kind": "stage-set",
+          "ref": "delegation-guard-stage"
+        },
+        {
+          "kind": "stage-set",
+          "ref": "diagnostics-spec-guard"
+        },
+        {
+          "kind": "stage-set",
+          "ref": "build-lint-test"
+        },
+        {
+          "kind": "stage-set",
+          "ref": "docs-review-checks"
+        },
+        {
+          "kind": "command",
+          "id": "diff-budget",
+          "title": "node scripts/diff-budget.mjs",
+          "command": "node scripts/diff-budget.mjs"
+        },
+        {
+          "kind": "command",
+          "id": "review",
+          "title": "npm run review",
+          "command": "npm run review",
+          "env": {
+            "DIFF_BUDGET_STAGE": "1",
+            "CODEX_REVIEW_NON_INTERACTIVE": "1",
+            "NOTES": "Goal: implementation gate review handoff | Summary: automated prompt with manifest evidence + scope hints | Risks: review output depends on local Codex CLI capabilities"
+          }
+        }
+      ]
+    },
+    {
+      "id": "docs-review",
+      "title": "Docs Review Gate",
+      "description": "Pre-implementation docs review: spec-guard, docs:check, docs:freshness, and review (diff budget skipped).",
+      "tags": [
+        "docs-review"
+      ],
+      "stages": [
+        {
+          "kind": "stage-set",
+          "ref": "delegation-guard-stage"
+        },
+        {
+          "kind": "stage-set",
+          "ref": "diagnostics-spec-guard"
+        },
+        {
+          "kind": "stage-set",
+          "ref": "docs-review-checks"
+        },
+        {
+          "kind": "command",
+          "id": "review",
+          "title": "npm run review",
+          "command": "npm run review",
+          "env": {
+            "SKIP_DIFF_BUDGET": "1",
+            "CODEX_REVIEW_NON_INTERACTIVE": "1",
+            "NOTES": "Goal: docs review gate review handoff | Summary: automated prompt with manifest evidence + scope hints | Risks: review output depends on local Codex CLI capabilities"
+          }
+        }
+      ]
+    },
+    {
+      "id": "frontend-testing",
+      "title": "Frontend Testing",
+      "description": "Runs the frontend testing runner (DevTools off by default).",
+      "tags": [
+        "frontend-testing"
+      ],
+      "guardrailsRequired": false,
+      "stages": [
+        {
+          "kind": "command",
+          "id": "frontend-testing",
+          "title": "Run frontend testing",
+          "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/orchestrator/src/cli/frontendTestingRunner.js\"",
+          "env": {
+            "CODEX_NON_INTERACTIVE": "1"
+          }
+        }
+      ]
+    },
+    {
+      "id": "diagnostics-with-eval",
+      "title": "Diagnostics Pipeline (with Eval Harness)",
+      "description": "Build, lint, test, run evaluation harness, and verify specs for guardrail evidence capture.",
+      "stages": [
+        {
+          "kind": "stage-set",
+          "ref": "delegation-guard-stage"
+        },
+        {
+          "kind": "stage-set",
+          "ref": "build-lint-test"
+        },
+        {
+          "kind": "command",
+          "id": "eval-test",
+          "title": "npm run eval:test",
+          "command": "npm run eval:test"
+        },
+        {
+          "kind": "stage-set",
+          "ref": "diagnostics-spec-guard"
+        }
+      ]
+    },
+    {
+      "id": "design-reference",
+      "title": "Design Reference Pipeline",
+      "description": "Extracts design reference assets, stages Storybook-ready components, and records manifest evidence.",
+      "tags": [
+        "design",
+        "reference"
+      ],
+      "stages": [
+        {
+          "kind": "stage-set",
+          "ref": "delegation-guard-stage"
+        },
+        {
+          "kind": "command",
+          "id": "design-config",
+          "title": "Resolve design configuration",
+          "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/prepare.js\"",
+          "env": {
+            "DESIGN_PIPELINE": "1"
+          }
+        },
+        {
+          "kind": "command",
+          "id": "design-extract",
+          "title": "Run Playwright design extractor",
+          "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/extract.js\"",
+          "env": {
+            "DESIGN_PIPELINE": "1"
+          }
+        },
+        {
+          "kind": "command",
+          "id": "design-reference",
+          "title": "Build motherduck reference page",
+          "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/reference.js\"",
+          "env": {
+            "DESIGN_PIPELINE": "1"
+          }
+        },
+        {
+          "kind": "command",
+          "id": "design-componentize",
+          "title": "Componentize artifacts via packages/design-system",
+          "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/componentize.js\"",
+          "env": {
+            "DESIGN_PIPELINE": "1"
+          }
+        },
+        {
+          "kind": "command",
+          "id": "design-advanced-assets",
+          "title": "Generate advanced design assets",
+          "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/advanced-assets.js\"",
+          "env": {
+            "DESIGN_PIPELINE": "1"
+          },
+          "allowFailure": true,
+          "summaryHint": "Optional Framer Motion and FFmpeg assets"
+        },
+        {
+          "kind": "command",
+          "id": "design-visual-regression",
+          "title": "Run visual regression tests",
+          "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/visual-regression.js\"",
+          "env": {
+            "DESIGN_PIPELINE": "1"
+          },
+          "allowFailure": true,
+          "summaryHint": "Visual regression diffs stored under design/visual-regression/"
+        },
+        {
+          "kind": "stage-set",
+          "ref": "design-artifacts"
+        }
+      ]
+    },
+    {
+      "id": "hi-fi-design-toolkit",
+      "title": "Hi-Fi Design Toolkit",
+      "description": "Runs the hi-fi design toolkit pipeline to extract, tokenize, self-correct, and publish design artifacts.",
+      "tags": [
+        "design",
+        "hi-fi"
+      ],
+      "stages": [
+        {
+          "kind": "stage-set",
+          "ref": "delegation-guard-stage"
+        },
+        {
+          "kind": "command",
+          "id": "design-config",
+          "title": "Resolve design configuration",
+          "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/prepare.js\"",
+          "env": {
+            "DESIGN_PIPELINE": "1",
+            "DESIGN_TOOLKIT": "1"
+          }
+        },
+        {
+          "kind": "command",
+          "id": "design-toolkit-extract",
+          "title": "Wrap external toolkit extractor",
+          "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/toolkit/extract.js\"",
+          "env": {
+            "DESIGN_PIPELINE": "1",
+            "DESIGN_TOOLKIT": "1"
+          }
+        },
+        {
+          "kind": "command",
+          "id": "design-toolkit-tokens",
+          "title": "Generate tokens and style guides",
+          "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/toolkit/tokens.js\"",
+          "env": {
+            "DESIGN_PIPELINE": "1",
+            "DESIGN_TOOLKIT": "1"
+          }
+        },
+        {
+          "kind": "command",
+          "id": "design-toolkit-reference",
+          "title": "Build reference pages + self-correction",
+          "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/toolkit/reference.js\"",
+          "env": {
+            "DESIGN_PIPELINE": "1",
+            "DESIGN_TOOLKIT": "1"
+          }
+        },
+        {
+          "kind": "command",
+          "id": "design-advanced-assets",
+          "title": "Generate advanced design assets",
+          "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/advanced-assets.js\"",
+          "env": {
+            "DESIGN_PIPELINE": "1",
+            "DESIGN_TOOLKIT": "1"
+          },
+          "allowFailure": true,
+          "summaryHint": "Optional motion capture via Framer Motion + FFmpeg"
+        },
+        {
+          "kind": "command",
+          "id": "design-toolkit-publish",
+          "title": "Publish toolkit outputs to packages/design-system",
+          "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/toolkit/publish.js\"",
+          "env": {
+            "DESIGN_PIPELINE": "1",
+            "DESIGN_TOOLKIT": "1"
+          }
+        },
+        {
+          "kind": "stage-set",
+          "ref": "design-artifacts"
+        }
+      ]
+    },
+    {
+      "id": "tfgrpo-learning",
+      "title": "TF-GRPO Learning Loop",
+      "description": "Run three TF-GRPO epochs (~100 samples, train temp 0.7, eval temp 0.3, G>=2) and capture guardrail evidence.",
+      "tags": [
+        "tfgrpo-learning",
+        "tfgrpo-learning-secondary"
+      ],
+      "stages": [
+        {
+          "kind": "stage-set",
+          "ref": "delegation-guard-stage"
+        },
+        {
+          "kind": "command",
+          "id": "tfgrpo-loop",
+          "title": "tfgrpo learning schedule (3 epochs, 100 samples)",
+          "command": "TFGRPO_GROUP_SIZE=2 TFGRPO_REWARDERS=gt,relative TFGRPO_EPOCHS=3 TFGRPO_SAMPLE_SIZE=100 TFGRPO_TRAIN_TEMP=0.7 TFGRPO_EVAL_TEMP=0.3 node --loader ts-node/esm evaluation/harness/scripts/tfgrpo-runner.ts"
+        },
+        {
+          "kind": "stage-set",
+          "ref": "diagnostics-spec-guard"
+        }
+      ]
+    },
+    {
+      "id": "pause-eval",
+      "title": "Pause/Resume Eval",
+      "description": "Utility pipeline for long pause/resume evaluations (sleep + resume marker).",
+      "tags": [
+        "eval",
+        "pause-resume"
+      ],
+      "guardrailsRequired": false,
+      "stages": [
+        {
+          "kind": "command",
+          "id": "pause-window",
+          "title": "Sleep to allow pause",
+          "command": "sleep 120",
+          "summaryHint": "Pause window elapsed"
+        },
+        {
+          "kind": "command",
+          "id": "resume-marker",
+          "title": "Resume marker",
+          "command": "node -e \"console.log('resume-ok')\"",
+          "summaryHint": "Resume marker written"
+        }
+      ]
+    }
+  ]
+}