@kbediako/codex-orchestrator 0.1.32 → 0.1.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +96 -12
  2. package/codex.orchestrator.json +448 -0
  3. package/dist/bin/codex-orchestrator.js +703 -136
  4. package/dist/orchestrator/src/cli/codexCliSetup.js +1 -0
  5. package/dist/orchestrator/src/cli/config/repoConfigPolicy.js +22 -0
  6. package/dist/orchestrator/src/cli/config/userConfig.js +20 -9
  7. package/dist/orchestrator/src/cli/delegationSetup.js +111 -14
  8. package/dist/orchestrator/src/cli/doctor.js +264 -8
  9. package/dist/orchestrator/src/cli/doctorIssueLog.js +350 -0
  10. package/dist/orchestrator/src/cli/doctorUsage.js +150 -8
  11. package/dist/orchestrator/src/cli/init.js +24 -1
  12. package/dist/orchestrator/src/cli/mcpEnable.js +392 -0
  13. package/dist/orchestrator/src/cli/orchestrator.js +180 -5
  14. package/dist/orchestrator/src/cli/rlmRunner.js +289 -35
  15. package/dist/orchestrator/src/cli/run/manifest.js +31 -6
  16. package/dist/orchestrator/src/cli/services/commandRunner.js +10 -2
  17. package/dist/orchestrator/src/cli/services/pipelineResolver.js +70 -18
  18. package/dist/orchestrator/src/cli/services/runPreparation.js +2 -0
  19. package/dist/orchestrator/src/cli/services/runSummaryWriter.js +35 -0
  20. package/dist/orchestrator/src/cli/skills.js +3 -8
  21. package/dist/orchestrator/src/cli/utils/advancedAutopilot.js +114 -0
  22. package/dist/orchestrator/src/cli/utils/codexCli.js +21 -0
  23. package/dist/orchestrator/src/cli/utils/commandPreview.js +10 -0
  24. package/dist/orchestrator/src/cli/utils/delegationGuardRunner.js +85 -8
  25. package/dist/orchestrator/src/cli/utils/devtools.js +2 -1
  26. package/dist/orchestrator/src/cli/utils/specGuardRunner.js +79 -19
  27. package/dist/orchestrator/src/cloud/CodexCloudTaskExecutor.js +46 -6
  28. package/dist/orchestrator/src/control-plane/request-builder.js +9 -8
  29. package/dist/scripts/lib/pr-watch-merge.js +367 -3
  30. package/docs/README.md +17 -11
  31. package/package.json +2 -1
  32. package/schemas/manifest.json +27 -0
  33. package/skills/collab-deliberation/SKILL.md +6 -0
  34. package/skills/collab-evals/SKILL.md +4 -0
  35. package/skills/collab-subagents-first/SKILL.md +29 -7
  36. package/skills/delegation-usage/DELEGATION_GUIDE.md +31 -5
  37. package/skills/delegation-usage/SKILL.md +29 -4
  38. package/skills/elegance-review/SKILL.md +14 -3
  39. package/skills/standalone-review/SKILL.md +8 -2
  40. package/templates/README.md +1 -1
  41. package/templates/codex/AGENTS.md +12 -1
package/README.md CHANGED
@@ -47,7 +47,7 @@ Use this when you want Codex to drive work inside another repo with the CO defau
47
47
  ```bash
48
48
  codex-orchestrator init codex --cwd /path/to/repo
49
49
  ```
50
- One-shot (templates + CO-managed Codex CLI):
50
+ One-shot (templates + optional CO-managed Codex CLI install):
51
51
  ```bash
52
52
  codex-orchestrator init codex --codex-cli --yes
53
53
  ```
@@ -59,7 +59,11 @@ Use this when you want Codex to drive work inside another repo with the CO defau
59
59
  ```bash
60
60
  codex-orchestrator codex setup
61
61
  ```
62
- Use this when you want a pinned binary, build-from-source behavior, or a custom fork. Stock `codex` works for default flows.
62
+ Use this when you want a pinned binary, build-from-source behavior, or a custom fork.
63
+ Stock/global `codex` is still the default selection; activate managed binary routing with:
64
+ ```bash
65
+ export CODEX_CLI_USE_MANAGED=1
66
+ ```
63
67
  4. Optional (fast refresh helper for downstream users):
64
68
  ```bash
65
69
  scripts/codex-cli-refresh.sh --repo /path/to/codex --align-only
@@ -81,6 +85,58 @@ codex -c 'mcp_servers.delegation.enabled=true' ...
81
85
  ```
82
86
  `delegate-server` is the canonical name; `delegation-server` is supported as an alias (older docs may use it).
83
87
 
88
+ ## Agent role defaults (recommended)
89
+
90
+ Codex built-ins are `default`, `explorer`, and `worker`. `researcher` is user-defined.
91
+ - `spawn_agent` defaults to `default` when `agent_type` is omitted, so always set `agent_type` explicitly when using collab subagents.
92
+
93
+ Built-in `explorer` in Codex currently uses `gpt-5.1-codex-mini` with `medium` reasoning unless you override it. If you want latest-codex defaults end-to-end, add role overrides in `~/.codex/config.toml`:
94
+
95
+ ```toml
96
+ model = "gpt-5.3-codex"
97
+ model_reasoning_effort = "xhigh"
98
+
99
+ [agents]
100
+ max_threads = 8
101
+
102
+ [agents.explorer]
103
+ description = "Explorer role override (no config_file): keep built-in explorer on top-level model defaults."
104
+
105
+ [agents.explorer_fast]
106
+ description = "Fast explorer (spark text-only)."
107
+ config_file = "/absolute/path/to/.codex/agents/explorer-fast.toml"
108
+
109
+ [agents.explorer_detailed]
110
+ description = "Detailed explorer."
111
+ config_file = "/absolute/path/to/.codex/agents/explorer-detailed.toml"
112
+
113
+ [agents.worker_complex]
114
+ description = "Complex worker role."
115
+ config_file = "/absolute/path/to/.codex/agents/worker-complex.toml"
116
+ ```
117
+
118
+ ```toml
119
+ # ~/.codex/agents/explorer-fast.toml
120
+ model = "gpt-5.3-codex-spark"
121
+ model_reasoning_effort = "xhigh"
122
+ ```
123
+
124
+ ```toml
125
+ # ~/.codex/agents/explorer-detailed.toml
126
+ model = "gpt-5.3-codex"
127
+ model_reasoning_effort = "high"
128
+ ```
129
+
130
+ ```toml
131
+ # ~/.codex/agents/worker-complex.toml
132
+ model = "gpt-5.3-codex"
133
+ model_reasoning_effort = "xhigh"
134
+ ```
135
+
136
+ Caveats:
137
+ - `gpt-5.3-codex-spark` is text-only (no image inputs). Keep it for fast search/synthesis.
138
+ - Use `max_threads = 8` as a balanced default; only move to `12` after verifying your machine/tooling stays stable under higher concurrency.
139
+
84
140
  Delegation guard profile:
85
141
  - `CODEX_ORCHESTRATOR_GUARD_PROFILE=auto` (default): strict in CO-style repos, warn in lightweight repos.
86
142
  - Set `CODEX_ORCHESTRATOR_GUARD_PROFILE=warn` for ad-hoc/no-task-id runs.
@@ -88,8 +144,8 @@ Delegation guard profile:
88
144
 
89
145
  ## Delegation + RLM flow
90
146
 
91
- RLM (Recursive Language Model) is the long-horizon loop used by the `rlm` pipeline (`codex-orchestrator rlm "<goal>"` or `codex-orchestrator start rlm --goal "<goal>"`). Delegated runs only enter RLM when the child is launched with the `rlm` pipeline (or the rlm runner directly). In auto mode it resolves to symbolic when delegated, when `RLM_CONTEXT_PATH` is set, or when the context exceeds `RLM_SYMBOLIC_MIN_BYTES`; otherwise it stays iterative. The runner writes state to `.runs/<task-id>/cli/<run-id>/rlm/state.json` and stops when the validator passes or budgets are exhausted.
92
- Symbolic subcalls can optionally use collab tools. Fast path: `codex-orchestrator rlm --collab auto "<goal>"` (sets `RLM_SYMBOLIC_COLLAB=1` and implies symbolic mode). Collab requires `collab=true` in `codex features list`. Collab tool calls parsed from `codex exec --json --enable collab` are stored in `manifest.collab_tool_calls` (bounded by `CODEX_ORCHESTRATOR_COLLAB_MAX_EVENTS`, set to `0` to disable). `codex-orchestrator codex setup` remains available when you want a managed/pinned CLI path.
147
+ RLM (Recursive Language Model) is the long-horizon loop used by the `rlm` pipeline (`codex-orchestrator rlm "<goal>"` or `codex-orchestrator start rlm --goal "<goal>"`). Delegated runs only enter RLM when the child is launched with the `rlm` pipeline (or the rlm runner directly). In auto mode it resolves to symbolic only when context is large (`RLM_SYMBOLIC_MIN_BYTES`) and an explicit context signal is present (`RLM_CONTEXT_PATH` or delegated run); otherwise it stays iterative. The runner writes state to `.runs/<task-id>/cli/<run-id>/rlm/state.json` and stops when the validator passes or budgets are exhausted.
148
+ Symbolic subcalls can optionally use collab tools. Fast path: `codex-orchestrator rlm --multi-agent auto "<goal>"` (legacy alias: `--collab auto`; sets `RLM_SYMBOLIC_MULTI_AGENT=1` plus legacy `RLM_SYMBOLIC_COLLAB=1` for compatibility, and implies symbolic mode). Collab requires `multi_agent=true` in `codex features list` (`collab` remains a legacy alias). Collab tool calls parsed from `codex exec --json --enable multi_agent` are stored in `manifest.collab_tool_calls` (bounded by `CODEX_ORCHESTRATOR_COLLAB_MAX_EVENTS`, set to `0` to disable). For auditable role routing, prefix spawned prompts with `[agent_type:<role>]` and set `spawn_agent.agent_type` when supported; lifecycle validation enforces prompt-role evidence and validates `agent_type` when present (`RLM_SYMBOLIC_MULTI_AGENT_ROLE_POLICY=warn|off`, legacy alias `RLM_COLLAB_ROLE_POLICY`; `RLM_SYMBOLIC_MULTI_AGENT_ALLOW_DEFAULT_ROLE=1`, legacy alias `RLM_COLLAB_ALLOW_DEFAULT_ROLE`). `codex-orchestrator codex setup` remains available when you want a managed/pinned CLI path (opt-in via `CODEX_CLI_USE_MANAGED=1`).
93
149
 
94
150
  ### Delegation flow
95
151
  ```mermaid
@@ -133,6 +189,8 @@ flowchart TB
133
189
  Recommended one-shot bootstrap (skills + delegation + DevTools wiring):
134
190
  ```bash
135
191
  codex-orchestrator setup --yes
192
+ # Optional: overwrite existing bundled skills in $CODEX_HOME/skills
193
+ # codex-orchestrator setup --yes --refresh-skills
136
194
  ```
137
195
 
138
196
  The release ships skills under `skills/` for downstream packaging. If you already have global skills installed, treat those as the primary reference and use bundled skills as the shipped fallback. Install bundled skills into `$CODEX_HOME/skills`:
@@ -172,15 +230,40 @@ Usage snapshot (scans local `.runs/`):
172
230
  ```bash
173
231
  codex-orchestrator doctor --usage
174
232
  ```
233
+ `doctor --usage` prints adoption KPIs (advanced/cloud/rlm/collab/delegation coverage), and per-run `run-summary.json` now includes a `usageKpi` section plus cloud fallback metadata when preflight downgrades to MCP.
234
+
235
+ Issue bundle logging (downstream dogfooding / repro handoff):
236
+ ```bash
237
+ codex-orchestrator doctor --issue-log --issue-title "Observed failure" --issue-notes "what happened"
238
+ ```
239
+ `doctor --issue-log` appends `docs/codex-orchestrator-issues.md` (override via `--issue-log-path`) and writes a JSON bundle under `out/<resolved-task>/doctor/issue-bundles/` with doctor/cloud context (latest run context is included when available).
240
+
241
+ Auto-capture issue bundles when runs fail:
242
+ ```bash
243
+ codex-orchestrator start <pipeline> --auto-issue-log
244
+ codex-orchestrator flow --task <task-id> --auto-issue-log
245
+ ```
246
+ This captures both post-manifest run failures and setup failures that occur before a run manifest is created (for example strict repo-config enforcement).
247
+
248
+ Cloud preflight check (without starting a pipeline):
249
+ ```bash
250
+ codex-orchestrator doctor --cloud-preflight
251
+ ```
175
252
 
176
253
  ## Downstream usage cheatsheet (agent-first)
177
254
 
178
- - Bootstrap + wire everything: `codex-orchestrator setup --yes`
255
+ - Bootstrap + wire everything: `codex-orchestrator setup --yes` (non-destructive for existing skills by default; add `--refresh-skills` to overwrite)
256
+ - Enable required MCP servers with least privilege: `codex-orchestrator mcp enable --servers delegation --yes` (plan with `--format json`; omit `--servers` only when you intentionally want all disabled servers enabled; env/secret values are redacted in displayed command lines)
179
257
  - Low-friction docs->implementation guardrails: `codex-orchestrator flow --task <task-id>`
180
258
  - Validate + measure adoption locally: `codex-orchestrator doctor --usage --format json`
259
+ - Capture reproducible downstream failures: `codex-orchestrator doctor --issue-log --issue-title "<title>" --issue-notes "<notes>"`
260
+ - Auto-capture failed run issue bundles: `codex-orchestrator start <pipeline> --auto-issue-log` or `codex-orchestrator flow --auto-issue-log`
181
261
  - Delegation: `codex-orchestrator doctor --apply --yes`, then enable for a Codex run with: `codex -c 'mcp_servers.delegation.enabled=true' ...`
182
- - Collab (symbolic RLM subagents): `codex-orchestrator rlm --collab auto "<goal>"` (requires collab feature enabled in Codex)
262
+ - Collab (symbolic RLM subagents): `codex-orchestrator rlm --multi-agent auto "<goal>"` (legacy alias: `--collab auto`; requires Codex `features.multi_agent=true`)
183
263
  - Cloud: set `CODEX_CLOUD_ENV_ID` (and optional `CODEX_CLOUD_BRANCH`), then run: `codex-orchestrator start <pipeline> --cloud --target <stage-id>`
264
+ - Cloud fail-fast (avoid fallback reliance): set `CODEX_ORCHESTRATOR_CLOUD_FALLBACK=deny`
265
+ - Repo-config fail-fast (deny packaged config fallback): set `CODEX_ORCHESTRATOR_REPO_CONFIG_REQUIRED=1` or pass `--repo-config-required`
266
+ - Cloud status retry tuning (optional): `CODEX_CLOUD_STATUS_RETRY_LIMIT`, `CODEX_CLOUD_STATUS_RETRY_BACKOFF_MS`
184
267
 
185
268
  Print DevTools MCP setup guidance:
186
269
  ```bash
@@ -189,16 +272,17 @@ codex-orchestrator devtools setup
189
272
 
190
273
  ## Common commands
191
274
 
192
- - `codex-orchestrator start <pipeline>` — run a pipeline.
193
- - `codex-orchestrator flow --task <task-id>` — run `docs-review` then `implementation-gate` in sequence.
275
+ - `codex-orchestrator start <pipeline>` — run a pipeline (add `--auto-issue-log` for automatic failure bundle capture; add `--repo-config-required` for strict repo-local config mode).
276
+ - `codex-orchestrator flow --task <task-id>` — run `docs-review` then `implementation-gate` in sequence (supports `--auto-issue-log` and `--repo-config-required`).
194
277
  - `codex-orchestrator plan <pipeline>` — preview pipeline stages.
195
278
  - `codex-orchestrator exec <cmd>` — run a one-off command with the exec runtime.
196
- - `codex-orchestrator init codex` — install starter templates (`mcp-client.json`, `AGENTS.md`) into a repo.
197
- - `codex-orchestrator setup --yes` — install bundled skills and configure delegation + DevTools wiring.
198
- - `codex-orchestrator init codex --codex-cli --yes --codex-source <path>` — optionally provision a CO-managed Codex CLI binary (build-from-source default; set `CODEX_CLI_SOURCE` to avoid passing `--codex-source` every time).
279
+ - `codex-orchestrator init codex` — install starter templates (`mcp-client.json`, `AGENTS.md`, `codex.orchestrator.json`) into a repo.
280
+ - `codex-orchestrator setup --yes` — install bundled skills and configure delegation + DevTools wiring (add `--refresh-skills` to overwrite existing skills in `$CODEX_HOME/skills`).
281
+ - `codex-orchestrator init codex --codex-cli --yes --codex-source <path>` — optionally provision a CO-managed Codex CLI binary (build-from-source default; set `CODEX_CLI_SOURCE` to avoid passing `--codex-source` every time, and `CODEX_CLI_USE_MANAGED=1` to route runs to it).
199
282
  - `codex-orchestrator init codex --codex-cli --yes --codex-download-url <url> --codex-download-sha256 <sha>` — opt-in to a prebuilt Codex CLI download.
200
- - `codex-orchestrator codex setup` — plan/apply a CO-managed Codex CLI install (optional managed/pinned path; use `--download-url` + `--download-sha256` for prebuilts).
283
+ - `codex-orchestrator codex setup` — plan/apply a CO-managed Codex CLI install (optional managed/pinned path; use `--download-url` + `--download-sha256` for prebuilts; activate with `CODEX_CLI_USE_MANAGED=1`).
201
284
  - `codex-orchestrator delegation setup --yes` — configure delegation MCP server wiring.
285
+ - `codex-orchestrator mcp enable --servers <csv> --yes` — enable specific disabled MCP servers from existing Codex config entries.
202
286
  - `codex-orchestrator self-check --format json` — JSON health payload.
203
287
  - `codex-orchestrator mcp serve` — Codex MCP stdio server.
204
288
 
@@ -0,0 +1,448 @@
1
+ {
2
+ "stageSets": {
3
+ "build-lint-test": [
4
+ {
5
+ "kind": "command",
6
+ "id": "build",
7
+ "title": "npm run build",
8
+ "command": "npm run build"
9
+ },
10
+ {
11
+ "kind": "command",
12
+ "id": "lint",
13
+ "title": "npm run lint",
14
+ "command": "npm run lint"
15
+ },
16
+ {
17
+ "kind": "command",
18
+ "id": "test",
19
+ "title": "npm run test",
20
+ "command": "npm run test"
21
+ }
22
+ ],
23
+ "delegation-guard-stage": [
24
+ {
25
+ "kind": "command",
26
+ "id": "delegation-guard",
27
+ "title": "Run delegation guard",
28
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/orchestrator/src/cli/utils/delegationGuardRunner.js\""
29
+ }
30
+ ],
31
+ "diagnostics-spec-guard": [
32
+ {
33
+ "kind": "command",
34
+ "id": "spec-guard",
35
+ "title": "node scripts/spec-guard.mjs --dry-run",
36
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/orchestrator/src/cli/utils/specGuardRunner.js\" --dry-run"
37
+ }
38
+ ],
39
+ "docs-review-checks": [
40
+ {
41
+ "kind": "command",
42
+ "id": "docs-check",
43
+ "title": "npm run docs:check",
44
+ "command": "npm run docs:check"
45
+ },
46
+ {
47
+ "kind": "command",
48
+ "id": "docs-freshness",
49
+ "title": "npm run docs:freshness",
50
+ "command": "npm run docs:freshness"
51
+ }
52
+ ],
53
+ "design-artifacts": [
54
+ {
55
+ "kind": "command",
56
+ "id": "design-spec-guard",
57
+ "title": "Validate specs via spec-guard",
58
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/orchestrator/src/cli/utils/specGuardRunner.js\" --dry-run",
59
+ "env": {
60
+ "DESIGN_PIPELINE": "1"
61
+ },
62
+ "summaryHint": "Ensures design specs are fresh before artifact write"
63
+ },
64
+ {
65
+ "kind": "command",
66
+ "id": "design-artifact-writer",
67
+ "title": "Persist design artifact manifests",
68
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/write-artifacts.js\"",
69
+ "env": {
70
+ "DESIGN_PIPELINE": "1"
71
+ }
72
+ }
73
+ ]
74
+ },
75
+ "pipelines": [
76
+ {
77
+ "id": "diagnostics",
78
+ "title": "Diagnostics Pipeline",
79
+ "description": "Build, lint, test, and spec-guard the repository with grouped runner support.",
80
+ "tags": [
81
+ "diagnostics-primary",
82
+ "diagnostics-secondary"
83
+ ],
84
+ "stages": [
85
+ {
86
+ "kind": "stage-set",
87
+ "ref": "delegation-guard-stage"
88
+ },
89
+ {
90
+ "kind": "stage-set",
91
+ "ref": "build-lint-test"
92
+ },
93
+ {
94
+ "kind": "stage-set",
95
+ "ref": "diagnostics-spec-guard"
96
+ }
97
+ ]
98
+ },
99
+ {
100
+ "id": "rlm",
101
+ "title": "RLM Runner",
102
+ "description": "Runs the recursive language model loop with validator gating.",
103
+ "tags": [
104
+ "rlm"
105
+ ],
106
+ "guardrailsRequired": false,
107
+ "stages": [
108
+ {
109
+ "kind": "command",
110
+ "id": "rlm-runner",
111
+ "title": "Run RLM loop",
112
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/orchestrator/src/cli/rlmRunner.js\"",
113
+ "summaryHint": "RLM loop completed"
114
+ }
115
+ ]
116
+ },
117
+ {
118
+ "id": "implementation-gate",
119
+ "title": "Implementation Complete Gate",
120
+ "description": "Runs the required implementation validations (spec-guard, build, lint, test, docs:check, docs:freshness, diff-budget) and launches the Codex review handoff, writing a single manifest for evidence.",
121
+ "tags": [
122
+ "implementation-gate"
123
+ ],
124
+ "stages": [
125
+ {
126
+ "kind": "stage-set",
127
+ "ref": "delegation-guard-stage"
128
+ },
129
+ {
130
+ "kind": "stage-set",
131
+ "ref": "diagnostics-spec-guard"
132
+ },
133
+ {
134
+ "kind": "stage-set",
135
+ "ref": "build-lint-test"
136
+ },
137
+ {
138
+ "kind": "stage-set",
139
+ "ref": "docs-review-checks"
140
+ },
141
+ {
142
+ "kind": "command",
143
+ "id": "diff-budget",
144
+ "title": "node scripts/diff-budget.mjs",
145
+ "command": "node scripts/diff-budget.mjs"
146
+ },
147
+ {
148
+ "kind": "command",
149
+ "id": "review",
150
+ "title": "npm run review",
151
+ "command": "npm run review",
152
+ "env": {
153
+ "DIFF_BUDGET_STAGE": "1",
154
+ "CODEX_REVIEW_NON_INTERACTIVE": "1",
155
+ "NOTES": "Goal: implementation gate review handoff | Summary: automated prompt with manifest evidence + scope hints | Risks: review output depends on local Codex CLI capabilities"
156
+ }
157
+ }
158
+ ]
159
+ },
160
+ {
161
+ "id": "docs-review",
162
+ "title": "Docs Review Gate",
163
+ "description": "Pre-implementation docs review: spec-guard, docs:check, docs:freshness, and review (diff budget skipped).",
164
+ "tags": [
165
+ "docs-review"
166
+ ],
167
+ "stages": [
168
+ {
169
+ "kind": "stage-set",
170
+ "ref": "delegation-guard-stage"
171
+ },
172
+ {
173
+ "kind": "stage-set",
174
+ "ref": "diagnostics-spec-guard"
175
+ },
176
+ {
177
+ "kind": "stage-set",
178
+ "ref": "docs-review-checks"
179
+ },
180
+ {
181
+ "kind": "command",
182
+ "id": "review",
183
+ "title": "npm run review",
184
+ "command": "npm run review",
185
+ "env": {
186
+ "SKIP_DIFF_BUDGET": "1",
187
+ "CODEX_REVIEW_NON_INTERACTIVE": "1",
188
+ "NOTES": "Goal: docs review gate review handoff | Summary: automated prompt with manifest evidence + scope hints | Risks: review output depends on local Codex CLI capabilities"
189
+ }
190
+ }
191
+ ]
192
+ },
193
+ {
194
+ "id": "frontend-testing",
195
+ "title": "Frontend Testing",
196
+ "description": "Runs the frontend testing runner (DevTools off by default).",
197
+ "tags": [
198
+ "frontend-testing"
199
+ ],
200
+ "guardrailsRequired": false,
201
+ "stages": [
202
+ {
203
+ "kind": "command",
204
+ "id": "frontend-testing",
205
+ "title": "Run frontend testing",
206
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/orchestrator/src/cli/frontendTestingRunner.js\"",
207
+ "env": {
208
+ "CODEX_NON_INTERACTIVE": "1"
209
+ }
210
+ }
211
+ ]
212
+ },
213
+ {
214
+ "id": "diagnostics-with-eval",
215
+ "title": "Diagnostics Pipeline (with Eval Harness)",
216
+ "description": "Build, lint, test, run evaluation harness, and verify specs for guardrail evidence capture.",
217
+ "stages": [
218
+ {
219
+ "kind": "stage-set",
220
+ "ref": "delegation-guard-stage"
221
+ },
222
+ {
223
+ "kind": "stage-set",
224
+ "ref": "build-lint-test"
225
+ },
226
+ {
227
+ "kind": "command",
228
+ "id": "eval-test",
229
+ "title": "npm run eval:test",
230
+ "command": "npm run eval:test"
231
+ },
232
+ {
233
+ "kind": "stage-set",
234
+ "ref": "diagnostics-spec-guard"
235
+ }
236
+ ]
237
+ },
238
+ {
239
+ "id": "design-reference",
240
+ "title": "Design Reference Pipeline",
241
+ "description": "Extracts design reference assets, stages Storybook-ready components, and records manifest evidence.",
242
+ "tags": [
243
+ "design",
244
+ "reference"
245
+ ],
246
+ "stages": [
247
+ {
248
+ "kind": "stage-set",
249
+ "ref": "delegation-guard-stage"
250
+ },
251
+ {
252
+ "kind": "command",
253
+ "id": "design-config",
254
+ "title": "Resolve design configuration",
255
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/prepare.js\"",
256
+ "env": {
257
+ "DESIGN_PIPELINE": "1"
258
+ }
259
+ },
260
+ {
261
+ "kind": "command",
262
+ "id": "design-extract",
263
+ "title": "Run Playwright design extractor",
264
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/extract.js\"",
265
+ "env": {
266
+ "DESIGN_PIPELINE": "1"
267
+ }
268
+ },
269
+ {
270
+ "kind": "command",
271
+ "id": "design-reference",
272
+ "title": "Build motherduck reference page",
273
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/reference.js\"",
274
+ "env": {
275
+ "DESIGN_PIPELINE": "1"
276
+ }
277
+ },
278
+ {
279
+ "kind": "command",
280
+ "id": "design-componentize",
281
+ "title": "Componentize artifacts via packages/design-system",
282
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/componentize.js\"",
283
+ "env": {
284
+ "DESIGN_PIPELINE": "1"
285
+ }
286
+ },
287
+ {
288
+ "kind": "command",
289
+ "id": "design-advanced-assets",
290
+ "title": "Generate advanced design assets",
291
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/advanced-assets.js\"",
292
+ "env": {
293
+ "DESIGN_PIPELINE": "1"
294
+ },
295
+ "allowFailure": true,
296
+ "summaryHint": "Optional Framer Motion and FFmpeg assets"
297
+ },
298
+ {
299
+ "kind": "command",
300
+ "id": "design-visual-regression",
301
+ "title": "Run visual regression tests",
302
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/visual-regression.js\"",
303
+ "env": {
304
+ "DESIGN_PIPELINE": "1"
305
+ },
306
+ "allowFailure": true,
307
+ "summaryHint": "Visual regression diffs stored under design/visual-regression/"
308
+ },
309
+ {
310
+ "kind": "stage-set",
311
+ "ref": "design-artifacts"
312
+ }
313
+ ]
314
+ },
315
+ {
316
+ "id": "hi-fi-design-toolkit",
317
+ "title": "Hi-Fi Design Toolkit",
318
+ "description": "Runs the hi-fi design toolkit pipeline to extract, tokenize, self-correct, and publish design artifacts.",
319
+ "tags": [
320
+ "design",
321
+ "hi-fi"
322
+ ],
323
+ "stages": [
324
+ {
325
+ "kind": "stage-set",
326
+ "ref": "delegation-guard-stage"
327
+ },
328
+ {
329
+ "kind": "command",
330
+ "id": "design-config",
331
+ "title": "Resolve design configuration",
332
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/prepare.js\"",
333
+ "env": {
334
+ "DESIGN_PIPELINE": "1",
335
+ "DESIGN_TOOLKIT": "1"
336
+ }
337
+ },
338
+ {
339
+ "kind": "command",
340
+ "id": "design-toolkit-extract",
341
+ "title": "Wrap external toolkit extractor",
342
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/toolkit/extract.js\"",
343
+ "env": {
344
+ "DESIGN_PIPELINE": "1",
345
+ "DESIGN_TOOLKIT": "1"
346
+ }
347
+ },
348
+ {
349
+ "kind": "command",
350
+ "id": "design-toolkit-tokens",
351
+ "title": "Generate tokens and style guides",
352
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/toolkit/tokens.js\"",
353
+ "env": {
354
+ "DESIGN_PIPELINE": "1",
355
+ "DESIGN_TOOLKIT": "1"
356
+ }
357
+ },
358
+ {
359
+ "kind": "command",
360
+ "id": "design-toolkit-reference",
361
+ "title": "Build reference pages + self-correction",
362
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/toolkit/reference.js\"",
363
+ "env": {
364
+ "DESIGN_PIPELINE": "1",
365
+ "DESIGN_TOOLKIT": "1"
366
+ }
367
+ },
368
+ {
369
+ "kind": "command",
370
+ "id": "design-advanced-assets",
371
+ "title": "Generate advanced design assets",
372
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/advanced-assets.js\"",
373
+ "env": {
374
+ "DESIGN_PIPELINE": "1",
375
+ "DESIGN_TOOLKIT": "1"
376
+ },
377
+ "allowFailure": true,
378
+ "summaryHint": "Optional motion capture via Framer Motion + FFmpeg"
379
+ },
380
+ {
381
+ "kind": "command",
382
+ "id": "design-toolkit-publish",
383
+ "title": "Publish toolkit outputs to packages/design-system",
384
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/toolkit/publish.js\"",
385
+ "env": {
386
+ "DESIGN_PIPELINE": "1",
387
+ "DESIGN_TOOLKIT": "1"
388
+ }
389
+ },
390
+ {
391
+ "kind": "stage-set",
392
+ "ref": "design-artifacts"
393
+ }
394
+ ]
395
+ },
396
+ {
397
+ "id": "tfgrpo-learning",
398
+ "title": "TF-GRPO Learning Loop",
399
+ "description": "Run three TF-GRPO epochs (~100 samples, train temp 0.7, eval temp 0.3, G>=2) and capture guardrail evidence.",
400
+ "tags": [
401
+ "tfgrpo-learning",
402
+ "tfgrpo-learning-secondary"
403
+ ],
404
+ "stages": [
405
+ {
406
+ "kind": "stage-set",
407
+ "ref": "delegation-guard-stage"
408
+ },
409
+ {
410
+ "kind": "command",
411
+ "id": "tfgrpo-loop",
412
+ "title": "tfgrpo learning schedule (3 epochs, 100 samples)",
413
+ "command": "TFGRPO_GROUP_SIZE=2 TFGRPO_REWARDERS=gt,relative TFGRPO_EPOCHS=3 TFGRPO_SAMPLE_SIZE=100 TFGRPO_TRAIN_TEMP=0.7 TFGRPO_EVAL_TEMP=0.3 node --loader ts-node/esm evaluation/harness/scripts/tfgrpo-runner.ts"
414
+ },
415
+ {
416
+ "kind": "stage-set",
417
+ "ref": "diagnostics-spec-guard"
418
+ }
419
+ ]
420
+ },
421
+ {
422
+ "id": "pause-eval",
423
+ "title": "Pause/Resume Eval",
424
+ "description": "Utility pipeline for long pause/resume evaluations (sleep + resume marker).",
425
+ "tags": [
426
+ "eval",
427
+ "pause-resume"
428
+ ],
429
+ "guardrailsRequired": false,
430
+ "stages": [
431
+ {
432
+ "kind": "command",
433
+ "id": "pause-window",
434
+ "title": "Sleep to allow pause",
435
+ "command": "sleep 120",
436
+ "summaryHint": "Pause window elapsed"
437
+ },
438
+ {
439
+ "kind": "command",
440
+ "id": "resume-marker",
441
+ "title": "Resume marker",
442
+ "command": "node -e \"console.log('resume-ok')\"",
443
+ "summaryHint": "Resume marker written"
444
+ }
445
+ ]
446
+ }
447
+ ]
448
+ }