@kbediako/codex-orchestrator 0.1.34 → 0.1.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -25
- package/codex.orchestrator.json +39 -0
- package/dist/bin/codex-orchestrator.js +243 -32
- package/dist/orchestrator/src/cli/codexDefaultsSetup.js +274 -0
- package/dist/orchestrator/src/cli/doctor.js +132 -1
- package/dist/orchestrator/src/cli/doctorIssueLog.js +42 -16
- package/dist/orchestrator/src/cli/doctorUsage.js +4 -4
- package/dist/orchestrator/src/cli/rlm/alignment.js +956 -0
- package/dist/orchestrator/src/cli/rlm/symbolic.js +96 -0
- package/dist/orchestrator/src/cli/rlmRunner.js +148 -1
- package/dist/scripts/lib/pr-watch-merge.js +170 -9
- package/dist/scripts/run-review.js +1983 -0
- package/docs/README.md +12 -10
- package/package.json +3 -1
- package/skills/agent-first-adoption-steering/SKILL.md +116 -0
- package/skills/chrome-devtools/SKILL.md +6 -0
- package/skills/collab-deliberation/SKILL.md +6 -0
- package/skills/collab-evals/SKILL.md +15 -0
- package/skills/collab-subagents-first/SKILL.md +14 -1
- package/skills/delegate-early/SKILL.md +6 -0
- package/skills/delegation-usage/DELEGATION_GUIDE.md +7 -4
- package/skills/delegation-usage/SKILL.md +21 -4
- package/skills/docs-first/SKILL.md +13 -0
- package/skills/elegance-review/SKILL.md +4 -0
- package/skills/long-poll-wait/SKILL.md +82 -0
- package/skills/release/SKILL.md +6 -2
- package/skills/standalone-review/SKILL.md +9 -3
- package/templates/README.md +5 -0
- package/templates/codex/.codex/agents/awaiter-high.toml +38 -0
- package/templates/codex/.codex/agents/explorer-fast.toml +2 -0
- package/templates/codex/.codex/agents/worker-complex.toml +2 -0
- package/templates/codex/.codex/config.toml +19 -0
- package/templates/codex/AGENTS.md +10 -4
package/README.md
CHANGED
|
@@ -51,6 +51,7 @@ Use this when you want Codex to drive work inside another repo with the CO defau
|
|
|
51
51
|
```bash
|
|
52
52
|
codex-orchestrator init codex --codex-cli --yes
|
|
53
53
|
```
|
|
54
|
+
This seeds `AGENTS.md`, `mcp-client.json`, and downstream .codex/config.toml + .codex/agents/* role files (sourced from `templates/codex/.codex/*`), plus `codex.orchestrator.json`.
|
|
54
55
|
2. Register the delegation MCP server (one-time per machine):
|
|
55
56
|
```bash
|
|
56
57
|
codex mcp add delegation -- codex-orchestrator delegate-server --repo /path/to/repo
|
|
@@ -64,7 +65,13 @@ Use this when you want Codex to drive work inside another repo with the CO defau
|
|
|
64
65
|
```bash
|
|
65
66
|
export CODEX_CLI_USE_MANAGED=1
|
|
66
67
|
```
|
|
67
|
-
4. Optional (
|
|
68
|
+
4. Optional (additive global defaults in `~/.codex/config.toml`):
|
|
69
|
+
```bash
|
|
70
|
+
codex-orchestrator codex defaults
|
|
71
|
+
codex-orchestrator codex defaults --yes
|
|
72
|
+
```
|
|
73
|
+
This updates only the CO baseline keys/role wiring and preserves unrelated config entries.
|
|
74
|
+
5. Optional (fast refresh helper for downstream users):
|
|
68
75
|
```bash
|
|
69
76
|
scripts/codex-cli-refresh.sh --repo /path/to/codex --align-only
|
|
70
77
|
```
|
|
@@ -87,55 +94,57 @@ codex -c 'mcp_servers.delegation.enabled=true' ...
|
|
|
87
94
|
|
|
88
95
|
## Agent role defaults (recommended)
|
|
89
96
|
|
|
90
|
-
Codex built-ins are `default`, `explorer`, and `
|
|
91
|
-
- `spawn_agent` defaults to `default` when `agent_type` is omitted, so always set `agent_type` explicitly
|
|
97
|
+
Codex built-ins are `default`, `explorer`, `worker`, and `awaiter`. `researcher` is user-defined.
|
|
98
|
+
- `spawn_agent` defaults to `default` when `agent_type` is omitted, so always set `agent_type` explicitly.
|
|
99
|
+
- Multi-turn loops are supported (`spawn_agent` -> `send_input` -> `wait`/`resume_agent` -> `close_agent`), so subagents can iterate before parent synthesis.
|
|
92
100
|
|
|
93
|
-
|
|
101
|
+
In Codex CLI `0.105.0`, built-in `explorer` no longer pins an older model profile; it inherits top-level defaults unless you attach a role `config_file`.
|
|
102
|
+
CO now ships this downstream starter config via `init codex` (source template: `templates/codex/.codex/config.toml`; installed as .codex/config.toml in target repos):
|
|
94
103
|
|
|
95
104
|
```toml
|
|
96
105
|
model = "gpt-5.3-codex"
|
|
97
106
|
model_reasoning_effort = "xhigh"
|
|
98
107
|
|
|
99
108
|
[agents]
|
|
100
|
-
max_threads =
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
description = "Explorer role override (no config_file): keep built-in explorer on top-level model defaults."
|
|
109
|
+
max_threads = 12
|
|
110
|
+
max_depth = 4
|
|
111
|
+
max_spawn_depth = 4
|
|
104
112
|
|
|
105
113
|
[agents.explorer_fast]
|
|
106
114
|
description = "Fast explorer (spark text-only)."
|
|
107
|
-
config_file = "
|
|
108
|
-
|
|
109
|
-
[agents.explorer_detailed]
|
|
110
|
-
description = "Detailed explorer."
|
|
111
|
-
config_file = "/absolute/path/to/.codex/agents/explorer-detailed.toml"
|
|
115
|
+
config_file = "./agents/explorer-fast.toml"
|
|
112
116
|
|
|
113
117
|
[agents.worker_complex]
|
|
114
118
|
description = "Complex worker role."
|
|
115
|
-
config_file = "
|
|
119
|
+
config_file = "./agents/worker-complex.toml"
|
|
120
|
+
|
|
121
|
+
[agents.awaiter]
|
|
122
|
+
description = "Awaiter override (keeps awaiter behavior with latest codex/high reasoning)."
|
|
123
|
+
config_file = "./agents/awaiter-high.toml"
|
|
116
124
|
```
|
|
117
125
|
|
|
118
126
|
```toml
|
|
119
|
-
#
|
|
127
|
+
# .codex/agents/explorer-fast.toml
|
|
120
128
|
model = "gpt-5.3-codex-spark"
|
|
121
129
|
model_reasoning_effort = "xhigh"
|
|
122
130
|
```
|
|
123
131
|
|
|
124
132
|
```toml
|
|
125
|
-
#
|
|
126
|
-
model = "gpt-5.3-codex"
|
|
127
|
-
model_reasoning_effort = "high"
|
|
128
|
-
```
|
|
129
|
-
|
|
130
|
-
```toml
|
|
131
|
-
# ~/.codex/agents/worker-complex.toml
|
|
133
|
+
# .codex/agents/worker-complex.toml
|
|
132
134
|
model = "gpt-5.3-codex"
|
|
133
135
|
model_reasoning_effort = "xhigh"
|
|
134
136
|
```
|
|
135
137
|
|
|
138
|
+
`init codex` also writes downstream .codex/agents/awaiter-high.toml from `templates/codex/.codex/agents/awaiter-high.toml` so CO users can keep awaiter semantics while meeting a high-reasoning minimum.
|
|
139
|
+
|
|
136
140
|
Caveats:
|
|
137
141
|
- `gpt-5.3-codex-spark` is text-only (no image inputs). Keep it for fast search/synthesis.
|
|
138
|
-
-
|
|
142
|
+
- Leave `agents.explorer` undefined unless you intentionally want to override built-in explorer behavior.
|
|
143
|
+
- Keep RLM/collab built-ins-first by default; add specialist custom roles only when a measured benefit justifies ongoing maintenance.
|
|
144
|
+
- `max_threads = 12`, `max_depth = 4`, and `max_spawn_depth = 4` are CO's standard multi-agent baseline.
|
|
145
|
+
- Fallbacks are contingency-only: use `8/2/2` on constrained hosts or deterministic high-risk lanes; use `6/1/1` only as break-glass under severe contention.
|
|
146
|
+
- Awaiter triage: long waits are expected for long-running jobs; treat it as stuck only after multiple polling windows with no status/progress movement.
|
|
147
|
+
- `codex review` delegates with collab tools disabled in review threads; keep review expectations single-agent even when multi-agent is enabled elsewhere.
|
|
139
148
|
|
|
140
149
|
Delegation guard profile:
|
|
141
150
|
- `CODEX_ORCHESTRATOR_GUARD_PROFILE=auto` (default): strict in CO-style repos, warn in lightweight repos.
|
|
@@ -145,7 +154,9 @@ Delegation guard profile:
|
|
|
145
154
|
## Delegation + RLM flow
|
|
146
155
|
|
|
147
156
|
RLM (Recursive Language Model) is the long-horizon loop used by the `rlm` pipeline (`codex-orchestrator rlm "<goal>"` or `codex-orchestrator start rlm --goal "<goal>"`). Delegated runs only enter RLM when the child is launched with the `rlm` pipeline (or the rlm runner directly). In auto mode it resolves to symbolic only when context is large (`RLM_SYMBOLIC_MIN_BYTES`) and an explicit context signal is present (`RLM_CONTEXT_PATH` or delegated run); otherwise it stays iterative. The runner writes state to `.runs/<task-id>/cli/<run-id>/rlm/state.json` and stops when the validator passes or budgets are exhausted.
|
|
157
|
+
For symbolic mode, the Option 2 alignment checker is enabled by default (`RLM_ALIGNMENT_CHECKER=1`) and writes append-only alignment artifacts under `.runs/<task-id>/cli/<run-id>/rlm/alignment/` (ledger + projection). Rollback toggle: set `RLM_ALIGNMENT_CHECKER=0`. Enforcement is opt-in via `RLM_ALIGNMENT_CHECKER_ENFORCE=1`.
|
|
148
158
|
Symbolic subcalls can optionally use collab tools. Fast path: `codex-orchestrator rlm --multi-agent auto "<goal>"` (legacy alias: `--collab auto`; sets `RLM_SYMBOLIC_MULTI_AGENT=1` plus legacy `RLM_SYMBOLIC_COLLAB=1` for compatibility, and implies symbolic mode). Collab requires `multi_agent=true` in `codex features list` (`collab` remains a legacy alias). Collab tool calls parsed from `codex exec --json --enable multi_agent` are stored in `manifest.collab_tool_calls` (bounded by `CODEX_ORCHESTRATOR_COLLAB_MAX_EVENTS`, set to `0` to disable). For auditable role routing, prefix spawned prompts with `[agent_type:<role>]` and set `spawn_agent.agent_type` when supported; lifecycle validation enforces prompt-role evidence and validates `agent_type` when present (`RLM_SYMBOLIC_MULTI_AGENT_ROLE_POLICY=warn|off`, legacy alias `RLM_COLLAB_ROLE_POLICY`; `RLM_SYMBOLIC_MULTI_AGENT_ALLOW_DEFAULT_ROLE=1`, legacy alias `RLM_COLLAB_ALLOW_DEFAULT_ROLE`). `codex-orchestrator codex setup` remains available when you want a managed/pinned CLI path (opt-in via `CODEX_CLI_USE_MANAGED=1`).
|
|
159
|
+
For batch fan-out jobs, prefer native `spawn_agents_on_csv` before building custom orchestration wrappers.
|
|
149
160
|
|
|
150
161
|
### Delegation flow
|
|
151
162
|
```mermaid
|
|
@@ -211,7 +222,9 @@ Bundled skills (may vary by release):
|
|
|
211
222
|
- `docs-first`
|
|
212
223
|
- `collab-evals`
|
|
213
224
|
- `collab-deliberation`
|
|
225
|
+
- `long-poll-wait`
|
|
214
226
|
- `release`
|
|
227
|
+
- `agent-first-adoption-steering`
|
|
215
228
|
- `delegate-early` (compatibility alias; use `delegation-usage`)
|
|
216
229
|
|
|
217
230
|
## DevTools readiness
|
|
@@ -231,6 +244,7 @@ Usage snapshot (scans local `.runs/`):
|
|
|
231
244
|
codex-orchestrator doctor --usage
|
|
232
245
|
```
|
|
233
246
|
`doctor --usage` prints adoption KPIs (advanced/cloud/rlm/collab/delegation coverage), and per-run `run-summary.json` now includes a `usageKpi` section plus cloud fallback metadata when preflight downgrades to MCP.
|
|
247
|
+
`doctor` also includes a codex-defaults advisory section (model/reasoning/agent baseline drift) and points to additive remediation via `codex-orchestrator codex defaults --yes`.
|
|
234
248
|
|
|
235
249
|
Issue bundle logging (downstream dogfooding / repro handoff):
|
|
236
250
|
```bash
|
|
@@ -256,8 +270,13 @@ codex-orchestrator doctor --cloud-preflight
|
|
|
256
270
|
- Enable required MCP servers with least privilege: `codex-orchestrator mcp enable --servers delegation --yes` (plan with `--format json`; omit `--servers` only when you intentionally want all disabled servers enabled; env/secret values are redacted in displayed command lines)
|
|
257
271
|
- Low-friction docs->implementation guardrails: `codex-orchestrator flow --task <task-id>`
|
|
258
272
|
- Validate + measure adoption locally: `codex-orchestrator doctor --usage --format json`
|
|
273
|
+
- Run docs relevance as an advisory lane (non-blocking): `codex-orchestrator start docs-relevance-advisory --task <task-id>`
|
|
259
274
|
- Capture reproducible downstream failures: `codex-orchestrator doctor --issue-log --issue-title "<title>" --issue-notes "<notes>"`
|
|
260
275
|
- Auto-capture failed run issue bundles: `codex-orchestrator start <pipeline> --auto-issue-log` or `codex-orchestrator flow --auto-issue-log`
|
|
276
|
+
- Active PR watch-resolve-merge loop: `codex-orchestrator pr resolve-merge --pr <number> --quiet-minutes <window>` (add `--auto-merge` when approved; exits early when author action is required).
|
|
277
|
+
- Passive PR monitor loop: `codex-orchestrator pr watch-merge --pr <number> --quiet-minutes <window>` (monitor-only behavior; keeps waiting unless terminal/timeout).
|
|
278
|
+
- Review checkpoints (npm-only safe): `NOTES="Goal: ... | Summary: ... | Risks: ..." codex-orchestrator review --task <task-id>` for manifest-backed standalone review wrapper behavior (auto-skips repo-only diff-budget script when unavailable in downstream installs); use `codex review "<focus>"` for quick prompt-only checks; use `codex-orchestrator start implementation-gate --task <task-id> --format json` when you want a full gate run.
|
|
279
|
+
- Downstream simulation before shipping wrapper/skill changes: `npm run pack:smoke` (packaged CLI in temp mock repo; validates `review` artifacts and `long-poll-wait` install path).
|
|
261
280
|
- Delegation: `codex-orchestrator doctor --apply --yes`, then enable for a Codex run with: `codex -c 'mcp_servers.delegation.enabled=true' ...`
|
|
262
281
|
- Collab (symbolic RLM subagents): `codex-orchestrator rlm --multi-agent auto "<goal>"` (legacy alias: `--collab auto`; requires Codex `features.multi_agent=true`)
|
|
263
282
|
- Cloud: set `CODEX_CLOUD_ENV_ID` (and optional `CODEX_CLOUD_BRANCH`), then run: `codex-orchestrator start <pipeline> --cloud --target <stage-id>`
|
|
@@ -274,17 +293,21 @@ codex-orchestrator devtools setup
|
|
|
274
293
|
|
|
275
294
|
- `codex-orchestrator start <pipeline>` — run a pipeline (add `--auto-issue-log` for automatic failure bundle capture; add `--repo-config-required` for strict repo-local config mode).
|
|
276
295
|
- `codex-orchestrator flow --task <task-id>` — run `docs-review` then `implementation-gate` in sequence (supports `--auto-issue-log` and `--repo-config-required`).
|
|
296
|
+
- `codex-orchestrator start docs-relevance-advisory --task <task-id>` — run non-blocking docs relevance signals (warn-mode freshness + advisory review lane).
|
|
297
|
+
- `NOTES="Goal: ... | Summary: ... | Risks: ..." codex-orchestrator review --task <task-id>` — run standalone review wrapper with manifest-backed evidence (supports run-review flags/env).
|
|
277
298
|
- `codex-orchestrator plan <pipeline>` — preview pipeline stages.
|
|
278
299
|
- `codex-orchestrator exec <cmd>` — run a one-off command with the exec runtime.
|
|
279
|
-
- `codex-orchestrator init codex` — install starter templates (`mcp-client.json`, `AGENTS.md`, `codex.orchestrator.json`) into a repo.
|
|
300
|
+
- `codex-orchestrator init codex` — install starter templates (`mcp-client.json`, `AGENTS.md`, downstream .codex/config.toml + .codex/agents/* role files sourced from `templates/codex/.codex/*`, `codex.orchestrator.json`) into a repo.
|
|
280
301
|
- `codex-orchestrator setup --yes` — install bundled skills and configure delegation + DevTools wiring (add `--refresh-skills` to overwrite existing skills in `$CODEX_HOME/skills`).
|
|
281
302
|
- `codex-orchestrator init codex --codex-cli --yes --codex-source <path>` — optionally provision a CO-managed Codex CLI binary (build-from-source default; set `CODEX_CLI_SOURCE` to avoid passing `--codex-source` every time, and `CODEX_CLI_USE_MANAGED=1` to route runs to it).
|
|
282
303
|
- `codex-orchestrator init codex --codex-cli --yes --codex-download-url <url> --codex-download-sha256 <sha>` — opt-in to a prebuilt Codex CLI download.
|
|
283
304
|
- `codex-orchestrator codex setup` — plan/apply a CO-managed Codex CLI install (optional managed/pinned path; use `--download-url` + `--download-sha256` for prebuilts; activate with `CODEX_CLI_USE_MANAGED=1`).
|
|
305
|
+
- `codex-orchestrator codex defaults` — plan/apply additive global defaults in `~/.codex/config.toml` and `~/.codex/agents/*.toml` (`--yes` applies, `--force` allows role file overwrite).
|
|
284
306
|
- `codex-orchestrator delegation setup --yes` — configure delegation MCP server wiring.
|
|
285
307
|
- `codex-orchestrator mcp enable --servers <csv> --yes` — enable specific disabled MCP servers from existing Codex config entries.
|
|
286
308
|
- `codex-orchestrator self-check --format json` — JSON health payload.
|
|
287
309
|
- `codex-orchestrator mcp serve` — Codex MCP stdio server.
|
|
310
|
+
- `npm run pack:smoke` — maintainer smoke gate for packaged downstream behavior (tarball install + review/skill checks).
|
|
288
311
|
|
|
289
312
|
## What ships in the npm release
|
|
290
313
|
|
|
@@ -301,7 +324,8 @@ Repo internals, development workflows, and deeper architecture notes (contributo
|
|
|
301
324
|
- `docs/guides/collab-vs-mcp.md` (agent-first decision guide)
|
|
302
325
|
- `docs/guides/rlm-recursion-v2.md` (RLM recursion reference)
|
|
303
326
|
- `docs/guides/cloud-mode-preflight.md` (cloud-mode preflight + fallback guidance)
|
|
304
|
-
- `docs/guides/review-artifacts.md` (where `npm run review`
|
|
327
|
+
- `docs/guides/review-artifacts.md` (where `codex-orchestrator review` / `npm run review` write prompt/output artifacts)
|
|
328
|
+
- `docs/standalone-review-guide.md` (repo-local wrapper behavior + downstream-safe review alternatives)
|
|
305
329
|
|
|
306
330
|
## RLM benchmark graphs
|
|
307
331
|
|
package/codex.orchestrator.json
CHANGED
|
@@ -50,6 +50,29 @@
|
|
|
50
50
|
"command": "npm run docs:freshness"
|
|
51
51
|
}
|
|
52
52
|
],
|
|
53
|
+
"docs-relevance-advisory-checks": [
|
|
54
|
+
{
|
|
55
|
+
"kind": "command",
|
|
56
|
+
"id": "docs-freshness-advisory",
|
|
57
|
+
"title": "npm run docs:freshness -- --warn",
|
|
58
|
+
"command": "npm run docs:freshness -- --warn",
|
|
59
|
+
"allowFailure": true,
|
|
60
|
+
"summaryHint": "Advisory docs-freshness signal (non-blocking)"
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
"kind": "command",
|
|
64
|
+
"id": "docs-relevance-review",
|
|
65
|
+
"title": "npm run review (docs relevance advisory)",
|
|
66
|
+
"command": "npm run review",
|
|
67
|
+
"env": {
|
|
68
|
+
"SKIP_DIFF_BUDGET": "1",
|
|
69
|
+
"CODEX_REVIEW_NON_INTERACTIVE": "1",
|
|
70
|
+
"NOTES": "Goal: docs relevance advisory | Summary: semantic docs relevance review lane with manifest context | Risks: advisory-only lane; false positives/negatives possible"
|
|
71
|
+
},
|
|
72
|
+
"allowFailure": true,
|
|
73
|
+
"summaryHint": "Agent-first docs relevance advisory review"
|
|
74
|
+
}
|
|
75
|
+
],
|
|
53
76
|
"design-artifacts": [
|
|
54
77
|
{
|
|
55
78
|
"kind": "command",
|
|
@@ -190,6 +213,22 @@
|
|
|
190
213
|
}
|
|
191
214
|
]
|
|
192
215
|
},
|
|
216
|
+
{
|
|
217
|
+
"id": "docs-relevance-advisory",
|
|
218
|
+
"title": "Docs Relevance Advisory",
|
|
219
|
+
"description": "Runs a non-blocking docs relevance signal lane (warn-mode freshness + advisory review).",
|
|
220
|
+
"tags": [
|
|
221
|
+
"docs",
|
|
222
|
+
"advisory"
|
|
223
|
+
],
|
|
224
|
+
"guardrailsRequired": false,
|
|
225
|
+
"stages": [
|
|
226
|
+
{
|
|
227
|
+
"kind": "stage-set",
|
|
228
|
+
"ref": "docs-relevance-advisory-checks"
|
|
229
|
+
}
|
|
230
|
+
]
|
|
231
|
+
},
|
|
193
232
|
{
|
|
194
233
|
"id": "frontend-testing",
|
|
195
234
|
"title": "Frontend Testing",
|