npm - @kbediako/codex-orchestrator - Versions diffs - 0.1.34 → 0.1.36 - Mend

@kbediako/codex-orchestrator 0.1.34 → 0.1.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/README.md +49 -25
package/codex.orchestrator.json +39 -0
package/dist/bin/codex-orchestrator.js +243 -32
package/dist/orchestrator/src/cli/codexDefaultsSetup.js +274 -0
package/dist/orchestrator/src/cli/doctor.js +132 -1
package/dist/orchestrator/src/cli/doctorIssueLog.js +42 -16
package/dist/orchestrator/src/cli/doctorUsage.js +4 -4
package/dist/orchestrator/src/cli/rlm/alignment.js +956 -0
package/dist/orchestrator/src/cli/rlm/symbolic.js +96 -0
package/dist/orchestrator/src/cli/rlmRunner.js +148 -1
package/dist/scripts/lib/pr-watch-merge.js +170 -9
package/dist/scripts/run-review.js +1983 -0
package/docs/README.md +12 -10
package/package.json +3 -1
package/skills/agent-first-adoption-steering/SKILL.md +116 -0
package/skills/chrome-devtools/SKILL.md +6 -0
package/skills/collab-deliberation/SKILL.md +6 -0
package/skills/collab-evals/SKILL.md +15 -0
package/skills/collab-subagents-first/SKILL.md +14 -1
package/skills/delegate-early/SKILL.md +6 -0
package/skills/delegation-usage/DELEGATION_GUIDE.md +7 -4
package/skills/delegation-usage/SKILL.md +21 -4
package/skills/docs-first/SKILL.md +13 -0
package/skills/elegance-review/SKILL.md +4 -0
package/skills/long-poll-wait/SKILL.md +82 -0
package/skills/release/SKILL.md +6 -2
package/skills/standalone-review/SKILL.md +9 -3
package/templates/README.md +5 -0
package/templates/codex/.codex/agents/awaiter-high.toml +38 -0
package/templates/codex/.codex/agents/explorer-fast.toml +2 -0
package/templates/codex/.codex/agents/worker-complex.toml +2 -0
package/templates/codex/.codex/config.toml +19 -0
package/templates/codex/AGENTS.md +10 -4

package/README.md CHANGED Viewed

@@ -51,6 +51,7 @@ Use this when you want Codex to drive work inside another repo with the CO defau
    ```bash
    codex-orchestrator init codex --codex-cli --yes
    ```
+   This seeds `AGENTS.md`, `mcp-client.json`, and downstream .codex/config.toml + .codex/agents/* role files (sourced from `templates/codex/.codex/*`), plus `codex.orchestrator.json`.
 2. Register the delegation MCP server (one-time per machine):
    ```bash
    codex mcp add delegation -- codex-orchestrator delegate-server --repo /path/to/repo
@@ -64,7 +65,13 @@ Use this when you want Codex to drive work inside another repo with the CO defau
    ```bash
    export CODEX_CLI_USE_MANAGED=1
    ```
-4. Optional (fast refresh helper for downstream users):
+4. Optional (additive global defaults in `~/.codex/config.toml`):
+   ```bash
+   codex-orchestrator codex defaults
+   codex-orchestrator codex defaults --yes
+   ```
+   This updates only the CO baseline keys/role wiring and preserves unrelated config entries.
+5. Optional (fast refresh helper for downstream users):
    ```bash
    scripts/codex-cli-refresh.sh --repo /path/to/codex --align-only
    ```
@@ -87,55 +94,57 @@ codex -c 'mcp_servers.delegation.enabled=true' ...
 ## Agent role defaults (recommended)
-Codex built-ins are `default`, `explorer`, and `worker`. `researcher` is user-defined.
-- `spawn_agent` defaults to `default` when `agent_type` is omitted, so always set `agent_type` explicitly when using collab subagents.
+Codex built-ins are `default`, `explorer`, `worker`, and `awaiter`. `researcher` is user-defined.
+- `spawn_agent` defaults to `default` when `agent_type` is omitted, so always set `agent_type` explicitly.
+- Multi-turn loops are supported (`spawn_agent` -> `send_input` -> `wait`/`resume_agent` -> `close_agent`), so subagents can iterate before parent synthesis.
-Built-in `explorer` in Codex currently uses `gpt-5.1-codex-mini` with `medium` reasoning unless you override it. If you want latest-codex defaults end-to-end, add role overrides in `~/.codex/config.toml`:
+In Codex CLI `0.105.0`, built-in `explorer` no longer pins an older model profile; it inherits top-level defaults unless you attach a role `config_file`.
+CO now ships this downstream starter config via `init codex` (source template: `templates/codex/.codex/config.toml`; installed as .codex/config.toml in target repos):
 ```toml
 model = "gpt-5.3-codex"
 model_reasoning_effort = "xhigh"
 [agents]
-max_threads = 8
-[agents.explorer]
-description = "Explorer role override (no config_file): keep built-in explorer on top-level model defaults."
+max_threads = 12
+max_depth = 4
+max_spawn_depth = 4
 [agents.explorer_fast]
 description = "Fast explorer (spark text-only)."
-config_file = "/absolute/path/to/.codex/agents/explorer-fast.toml"
-[agents.explorer_detailed]
-description = "Detailed explorer."
-config_file = "/absolute/path/to/.codex/agents/explorer-detailed.toml"
+config_file = "./agents/explorer-fast.toml"
 [agents.worker_complex]
 description = "Complex worker role."
-config_file = "/absolute/path/to/.codex/agents/worker-complex.toml"
+config_file = "./agents/worker-complex.toml"
+[agents.awaiter]
+description = "Awaiter override (keeps awaiter behavior with latest codex/high reasoning)."
+config_file = "./agents/awaiter-high.toml"
 ```
 ```toml
-# ~/.codex/agents/explorer-fast.toml
+# .codex/agents/explorer-fast.toml
 model = "gpt-5.3-codex-spark"
 model_reasoning_effort = "xhigh"
 ```
 ```toml
-# ~/.codex/agents/explorer-detailed.toml
-model = "gpt-5.3-codex"
-model_reasoning_effort = "high"
-```
-```toml
-# ~/.codex/agents/worker-complex.toml
+# .codex/agents/worker-complex.toml
 model = "gpt-5.3-codex"
 model_reasoning_effort = "xhigh"
 ```
+`init codex` also writes downstream .codex/agents/awaiter-high.toml from `templates/codex/.codex/agents/awaiter-high.toml` so CO users can keep awaiter semantics while meeting a high-reasoning minimum.
 Caveats:
 - `gpt-5.3-codex-spark` is text-only (no image inputs). Keep it for fast search/synthesis.
-- Use `max_threads = 8` as a balanced default; only move to `12` after verifying your machine/tooling stays stable under higher concurrency.
+- Leave `agents.explorer` undefined unless you intentionally want to override built-in explorer behavior.
+- Keep RLM/collab built-ins-first by default; add specialist custom roles only when a measured benefit justifies ongoing maintenance.
+- `max_threads = 12`, `max_depth = 4`, and `max_spawn_depth = 4` are CO's standard multi-agent baseline.
+- Fallbacks are contingency-only: use `8/2/2` on constrained hosts or deterministic high-risk lanes; use `6/1/1` only as break-glass under severe contention.
+- Awaiter triage: long waits are expected for long-running jobs; treat it as stuck only after multiple polling windows with no status/progress movement.
+- `codex review` delegates with collab tools disabled in review threads; keep review expectations single-agent even when multi-agent is enabled elsewhere.
 Delegation guard profile:
 - `CODEX_ORCHESTRATOR_GUARD_PROFILE=auto` (default): strict in CO-style repos, warn in lightweight repos.
@@ -145,7 +154,9 @@ Delegation guard profile:
 ## Delegation + RLM flow
 RLM (Recursive Language Model) is the long-horizon loop used by the `rlm` pipeline (`codex-orchestrator rlm "<goal>"` or `codex-orchestrator start rlm --goal "<goal>"`). Delegated runs only enter RLM when the child is launched with the `rlm` pipeline (or the rlm runner directly). In auto mode it resolves to symbolic only when context is large (`RLM_SYMBOLIC_MIN_BYTES`) and an explicit context signal is present (`RLM_CONTEXT_PATH` or delegated run); otherwise it stays iterative. The runner writes state to `.runs/<task-id>/cli/<run-id>/rlm/state.json` and stops when the validator passes or budgets are exhausted.
+For symbolic mode, the Option 2 alignment checker is enabled by default (`RLM_ALIGNMENT_CHECKER=1`) and writes append-only alignment artifacts under `.runs/<task-id>/cli/<run-id>/rlm/alignment/` (ledger + projection). Rollback toggle: set `RLM_ALIGNMENT_CHECKER=0`. Enforcement is opt-in via `RLM_ALIGNMENT_CHECKER_ENFORCE=1`.
 Symbolic subcalls can optionally use collab tools. Fast path: `codex-orchestrator rlm --multi-agent auto "<goal>"` (legacy alias: `--collab auto`; sets `RLM_SYMBOLIC_MULTI_AGENT=1` plus legacy `RLM_SYMBOLIC_COLLAB=1` for compatibility, and implies symbolic mode). Collab requires `multi_agent=true` in `codex features list` (`collab` remains a legacy alias). Collab tool calls parsed from `codex exec --json --enable multi_agent` are stored in `manifest.collab_tool_calls` (bounded by `CODEX_ORCHESTRATOR_COLLAB_MAX_EVENTS`, set to `0` to disable). For auditable role routing, prefix spawned prompts with `[agent_type:<role>]` and set `spawn_agent.agent_type` when supported; lifecycle validation enforces prompt-role evidence and validates `agent_type` when present (`RLM_SYMBOLIC_MULTI_AGENT_ROLE_POLICY=warn|off`, legacy alias `RLM_COLLAB_ROLE_POLICY`; `RLM_SYMBOLIC_MULTI_AGENT_ALLOW_DEFAULT_ROLE=1`, legacy alias `RLM_COLLAB_ALLOW_DEFAULT_ROLE`). `codex-orchestrator codex setup` remains available when you want a managed/pinned CLI path (opt-in via `CODEX_CLI_USE_MANAGED=1`).
+For batch fan-out jobs, prefer native `spawn_agents_on_csv` before building custom orchestration wrappers.
 ### Delegation flow
 ```mermaid
@@ -211,7 +222,9 @@ Bundled skills (may vary by release):
 - `docs-first`
 - `collab-evals`
 - `collab-deliberation`
+- `long-poll-wait`
 - `release`
+- `agent-first-adoption-steering`
 - `delegate-early` (compatibility alias; use `delegation-usage`)
 ## DevTools readiness
@@ -231,6 +244,7 @@ Usage snapshot (scans local `.runs/`):
 codex-orchestrator doctor --usage
 ```
 `doctor --usage` prints adoption KPIs (advanced/cloud/rlm/collab/delegation coverage), and per-run `run-summary.json` now includes a `usageKpi` section plus cloud fallback metadata when preflight downgrades to MCP.
+`doctor` also includes a codex-defaults advisory section (model/reasoning/agent baseline drift) and points to additive remediation via `codex-orchestrator codex defaults --yes`.
 Issue bundle logging (downstream dogfooding / repro handoff):
 ```bash
@@ -256,8 +270,13 @@ codex-orchestrator doctor --cloud-preflight
 - Enable required MCP servers with least privilege: `codex-orchestrator mcp enable --servers delegation --yes` (plan with `--format json`; omit `--servers` only when you intentionally want all disabled servers enabled; env/secret values are redacted in displayed command lines)
 - Low-friction docs->implementation guardrails: `codex-orchestrator flow --task <task-id>`
 - Validate + measure adoption locally: `codex-orchestrator doctor --usage --format json`
+- Run docs relevance as an advisory lane (non-blocking): `codex-orchestrator start docs-relevance-advisory --task <task-id>`
 - Capture reproducible downstream failures: `codex-orchestrator doctor --issue-log --issue-title "<title>" --issue-notes "<notes>"`
 - Auto-capture failed run issue bundles: `codex-orchestrator start <pipeline> --auto-issue-log` or `codex-orchestrator flow --auto-issue-log`
+- Active PR watch-resolve-merge loop: `codex-orchestrator pr resolve-merge --pr <number> --quiet-minutes <window>` (add `--auto-merge` when approved; exits early when author action is required).
+- Passive PR monitor loop: `codex-orchestrator pr watch-merge --pr <number> --quiet-minutes <window>` (monitor-only behavior; keeps waiting unless terminal/timeout).
+- Review checkpoints (npm-only safe): `NOTES="Goal: ... | Summary: ... | Risks: ..." codex-orchestrator review --task <task-id>` for manifest-backed standalone review wrapper behavior (auto-skips repo-only diff-budget script when unavailable in downstream installs); use `codex review "<focus>"` for quick prompt-only checks; use `codex-orchestrator start implementation-gate --task <task-id> --format json` when you want a full gate run.
+- Downstream simulation before shipping wrapper/skill changes: `npm run pack:smoke` (packaged CLI in temp mock repo; validates `review` artifacts and `long-poll-wait` install path).
 - Delegation: `codex-orchestrator doctor --apply --yes`, then enable for a Codex run with: `codex -c 'mcp_servers.delegation.enabled=true' ...`
 - Collab (symbolic RLM subagents): `codex-orchestrator rlm --multi-agent auto "<goal>"` (legacy alias: `--collab auto`; requires Codex `features.multi_agent=true`)
 - Cloud: set `CODEX_CLOUD_ENV_ID` (and optional `CODEX_CLOUD_BRANCH`), then run: `codex-orchestrator start <pipeline> --cloud --target <stage-id>`
@@ -274,17 +293,21 @@ codex-orchestrator devtools setup
 - `codex-orchestrator start <pipeline>` — run a pipeline (add `--auto-issue-log` for automatic failure bundle capture; add `--repo-config-required` for strict repo-local config mode).
 - `codex-orchestrator flow --task <task-id>` — run `docs-review` then `implementation-gate` in sequence (supports `--auto-issue-log` and `--repo-config-required`).
+- `codex-orchestrator start docs-relevance-advisory --task <task-id>` — run non-blocking docs relevance signals (warn-mode freshness + advisory review lane).
+- `NOTES="Goal: ... | Summary: ... | Risks: ..." codex-orchestrator review --task <task-id>` — run standalone review wrapper with manifest-backed evidence (supports run-review flags/env).
 - `codex-orchestrator plan <pipeline>` — preview pipeline stages.
 - `codex-orchestrator exec <cmd>` — run a one-off command with the exec runtime.
-- `codex-orchestrator init codex` — install starter templates (`mcp-client.json`, `AGENTS.md`, `codex.orchestrator.json`) into a repo.
+- `codex-orchestrator init codex` — install starter templates (`mcp-client.json`, `AGENTS.md`, downstream .codex/config.toml + .codex/agents/* role files sourced from `templates/codex/.codex/*`, `codex.orchestrator.json`) into a repo.
 - `codex-orchestrator setup --yes` — install bundled skills and configure delegation + DevTools wiring (add `--refresh-skills` to overwrite existing skills in `$CODEX_HOME/skills`).
 - `codex-orchestrator init codex --codex-cli --yes --codex-source <path>` — optionally provision a CO-managed Codex CLI binary (build-from-source default; set `CODEX_CLI_SOURCE` to avoid passing `--codex-source` every time, and `CODEX_CLI_USE_MANAGED=1` to route runs to it).
 - `codex-orchestrator init codex --codex-cli --yes --codex-download-url <url> --codex-download-sha256 <sha>` — opt-in to a prebuilt Codex CLI download.
 - `codex-orchestrator codex setup` — plan/apply a CO-managed Codex CLI install (optional managed/pinned path; use `--download-url` + `--download-sha256` for prebuilts; activate with `CODEX_CLI_USE_MANAGED=1`).
+- `codex-orchestrator codex defaults` — plan/apply additive global defaults in `~/.codex/config.toml` and `~/.codex/agents/*.toml` (`--yes` applies, `--force` allows role file overwrite).
 - `codex-orchestrator delegation setup --yes` — configure delegation MCP server wiring.
 - `codex-orchestrator mcp enable --servers <csv> --yes` — enable specific disabled MCP servers from existing Codex config entries.
 - `codex-orchestrator self-check --format json` — JSON health payload.
 - `codex-orchestrator mcp serve` — Codex MCP stdio server.
+- `npm run pack:smoke` — maintainer smoke gate for packaged downstream behavior (tarball install + review/skill checks).
 ## What ships in the npm release
@@ -301,7 +324,8 @@ Repo internals, development workflows, and deeper architecture notes (contributo
 - `docs/guides/collab-vs-mcp.md` (agent-first decision guide)
 - `docs/guides/rlm-recursion-v2.md` (RLM recursion reference)
 - `docs/guides/cloud-mode-preflight.md` (cloud-mode preflight + fallback guidance)
-- `docs/guides/review-artifacts.md` (where `npm run review` writes prompt/output artifacts)
+- `docs/guides/review-artifacts.md` (where `codex-orchestrator review` / `npm run review` write prompt/output artifacts)
+- `docs/standalone-review-guide.md` (repo-local wrapper behavior + downstream-safe review alternatives)
 ## RLM benchmark graphs

package/codex.orchestrator.json CHANGED Viewed

@@ -50,6 +50,29 @@
         "command": "npm run docs:freshness"
       }
     ],
+    "docs-relevance-advisory-checks": [
+      {
+        "kind": "command",
+        "id": "docs-freshness-advisory",
+        "title": "npm run docs:freshness -- --warn",
+        "command": "npm run docs:freshness -- --warn",
+        "allowFailure": true,
+        "summaryHint": "Advisory docs-freshness signal (non-blocking)"
+      },
+      {
+        "kind": "command",
+        "id": "docs-relevance-review",
+        "title": "npm run review (docs relevance advisory)",
+        "command": "npm run review",
+        "env": {
+          "SKIP_DIFF_BUDGET": "1",
+          "CODEX_REVIEW_NON_INTERACTIVE": "1",
+          "NOTES": "Goal: docs relevance advisory | Summary: semantic docs relevance review lane with manifest context | Risks: advisory-only lane; false positives/negatives possible"
+        },
+        "allowFailure": true,
+        "summaryHint": "Agent-first docs relevance advisory review"
+      }
+    ],
     "design-artifacts": [
       {
         "kind": "command",
@@ -190,6 +213,22 @@
         }
       ]
     },
+    {
+      "id": "docs-relevance-advisory",
+      "title": "Docs Relevance Advisory",
+      "description": "Runs a non-blocking docs relevance signal lane (warn-mode freshness + advisory review).",
+      "tags": [
+        "docs",
+        "advisory"
+      ],
+      "guardrailsRequired": false,
+      "stages": [
+        {
+          "kind": "stage-set",
+          "ref": "docs-relevance-advisory-checks"
+        }
+      ]
+    },
     {
       "id": "frontend-testing",
       "title": "Frontend Testing",