npm - @ai-dev-methodologies/rlp-desk - Versions diffs - 0.9.3 → 0.10.1 - Mend

@ai-dev-methodologies/rlp-desk 0.9.3 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +5 -5
package/docs/protocol-reference.md +1 -1
package/docs/superpowers/plans/2026-04-24-gpt-5-5-default.md +517 -0
package/docs/superpowers/specs/2026-04-24-gpt-5-5-default.md +107 -0
package/package.json +1 -1
package/src/commands/rlp-desk.md +23 -23
package/src/governance.md +7 -7
package/src/model-upgrade-table.md +5 -5
package/src/node/run.mjs +2 -2
package/src/node/runner/campaign-main-loop.mjs +3 -4
package/docs/superpowers/plans/2026-04-12-flywheel-redesign.md +0 -704
package/docs/superpowers/specs/2026-04-12-flywheel-redesign.md +0 -161

package/src/commands/rlp-desk.md CHANGED Viewed

@@ -75,13 +75,13 @@ Ask about these items one by one (or in small groups):
    | Complexity | Worker | per-US Verifier | Final Verifier | Consensus |
    |------------|--------|-----------------|----------------|-----------|
-   | LOW | gpt-5.4:medium | sonnet | opus | final-only |
-   | MEDIUM | gpt-5.4:medium | opus | opus | final-only |
-   | HIGH | gpt-5.4:high | opus | opus | all |
-   | CRITICAL | gpt-5.4:high | opus | opus + human | all |
+   | LOW | gpt-5.5:medium | sonnet | opus | final-only |
+   | MEDIUM | gpt-5.5:medium | opus | opus | final-only |
+   | HIGH | gpt-5.5:high | opus | opus | all |
+   | CRITICAL | gpt-5.5:high | opus | opus + human | all |
    **Worker model selection** (cross-engine):
-   - **gpt-5.4:medium** — default recommendation (full context window, progressive upgrade handles harder US)
+   - **gpt-5.5:medium** — default recommendation (full context window, progressive upgrade handles harder US)
    - **spark:high** — only when US is small enough for spark's 100k context (single-file, AC count <= 4, simple logic). Do NOT use as primary recommendation — spark context window is too small for most tasks
    Present complexity score with evidence to the user, e.g.: "I rate this MEDIUM because: US count=4 (MEDIUM), file scope=2 (MEDIUM), logic=conditionals (MEDIUM), deps=none (LOW), impact=modify (MEDIUM). Highest=MEDIUM."
@@ -91,8 +91,8 @@ Ask about these items one by one (or in small groups):
    **If codex is NOT installed** — say: "Codex is not installed. Defaulting to claude-only Worker. Note: without a second engine, your Verifier shares the same perspective as the Worker — there is a risk of blind spots where both Worker and Verifier miss the same issue. To unlock cross-engine coverage: `npm install -g @openai/codex`"
 8. **Batch Capacity Check** — when verify-mode is batch and PRD is large:
-   - batch + spark + AC > 4 → warn "spark 100k context limit — switch to gpt-5.4 or split smaller"
-   - batch + gpt-5.4 + AC > 15 → warn "too many ACs for single batch — consider wave split (3-4 US per wave)"
+   - batch + spark + AC > 4 → warn "spark 100k context limit — switch to gpt-5.5 or split smaller"
+   - batch + gpt-5.5 + AC > 15 → warn "too many ACs for single batch — consider wave split (3-4 US per wave)"
    - per-us → no warning (US-level processing, no limit concern)
 9. **Verify Mode** — per-us (default) or batch. Ask: "Verify after each user story (per-us, recommended) or only after all stories are done (batch)?" Default recommendation: per-us for 2+ stories.
 10. **Consensus** — Ask: "Use cross-engine consensus? off (single engine), final-only (cross-engine on final verify only), or all (cross-engine on every verify). Requires codex CLI." Default: off. Recommended: final-only when codex is installed.
@@ -164,26 +164,26 @@ Tell the user:
    Available run commands (copy the one you want):
    # ★ Recommended: cross-engine + final-consensus (full context + blind-spot coverage):
-   /rlp-desk run <actual-slug> --mode tmux --worker-model gpt-5.4:medium --consensus final-only --debug
+   /rlp-desk run <actual-slug> --mode tmux --worker-model gpt-5.5:medium --consensus final-only --debug
    # Small tasks only (single-file, AC <= 4, simple logic — spark 100k context limit):
    /rlp-desk run <actual-slug> --mode tmux --worker-model spark:high --consensus final-only --debug
    # Critical (full consensus on every verify):
-   /rlp-desk run <actual-slug> --mode tmux --worker-model gpt-5.4:high --consensus all --debug
+   /rlp-desk run <actual-slug> --mode tmux --worker-model gpt-5.5:high --consensus all --debug
    # Claude-only:
    /rlp-desk run <actual-slug> --debug
    # Full options reference:
    #   --mode agent|tmux                      (default: agent)
-   #   --worker-model MODEL                   haiku|sonnet|opus or gpt-5.4:high|spark:high (default: haiku)
+   #   --worker-model MODEL                   haiku|sonnet|opus or gpt-5.5:high|spark:high (default: haiku)
    #   --lock-worker-model                    disable auto model upgrade
    #   --verifier-model MODEL                 per-US verifier (default: sonnet)
    #   --final-verifier-model MODEL           final ALL verifier (default: opus)
    #   --consensus off|all|final-only         cross-engine consensus (default: off)
-   #   --consensus-model MODEL                per-US cross-verifier (default: gpt-5.4:medium)
-   #   --final-consensus-model MODEL          final cross-verifier (default: gpt-5.4:high)
+   #   --consensus-model MODEL                per-US cross-verifier (default: gpt-5.5:medium)
+   #   --final-consensus-model MODEL          final cross-verifier (default: gpt-5.5:high)
    #   --verify-mode per-us|batch             (default: per-us)
    #   --cb-threshold N                       (default: 6)
    #   --max-iter N                           (default: 100)
@@ -240,16 +240,16 @@ Tell the user:
 Options (parse from `$ARGUMENTS`):
 - `--mode agent|tmux` (default: `agent`) — execution mode
-- `--worker-model MODEL` (default: `haiku`) — Worker model. Format: `model` = claude engine, `model:reasoning` = codex engine. Examples: `haiku`, `sonnet`, `opus`, `spark:high`, `gpt-5.4:high`. Parsed by `parse_model_flag()` which auto-splits engine/model/reasoning.
-- `--lock-worker-model` — disable automatic model upgrade on failure (check_model_upgrade). Worker stays on the specified model regardless of consecutive failures.
+- `--worker-model MODEL` (default: `haiku`) — Worker model. Format: `model` = claude engine, `model:reasoning` = codex engine. Examples: `haiku`, `sonnet`, `opus`, `spark:high`, `gpt-5.5:high`. Parsed by `parse_model_flag()` which auto-splits engine/model/reasoning.
+- `--lock-worker-model` — disable automatic model upgrade on failure. Worker stays on the specified model regardless of consecutive failures.
 - `--verifier-model MODEL` (default: `sonnet`) — per-US verification model. Campaign-fixed (no progressive upgrade). Lighter than final verifier.
 - `--final-verifier-model MODEL` (default: `opus`) — final ALL verification model. Independent from per-US verifier. Used only for the final full-AC verify pass.
 - `--consensus off|all|final-only` (default: `off`) — cross-engine consensus verification mode.
   - `off`: single-engine verification only
   - `all`: cross-engine consensus on every verify (per-US and final)
   - `final-only`: cross-engine consensus only on the final ALL verify
-- `--consensus-model MODEL` (default: `gpt-5.4:medium`) — per-US cross-verifier model. Lighter weight for cost efficiency.
-- `--final-consensus-model MODEL` (default: `gpt-5.4:high`) — final cross-verifier model. Stricter. Note: spark is not allowed here (100k output limit).
+- `--consensus-model MODEL` (default: `gpt-5.5:medium`) — per-US cross-verifier model. Lighter weight for cost efficiency.
+- `--final-consensus-model MODEL` (default: `gpt-5.5:high`) — final cross-verifier model. Stricter. Note: spark is not allowed here (100k output limit).
 - `--verify-mode per-us|batch` (default: `per-us`) — verification strategy
   - `per-us`: verify after each US, then final full verify of all AC
   - `batch`: verify only after all US done (legacy behavior)
@@ -292,8 +292,8 @@ VERIFIER_MODEL=<--verifier-model value, default: sonnet> \
 FINAL_VERIFIER_MODEL=<--final-verifier-model value, default: opus> \
 VERIFY_MODE=<--verify-mode value, default: per-us> \
 CONSENSUS_MODE=<--consensus value, default: off> \
-CONSENSUS_MODEL=<--consensus-model value, default: gpt-5.4:medium> \
-FINAL_CONSENSUS_MODEL=<--final-consensus-model value, default: gpt-5.4:high> \
+CONSENSUS_MODEL=<--consensus-model value, default: gpt-5.5:medium> \
+FINAL_CONSENSUS_MODEL=<--final-consensus-model value, default: gpt-5.5:high> \
 CB_THRESHOLD=<--cb-threshold value, default: 6> \
 ITER_TIMEOUT=<--iter-timeout value, default: 600> \
 DEBUG=<1 if --debug, else 0> \
@@ -473,8 +473,8 @@ Bash("codex exec --model <codex_model> --reasoning-effort <codex_reasoning> <ful
 **⑦b Consensus Verification** (when `--consensus` is `all`, or `final-only` and scope is ALL):
 After the primary verifier runs, run a cross-engine second verifier:
 - Determine cross-verifier model based on scope:
-  - per-US verify → use `--consensus-model` (default: gpt-5.4:medium)
-  - final ALL verify → use `--final-consensus-model` (default: gpt-5.4:high)
+  - per-US verify → use `--consensus-model` (default: gpt-5.5:medium)
+  - final ALL verify → use `--final-consensus-model` (default: gpt-5.5:high)
 - If primary engine is claude → cross-verifier uses codex (the consensus model)
 - If primary engine is codex → cross-verifier uses claude `opus` (fixed)
 - Both produce `verify-verdict.json` (Leader renames to `verify-verdict-claude.json` and `verify-verdict-codex.json`)
@@ -732,13 +732,13 @@ Example:
 Run options:
   --mode agent|tmux                    Execution mode (default: agent)
-  --worker-model MODEL                 Worker model: haiku|sonnet|opus or gpt-5.4:high|spark:high (default: haiku)
+  --worker-model MODEL                 Worker model: haiku|sonnet|opus or gpt-5.5:high|spark:high (default: haiku)
   --lock-worker-model                  Disable auto model upgrade on failure
   --verifier-model MODEL               per-US verifier (default: sonnet)
   --final-verifier-model MODEL         Final ALL verifier (default: opus)
   --consensus off|all|final-only       Cross-engine consensus (default: off)
-  --consensus-model MODEL              per-US cross-verifier (default: gpt-5.4:medium)
-  --final-consensus-model MODEL        Final cross-verifier (default: gpt-5.4:high)
+  --consensus-model MODEL              per-US cross-verifier (default: gpt-5.5:medium)
+  --final-consensus-model MODEL        Final cross-verifier (default: gpt-5.5:high)
   --verify-mode per-us|batch           Verification strategy (default: per-us)
   --cb-threshold N                     Consecutive failures before BLOCKED (default: 6)
   --max-iter N                         Max iterations (default: 100)

package/src/governance.md CHANGED Viewed

@@ -14,7 +14,7 @@ The Leader orchestrates, while Worker/Verifier run in isolated fresh contexts ev
 - **Worker must NEVER modify Claude Code settings** (settings.json, settings.local.json). Permission prompts must be reported as blocked, not bypassed by editing settings.
 - **Verifier is independent**: The Verifier judges based on evidence alone, without knowledge of the Worker's reasoning process.
 - **Sentinels are Leader-owned**: Only the Leader writes COMPLETE/BLOCKED sentinels.
-- **Supported engines**: claude (default; models: haiku, sonnet, opus) and codex (opt-in via `--worker-model spark:high` or `--worker-model gpt-5.4:high`).
+- **Supported engines**: claude (default; models: haiku, sonnet, opus) and codex (opt-in via `--worker-model spark:high` or `--worker-model gpt-5.5:high`).
 ## 1a. Iron Laws
@@ -300,11 +300,11 @@ The Leader decides each iteration. Decision criteria:
 ### Codex (opt-in engine)
-Model routing uses `--worker-model` and `--verifier-model` with codex format: `spark:high` or `gpt-5.4:high`.
+Model routing uses `--worker-model` and `--verifier-model` with codex format: `spark:high` or `gpt-5.5:high`.
 ```
 --worker-model spark:high        # codex worker, spark model, high reasoning
---verifier-model gpt-5.4:high    # codex verifier, gpt-5.4, high reasoning
+--verifier-model gpt-5.5:high    # codex verifier, gpt-5.5, high reasoning
 ```
 `parse_model_flag()` auto-detects engine from the model name: plain names (haiku, sonnet, opus) = claude; `name:reasoning` format = codex. Claude is the default engine; codex is explicitly opt-in.
@@ -331,7 +331,7 @@ Agent(
 )
 ```
-If `--worker-model` or `--verifier-model` uses codex format (e.g., `spark:high`, `gpt-5.4:high`) (opt-in):
+If `--worker-model` or `--verifier-model` uses codex format (e.g., `spark:high`, `gpt-5.5:high`) (opt-in):
 ```
 # Worker or Verifier (codex engine)
 Bash("codex -m <codex_model> -c model_reasoning_effort=<codex_reasoning> --dangerously-bypass-approvals-and-sandbox <prompt>")
@@ -377,7 +377,7 @@ claude -p "$(cat /path/to/prompt.md)" \
 When `WORKER_ENGINE=codex` or `VERIFIER_ENGINE=codex`, the `codex` CLI is used instead:
 ```bash
 # codex engine (opt-in)
-codex -m gpt-5.4 \
+codex -m gpt-5.5 \
   -c model_reasoning_effort="high" \
   --dangerously-bypass-approvals-and-sandbox \
   "$(cat /path/to/prompt.md)"
@@ -569,9 +569,9 @@ Worker completes US → signal verify
 | Scenario | Primary verifier | Cross verifier |
 |----------|-----------------|----------------|
-| per-US, primary=claude | `--verifier-model` (sonnet) | `--consensus-model` (gpt-5.4:medium) |
+| per-US, primary=claude | `--verifier-model` (sonnet) | `--consensus-model` (gpt-5.5:medium) |
 | per-US, primary=codex | `--verifier-model` | claude opus (fixed) |
-| final, primary=claude | `--final-verifier-model` (opus) | `--final-consensus-model` (gpt-5.4:high) |
+| final, primary=claude | `--final-verifier-model` (opus) | `--final-consensus-model` (gpt-5.5:high) |
 | final, primary=codex | `--final-verifier-model` | claude opus (fixed) |
 - Both must pass. No engine priority.

package/src/model-upgrade-table.md CHANGED Viewed

@@ -18,14 +18,14 @@ CB default: 6. Override: `--cb-threshold N`. Worker only — Verifier fixed at c
 | HIGH | gpt-5.3-codex-spark:high | gpt-5.3-codex-spark:xhigh | gpt-5.3-codex-spark:xhigh | BLOCKED |
 | CRITICAL | gpt-5.3-codex-spark:xhigh | gpt-5.3-codex-spark:xhigh | gpt-5.3-codex-spark:xhigh | BLOCKED |
-## Non-Pro (gpt-5.4)
+## Non-Pro (gpt-5.5)
 | Complexity | 1-2 | 3-4 | 5-6 | 7+ |
 |------------|-----|-----|-----|-----|
-| LOW | gpt-5.4:low | gpt-5.4:medium | gpt-5.4:high | BLOCKED |
-| MEDIUM | gpt-5.4:medium | gpt-5.4:high | gpt-5.4:xhigh | BLOCKED |
-| HIGH | gpt-5.4:high | gpt-5.4:xhigh | gpt-5.4:xhigh | BLOCKED |
-| CRITICAL | gpt-5.4:xhigh | gpt-5.4:xhigh | gpt-5.4:xhigh | BLOCKED |
+| LOW | gpt-5.5:low | gpt-5.5:medium | gpt-5.5:high | BLOCKED |
+| MEDIUM | gpt-5.5:medium | gpt-5.5:high | gpt-5.5:xhigh | BLOCKED |
+| HIGH | gpt-5.5:high | gpt-5.5:xhigh | gpt-5.5:xhigh | BLOCKED |
+| CRITICAL | gpt-5.5:xhigh | gpt-5.5:xhigh | gpt-5.5:xhigh | BLOCKED |
 ## Claude-only

package/src/node/run.mjs CHANGED Viewed

@@ -11,8 +11,8 @@ const RUN_DEFAULTS = {
   verifierModel: 'sonnet',
   finalVerifierModel: 'opus',
   consensusMode: 'off',
-  consensusModel: 'gpt-5.4:medium',
-  finalConsensusModel: 'gpt-5.4:high',
+  consensusModel: 'gpt-5.5:medium',
+  finalConsensusModel: 'gpt-5.5:high',
   verifyMode: 'per-us',
   cbThreshold: 6,
   maxIterations: 100,

package/src/node/runner/campaign-main-loop.mjs CHANGED Viewed

@@ -24,11 +24,10 @@ import {
 const execFileAsync = promisify(execFile);
 const REQUIRED_SCAFFOLD_NAMES = ['workerPrompt', 'verifierPrompt', 'memoryFile', 'prdFile', 'testSpecFile'];
-const CLAUDE_MODELS = new Set(['haiku', 'sonnet', 'opus']);
 const MODEL_UPGRADES = {
-  'gpt-5.4:medium': 'gpt-5.4:high',
-  'gpt-5.4:high': 'gpt-5.4:xhigh',
-  'gpt-5.4:xhigh': 'BLOCKED',
+  'gpt-5.5:medium': 'gpt-5.5:high',
+  'gpt-5.5:high': 'gpt-5.5:xhigh',
+  'gpt-5.5:xhigh': 'BLOCKED',
   'gpt-5.3-codex-spark:medium': 'gpt-5.3-codex-spark:high',
   'gpt-5.3-codex-spark:high': 'gpt-5.3-codex-spark:xhigh',
   'gpt-5.3-codex-spark:xhigh': 'BLOCKED',