npm - devlyn-cli - Versions diffs - 1.15.0 → 2.1.0 - Mend

devlyn-cli 1.15.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (158) hide show

package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/pair-plan.sample-pass.json ADDED Viewed

@@ -0,0 +1,223 @@
+{
+  "accepted_invariants": [
+    {
+      "authority": "expected.json/forbidden_patterns",
+      "id": "forbidden_pattern__silent_catch_returning_a_fallback_value_violates_no_silent_c__bin_cli_js",
+      "operational_check": "variant arm output MUST NOT contain regex pattern \"catch\\\\s*\\\\([^)]*\\\\)\\\\s*\\\\{[^}]*return\\\\s+(\\\\[\\\\]|null|undefined|\\\\{|false|'')\" in files ['bin/cli.js']; rationale: silent catch returning a fallback value — violates no-silent-catches policy",
+      "paraphrase": "variant arm output MUST NOT contain regex pattern \"catch\\\\s*\\\\([^)]*\\\\)\\\\s*\\\\{[^}]*return\\\\s+(\\\\[\\\\]|null|undefined|\\\\{|",
+      "source_refs": [
+        "expected.json:forbidden_patterns[0]"
+      ]
+    },
+    {
+      "authority": "expected.json/forbidden_patterns",
+      "id": "forbidden_pattern__ts_ignore_escape_hatch__bin_cli_js",
+      "operational_check": "variant arm output MUST NOT contain regex pattern '@ts-ignore' in files ['bin/cli.js']; rationale: @ts-ignore escape hatch",
+      "paraphrase": "variant arm output MUST NOT contain regex pattern '@ts-ignore' in files ['bin/cli.js']; rationale: @ts-ignore escape hat",
+      "source_refs": [
+        "expected.json:forbidden_patterns[1]"
+      ]
+    },
+    {
+      "authority": "expected.json/max_deps_added",
+      "id": "max_deps_added__0",
+      "operational_check": "variant arm MUST NOT add more than 0 new npm dependencies (count delta of package.json:dependencies + devDependencies)",
+      "paraphrase": "variant arm MUST NOT add more than 0 new npm dependencies (count delta of package.json:dependencies + devDependencies)",
+      "source_refs": [
+        "expected.json:max_deps_added"
+      ]
+    },
+    {
+      "authority": "expected.json/required_files",
+      "id": "required_file__bin_cli_js",
+      "operational_check": "variant arm output MUST contain file 'bin/cli.js' (created or preserved)",
+      "paraphrase": "variant arm output MUST contain file 'bin/cli.js' (created or preserved)",
+      "source_refs": [
+        "expected.json:required_files[bin/cli.js]"
+      ]
+    },
+    {
+      "authority": "metadata/oracle-allowlist",
+      "id": "scope-tier-a:lockfile-deletion",
+      "operational_check": "variant arm MUST NOT delete a scaffold-present lockfile",
+      "paraphrase": "variant arm MUST NOT delete a scaffold-present lockfile",
+      "source_refs": [
+        "oracle-scope-tier-a.py"
+      ]
+    },
+    {
+      "authority": "metadata/oracle-allowlist",
+      "id": "scope-tier-a:tier-a-violation",
+      "operational_check": "variant arm MUST NOT add or modify paths matching: docs/roadmap/** | docs/VISION.md | docs/ROADMAP.md | .github/** | node_modules/** | **/node_modules/** | test-results/** | coverage/** | .nyc_output/** | basename suffix .log | basename prefix .env or secrets.",
+      "paraphrase": "variant arm MUST NOT add or modify paths matching: docs/roadmap/** | docs/VISION.md | docs/ROADMAP.md | .github/** | nod",
+      "source_refs": [
+        "oracle-scope-tier-a.py"
+      ]
+    },
+    {
+      "authority": "metadata/oracle-allowlist",
+      "id": "scope-tier-b:scope-unmatched",
+      "operational_check": "every variant-touched file MUST be either inside spec_output_files (Tier C) OR reachable from a Tier C seed via static JS/TS imports OR matched by expected.json:tier_a_waivers",
+      "paraphrase": "every variant-touched file MUST be either inside spec_output_files (Tier C) OR reachable from a Tier C seed via static J",
+      "source_refs": [
+        "oracle-scope-tier-b.py"
+      ]
+    },
+    {
+      "authority": "expected.json/spec_output_files",
+      "id": "spec_output_file__bin_cli_js",
+      "operational_check": "variant-touched files MUST be inside (or reachable via static imports from) the spec_output_files set; 'bin/cli.js' is one Tier C seed",
+      "paraphrase": "variant-touched files MUST be inside (or reachable via static imports from) the spec_output_files set; 'bin/cli.js' is o",
+      "source_refs": [
+        "expected.json:spec_output_files[bin/cli.js]"
+      ]
+    },
+    {
+      "authority": "expected.json/spec_output_files",
+      "id": "spec_output_file__tests_cli_test_js",
+      "operational_check": "variant-touched files MUST be inside (or reachable via static imports from) the spec_output_files set; 'tests/cli.test.js' is one Tier C seed",
+      "paraphrase": "variant-touched files MUST be inside (or reachable via static imports from) the spec_output_files set; 'tests/cli.test.j",
+      "source_refs": [
+        "expected.json:spec_output_files[tests/cli.test.js]"
+      ]
+    },
+    {
+      "authority": "metadata/oracle-allowlist",
+      "id": "test-fidelity:assertion-regression",
+      "operational_check": "effective assertion count MUST NOT drop and skipped-test count MUST NOT rise; vacuous expect.assertions(0) is treated as a real regression",
+      "paraphrase": "effective assertion count MUST NOT drop and skipped-test count MUST NOT rise; vacuous expect.assertions(0) is treated as",
+      "source_refs": [
+        "oracle-test-fidelity.py"
+      ]
+    },
+    {
+      "authority": "metadata/oracle-allowlist",
+      "id": "test-fidelity:mock-swap",
+      "operational_check": "post-arm test file MUST NOT swap REAL_PATTERNS hits for MOCK_PATTERNS hits (jest/vi/sinon, nock/msw, app.handle/inject/callback, hand-rolled IncomingMessage/ServerResponse, etc.); a drop in real_calls combined with a rise in mock_calls is a mock-swap flag",
+      "paraphrase": "post-arm test file MUST NOT swap REAL_PATTERNS hits for MOCK_PATTERNS hits (jest/vi/sinon, nock/msw, app.handle/inject/c",
+      "source_refs": [
+        "oracle-test-fidelity.py"
+      ]
+    },
+    {
+      "authority": "metadata/oracle-allowlist",
+      "id": "test-fidelity:test-file-deleted",
+      "operational_check": "no scaffold-present test file may be deleted by the variant arm; deletion of an existing tests/*.test.* / *.spec.* / *.e2e.* file is a flag-severity finding",
+      "paraphrase": "no scaffold-present test file may be deleted by the variant arm; deletion of an existing tests/*.test.* / *.spec.* / *.e",
+      "source_refs": [
+        "oracle-test-fidelity.py"
+      ]
+    },
+    {
+      "authority": "metadata/oracle-allowlist",
+      "id": "test-fidelity:test-file-renamed",
+      "operational_check": "rename of a scaffold-present test file is warn-severity (content fidelity not verified across renames in step 1)",
+      "paraphrase": "rename of a scaffold-present test file is warn-severity (content fidelity not verified across renames in step 1)",
+      "source_refs": [
+        "oracle-test-fidelity.py"
+      ]
+    },
+    {
+      "authority": "expected.json/verification_commands",
+      "id": "verification__3f35982a",
+      "operational_check": "running `node bin/cli.js doctor` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['doctor:']; stdout MUST NOT contain any of ['undefined', 'Error:']",
+      "paraphrase": "running `node bin/cli.js doctor` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['doctor:'];",
+      "source_refs": [
+        "expected.json:verification_commands[0]"
+      ]
+    },
+    {
+      "authority": "expected.json/verification_commands",
+      "id": "verification__460fce04",
+      "operational_check": "running `HOME=/nonexistent node bin/cli.js doctor` in the post-arm work dir MUST exit with code 1; stdout MUST contain all of ['/nonexistent']; stdout MUST NOT contain any of []",
+      "paraphrase": "running `HOME=/nonexistent node bin/cli.js doctor` in the post-arm work dir MUST exit with code 1; stdout MUST contain a",
+      "source_refs": [
+        "expected.json:verification_commands[1]"
+      ]
+    },
+    {
+      "authority": "expected.json/verification_commands",
+      "id": "verification__973e287e",
+      "operational_check": "running `python3 -c \"import subprocess; r = subprocess.run(['node', 'bin/cli.js', 'doctor'], capture_output=True); n = r.stdout.count(b'\\x1b['); print(n); exit(0 if n == 0 else 1)\"` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['0']; stdout MUST NOT contain any of []",
+      "paraphrase": "running `python3 -c \"import subprocess; r = subprocess.run(['node', 'bin/cli.js', 'doctor'], capture_output=True); n = r",
+      "source_refs": [
+        "expected.json:verification_commands[2]"
+      ]
+    },
+    {
+      "authority": "expected.json/verification_commands",
+      "id": "verification__d6253a97",
+      "operational_check": "running `node bin/cli.js doctor --help` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['doctor']; stdout MUST NOT contain any of []",
+      "paraphrase": "running `node bin/cli.js doctor --help` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['doc",
+      "source_refs": [
+        "expected.json:verification_commands[3]"
+      ]
+    },
+    {
+      "authority": "expected.json/verification_commands",
+      "id": "verification__e0f149e4",
+      "operational_check": "running `node bin/cli.js --help` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['doctor']; stdout MUST NOT contain any of []",
+      "paraphrase": "running `node bin/cli.js --help` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['doctor']; ",
+      "source_refs": [
+        "expected.json:verification_commands[4]"
+      ]
+    },
+    {
+      "authority": "expected.json/verification_commands",
+      "id": "verification__fdbcd321",
+      "operational_check": "running `node bin/cli.js doctor --verbose` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['doctor:']; stdout MUST NOT contain any of ['Error:']",
+      "paraphrase": "running `node bin/cli.js doctor --verbose` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['",
+      "source_refs": [
+        "expected.json:verification_commands[5]"
+      ]
+    }
+  ],
+  "authority_order": [
+    "spec.md",
+    "expected.json/rubric",
+    "phase prompt",
+    "model preference"
+  ],
+  "escalated_to_user": [],
+  "fixture_id": "F2-cli-medium-subcommand",
+  "model_stamps": {
+    "claude": {
+      "blocked_ids": [],
+      "model": "claude-opus-4-7",
+      "signed_plan_sha256": "7329b8955a94ac680a7a58d09bfc53ce4de17609495b4bf4658cf2d3a43dacd5",
+      "status": "sign",
+      "timestamp": "2026-04-29T18:30:00Z"
+    },
+    "codex": {
+      "blocked_ids": [],
+      "model": "gpt-5.5",
+      "signed_plan_sha256": "7329b8955a94ac680a7a58d09bfc53ce4de17609495b4bf4658cf2d3a43dacd5",
+      "status": "sign",
+      "timestamp": "2026-04-29T18:31:00Z"
+    }
+  },
+  "plan_status": "final",
+  "planning_mode": "pair",
+  "rejected_alternatives": [],
+  "rounds": [
+    {
+      "claude_draft_sha256": "0000000000000000000000000000000000000000000000000000000000000000",
+      "codex_draft_sha256": "1111111111111111111111111111111111111111111111111111111111111111",
+      "merged_sha256": "2222222222222222222222222222222222222222222222222222222222222222",
+      "note": "sample-pass synthetic round (test fixture)",
+      "round": 1
+    }
+  ],
+  "schema_version": "1",
+  "source": {
+    "canonical_id_registry_path": "benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected-pair-plan-registry.json",
+    "canonical_id_registry_sha256": "98ac16e4536ea3ef2e51d3c728982c014211c193a742cea74f1331e4fbba76be",
+    "expected_path": "benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected.json",
+    "expected_sha256": "ddef8feba49f20b6957e37840bc6a03e78e554776e380d81ad6390944c72fcab",
+    "rubric_path": "benchmark/auto-resolve/RUBRIC.md",
+    "rubric_sha256": "5b5b709a0b57f7e6f4fbc072af91e1edbc8d7910ae16b9b7be7170616aeaa9af",
+    "spec_path": "benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/spec.md",
+    "spec_sha256": "9b0949c2afd4a522de2bdbbf267d93907fd908bf0f1d0dc5e111ee30ba875bb7"
+  },
+  "unresolved": []
+}

package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/setup.sh ADDED Viewed

@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+# F2 setup — no base-repo modifications needed. The task starts from the
+# stock test-repo baseline (hello/version CLI) and adds a new subcommand.
+set -e
+exit 0

package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/spec.md ADDED Viewed

@@ -0,0 +1,56 @@
+---
+id: "F2-cli-medium-subcommand"
+title: "Add `doctor` subcommand to bench-test-repo CLI"
+status: planned
+complexity: medium
+depends-on: []
+---
+# F2 Add `doctor` subcommand
+## Context
+`bench-test-repo` users need a one-command way to diagnose their local
+environment — node version, Claude Code install, plugins, skills — without
+digging through the filesystem. A `doctor` subcommand lands that capability
+inside the CLI itself.
+## Requirements
+- [ ] `node bin/cli.js doctor` produces a status report and exits 0 on a clean machine.
+- [ ] Node version check — requires `process.version >= v18.0.0`, emits a status line, marks FAIL if below.
+- [ ] `$HOME/.claude/` check — exists as directory AND is writable. Missing → FAIL. Exists but not writable (EACCES) → FAIL with a distinct "permission" message.
+- [ ] Installed plugins scan — read subdirectories of `$HOME/.claude/plugins/cache/` and print a summary line with the count; `--verbose` lists names.
+- [ ] Installed skills scan — count files matching `$HOME/.claude/skills/**/SKILL.md`; print count; `--verbose` lists relative paths.
+- [ ] Colored output with `[OK]` (green), `[WARN]` (yellow), `[FAIL]` (red) via ANSI escape codes **only when `process.stdout.isTTY` is true** — piped output must contain no `\x1b[` sequences.
+- [ ] Summary line: `doctor: <N> ok, <M> warn, <K> fail`.
+- [ ] Exit code: `0` if zero fails, `1` otherwise.
+- [ ] `--verbose` flag expands details for plugins/skills scans.
+- [ ] `node bin/cli.js doctor --help` prints a short help block and exits 0.
+- [ ] `node bin/cli.js --help` lists `doctor` as an available subcommand.
+- [ ] `HOME=/nonexistent node bin/cli.js doctor` prints a FAIL line clearly referencing the missing `/nonexistent/.claude` and exits 1.
+## Constraints
+- **Zero new npm dependencies.** Use only Node.js built-ins (`fs`, `path`, `os`, `process`).
+- **No silent error catches.** Do not wrap operations in `try { … } catch { return fallbackValue }`. All errors visible to the user with actionable messages.
+- **HOME guard.** If `process.env.HOME` is undefined or empty, emit a clear FAIL line ("HOME environment variable is not set") and exit 1.
+- **EACCES handling.** If `readdirSync` fails with EACCES, emit a permission-specific message quoting the offending path. Do not silently return an empty list.
+- **Lifecycle note.** The harness's DOCS phase flips this spec's frontmatter `status` after implementation completes — that is benchmark lifecycle bookkeeping, not a scope violation.
+## Out of Scope
+- Auto-repair (report only; do not offer to fix detected problems).
+- Checking remote/registry state (npm, GitHub).
+- Any feature requiring a new npm dependency.
+## Verification
+- `node bin/cli.js doctor` exits 0 on a machine with `~/.claude` present.
+- `HOME=/nonexistent node bin/cli.js doctor` prints a FAIL line referencing `/nonexistent/.claude` and exits 1.
+- `node bin/cli.js doctor | cat` contains no `\x1b[` sequences.
+- `node bin/cli.js doctor --help` prints help, exits 0.
+- `node bin/cli.js --help` mentions `doctor`.
+- `git diff -- package.json` is empty.
+- `node bin/cli.js doctor --verbose` lists plugins and skills.

package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/task.txt ADDED Viewed

@@ -0,0 +1,14 @@
+Add a `doctor` subcommand to bench-test-repo's CLI (`bin/cli.js`) that diagnoses the local environment. When a user runs `node bin/cli.js doctor`, it should check:
+1. Node version is >= v18.0.0
+2. $HOME/.claude/ exists and is writable
+3. How many plugins are installed under $HOME/.claude/plugins/cache/
+4. How many skills are installed (SKILL.md files under $HOME/.claude/skills/)
+Each check produces a status line with a tag — `[OK]`, `[WARN]`, or `[FAIL]`. The tags should be colored (green/yellow/red) when output is a TTY, but plain text when piped. End with a summary line like `doctor: 3 ok, 1 warn, 0 fail` and exit 0 if no fails, 1 otherwise.
+Add a `--verbose` flag that lists the individual plugins and skill paths. Add `--help` for the subcommand. Make sure `node bin/cli.js --help` mentions `doctor`.
+Keep it to Node.js built-ins only (fs, path, os, process) — no new npm dependencies. Handle errors explicitly: if HOME is unset, fail cleanly with a helpful message; if a directory can't be read due to permissions (EACCES), say so with the path.
+Running `HOME=/nonexistent node bin/cli.js doctor` should exit 1 and mention the missing `/nonexistent/.claude`.

package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/NOTES.md ADDED Viewed

@@ -0,0 +1,28 @@
+# F3 — Notes
+## Purpose
+High-risk contract change. Exercises the pipeline's ability to catch
+**breaking changes hidden inside a reasonable-looking refactor**: lazy
+implementations wrap `items` in an envelope but forget to update tests, or
+update tests but forget backward-compat requirements (single-item route,
+`items` key), or paginate without validating query params.
+## Failure modes detected
+- **Test lie**: arm changes the handler but leaves old `assert.ok(Array.isArray(body.items))` that still passes against `{ items: [...] }` inside the envelope → test passes but new paging fields aren't asserted. Fixture requires ≥ 2 NEW tests.
+- **Query-param trust**: accepts `?per_page=abc` → `parseInt` returns `NaN` → handler explodes or silently treats as default. Fixture requires explicit 400.
+- **Contract drift on single-item lookup**: arm paginates `/items/:id` too, breaking existing clients.
+- **Silent catch**: wrapping `Number(req.query.page)` in a `try/catch { return [] }` — caught by forbidden pattern.
+## Pipeline exercise
+- Phase 0 routing: likely `strict` (no risk keywords, but cross-file multi-function change may escalate in Stage B).
+- Phase 1 BUILD: Codex BUILD produces the implementation.
+- Phase 1.4 BUILD GATE: `node --test tests/server.test.js` must pass.
+- Phase 2 EVAL: scrutinizes that new tests cover new behavior (not just rename passes).
+- Phase 3 CRITIC design: checks invalid-query branch and backward-compat.
+## Rotation trigger
+Retire when both arms consistently score > 95 AND produce 2+ new tests covering paging edge cases without pipeline intervention.

package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected-pair-plan-registry.json ADDED Viewed

@@ -0,0 +1,162 @@
+{
+  "fixture_id": "F3-backend-contract-risk",
+  "generated_at": "2026-04-29T09:57:53Z",
+  "generated_from": {
+    "expected_path": "benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected.json",
+    "expected_sha256": "c0925ee948179fbc1c76836d98fba0c14c7eba56f462f2922903951923cb22e6",
+    "metadata_path": "benchmark/auto-resolve/fixtures/F3-backend-contract-risk/metadata.json",
+    "metadata_sha256": "c54530db26dbb04ce50b698fed2608206eae6f9a5dc2f666f127695e15d3fa30",
+    "oracle_script_shas": {
+      "scope-tier-a": "baaf21ed4a67f35d2a8af825e72869ef9737b5dfe08d65dd1a11c26fafe297ae",
+      "scope-tier-b": "9349d00a5c7456a4df9142923334e7004407d53f2443f2e210945bb771971e25",
+      "test-fidelity": "401184da51ae500cecfc75a6c5819b0d28acb63a397f788fb628c2913562f903"
+    }
+  },
+  "required_invariants": [
+    {
+      "authority": "expected.json/forbidden_patterns",
+      "id": "forbidden_pattern__eslint_disable_without_explicit_per_issue_justification__server_index_js",
+      "operational_check": "variant arm output MUST NOT contain regex pattern '/\\\\*\\\\s*eslint-disable' in files ['server/index.js']; rationale: eslint-disable without explicit per-issue justification",
+      "severity": "disqualifier",
+      "source_field": "expected.json/forbidden_patterns/1",
+      "source_ref": "expected.json:forbidden_patterns[1]"
+    },
+    {
+      "authority": "expected.json/forbidden_patterns",
+      "id": "forbidden_pattern__silent_catch_returning_fallback__server_index_js",
+      "operational_check": "variant arm output MUST NOT contain regex pattern \"catch\\\\s*\\\\([^)]*\\\\)\\\\s*\\\\{[^}]*return\\\\s+(null|undefined|'')\" in files ['server/index.js']; rationale: silent catch returning fallback",
+      "severity": "disqualifier",
+      "source_field": "expected.json/forbidden_patterns/0",
+      "source_ref": "expected.json:forbidden_patterns[0]"
+    },
+    {
+      "authority": "expected.json/max_deps_added",
+      "id": "max_deps_added__0",
+      "operational_check": "variant arm MUST NOT add more than 0 new npm dependencies (count delta of package.json:dependencies + devDependencies)",
+      "severity": "hard",
+      "source_field": "expected.json/max_deps_added",
+      "source_ref": "expected.json:max_deps_added"
+    },
+    {
+      "authority": "expected.json/required_files",
+      "id": "required_file__server_index_js",
+      "operational_check": "variant arm output MUST contain file 'server/index.js' (created or preserved)",
+      "severity": "hard",
+      "source_field": "expected.json/required_files",
+      "source_ref": "expected.json:required_files[server/index.js]"
+    },
+    {
+      "authority": "expected.json/required_files",
+      "id": "required_file__tests_server_test_js",
+      "operational_check": "variant arm output MUST contain file 'tests/server.test.js' (created or preserved)",
+      "severity": "hard",
+      "source_field": "expected.json/required_files",
+      "source_ref": "expected.json:required_files[tests/server.test.js]"
+    },
+    {
+      "authority": "metadata/oracle-allowlist",
+      "id": "scope-tier-a:lockfile-deletion",
+      "operational_check": "variant arm MUST NOT delete a scaffold-present lockfile",
+      "severity": "hard",
+      "source_field": "oracle/scope-tier-a/scope-tier-a:lockfile-deletion",
+      "source_ref": "oracle-scope-tier-a.py"
+    },
+    {
+      "authority": "metadata/oracle-allowlist",
+      "id": "scope-tier-a:tier-a-violation",
+      "operational_check": "variant arm MUST NOT add or modify paths matching: docs/roadmap/** | docs/VISION.md | docs/ROADMAP.md | .github/** | node_modules/** | **/node_modules/** | test-results/** | coverage/** | .nyc_output/** | basename suffix .log | basename prefix .env or secrets.",
+      "severity": "hard",
+      "source_field": "oracle/scope-tier-a/scope-tier-a:tier-a-violation",
+      "source_ref": "oracle-scope-tier-a.py"
+    },
+    {
+      "authority": "metadata/oracle-allowlist",
+      "id": "scope-tier-b:scope-unmatched",
+      "operational_check": "every variant-touched file MUST be either inside spec_output_files (Tier C) OR reachable from a Tier C seed via static JS/TS imports OR matched by expected.json:tier_a_waivers",
+      "severity": "warn",
+      "source_field": "oracle/scope-tier-b/scope-tier-b:scope-unmatched",
+      "source_ref": "oracle-scope-tier-b.py"
+    },
+    {
+      "authority": "expected.json/spec_output_files",
+      "id": "spec_output_file__server_index_js",
+      "operational_check": "variant-touched files MUST be inside (or reachable via static imports from) the spec_output_files set; 'server/index.js' is one Tier C seed",
+      "severity": "warn",
+      "source_field": "expected.json/spec_output_files",
+      "source_ref": "expected.json:spec_output_files[server/index.js]"
+    },
+    {
+      "authority": "expected.json/spec_output_files",
+      "id": "spec_output_file__tests_server_test_js",
+      "operational_check": "variant-touched files MUST be inside (or reachable via static imports from) the spec_output_files set; 'tests/server.test.js' is one Tier C seed",
+      "severity": "warn",
+      "source_field": "expected.json/spec_output_files",
+      "source_ref": "expected.json:spec_output_files[tests/server.test.js]"
+    },
+    {
+      "authority": "metadata/oracle-allowlist",
+      "id": "test-fidelity:assertion-regression",
+      "operational_check": "effective assertion count MUST NOT drop and skipped-test count MUST NOT rise; vacuous expect.assertions(0) is treated as a real regression",
+      "severity": "warn",
+      "source_field": "oracle/test-fidelity/test-fidelity:assertion-regression",
+      "source_ref": "oracle-test-fidelity.py"
+    },
+    {
+      "authority": "metadata/oracle-allowlist",
+      "id": "test-fidelity:mock-swap",
+      "operational_check": "post-arm test file MUST NOT swap REAL_PATTERNS hits for MOCK_PATTERNS hits (jest/vi/sinon, nock/msw, app.handle/inject/callback, hand-rolled IncomingMessage/ServerResponse, etc.); a drop in real_calls combined with a rise in mock_calls is a mock-swap flag",
+      "severity": "flag",
+      "source_field": "oracle/test-fidelity/test-fidelity:mock-swap",
+      "source_ref": "oracle-test-fidelity.py"
+    },
+    {
+      "authority": "metadata/oracle-allowlist",
+      "id": "test-fidelity:test-file-deleted",
+      "operational_check": "no scaffold-present test file may be deleted by the variant arm; deletion of an existing tests/*.test.* / *.spec.* / *.e2e.* file is a flag-severity finding",
+      "severity": "flag",
+      "source_field": "oracle/test-fidelity/test-fidelity:test-file-deleted",
+      "source_ref": "oracle-test-fidelity.py"
+    },
+    {
+      "authority": "metadata/oracle-allowlist",
+      "id": "test-fidelity:test-file-renamed",
+      "operational_check": "rename of a scaffold-present test file is warn-severity (content fidelity not verified across renames in step 1)",
+      "severity": "warn",
+      "source_field": "oracle/test-fidelity/test-fidelity:test-file-renamed",
+      "source_ref": "oracle-test-fidelity.py"
+    },
+    {
+      "authority": "expected.json/verification_commands",
+      "id": "verification__6001efe2",
+      "operational_check": "running `node -e 'const { app } = require(\"./server\"); const http = require(\"http\"); const s = http.createServer(app).listen(0, () => { const { port } = s.address(); http.get(`http://127.0.0.1:${port}/items?per_page=abc`, r => { console.log(r.statusCode); s.close(); }); });'` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['400']; stdout MUST NOT contain any of []",
+      "severity": "hard",
+      "source_field": "expected.json/verification_commands/3",
+      "source_ref": "expected.json:verification_commands[3]"
+    },
+    {
+      "authority": "expected.json/verification_commands",
+      "id": "verification__6517d995",
+      "operational_check": "running `node -e 'const { app } = require(\"./server\"); const http = require(\"http\"); const s = http.createServer(app).listen(0, () => { const { port } = s.address(); http.get(`http://127.0.0.1:${port}/items`, r => { let b = \"\"; r.on(\"data\", c=>b+=c); r.on(\"end\", () => { const d = JSON.parse(b); console.log(JSON.stringify({ total: d.total, page: d.page, per_page: d.per_page, items_len: d.items.length })); s.close(); }); }); });'` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['\"total\":2', '\"page\":1']; stdout MUST NOT contain any of []",
+      "severity": "hard",
+      "source_field": "expected.json/verification_commands/1",
+      "source_ref": "expected.json:verification_commands[1]"
+    },
+    {
+      "authority": "expected.json/verification_commands",
+      "id": "verification__73df5e81",
+      "operational_check": "running `node -e 'const { app } = require(\"./server\"); const http = require(\"http\"); const s = http.createServer(app).listen(0, () => { const { port } = s.address(); http.get(`http://127.0.0.1:${port}/items?page=2&per_page=1`, r => { let b = \"\"; r.on(\"data\", c=>b+=c); r.on(\"end\", () => { const d = JSON.parse(b); console.log(d.items[0] && d.items[0].name); s.close(); }); }); });'` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['beta']; stdout MUST NOT contain any of []",
+      "severity": "hard",
+      "source_field": "expected.json/verification_commands/2",
+      "source_ref": "expected.json:verification_commands[2]"
+    },
+    {
+      "authority": "expected.json/verification_commands",
+      "id": "verification__7c5f3637",
+      "operational_check": "running `node --test tests/server.test.js` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of []; stdout MUST NOT contain any of ['fail ']",
+      "severity": "hard",
+      "source_field": "expected.json/verification_commands/0",
+      "source_ref": "expected.json:verification_commands[0]"
+    }
+  ],
+  "schema_version": "1"
+}

package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected.json ADDED Viewed

@@ -0,0 +1,65 @@
+{
+  "verification_commands": [
+    {
+      "cmd": "node --test tests/server.test.js",
+      "exit_code": 0,
+      "stdout_contains": [],
+      "stdout_not_contains": [
+        "not ok "
+      ]
+    },
+    {
+      "cmd": "node -e 'const { app } = require(\"./server\"); const http = require(\"http\"); const s = http.createServer(app).listen(0, () => { const { port } = s.address(); http.get(`http://127.0.0.1:${port}/items`, r => { let b = \"\"; r.on(\"data\", c=>b+=c); r.on(\"end\", () => { const d = JSON.parse(b); console.log(JSON.stringify({ total: d.total, page: d.page, per_page: d.per_page, items_len: d.items.length })); s.close(); }); }); });'",
+      "exit_code": 0,
+      "stdout_contains": [
+        "\"total\":2",
+        "\"page\":1"
+      ],
+      "stdout_not_contains": []
+    },
+    {
+      "cmd": "node -e 'const { app } = require(\"./server\"); const http = require(\"http\"); const s = http.createServer(app).listen(0, () => { const { port } = s.address(); http.get(`http://127.0.0.1:${port}/items?page=2&per_page=1`, r => { let b = \"\"; r.on(\"data\", c=>b+=c); r.on(\"end\", () => { const d = JSON.parse(b); console.log(d.items[0] && d.items[0].name); s.close(); }); }); });'",
+      "exit_code": 0,
+      "stdout_contains": [
+        "beta"
+      ],
+      "stdout_not_contains": []
+    },
+    {
+      "cmd": "node -e 'const { app } = require(\"./server\"); const http = require(\"http\"); const s = http.createServer(app).listen(0, () => { const { port } = s.address(); http.get(`http://127.0.0.1:${port}/items?per_page=abc`, r => { console.log(r.statusCode); s.close(); }); });'",
+      "exit_code": 0,
+      "stdout_contains": [
+        "400"
+      ],
+      "stdout_not_contains": []
+    }
+  ],
+  "forbidden_patterns": [
+    {
+      "pattern": "catch\\s*\\([^)]*\\)\\s*\\{[^}]*return\\s+(null|undefined|'')",
+      "description": "silent catch returning fallback",
+      "files": [
+        "server/index.js"
+      ],
+      "severity": "disqualifier"
+    },
+    {
+      "pattern": "/\\*\\s*eslint-disable",
+      "description": "eslint-disable without explicit per-issue justification",
+      "files": [
+        "server/index.js"
+      ],
+      "severity": "disqualifier"
+    }
+  ],
+  "required_files": [
+    "server/index.js",
+    "tests/server.test.js"
+  ],
+  "forbidden_files": [],
+  "max_deps_added": 0,
+  "spec_output_files": [
+    "server/index.js",
+    "tests/server.test.js"
+  ]
+}

package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/metadata.json ADDED Viewed

@@ -0,0 +1,19 @@
+{
+  "id": "F3-backend-contract-risk",
+  "category": "high-risk",
+  "difficulty": "high",
+  "timeout_seconds": 1500,
+  "required_tools": ["node"],
+  "browser": false,
+  "deps_change_expected": false,
+  "intent": "Change the GET /items response shape from { items } to a paginated { items, total, page, per_page } while keeping the existing 1-based id semantics and updating tests. A lazy implementation will leave tests broken or drop the items array — the spec requires both.",
+  "pair_plan_oracle_categories": [
+    "scope-tier-a:lockfile-deletion",
+    "scope-tier-a:tier-a-violation",
+    "scope-tier-b:scope-unmatched",
+    "test-fidelity:assertion-regression",
+    "test-fidelity:mock-swap",
+    "test-fidelity:test-file-deleted",
+    "test-fidelity:test-file-renamed"
+  ]
+}

package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/setup.sh ADDED Viewed

@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+# F3 setup — no changes to base test-repo. Task modifies existing server/index.js.
+set -e
+exit 0

package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/spec.md ADDED Viewed

@@ -0,0 +1,56 @@
+---
+id: "F3-backend-contract-risk"
+title: "Paginate GET /items response"
+status: planned
+complexity: high
+depends-on: []
+---
+# F3 Paginate `GET /items`
+## Context
+`server/index.js` currently returns `{ items: [...] }` for `GET /items` with
+no pagination metadata. As the dataset grows, clients need a `total` and
+paging information. The task: wrap the existing response in a pagination
+envelope, accept `?page` and `?per_page` query parameters, and update tests
+so existing assertions continue to pass alongside new paging assertions.
+## Requirements
+- [ ] `GET /items` (no query) returns `{ items, total, page, per_page }` where:
+  - `items` is the full list (baseline repo has 2 items).
+  - `total` is the full item count.
+  - `page` is `1`.
+  - `per_page` is the full item count when no pagination was requested.
+- [ ] `GET /items?page=1&per_page=1` returns the first item wrapped in the envelope with `items.length === 1`, `total === 2`, `page === 1`, `per_page === 1`.
+- [ ] `GET /items?page=2&per_page=1` returns the second item similarly.
+- [ ] `GET /items?page=99&per_page=1` returns `items: []`, `total === 2`, `page === 99`, `per_page === 1` (out-of-range page is allowed — bare empty array, never a 404).
+- [ ] `GET /items/:id` behavior unchanged (the per-item route does NOT get paginated).
+- [ ] `tests/server.test.js` is updated so every existing assertion still holds (semantically) AND the new paging behavior is covered by at least two new tests.
+- [ ] `GET /health` continues to return `{ status: 'ok' }` unchanged.
+## Constraints
+- **No new npm dependencies.** Use only Express + built-ins already in the repo.
+- **No silent catches.** Invalid `page` or `per_page` (non-numeric, zero, negative) must respond 400 with `{ error: 'invalid_query', field }`.
+- **No breaking change to `/items/:id`.** The per-item route must keep its current contract (the fixture explicitly does NOT paginate single-item lookups).
+- **Backward-compat note**: clients that previously read `response.items` MUST still get the array at the same key inside the new envelope.
+- **Lifecycle note.** The harness's DOCS phase flips this spec's frontmatter `status` after implementation completes — that is benchmark lifecycle bookkeeping, not a scope violation.
+## Out of Scope
+- Caching, rate limiting, authentication.
+- Converting `items` to a database-backed list.
+- Touching `bin/cli.js`, `web/`, or `tests/cli.test.js`.
+- Adding a new route.
+## Verification
+- Server start: `node server/index.js` listens on port 3000 (exit via SIGINT).
+- `curl -s http://127.0.0.1:3000/items | jq '.total'` returns `2`.
+- `curl -s 'http://127.0.0.1:3000/items?per_page=1&page=2' | jq '.items[0].name'` returns `"beta"`.
+- `curl -s 'http://127.0.0.1:3000/items?per_page=abc' -o /dev/null -w '%{http_code}'` returns `400`.
+- `node --test tests/server.test.js` passes; must include ≥ 2 new paging tests.
+- `git diff --stat` shows only `server/index.js` and `tests/server.test.js` touched.