npm - devlyn-cli - Versions diffs - 2.3.0 → 2.3.1 - Mend

devlyn-cli 2.3.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (219) hide show

package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/NOTES.md ADDED Viewed

@@ -0,0 +1,34 @@
+# S4-cli-return-routing NOTES
+## What failure mode does this fixture detect?
+Priority-sensitive return routing where policy decisions and mutable destination
+capacity interact. Bare implementations commonly route in input order, decrement
+capacity for rejected rows, apply the dispose/window rule after condition
+branches, or emit rejected rows in processing order.
+## What pipeline phase(s) is this testing?
+- **PLAN / RISK_PROBES**: must notice priority ordering, condition/window rule
+  order, capacity mutation, duplicate id handling, and output-shape contracts.
+- **IMPLEMENT**: must add a CLI command without broadening scope or adding deps.
+- **VERIFY**: hidden oracles exercise compound behavior that simple unit tests
+  often miss.
+## Why can't another fixture cover this?
+S2 uses single-SKU inventory and S3 uses skill/capacity assignment. S4 adds a
+policy-derived destination before capacity mutation, so it catches rule-order
+and output-order failures that those fixtures do not.
+## When should this fixture be retired?
+Retire or replace it if two consecutive measured runs show both bare and
+solo_claude consistently satisfy priority ordering, policy rule order, capacity
+mutation, duplicate id handling, and exact output shape without pair assistance.
+## Calibration status
+- `20260513-s4-return-headroom`: bare `33`, solo_claude `98`, headroom gate
+  `FAIL` because solo exceeded the `80` ceiling and timed out. Treat S4 as a
+  shadow control unless it is reworked to preserve solo_claude headroom.

package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/expected.json ADDED Viewed

@@ -0,0 +1,55 @@
+{
+  "verification_commands": [
+    {
+      "cmd": "node --test tests/cli.test.js",
+      "exit_code": 0,
+      "stdout_contains": [],
+      "stdout_not_contains": ["not ok "]
+    },
+    {
+      "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/priority-return-routing.js\"",
+      "exit_code": 0,
+      "stdout_contains": ["\"ok\":true"],
+      "stdout_not_contains": [],
+      "contract_refs": [
+        "Process returns globally by `priority` descending, then original input order ascending.",
+        "A return with an unknown category rejects with reason `unknown_category` and does not change capacity.",
+        "For a known category, choose the target destination by this rule order: `damaged` condition routes to `dispose`; otherwise, if `days_since_purchase` is greater than `restock_window_days`, route to `dispose`; otherwise `sealed` routes to `restock`; otherwise `opened` routes to `refurbish`.",
+        "A return accepts only when the chosen destination exists in `capacity` and has positive remaining capacity. On accept, decrement that destination by `1`.",
+        "A return rejects with reason `destination_full` when the chosen destination is absent from `capacity` or has zero remaining capacity.",
+        "`routed` is ordered by processing order. Each row has keys `id`, `destination`.",
+        "`rejected` is ordered in the original input order. Each row has keys `id`, `reason`.",
+        "`capacity` is an object whose keys are sorted alphabetically and whose values are remaining capacities.",
+        "On success, write exactly one JSON object to stdout and no stderr. Keys: `routed`, `rejected`, `capacity`."
+      ]
+    },
+    {
+      "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/duplicate-return-error.js\"",
+      "exit_code": 0,
+      "stdout_contains": ["\"ok\":true"],
+      "stdout_not_contains": [],
+      "contract_refs": [
+        "Before routing any return, duplicate return ids are invalid input: exit `2`, write exactly one JSON error object `{ \"error\": \"duplicate_return_id\", \"id\": string }` to stderr, and write no stdout."
+      ]
+    }
+  ],
+  "forbidden_patterns": [
+    {
+      "pattern": "catch\\s*\\([^)]*\\)\\s*\\{[^}]*return\\s+(null|undefined|''|\\{\\}|\\[\\])",
+      "description": "silent catch returning fallback in route-returns path",
+      "files": ["bin/cli.js", "tests/cli.test.js"],
+      "severity": "disqualifier"
+    },
+    {
+      "pattern": "catch\\s*\\([^)]*\\)\\s*\\{\\s*\\}",
+      "description": "empty catch block",
+      "files": ["bin/cli.js", "tests/cli.test.js"],
+      "severity": "disqualifier"
+    }
+  ],
+  "required_files": ["bin/cli.js", "tests/cli.test.js"],
+  "forbidden_files": [],
+  "tier_a_waivers": [],
+  "spec_output_files": ["bin/cli.js", "tests/cli.test.js"],
+  "max_deps_added": 0
+}

package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/metadata.json ADDED Viewed

@@ -0,0 +1,10 @@
+{
+  "id": "S4-cli-return-routing",
+  "category": "high-risk",
+  "difficulty": "high",
+  "timeout_seconds": 900,
+  "required_tools": ["node"],
+  "browser": false,
+  "deps_change_expected": false,
+  "intent": "Add a return routing CLI command that applies category policy, priority ordering, destination capacity mutation, duplicate id rejection, and exact JSON output shape."
+}

package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/setup.sh ADDED Viewed

@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+set -euo pipefail
+# S4 reuses the baseline test-repo state.

package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/spec.md ADDED Viewed

@@ -0,0 +1,55 @@
+---
+id: "S4-cli-return-routing"
+title: "Add return routing command"
+status: planned
+complexity: high
+depends-on: []
+---
+# S4 Add Return Routing Command
+## Context
+Operations needs a deterministic CLI command that routes product returns to
+restock, refurbish, or dispose destinations. The command must combine category
+policy, condition/window rules, priority ordering, destination capacity mutation,
+and exact machine-readable output.
+## Requirements
+- [ ] Add `route-returns` to `bin/cli.js`.
+- [ ] Accept `--policies <json>` as a JSON array of policy objects. Each policy has keys `category`, `restock_window_days`, and `destinations`.
+- [ ] Each `destinations` object has keys `restock`, `refurbish`, and `dispose`, whose values are destination ids.
+- [ ] Accept `--capacity <json>` as a JSON object mapping destination ids to non-negative integer remaining capacity.
+- [ ] Accept `--returns <json>` as a JSON array of return objects. Each return has keys `id`, `category`, `condition`, `days_since_purchase`, and `priority`.
+- [ ] Before routing any return, duplicate return ids are invalid input: exit `2`, write exactly one JSON error object `{ "error": "duplicate_return_id", "id": string }` to stderr, and write no stdout.
+- [ ] Process returns globally by `priority` descending, then original input order ascending.
+- [ ] A return with an unknown category rejects with reason `unknown_category` and does not change capacity.
+- [ ] For a known category, choose the target destination by this rule order: `damaged` condition routes to `dispose`; otherwise, if `days_since_purchase` is greater than `restock_window_days`, route to `dispose`; otherwise `sealed` routes to `restock`; otherwise `opened` routes to `refurbish`.
+- [ ] A return rejects with reason `unsupported_condition` when condition is not `sealed`, `opened`, or `damaged`, and does not change capacity.
+- [ ] A return accepts only when the chosen destination exists in `capacity` and has positive remaining capacity. On accept, decrement that destination by `1`.
+- [ ] A return rejects with reason `destination_full` when the chosen destination is absent from `capacity` or has zero remaining capacity.
+- [ ] `routed` is ordered by processing order. Each row has keys `id`, `destination`.
+- [ ] `rejected` is ordered in the original input order. Each row has keys `id`, `reason`.
+- [ ] `capacity` is an object whose keys are sorted alphabetically and whose values are remaining capacities.
+- [ ] On success, write exactly one JSON object to stdout and no stderr. Keys: `routed`, `rejected`, `capacity`.
+## Constraints
+- Use only Node.js built-ins; add no npm dependencies.
+- Touch only `bin/cli.js` and `tests/cli.test.js`.
+- Do not silently catch JSON parse or validation errors. Surface invalid input as a user-visible error with nonzero exit.
+- Do not persist destination capacity between command invocations.
+## Out of Scope
+- Reading input from files.
+- SKU catalogs, refund amounts, shipping labels, or warehouse zones.
+- Changing `hello`, `version`, server routes, or package metadata.
+## Verification
+- `node --test tests/cli.test.js` passes.
+- `node "$BENCH_FIXTURE_DIR/verifiers/priority-return-routing.js"` prints `{"ok":true}`.
+- `node "$BENCH_FIXTURE_DIR/verifiers/duplicate-return-error.js"` prints `{"ok":true}`.
+- Solo-headroom hypothesis: solo_claude is expected to miss destination policy precedence or capacity mutation under priority routing; observable command `node "$BENCH_FIXTURE_DIR/verifiers/priority-return-routing.js"` exposes the miss.

package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/task.txt ADDED Viewed

@@ -0,0 +1 @@

+ Add a `route-returns` CLI command. It should accept `--policies <json>`, `--capacity <json>`, and `--returns <json>`, route known-category returns by condition/window policy, process higher priority returns first, decrement destination capacity only for accepted returns, reject duplicate return ids with exit 2 and a JSON error, and print exactly one JSON object with `routed`, `rejected`, and `capacity`. Use no new dependencies and update CLI tests.

package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/verifiers/duplicate-return-error.js ADDED Viewed

@@ -0,0 +1,43 @@
+'use strict';
+const assert = require('node:assert');
+const { spawnSync } = require('node:child_process');
+const path = require('node:path');
+const work = process.env.BENCH_WORKDIR || process.cwd();
+const cli = path.join(work, 'bin', 'cli.js');
+const policies = JSON.stringify([
+  {
+    category: 'electronics',
+    restock_window_days: 30,
+    destinations: { restock: 'restock-a', refurbish: 'refurb-a', dispose: 'dispose-a' }
+  }
+]);
+const capacity = JSON.stringify({ 'restock-a': 1 });
+const returns = JSON.stringify([
+  { id: 'dup', category: 'electronics', condition: 'sealed', days_since_purchase: 1, priority: 2 },
+  { id: 'dup', category: 'electronics', condition: 'opened', days_since_purchase: 1, priority: 1 }
+]);
+const result = spawnSync('node', [
+  cli,
+  'route-returns',
+  '--policies',
+  policies,
+  '--capacity',
+  capacity,
+  '--returns',
+  returns
+], {
+  cwd: work,
+  encoding: 'utf8'
+});
+assert.strictEqual(result.status, 2);
+assert.strictEqual(result.stdout, '');
+assert.deepStrictEqual(JSON.parse(result.stderr), {
+  error: 'duplicate_return_id',
+  id: 'dup'
+});
+console.log(JSON.stringify({ ok: true }));

package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/verifiers/priority-return-routing.js ADDED Viewed

@@ -0,0 +1,70 @@
+'use strict';
+const assert = require('node:assert');
+const { spawnSync } = require('node:child_process');
+const path = require('node:path');
+const work = process.env.BENCH_WORKDIR || process.cwd();
+const cli = path.join(work, 'bin', 'cli.js');
+const policies = JSON.stringify([
+  {
+    category: 'electronics',
+    restock_window_days: 30,
+    destinations: {
+      restock: 'restock-a',
+      refurbish: 'refurb-a',
+      dispose: 'dispose-a'
+    }
+  }
+]);
+const capacity = JSON.stringify({
+  'dispose-a': 1,
+  'refurb-a': 1,
+  'restock-a': 1
+});
+const returns = JSON.stringify([
+  { id: 'low-sealed', category: 'electronics', condition: 'sealed', days_since_purchase: 10, priority: 1 },
+  { id: 'vip-opened', category: 'electronics', condition: 'opened', days_since_purchase: 20, priority: 10 },
+  { id: 'vip-damaged', category: 'electronics', condition: 'damaged', days_since_purchase: 5, priority: 9 },
+  { id: 'std-sealed', category: 'electronics', condition: 'sealed', days_since_purchase: 10, priority: 5 },
+  { id: 'late-sealed', category: 'electronics', condition: 'sealed', days_since_purchase: 10, priority: 4 },
+  { id: 'unknown-cat', category: 'furniture', condition: 'sealed', days_since_purchase: 1, priority: 3 }
+]);
+const result = spawnSync('node', [
+  cli,
+  'route-returns',
+  '--policies',
+  policies,
+  '--capacity',
+  capacity,
+  '--returns',
+  returns
+], {
+  cwd: work,
+  encoding: 'utf8'
+});
+assert.strictEqual(result.status, 0, result.stderr || result.stdout);
+assert.strictEqual(result.stderr, '');
+const parsed = JSON.parse(result.stdout);
+assert.deepStrictEqual(parsed, {
+  routed: [
+    { id: 'vip-opened', destination: 'refurb-a' },
+    { id: 'vip-damaged', destination: 'dispose-a' },
+    { id: 'std-sealed', destination: 'restock-a' }
+  ],
+  rejected: [
+    { id: 'low-sealed', reason: 'destination_full' },
+    { id: 'late-sealed', reason: 'destination_full' },
+    { id: 'unknown-cat', reason: 'unknown_category' }
+  ],
+  capacity: {
+    'dispose-a': 0,
+    'refurb-a': 0,
+    'restock-a': 0
+  }
+});
+console.log(JSON.stringify({ ok: true }));

package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/NOTES.md ADDED Viewed

@@ -0,0 +1,37 @@
+# S5-cli-credit-grant-ledger NOTES
+## What failure mode does this fixture detect?
+Billing credit settlement where date ordering, same-day priority, grant
+expiration, account isolation, and mutable grant balances interact. Bare or
+single-pass implementations commonly process input order, use expired grants,
+let one account consume another account's grant, or compute balances without
+respecting earlier charge consumption.
+## What pipeline phase(s) is this testing?
+- **PLAN / RISK_PROBES**: must notice charge ordering, expiration boundaries,
+  grant consumption order, account scoping, mutation, duplicate id handling, and
+  exact output-shape contracts.
+- **IMPLEMENT**: must add a CLI command without broadening scope or adding deps.
+- **VERIFY**: hidden oracles exercise compound ledger behavior that simple unit
+  tests often miss.
+## Why can't another fixture cover this?
+S2 covers inventory reservation, S3 covers agent assignment, and S4 covers return
+routing. S5 adds money-like credit balances with date-bounded grant eligibility,
+which catches a different class of ledger mutation and ordering failures.
+## When should this fixture be retired?
+Retire or replace it if two consecutive measured runs show both bare and
+solo_claude consistently satisfy charge ordering, account scoping, expiration
+boundaries, mutable grant balances, duplicate id handling, and exact output
+shape without pair assistance.
+## Calibration status
+- `20260513-s5-credit-headroom`: bare `33`, solo_claude `98`, headroom gate
+  `FAIL` because solo exceeded the `80` ceiling and timed out. Treat S5 as a
+  shadow control unless it is reworked to preserve solo_claude headroom.

package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/expected.json ADDED Viewed

@@ -0,0 +1,54 @@
+{
+  "verification_commands": [
+    {
+      "cmd": "node --test tests/cli.test.js",
+      "exit_code": 0,
+      "stdout_contains": [],
+      "stdout_not_contains": ["not ok "]
+    },
+    {
+      "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/credit-ledger-priority.js\"",
+      "exit_code": 0,
+      "stdout_contains": ["\"ok\":true"],
+      "stdout_not_contains": [],
+      "contract_refs": [
+        "Process charges globally by `occurred_on` ascending, then `priority` descending, then original input order ascending.",
+        "A grant is usable for a charge only when it belongs to the same `account`, has positive remaining `cents`, and `expires_on` is greater than or equal to the charge's `occurred_on`.",
+        "For each charge, consume usable grants by `expires_on` ascending, then grant `id` ascending, until the charge is fully covered or no usable grant remains.",
+        "Consuming a grant decrements that grant's remaining `cents` immediately; later charges see the reduced balance.",
+        "Each `settled` row is ordered by processing order and has keys `id`, `covered_cents`, `uncovered_cents`, and `grants`.",
+        "`balances` is ordered by grant `id` ascending. Each row has keys `id` and `remaining_cents`.",
+        "`expired` is ordered by grant `id` ascending and includes grants with positive remaining `cents` whose `expires_on` is less than `--as-of`.",
+        "On success, write exactly one JSON object to stdout and no stderr. Keys: `settled`, `balances`, `expired`."
+      ]
+    },
+    {
+      "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/duplicate-charge-error.js\"",
+      "exit_code": 0,
+      "stdout_contains": ["\"ok\":true"],
+      "stdout_not_contains": [],
+      "contract_refs": [
+        "Before settling any charge, duplicate charge ids are invalid input: exit `2`, write exactly one JSON error object `{ \"error\": \"duplicate_charge_id\", \"id\": string }` to stderr, and write no stdout."
+      ]
+    }
+  ],
+  "forbidden_patterns": [
+    {
+      "pattern": "catch\\s*\\([^)]*\\)\\s*\\{[^}]*return\\s+(null|undefined|''|\\{\\}|\\[\\])",
+      "description": "silent catch returning fallback in settle-credits path",
+      "files": ["bin/cli.js", "tests/cli.test.js"],
+      "severity": "disqualifier"
+    },
+    {
+      "pattern": "catch\\s*\\([^)]*\\)\\s*\\{\\s*\\}",
+      "description": "empty catch block",
+      "files": ["bin/cli.js", "tests/cli.test.js"],
+      "severity": "disqualifier"
+    }
+  ],
+  "required_files": ["bin/cli.js", "tests/cli.test.js"],
+  "forbidden_files": [],
+  "tier_a_waivers": [],
+  "spec_output_files": ["bin/cli.js", "tests/cli.test.js"],
+  "max_deps_added": 0
+}

package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/metadata.json ADDED Viewed

@@ -0,0 +1,10 @@
+{
+  "id": "S5-cli-credit-grant-ledger",
+  "category": "high-risk",
+  "difficulty": "high",
+  "timeout_seconds": 900,
+  "required_tools": ["node"],
+  "browser": false,
+  "deps_change_expected": false,
+  "intent": "Add a credit grant ledger CLI command that applies account-scoped grant expiration, priority-ordered charges, mutable balances, duplicate charge rejection, and exact JSON output shape."
+}

package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/setup.sh ADDED Viewed

@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+set -euo pipefail
+# S5 reuses the baseline test-repo state.

package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/spec.md ADDED Viewed

@@ -0,0 +1,59 @@
+---
+id: "S5-cli-credit-grant-ledger"
+title: "Add credit grant ledger command"
+status: planned
+complexity: high
+depends-on: []
+---
+# S5 Add Credit Grant Ledger Command
+## Context
+Billing operations needs a deterministic CLI command that settles account
+charges against expiring promotional credit grants. The command must combine
+account isolation, date ordering, same-day priority, grant expiration, mutable
+balances, duplicate id rejection, and exact machine-readable output.
+## Requirements
+- [ ] Add `settle-credits` to `bin/cli.js`.
+- [ ] Accept `--grants <json>` as a JSON array of grant objects. Each grant has
+  keys `id`, `account`, `cents`, and `expires_on`.
+- [ ] Accept `--charges <json>` as a JSON array of charge objects. Each charge
+  has keys `id`, `account`, `cents`, `occurred_on`, and `priority`.
+- [ ] Accept `--as-of <YYYY-MM-DD>` as the ledger close date.
+- [ ] Before settling any charge, duplicate charge ids are invalid input: exit `2`, write exactly one JSON error object `{ "error": "duplicate_charge_id", "id": string }` to stderr, and write no stdout.
+- [ ] Process charges globally by `occurred_on` ascending, then `priority` descending, then original input order ascending.
+- [ ] A grant is usable for a charge only when it belongs to the same `account`, has positive remaining `cents`, and `expires_on` is greater than or equal to the charge's `occurred_on`.
+- [ ] For each charge, consume usable grants by `expires_on` ascending, then grant `id` ascending, until the charge is fully covered or no usable grant remains.
+- [ ] Consuming a grant decrements that grant's remaining `cents` immediately; later charges see the reduced balance.
+- [ ] Each `settled` row is ordered by processing order and has keys `id`, `covered_cents`, `uncovered_cents`, and `grants`.
+- [ ] Each row's `grants` array is ordered by actual consumption order. Each
+  item has keys `id` and `cents`.
+- [ ] `balances` is ordered by grant `id` ascending. Each row has keys `id` and `remaining_cents`.
+- [ ] `expired` is ordered by grant `id` ascending and includes grants with positive remaining `cents` whose `expires_on` is less than `--as-of`. Each row has keys `id` and `remaining_cents`.
+- [ ] On success, write exactly one JSON object to stdout and no stderr. Keys: `settled`, `balances`, `expired`.
+## Constraints
+- Use only Node.js built-ins; add no npm dependencies.
+- Touch only `bin/cli.js` and `tests/cli.test.js`.
+- Do not silently catch JSON parse or validation errors. Surface invalid input
+  as a user-visible error with nonzero exit.
+- Do not persist grant balances between command invocations.
+## Out of Scope
+- Reading input from files.
+- Taxes, invoices, refunds, currency conversion, or account creation.
+- Changing `hello`, `version`, server routes, or package metadata.
+## Verification
+- `node --test tests/cli.test.js` passes.
+- `node "$BENCH_FIXTURE_DIR/verifiers/credit-ledger-priority.js"` prints
+  `{"ok":true}`.
+- `node "$BENCH_FIXTURE_DIR/verifiers/duplicate-charge-error.js"` prints
+  `{"ok":true}`.
+- Solo-headroom hypothesis: solo_claude is expected to miss mutable grant balances across priority/date-ordered charges; observable command `node "$BENCH_FIXTURE_DIR/verifiers/credit-ledger-priority.js"` exposes the miss.

package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/task.txt ADDED Viewed

@@ -0,0 +1 @@

+ Add a `settle-credits` CLI command. It should accept `--grants <json>`, `--charges <json>`, and `--as-of <YYYY-MM-DD>`, settle account-scoped charges against unexpired credit grants, process earlier charge dates first with higher priority first on the same date, consume grants by earliest expiration then grant id, reject duplicate charge ids with exit 2 and a JSON error, and print exactly one JSON object with `settled`, `balances`, and `expired`. Use no new dependencies and update CLI tests.

package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/verifiers/credit-ledger-priority.js ADDED Viewed

@@ -0,0 +1,98 @@
+'use strict';
+const assert = require('node:assert');
+const { spawnSync } = require('node:child_process');
+const path = require('node:path');
+const work = process.env.BENCH_WORKDIR || process.cwd();
+const cli = path.join(work, 'bin', 'cli.js');
+const grants = JSON.stringify([
+  { id: 'g-late', account: 'acct-1', cents: 700, expires_on: '2026-03-31' },
+  { id: 'g-early-b', account: 'acct-1', cents: 300, expires_on: '2026-01-31' },
+  { id: 'g-early-a', account: 'acct-1', cents: 500, expires_on: '2026-01-31' },
+  { id: 'g-other', account: 'acct-2', cents: 400, expires_on: '2026-01-31' },
+  { id: 'g-expired-unused', account: 'acct-1', cents: 250, expires_on: '2026-01-05' }
+]);
+const charges = JSON.stringify([
+  { id: 'late-low', account: 'acct-1', cents: 500, occurred_on: '2026-01-20', priority: 1 },
+  { id: 'early', account: 'acct-1', cents: 450, occurred_on: '2026-01-10', priority: 1 },
+  { id: 'same-day-high', account: 'acct-1', cents: 600, occurred_on: '2026-01-20', priority: 9 },
+  { id: 'other-account', account: 'acct-2', cents: 350, occurred_on: '2026-01-20', priority: 10 },
+  { id: 'after-expiry', account: 'acct-1', cents: 500, occurred_on: '2026-02-02', priority: 10 }
+]);
+const result = spawnSync('node', [
+  cli,
+  'settle-credits',
+  '--grants',
+  grants,
+  '--charges',
+  charges,
+  '--as-of',
+  '2026-04-01'
+], {
+  cwd: work,
+  encoding: 'utf8'
+});
+assert.strictEqual(result.status, 0, result.stderr || result.stdout);
+assert.strictEqual(result.stderr, '');
+const parsed = JSON.parse(result.stdout);
+assert.deepStrictEqual(parsed, {
+  settled: [
+    {
+      id: 'early',
+      covered_cents: 450,
+      uncovered_cents: 0,
+      grants: [
+        { id: 'g-early-a', cents: 450 }
+      ]
+    },
+    {
+      id: 'other-account',
+      covered_cents: 350,
+      uncovered_cents: 0,
+      grants: [
+        { id: 'g-other', cents: 350 }
+      ]
+    },
+    {
+      id: 'same-day-high',
+      covered_cents: 600,
+      uncovered_cents: 0,
+      grants: [
+        { id: 'g-early-a', cents: 50 },
+        { id: 'g-early-b', cents: 300 },
+        { id: 'g-late', cents: 250 }
+      ]
+    },
+    {
+      id: 'late-low',
+      covered_cents: 450,
+      uncovered_cents: 50,
+      grants: [
+        { id: 'g-late', cents: 450 }
+      ]
+    },
+    {
+      id: 'after-expiry',
+      covered_cents: 0,
+      uncovered_cents: 500,
+      grants: []
+    }
+  ],
+  balances: [
+    { id: 'g-early-a', remaining_cents: 0 },
+    { id: 'g-early-b', remaining_cents: 0 },
+    { id: 'g-expired-unused', remaining_cents: 250 },
+    { id: 'g-late', remaining_cents: 0 },
+    { id: 'g-other', remaining_cents: 50 }
+  ],
+  expired: [
+    { id: 'g-expired-unused', remaining_cents: 250 },
+    { id: 'g-other', remaining_cents: 50 }
+  ]
+});
+console.log(JSON.stringify({ ok: true }));

package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/verifiers/duplicate-charge-error.js ADDED Viewed

@@ -0,0 +1,38 @@
+'use strict';
+const assert = require('node:assert');
+const { spawnSync } = require('node:child_process');
+const path = require('node:path');
+const work = process.env.BENCH_WORKDIR || process.cwd();
+const cli = path.join(work, 'bin', 'cli.js');
+const grants = JSON.stringify([
+  { id: 'g1', account: 'acct-1', cents: 100, expires_on: '2026-01-31' }
+]);
+const charges = JSON.stringify([
+  { id: 'dup', account: 'acct-1', cents: 50, occurred_on: '2026-01-10', priority: 1 },
+  { id: 'dup', account: 'acct-1', cents: 50, occurred_on: '2026-01-11', priority: 2 }
+]);
+const result = spawnSync('node', [
+  cli,
+  'settle-credits',
+  '--grants',
+  grants,
+  '--charges',
+  charges,
+  '--as-of',
+  '2026-02-01'
+], {
+  cwd: work,
+  encoding: 'utf8'
+});
+assert.strictEqual(result.status, 2);
+assert.strictEqual(result.stdout, '');
+assert.deepStrictEqual(JSON.parse(result.stderr), {
+  error: 'duplicate_charge_id',
+  id: 'dup'
+});
+console.log(JSON.stringify({ ok: true }));

package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/NOTES.md ADDED Viewed

@@ -0,0 +1,36 @@
+# S6-cli-refund-window-ledger NOTES
+## What failure mode does this fixture detect?
+Refund settlement where category windows, priority ordering, mutable per-order
+remaining refundable cents, fee calculation, and exact output-shape obligations
+interact. Bare or single-pass implementations commonly process input order,
+forget that an earlier high-priority refund consumes later refundable balance,
+or report rejected rows in processing order.
+## What pipeline phase(s) is this testing?
+- **PLAN / RISK_PROBES**: must notice priority ordering, date-window policy,
+  mutable per-order balances, duplicate id handling, and output-shape contracts.
+- **IMPLEMENT**: must add a CLI command without broadening scope or adding deps.
+- **VERIFY**: hidden oracles exercise compound refund ledger behavior that
+  simple unit tests often miss.
+## Why can't another fixture cover this?
+S5 covers credit grant consumption by charge date. S6 flips the money-like
+mutation to customer refunds, where priority ordering and refund-window policy
+jointly decide whether a later input row consumes the balance first.
+## When should this fixture be retired?
+Retire or replace it if two consecutive measured runs show both bare and
+solo_claude consistently satisfy priority ordering, refund-window policy,
+cumulative refundable balances, duplicate id handling, and exact output shape
+without pair assistance.
+## Calibration status
+- `20260514-s6-refund-headroom-v1`: bare `33`, solo_claude `98`, headroom
+  gate `FAIL` because solo exceeded the `80` ceiling and timed out. Treat S6 as
+  a shadow control unless it is reworked to preserve solo_claude headroom.