npm - devlyn-cli - Versions diffs - 2.1.0 → 2.2.1 - Mend

devlyn-cli 2.1.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (127) hide show

package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/verifiers/stock-error.js ADDED Viewed

@@ -0,0 +1,34 @@
+const assert = require('node:assert');
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const { spawnSync } = require('node:child_process');
+const workdir = process.env.BENCH_WORKDIR || process.cwd();
+const input = path.join(os.tmpdir(), `cart-stock-${process.pid}.json`);
+fs.writeFileSync(input, JSON.stringify({
+  state: 'OR',
+  coupon: null,
+  items: [
+    { sku: 'BAG', qty: 2 },
+    { sku: 'MUG', qty: 1 },
+    { sku: 'BAG', qty: 3 }
+  ]
+}));
+const proc = spawnSync('node', ['bin/cli.js', 'cart', '--input', input], {
+  cwd: workdir,
+  encoding: 'utf8'
+});
+assert.strictEqual(proc.status, 2);
+assert.strictEqual(proc.stdout, '');
+assert.deepStrictEqual(JSON.parse(proc.stderr), {
+  error: 'invalid_stock',
+  sku: 'BAG',
+  available: 4,
+  requested: 5
+});
+process.stdout.write(JSON.stringify({ ok: true }) + '\n');

package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/NOTES.md ADDED Viewed

@@ -0,0 +1,25 @@
+# F26 CLI payout ledger rules
+## Failure mode
+This fixture detects settlement implementations that pass simple payout tests
+while mishandling idempotent events, conflicting duplicates, fee ordering,
+dispute fees, reserves, minimum payout holds, and top-level totals.
+## Pipeline phase target
+PLAN must preserve event deduplication and arithmetic order. IMPLEMENT must keep
+fee/reserve math in integer cents and avoid hardcoded rules. VERIFY should build
+adversarial ledger examples with repeated IDs, refunds, disputes, and reserves.
+## Why existing fixtures do not cover it
+F16 covers quote math and F25 covers cart promotions, but neither has ledger
+idempotency or conflicting duplicate events. F21/F23 became oracle-control
+fixtures, so this adds a fresh visible-contract stateful arithmetic candidate.
+## Retirement
+Retire or replace this fixture if solo consistently reaches ceiling or if
+another fixture provides the same idempotent-ledger signal with cleaner
+full-pipeline pair lift.

package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/expected.json ADDED Viewed

@@ -0,0 +1,68 @@
+{
+  "verification_commands": [
+    {
+      "cmd": "node --test tests/cli.test.js",
+      "exit_code": 0,
+      "stdout_contains": [],
+      "stdout_not_contains": ["not ok "]
+    },
+    {
+      "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/exact-payout.js\"",
+      "exit_code": 0,
+      "stdout_contains": ["\"ok\":true"],
+      "stdout_not_contains": [],
+      "contract_refs": [
+        "Identical duplicate event IDs are applied only once before merchant totals are computed.",
+        "A payout with charges, a refund, and a dispute computes processing fees, dispute fees, reserves, merchant payouts, and top-level totals exactly.",
+        "Processing fees apply to charges only; refunds do not reverse processing fees.",
+        "Dispute events subtract the dispute amount and add one dispute fee per dispute event.",
+        "Merchant rows preserve first-seen merchant order after idempotent duplicate removal."
+      ]
+    },
+    {
+      "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/conflicting-duplicate.js\"",
+      "exit_code": 0,
+      "stdout_contains": ["\"ok\":true"],
+      "stdout_not_contains": [],
+      "contract_refs": [
+        "Conflicting duplicate events use exact error shape `{ \"error\": \"conflicting_duplicate\", \"id\": string }`.",
+        "A conflicting duplicate exits `2`, prints one JSON error to stderr, and prints no stdout.",
+        "The conflicting duplicate error object includes `error` and `id`."
+      ]
+    },
+    {
+      "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/rules-source.js\"",
+      "exit_code": 0,
+      "stdout_contains": ["\"ok\":true"],
+      "stdout_not_contains": [],
+      "contract_refs": [
+        "Processing fee percent, fixed fee, dispute fee, reserve percent, and minimum payout threshold come from `data/payout-rules.json`. Do not hardcode these values in the command implementation.",
+        "Changing `data/payout-rules.json` fee or reserve settings changes command output without code changes."
+      ]
+    }
+  ],
+  "forbidden_patterns": [
+    {
+      "pattern": "catch\\s*\\([^)]*\\)\\s*\\{[^}]*return\\s+(null|undefined|''|\\{\\})",
+      "description": "silent catch returning fallback in payout path",
+      "files": ["bin/cli.js"],
+      "severity": "disqualifier"
+    },
+    {
+      "pattern": "catch\\s*\\([^)]*\\)\\s*\\{\\s*\\}",
+      "description": "empty catch block",
+      "files": ["bin/cli.js"],
+      "severity": "disqualifier"
+    },
+    {
+      "pattern": "/\\*\\s*eslint-disable",
+      "description": "eslint-disable without scoped justification",
+      "files": ["bin/cli.js"],
+      "severity": "disqualifier"
+    }
+  ],
+  "required_files": ["bin/cli.js", "tests/cli.test.js", "data/payout-rules.json"],
+  "forbidden_files": [],
+  "max_deps_added": 0,
+  "spec_output_files": ["bin/cli.js", "tests/cli.test.js"]
+}

package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/metadata.json ADDED Viewed

@@ -0,0 +1,10 @@
+{
+  "id": "F26-cli-payout-ledger-rules",
+  "category": "high-risk",
+  "difficulty": "high",
+  "timeout_seconds": 1500,
+  "required_tools": ["node"],
+  "browser": false,
+  "deps_change_expected": false,
+  "intent": "Add a bench-cli payout command that reads ledger events from a JSON file, applies idempotent event handling and payout rules from data/payout-rules.json, and prints exact merchant payout totals with processing fees, dispute fees, reserves, and payouts in integer cents."
+}

package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/setup.sh ADDED Viewed

@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+# F26 setup — seed payout ledger rules.
+set -e
+mkdir -p data
+cat > data/payout-rules.json <<'JSON'
+{
+  "processing_fee_percent": 2.9,
+  "fixed_fee_cents": 30,
+  "dispute_fee_cents": 1500,
+  "reserve_percent": 10,
+  "minimum_payout_cents": 1000
+}
+JSON
+exit 0

package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/spec.md ADDED Viewed

@@ -0,0 +1,68 @@
+---
+id: "F26-cli-payout-ledger-rules"
+title: "Payout command with ledger rules"
+status: planned
+complexity: high
+depends-on: []
+---
+# F26 Payout command with ledger rules
+## Context
+`bench-cli` currently has greeting and version commands only. The task:
+add a `payout` command that reads ledger events from a JSON file, applies
+idempotent event handling and payout rules from `data/payout-rules.json`, and
+prints exact merchant payout totals with processing fees, dispute fees,
+reserves, and payouts in integer cents.
+This is settlement math, so duplicate events must not corrupt totals and every
+public amount must be integer cents.
+## Requirements
+- [ ] `bench-cli payout --input <path>` reads JSON shaped as `{ "events": [{ "id": string, "merchant_id": string, "type": "charge" | "refund" | "dispute", "amount_cents": number }] }`.
+- [ ] Processing fee percent, fixed fee, dispute fee, reserve percent, and minimum payout threshold come from `data/payout-rules.json`. Do not hardcode these values in the command implementation.
+- [ ] Events with the same `id` and identical JSON content are idempotent duplicates and are applied only once.
+- [ ] Events with the same `id` but different JSON content are conflicting duplicates. Validation happens before payout totals are printed, exits `2`, writes exactly one JSON error object to stderr, and writes no stdout.
+- [ ] Conflicting duplicate events use exact error shape `{ "error": "conflicting_duplicate", "id": string }`.
+- [ ] Unknown event type, missing `merchant_id`, missing `id`, non-positive or non-integer `amount_cents`, missing `events`, invalid JSON, or unreadable input exits `2` and writes exactly one JSON error object to stderr.
+- [ ] Merchant rows are emitted in first-seen merchant order after idempotent duplicate removal.
+- [ ] A `charge` increases `gross_charge_cents` and adds a processing fee of `Math.round(amount_cents * processing_fee_percent / 100) + fixed_fee_cents`.
+- [ ] A `refund` increases `refund_cents`. Refunds do not reverse processing fees.
+- [ ] A `dispute` increases `dispute_cents` and adds `dispute_fee_cents` from the rules for each dispute event.
+- [ ] For each merchant, compute `net_before_reserve = gross_charge_cents - refund_cents - dispute_cents - processing_fee_cents - dispute_fee_cents`.
+- [ ] `reserve_cents` is `Math.round(net_before_reserve * reserve_percent / 100)` when `net_before_reserve > 0`; otherwise `0`.
+- [ ] `payout_cents = net_before_reserve - reserve_cents`.
+- [ ] If `0 < payout_cents < minimum_payout_cents`, keep the merchant row but set `payout_cents` to `0` and add the original positive amount into `reserve_cents`.
+- [ ] On success, write exactly one JSON object to stdout and no stderr. Keys: `total_payout_cents`, `total_processing_fee_cents`, `total_dispute_fee_cents`, `total_reserve_cents`, `merchants`.
+- [ ] Each merchant row has keys `merchant_id`, `gross_charge_cents`, `refund_cents`, `dispute_cents`, `processing_fee_cents`, `dispute_fee_cents`, `reserve_cents`, `payout_cents`.
+- [ ] `tests/cli.test.js` is updated. Existing tests still pass AND at least two new tests cover `payout`: one successful payout and one validation failure.
+## Constraints
+- **No new npm dependencies.**
+- **No floating-money output.** All public amounts are integer cents.
+- **No silent catches.** If parsing or file reading fails, emit a visible JSON error to stderr and exit `2`.
+- **No extra stdout/stderr text** on the success path; downstream tooling parses stdout as JSON.
+## Out of Scope
+- Persisting payouts or mutating a ledger.
+- Currency conversion.
+- Time zones, reporting periods, or settlement dates.
+- Adding web UI or server routes.
+- Touching `server/`, `web/`, or `tests/server.test.js`.
+## Verification
+- `node --test tests/cli.test.js` exits 0.
+- Identical duplicate event IDs are applied only once before merchant totals are computed.
+- A payout with charges, a refund, and a dispute computes processing fees, dispute fees, reserves, merchant payouts, and top-level totals exactly.
+- Processing fees apply to charges only; refunds do not reverse processing fees.
+- Dispute events subtract the dispute amount and add one dispute fee per dispute event.
+- Merchant rows preserve first-seen merchant order after idempotent duplicate removal.
+- A conflicting duplicate exits `2`, prints one JSON error to stderr, and prints no stdout.
+- The conflicting duplicate error object includes `error` and `id`.
+- Changing `data/payout-rules.json` fee or reserve settings changes command output without code changes.
+- `git diff --stat` shows only `bin/cli.js` and `tests/cli.test.js` touched (the payout rules seed comes from setup, not the arm).

package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/task.txt ADDED Viewed

@@ -0,0 +1,7 @@
+Add a bench-cli payout command that reads ledger events from a JSON file, applies idempotent event handling and payout rules from data/payout-rules.json, and prints exact merchant payout totals with processing fees, dispute fees, reserves, and payouts in integer cents.
+The command should be `bench-cli payout --input <path>`. Input JSON has an events array. Events have id, merchant_id, type, and amount_cents. Use the payout rules JSON for processing fee percent, fixed fee, dispute fee, reserve percent, and minimum payout threshold. Do not hardcode those values.
+Identical duplicate event IDs are idempotent and should be applied only once. The same event ID with different content is a conflicting duplicate and must fail before printing totals. Successful output must be one JSON object with top-level totals and merchant rows in first-seen merchant order. Validation errors must exit 2, write one JSON error object to stderr, and write no stdout.
+Update `tests/cli.test.js` so existing tests still pass and at least two new tests cover the payout command, including one successful payout and one validation failure. Do not add dependencies or touch the server/web files.

package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/verifiers/conflicting-duplicate.js ADDED Viewed

@@ -0,0 +1,29 @@
+const assert = require('node:assert');
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const { spawnSync } = require('node:child_process');
+const workdir = process.env.BENCH_WORKDIR || process.cwd();
+const input = path.join(os.tmpdir(), `payout-conflict-${process.pid}.json`);
+fs.writeFileSync(input, JSON.stringify({
+  events: [
+    { id: 'evt-conflict', merchant_id: 'm_1', type: 'charge', amount_cents: 1000 },
+    { id: 'evt-conflict', merchant_id: 'm_1', type: 'charge', amount_cents: 1001 }
+  ]
+}));
+const proc = spawnSync('node', ['bin/cli.js', 'payout', '--input', input], {
+  cwd: workdir,
+  encoding: 'utf8'
+});
+assert.strictEqual(proc.status, 2);
+assert.strictEqual(proc.stdout, '');
+assert.deepStrictEqual(JSON.parse(proc.stderr), {
+  error: 'conflicting_duplicate',
+  id: 'evt-conflict'
+});
+process.stdout.write(JSON.stringify({ ok: true }) + '\n');

package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/verifiers/exact-payout.js ADDED Viewed

@@ -0,0 +1,58 @@
+const assert = require('node:assert');
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const { spawnSync } = require('node:child_process');
+const workdir = process.env.BENCH_WORKDIR || process.cwd();
+const input = path.join(os.tmpdir(), `payout-success-${process.pid}.json`);
+const charge1 = { id: 'evt-1', merchant_id: 'm_1', type: 'charge', amount_cents: 10000 };
+fs.writeFileSync(input, JSON.stringify({
+  events: [
+    charge1,
+    { id: 'evt-2', merchant_id: 'm_2', type: 'charge', amount_cents: 5000 },
+    charge1,
+    { id: 'evt-3', merchant_id: 'm_1', type: 'refund', amount_cents: 2500 },
+    { id: 'evt-4', merchant_id: 'm_1', type: 'charge', amount_cents: 3333 },
+    { id: 'evt-5', merchant_id: 'm_2', type: 'dispute', amount_cents: 2000 }
+  ]
+}));
+const proc = spawnSync('node', ['bin/cli.js', 'payout', '--input', input], {
+  cwd: workdir,
+  encoding: 'utf8'
+});
+assert.strictEqual(proc.status, 0, proc.stderr || proc.stdout);
+assert.strictEqual(proc.stderr, '');
+assert.deepStrictEqual(JSON.parse(proc.stdout), {
+  total_payout_cents: 10539,
+  total_processing_fee_cents: 622,
+  total_dispute_fee_cents: 1500,
+  total_reserve_cents: 1172,
+  merchants: [
+    {
+      merchant_id: 'm_1',
+      gross_charge_cents: 13333,
+      refund_cents: 2500,
+      dispute_cents: 0,
+      processing_fee_cents: 447,
+      dispute_fee_cents: 0,
+      reserve_cents: 1039,
+      payout_cents: 9347
+    },
+    {
+      merchant_id: 'm_2',
+      gross_charge_cents: 5000,
+      refund_cents: 0,
+      dispute_cents: 2000,
+      processing_fee_cents: 175,
+      dispute_fee_cents: 1500,
+      reserve_cents: 133,
+      payout_cents: 1192
+    }
+  ]
+});
+process.stdout.write(JSON.stringify({ ok: true }) + '\n');

package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/verifiers/rules-source.js ADDED Viewed

@@ -0,0 +1,56 @@
+const assert = require('node:assert');
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const { spawnSync } = require('node:child_process');
+const workdir = process.env.BENCH_WORKDIR || process.cwd();
+const rulesPath = path.join(workdir, 'data', 'payout-rules.json');
+const original = fs.readFileSync(rulesPath, 'utf8');
+try {
+  fs.writeFileSync(rulesPath, JSON.stringify({
+    processing_fee_percent: 1,
+    fixed_fee_cents: 10,
+    dispute_fee_cents: 77,
+    reserve_percent: 0,
+    minimum_payout_cents: 1
+  }, null, 2) + '\n');
+  const input = path.join(os.tmpdir(), `payout-rules-${process.pid}.json`);
+  fs.writeFileSync(input, JSON.stringify({
+    events: [
+      { id: 'evt-1', merchant_id: 'm_1', type: 'charge', amount_cents: 10000 }
+    ]
+  }));
+  const proc = spawnSync('node', ['bin/cli.js', 'payout', '--input', input], {
+    cwd: workdir,
+    encoding: 'utf8'
+  });
+  assert.strictEqual(proc.status, 0, proc.stderr || proc.stdout);
+  assert.strictEqual(proc.stderr, '');
+  assert.deepStrictEqual(JSON.parse(proc.stdout), {
+    total_payout_cents: 9890,
+    total_processing_fee_cents: 110,
+    total_dispute_fee_cents: 0,
+    total_reserve_cents: 0,
+    merchants: [
+      {
+        merchant_id: 'm_1',
+        gross_charge_cents: 10000,
+        refund_cents: 0,
+        dispute_cents: 0,
+        processing_fee_cents: 110,
+        dispute_fee_cents: 0,
+        reserve_cents: 0,
+        payout_cents: 9890
+      }
+    ]
+  });
+} finally {
+  fs.writeFileSync(rulesPath, original);
+}
+process.stdout.write(JSON.stringify({ ok: true }) + '\n');

package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/spec.md CHANGED Viewed

@@ -37,7 +37,6 @@ so existing assertions continue to pass alongside new paging assertions.
 - **No breaking change to `/items/:id`.** The per-item route must keep its current contract (the fixture explicitly does NOT paginate single-item lookups).
 - **Backward-compat note**: clients that previously read `response.items` MUST still get the array at the same key inside the new envelope.
-- **Lifecycle note.** The harness's DOCS phase flips this spec's frontmatter `status` after implementation completes — that is benchmark lifecycle bookkeeping, not a scope violation.
 ## Out of Scope

package/benchmark/auto-resolve/fixtures/F4-web-browser-design/spec.md CHANGED Viewed

@@ -31,7 +31,6 @@ and italicized — using only the page's own CSS/JS.
 - **No inline JS frameworks.** Stick to the vanilla pattern already in `index.html`.
 - **Accessibility.** Both buttons must have accessible names equal to their visible labels; `#whisper` adds `aria-label="whisper"` only if its visible text differs (it doesn't, so leave it off).
-- **Lifecycle note.** The harness's DOCS phase flips this spec's frontmatter `status` after implementation completes — that is benchmark lifecycle bookkeeping, not a scope violation.
 ## Out of Scope

package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/spec.md CHANGED Viewed

@@ -31,7 +31,6 @@ Implement it so every test passes.
 - **Do not modify `tests/count.test.js`.** If a test looks wrong, that's a signal to revisit the implementation, not the test.
 - **No silent catches.** Errors reading stdin must surface with a clear message (not suppressed).
-- **Lifecycle note.** The harness's DOCS phase flips this spec's frontmatter `status` after implementation completes — that is benchmark lifecycle bookkeeping, not a scope violation.
 ## Out of Scope

package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/spec.md CHANGED Viewed

@@ -30,7 +30,6 @@ already provides everything needed; no external dependency is warranted.
 - **Stream-friendly.** Large files should not be read fully into memory. Use a hash stream (`crypto.createHash('sha256')` + pipe from `fs.createReadStream`).
 - **No silent catches.** File I/O errors must surface with an informative message and the appropriate exit code.
-- **Lifecycle note.** The harness's DOCS phase flips this spec's frontmatter `status` after implementation completes — that is benchmark lifecycle bookkeeping, not a scope violation.
 ## Out of Scope

package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/spec.md CHANGED Viewed

@@ -27,7 +27,6 @@ version without string manipulation. Add a `--format json` flag that makes
 - **Touch only `bin/cli.js` (`version` handler + argument parsing) and `tests/cli.test.js` (new test).** Do NOT modify the `hello` subcommand or any other file.
 - **No silent catches.** Unknown `--format` values must surface an error.
-- **Lifecycle note.** The harness's DOCS phase flips this spec's frontmatter `status` after implementation completes — that is benchmark lifecycle bookkeeping, not a scope violation.
 ## Out of Scope

package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/spec.md CHANGED Viewed

@@ -42,9 +42,6 @@ inside `/devlyn:resolve` (no separate preflight skill in the 2-skill design).
 - **No silent catches.**
 - **Non-git-repo handling.** Do not assume the user is always in a repo.
-- **Lifecycle note.** The harness's CLEANUP/VERIFY phases may flip this
-  spec's frontmatter `status` after implementation completes — that is
-  benchmark lifecycle bookkeeping, not a scope violation.
 ## Out of Scope

package/benchmark/auto-resolve/fixtures/SCHEMA.md CHANGED Viewed

@@ -73,7 +73,8 @@ Machine-readable acceptance criteria used by both `run-fixture.sh` verification
       "cmd": "node bin/cli.js doctor",
       "exit_code": 0,
       "stdout_contains": ["doctor: "],
-      "stdout_not_contains": ["undefined"]
+      "stdout_not_contains": ["undefined"],
+      "contract_refs": []
     }
   ],
   "forbidden_patterns": [
@@ -86,14 +87,25 @@ Machine-readable acceptance criteria used by both `run-fixture.sh` verification
   ],
   "required_files": ["bin/cli.js"],
   "forbidden_files": [],
+  "tier_a_waivers": [],
+  "spec_output_files": ["bin/cli.js"],
   "max_deps_added": 0
 }
 ```
 - **verification_commands** — runner executes each. Each command's pass/fail contributes to the arm's `verify_score`.
+  Commands run with `BENCH_WORKDIR` (fresh arm work tree) and
+  `BENCH_FIXTURE_DIR` (the fixture directory outside the arm work tree) in
+  the environment. Put discriminator/oracle scripts under the fixture
+  directory when the arm should not read the verifier source.
+  Any command that references `BENCH_FIXTURE_DIR` is a hidden oracle and must
+  include `contract_refs`: exact substrings from `spec.md` proving the oracle
+  tests a visible contract rather than inventing a narrower one.
 - **forbidden_patterns** — regexes scanned across `diff.patch`. Match at `severity: "disqualifier"` is a hard-floor fail. Match at `severity: "warning"` goes into the judge's critical-findings report.
 - **required_files** — must exist after the arm runs.
 - **forbidden_files** — must NOT appear in the arm's diff.
+- **tier_a_waivers** — optional globs for files the spec explicitly authorizes even though Tier A scope oracle would normally flag them.
+- **spec_output_files** — files or globs that define the authorized output surface for Tier B scope tracing.
 - **max_deps_added** — count of new entries under `dependencies`/`devDependencies` in `package.json`. Exceeds → hard-floor fail.
 ## NOTES.md

package/benchmark/auto-resolve/scripts/collect-swebench-predictions.py ADDED Viewed

@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+"""Collect patch.diff files into SWE-bench prediction JSONL."""
+from __future__ import annotations
+import argparse
+import json
+from pathlib import Path
+from typing import Any
+def read_jsonl(path: Path) -> list[dict[str, Any]]:
+    rows: list[dict[str, Any]] = []
+    with path.open(encoding="utf8") as f:
+        for line_no, line in enumerate(f, start=1):
+            if not line.strip():
+                continue
+            value = json.loads(line)
+            if not isinstance(value, dict):
+                raise ValueError(f"{path}:{line_no}: expected JSON object")
+            rows.append(value)
+    return rows
+def instance_ids_from_jsonl(path: Path | None) -> set[str] | None:
+    if path is None:
+        return None
+    ids: set[str] = set()
+    for row in read_jsonl(path):
+        instance_id = row.get("instance_id")
+        if not isinstance(instance_id, str) or not instance_id:
+            raise ValueError(f"{path}: row missing non-empty instance_id")
+        ids.add(instance_id)
+    return ids
+def collect_from_root(root: Path, patch_name: str, keep: set[str] | None) -> list[tuple[str, Path]]:
+    patches: list[tuple[str, Path]] = []
+    for patch_path in sorted(root.glob(f"*/{patch_name}")):
+        instance_id = patch_path.parent.name
+        if keep is not None and instance_id not in keep:
+            continue
+        patches.append((instance_id, patch_path))
+    return patches
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--patch-root", required=True, type=Path)
+    parser.add_argument("--patch-name", default="patch.diff")
+    parser.add_argument("--instances-jsonl", type=Path, help="Optional filter/validation set.")
+    parser.add_argument("--model-name", required=True)
+    parser.add_argument("--out", required=True, type=Path)
+    parser.add_argument("--allow-empty", action="store_true")
+    args = parser.parse_args()
+    keep = instance_ids_from_jsonl(args.instances_jsonl)
+    patches = collect_from_root(args.patch_root, args.patch_name, keep)
+    if not patches:
+        raise ValueError(f"no {args.patch_name} files found under {args.patch_root}")
+    args.out.parent.mkdir(parents=True, exist_ok=True)
+    written = 0
+    skipped_empty: list[str] = []
+    with args.out.open("w", encoding="utf8") as f:
+        for instance_id, patch_path in patches:
+            patch = patch_path.read_text(encoding="utf8")
+            if not patch.strip():
+                if args.allow_empty:
+                    skipped_empty.append(instance_id)
+                    continue
+                raise ValueError(f"empty patch for {instance_id}: {patch_path}")
+            f.write(
+                json.dumps(
+                    {
+                        "instance_id": instance_id,
+                        "model_name_or_path": args.model_name,
+                        "model_patch": patch,
+                    }
+                )
+                + "\n"
+            )
+            written += 1
+    report = {
+        "patch_root": str(args.patch_root),
+        "patch_name": args.patch_name,
+        "model_name_or_path": args.model_name,
+        "out": str(args.out),
+        "predictions_written": written,
+        "empty_skipped": skipped_empty,
+    }
+    print(json.dumps(report, indent=2))
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())