devlyn-cli 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/CLAUDE.md +1 -1
  2. package/README.md +1 -1
  3. package/benchmark/auto-resolve/README.md +318 -2
  4. package/benchmark/auto-resolve/RUBRIC.md +6 -0
  5. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/NOTES.md +63 -0
  6. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/expected.json +60 -0
  7. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/metadata.json +10 -0
  8. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/setup.sh +17 -0
  9. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/spec.md +52 -0
  10. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/task.txt +9 -0
  11. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/verifiers/invalid.js +29 -0
  12. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/verifiers/parallel.js +50 -0
  13. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/NOTES.md +70 -0
  14. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/expected.json +52 -0
  15. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/metadata.json +10 -0
  16. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/setup.sh +171 -0
  17. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/spec.md +51 -0
  18. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/task.txt +9 -0
  19. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/NOTES.md +83 -0
  20. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/expected.json +74 -0
  21. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/metadata.json +10 -0
  22. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/setup.sh +251 -0
  23. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/spec.md +58 -0
  24. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/task.txt +13 -0
  25. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/verifiers/replay-malformed-body.js +64 -0
  26. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/NOTES.md +98 -0
  27. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/expected.json +46 -0
  28. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/metadata.json +10 -0
  29. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/setup.sh +336 -0
  30. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/spec.md +52 -0
  31. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/task.txt +9 -0
  32. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/NOTES.md +26 -0
  33. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/expected.json +64 -0
  34. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/metadata.json +10 -0
  35. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/setup.sh +32 -0
  36. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/spec.md +58 -0
  37. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/task.txt +7 -0
  38. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/verifiers/exact-success.js +54 -0
  39. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/verifiers/no-hardcoded-pricing.js +47 -0
  40. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/verifiers/stock-error.js +45 -0
  41. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/NOTES.md +27 -0
  42. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/expected.json +62 -0
  43. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/metadata.json +10 -0
  44. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/setup.sh +2 -0
  45. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/spec.md +62 -0
  46. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/task.txt +7 -0
  47. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/verifiers/error-order.js +55 -0
  48. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/verifiers/priority-blocked.js +48 -0
  49. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/NOTES.md +27 -0
  50. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/expected.json +56 -0
  51. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/metadata.json +10 -0
  52. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/setup.sh +2 -0
  53. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/spec.md +65 -0
  54. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/task.txt +7 -0
  55. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/verifiers/conflicting-duplicate.js +34 -0
  56. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/verifiers/idempotent-close.js +41 -0
  57. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/NOTES.md +27 -0
  58. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/expected.json +56 -0
  59. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/metadata.json +10 -0
  60. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/setup.sh +2 -0
  61. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/spec.md +71 -0
  62. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/task.txt +7 -0
  63. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/verifiers/priority-rollback.js +64 -0
  64. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/verifiers/single-warehouse-fefo.js +66 -0
  65. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/NOTES.md +28 -0
  66. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/expected.json +66 -0
  67. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/metadata.json +10 -0
  68. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/setup.sh +36 -0
  69. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/spec.md +65 -0
  70. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/task.txt +7 -0
  71. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/verifiers/catalog-source.js +57 -0
  72. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/verifiers/exact-success.js +63 -0
  73. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/verifiers/stock-error.js +34 -0
  74. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/NOTES.md +25 -0
  75. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/expected.json +68 -0
  76. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/metadata.json +10 -0
  77. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/setup.sh +17 -0
  78. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/spec.md +69 -0
  79. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/task.txt +7 -0
  80. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/verifiers/conflicting-duplicate.js +29 -0
  81. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/verifiers/exact-payout.js +58 -0
  82. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/verifiers/rules-source.js +56 -0
  83. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/NOTES.md +24 -0
  84. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/expected.json +66 -0
  85. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/metadata.json +10 -0
  86. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/setup.sh +22 -0
  87. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/spec.md +62 -0
  88. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/task.txt +9 -0
  89. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/verifiers/exact-success.js +48 -0
  90. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/verifiers/insufficient-balance.js +36 -0
  91. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/verifiers/rules-source.js +55 -0
  92. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/NOTES.md +20 -0
  93. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/expected.json +66 -0
  94. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/metadata.json +10 -0
  95. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/setup.sh +23 -0
  96. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/spec.md +66 -0
  97. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/task.txt +11 -0
  98. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/verifiers/exact-success.js +44 -0
  99. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/verifiers/rules-source.js +58 -0
  100. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/verifiers/unavailable-inventory.js +35 -0
  101. package/benchmark/auto-resolve/fixtures/SCHEMA.md +13 -1
  102. package/benchmark/auto-resolve/scripts/collect-swebench-predictions.py +98 -0
  103. package/benchmark/auto-resolve/scripts/fetch-swebench-instances.py +111 -0
  104. package/benchmark/auto-resolve/scripts/frozen-verify-gate.py +289 -0
  105. package/benchmark/auto-resolve/scripts/full-pipeline-pair-gate.py +250 -0
  106. package/benchmark/auto-resolve/scripts/headroom-gate.py +147 -0
  107. package/benchmark/auto-resolve/scripts/judge.sh +82 -3
  108. package/benchmark/auto-resolve/scripts/prepare-swebench-frozen-case.py +244 -0
  109. package/benchmark/auto-resolve/scripts/prepare-swebench-frozen-corpus.py +118 -0
  110. package/benchmark/auto-resolve/scripts/prepare-swebench-solver-worktree.py +192 -0
  111. package/benchmark/auto-resolve/scripts/run-fixture.sh +234 -40
  112. package/benchmark/auto-resolve/scripts/run-frozen-verify-pair.sh +511 -0
  113. package/benchmark/auto-resolve/scripts/run-full-pipeline-pair-candidate.sh +162 -0
  114. package/benchmark/auto-resolve/scripts/run-headroom-candidate.sh +93 -0
  115. package/benchmark/auto-resolve/scripts/run-swebench-frozen-corpus.sh +209 -0
  116. package/benchmark/auto-resolve/scripts/run-swebench-solver-batch.sh +239 -0
  117. package/benchmark/auto-resolve/scripts/swebench-frozen-matrix.py +265 -0
  118. package/benchmark/auto-resolve/scripts/test-frozen-verify-gate.sh +192 -0
  119. package/benchmark/auto-resolve/scripts/test-full-pipeline-pair-gate.sh +131 -0
  120. package/benchmark/auto-resolve/scripts/test-headroom-gate.sh +84 -0
  121. package/benchmark/auto-resolve/scripts/test-swebench-frozen-case.sh +302 -0
  122. package/bin/devlyn.js +56 -10
  123. package/config/skills/_shared/archive_run.py +3 -0
  124. package/config/skills/_shared/codex-config.md +2 -2
  125. package/config/skills/_shared/codex-monitored.sh +72 -7
  126. package/config/skills/_shared/collect-codex-findings.py +125 -0
  127. package/config/skills/_shared/engine-preflight.md +1 -1
  128. package/config/skills/_shared/expected.schema.json +18 -0
  129. package/config/skills/_shared/spec-verify-check.py +312 -10
  130. package/config/skills/_shared/verify-merge-findings.py +327 -0
  131. package/config/skills/devlyn:ideate/SKILL.md +1 -1
  132. package/config/skills/devlyn:resolve/SKILL.md +62 -8
  133. package/config/skills/devlyn:resolve/references/phases/build-gate.md +1 -1
  134. package/config/skills/devlyn:resolve/references/phases/probe-derive.md +164 -0
  135. package/config/skills/devlyn:resolve/references/phases/verify.md +156 -4
  136. package/config/skills/devlyn:resolve/references/state-schema.md +10 -4
  137. package/package.json +1 -1
  138. package/scripts/lint-skills.sh +32 -0
@@ -0,0 +1,34 @@
1
+ const assert = require('node:assert');
2
+ const fs = require('node:fs');
3
+ const os = require('node:os');
4
+ const path = require('node:path');
5
+ const { spawnSync } = require('node:child_process');
6
+
7
+ const workdir = process.env.BENCH_WORKDIR || process.cwd();
8
+ const input = path.join(os.tmpdir(), `cart-stock-${process.pid}.json`);
9
+
10
+ fs.writeFileSync(input, JSON.stringify({
11
+ state: 'OR',
12
+ coupon: null,
13
+ items: [
14
+ { sku: 'BAG', qty: 2 },
15
+ { sku: 'MUG', qty: 1 },
16
+ { sku: 'BAG', qty: 3 }
17
+ ]
18
+ }));
19
+
20
+ const proc = spawnSync('node', ['bin/cli.js', 'cart', '--input', input], {
21
+ cwd: workdir,
22
+ encoding: 'utf8'
23
+ });
24
+
25
+ assert.strictEqual(proc.status, 2);
26
+ assert.strictEqual(proc.stdout, '');
27
+ assert.deepStrictEqual(JSON.parse(proc.stderr), {
28
+ error: 'invalid_stock',
29
+ sku: 'BAG',
30
+ available: 4,
31
+ requested: 5
32
+ });
33
+
34
+ process.stdout.write(JSON.stringify({ ok: true }) + '\n');
@@ -0,0 +1,25 @@
1
+ # F26 CLI payout ledger rules
2
+
3
+ ## Failure mode
4
+
5
+ This fixture detects settlement implementations that pass simple payout tests
6
+ while mishandling idempotent events, conflicting duplicates, fee ordering,
7
+ dispute fees, reserves, minimum payout holds, and top-level totals.
8
+
9
+ ## Pipeline phase target
10
+
11
+ PLAN must preserve event deduplication and arithmetic order. IMPLEMENT must keep
12
+ fee/reserve math in integer cents and avoid hardcoded rules. VERIFY should build
13
+ adversarial ledger examples with repeated IDs, refunds, disputes, and reserves.
14
+
15
+ ## Why existing fixtures do not cover it
16
+
17
+ F16 covers quote math and F25 covers cart promotions, but neither has ledger
18
+ idempotency or conflicting duplicate events. F21/F23 became oracle-control
19
+ fixtures, so this adds a fresh visible-contract stateful arithmetic candidate.
20
+
21
+ ## Retirement
22
+
23
+ Retire or replace this fixture if solo consistently reaches ceiling or if
24
+ another fixture provides the same idempotent-ledger signal with cleaner
25
+ full-pipeline pair lift.
@@ -0,0 +1,68 @@
1
+ {
2
+ "verification_commands": [
3
+ {
4
+ "cmd": "node --test tests/cli.test.js",
5
+ "exit_code": 0,
6
+ "stdout_contains": [],
7
+ "stdout_not_contains": ["not ok "]
8
+ },
9
+ {
10
+ "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/exact-payout.js\"",
11
+ "exit_code": 0,
12
+ "stdout_contains": ["\"ok\":true"],
13
+ "stdout_not_contains": [],
14
+ "contract_refs": [
15
+ "Identical duplicate event IDs are applied only once before merchant totals are computed.",
16
+ "A payout with charges, a refund, and a dispute computes processing fees, dispute fees, reserves, merchant payouts, and top-level totals exactly.",
17
+ "Processing fees apply to charges only; refunds do not reverse processing fees.",
18
+ "Dispute events subtract the dispute amount and add one dispute fee per dispute event.",
19
+ "Merchant rows preserve first-seen merchant order after idempotent duplicate removal."
20
+ ]
21
+ },
22
+ {
23
+ "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/conflicting-duplicate.js\"",
24
+ "exit_code": 0,
25
+ "stdout_contains": ["\"ok\":true"],
26
+ "stdout_not_contains": [],
27
+ "contract_refs": [
28
+ "Conflicting duplicate events use exact error shape `{ \"error\": \"conflicting_duplicate\", \"id\": string }`.",
29
+ "A conflicting duplicate exits `2`, prints one JSON error to stderr, and prints no stdout.",
30
+ "The conflicting duplicate error object includes `error` and `id`."
31
+ ]
32
+ },
33
+ {
34
+ "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/rules-source.js\"",
35
+ "exit_code": 0,
36
+ "stdout_contains": ["\"ok\":true"],
37
+ "stdout_not_contains": [],
38
+ "contract_refs": [
39
+ "Processing fee percent, fixed fee, dispute fee, reserve percent, and minimum payout threshold come from `data/payout-rules.json`. Do not hardcode these values in the command implementation.",
40
+ "Changing `data/payout-rules.json` fee or reserve settings changes command output without code changes."
41
+ ]
42
+ }
43
+ ],
44
+ "forbidden_patterns": [
45
+ {
46
+ "pattern": "catch\\s*\\([^)]*\\)\\s*\\{[^}]*return\\s+(null|undefined|''|\\{\\})",
47
+ "description": "silent catch returning fallback in payout path",
48
+ "files": ["bin/cli.js"],
49
+ "severity": "disqualifier"
50
+ },
51
+ {
52
+ "pattern": "catch\\s*\\([^)]*\\)\\s*\\{\\s*\\}",
53
+ "description": "empty catch block",
54
+ "files": ["bin/cli.js"],
55
+ "severity": "disqualifier"
56
+ },
57
+ {
58
+ "pattern": "/\\*\\s*eslint-disable",
59
+ "description": "eslint-disable without scoped justification",
60
+ "files": ["bin/cli.js"],
61
+ "severity": "disqualifier"
62
+ }
63
+ ],
64
+ "required_files": ["bin/cli.js", "tests/cli.test.js", "data/payout-rules.json"],
65
+ "forbidden_files": [],
66
+ "max_deps_added": 0,
67
+ "spec_output_files": ["bin/cli.js", "tests/cli.test.js"]
68
+ }
@@ -0,0 +1,10 @@
1
+ {
2
+ "id": "F26-cli-payout-ledger-rules",
3
+ "category": "high-risk",
4
+ "difficulty": "high",
5
+ "timeout_seconds": 1500,
6
+ "required_tools": ["node"],
7
+ "browser": false,
8
+ "deps_change_expected": false,
9
+ "intent": "Add a bench-cli payout command that reads ledger events from a JSON file, applies idempotent event handling and payout rules from data/payout-rules.json, and prints exact merchant payout totals with processing fees, dispute fees, reserves, and payouts in integer cents."
10
+ }
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env bash
2
+ # F26 setup — seed payout ledger rules.
3
+ set -e
4
+
5
+ mkdir -p data
6
+
7
+ cat > data/payout-rules.json <<'JSON'
8
+ {
9
+ "processing_fee_percent": 2.9,
10
+ "fixed_fee_cents": 30,
11
+ "dispute_fee_cents": 1500,
12
+ "reserve_percent": 10,
13
+ "minimum_payout_cents": 1000
14
+ }
15
+ JSON
16
+
17
+ exit 0
@@ -0,0 +1,69 @@
1
+ ---
2
+ id: "F26-cli-payout-ledger-rules"
3
+ title: "Payout command with ledger rules"
4
+ status: planned
5
+ complexity: high
6
+ depends-on: []
7
+ ---
8
+
9
+ # F26 Payout command with ledger rules
10
+
11
+ ## Context
12
+
13
+ `bench-cli` currently has greeting and version commands only. The task:
14
+ add a `payout` command that reads ledger events from a JSON file, applies
15
+ idempotent event handling and payout rules from `data/payout-rules.json`, and
16
+ prints exact merchant payout totals with processing fees, dispute fees,
17
+ reserves, and payouts in integer cents.
18
+
19
+ This is settlement math, so duplicate events must not corrupt totals and every
20
+ public amount must be integer cents.
21
+
22
+ ## Requirements
23
+
24
+ - [ ] `bench-cli payout --input <path>` reads JSON shaped as `{ "events": [{ "id": string, "merchant_id": string, "type": "charge" | "refund" | "dispute", "amount_cents": number }] }`.
25
+ - [ ] Processing fee percent, fixed fee, dispute fee, reserve percent, and minimum payout threshold come from `data/payout-rules.json`. Do not hardcode these values in the command implementation.
26
+ - [ ] Events with the same `id` and identical JSON content are idempotent duplicates and are applied only once.
27
+ - [ ] Events with the same `id` but different JSON content are conflicting duplicates. Validation happens before payout totals are printed, exits `2`, writes exactly one JSON error object to stderr, and writes no stdout.
28
+ - [ ] Conflicting duplicate events use exact error shape `{ "error": "conflicting_duplicate", "id": string }`.
29
+ - [ ] Unknown event type, missing `merchant_id`, missing `id`, non-positive or non-integer `amount_cents`, missing `events`, invalid JSON, or unreadable input exits `2` and writes exactly one JSON error object to stderr.
30
+ - [ ] Merchant rows are emitted in first-seen merchant order after idempotent duplicate removal.
31
+ - [ ] A `charge` increases `gross_charge_cents` and adds a processing fee of `Math.round(amount_cents * processing_fee_percent / 100) + fixed_fee_cents`.
32
+ - [ ] A `refund` increases `refund_cents`. Refunds do not reverse processing fees.
33
+ - [ ] A `dispute` increases `dispute_cents` and adds `dispute_fee_cents` from the rules for each dispute event.
34
+ - [ ] For each merchant, compute `net_before_reserve = gross_charge_cents - refund_cents - dispute_cents - processing_fee_cents - dispute_fee_cents`.
35
+ - [ ] `reserve_cents` is `Math.round(net_before_reserve * reserve_percent / 100)` when `net_before_reserve > 0`; otherwise `0`.
36
+ - [ ] `payout_cents = net_before_reserve - reserve_cents`.
37
+ - [ ] If `0 < payout_cents < minimum_payout_cents`, keep the merchant row but set `payout_cents` to `0` and add the original positive amount into `reserve_cents`.
38
+ - [ ] On success, write exactly one JSON object to stdout and no stderr. Keys: `total_payout_cents`, `total_processing_fee_cents`, `total_dispute_fee_cents`, `total_reserve_cents`, `merchants`.
39
+ - [ ] Each merchant row has keys `merchant_id`, `gross_charge_cents`, `refund_cents`, `dispute_cents`, `processing_fee_cents`, `dispute_fee_cents`, `reserve_cents`, `payout_cents`.
40
+ - [ ] `tests/cli.test.js` is updated. Existing tests still pass AND at least two new tests cover `payout`: one successful payout and one validation failure.
41
+
42
+ ## Constraints
43
+
44
+ - **No new npm dependencies.**
45
+ - **No floating-money output.** All public amounts are integer cents.
46
+ - **No silent catches.** If parsing or file reading fails, emit a visible JSON error to stderr and exit `2`.
47
+ - **No extra stdout/stderr text** on the success path; downstream tooling parses stdout as JSON.
48
+ - **Lifecycle note.** The harness's DOCS phase flips this spec's frontmatter `status` after implementation completes — that is benchmark lifecycle bookkeeping, not a scope violation.
49
+
50
+ ## Out of Scope
51
+
52
+ - Persisting payouts or mutating a ledger.
53
+ - Currency conversion.
54
+ - Time zones, reporting periods, or settlement dates.
55
+ - Adding web UI or server routes.
56
+ - Touching `server/`, `web/`, or `tests/server.test.js`.
57
+
58
+ ## Verification
59
+
60
+ - `node --test tests/cli.test.js` exits 0.
61
+ - Identical duplicate event IDs are applied only once before merchant totals are computed.
62
+ - A payout with charges, a refund, and a dispute computes processing fees, dispute fees, reserves, merchant payouts, and top-level totals exactly.
63
+ - Processing fees apply to charges only; refunds do not reverse processing fees.
64
+ - Dispute events subtract the dispute amount and add one dispute fee per dispute event.
65
+ - Merchant rows preserve first-seen merchant order after idempotent duplicate removal.
66
+ - A conflicting duplicate exits `2`, prints one JSON error to stderr, and prints no stdout.
67
+ - The conflicting duplicate error object includes `error` and `id`.
68
+ - Changing `data/payout-rules.json` fee or reserve settings changes command output without code changes.
69
+ - `git diff --stat` shows only `bin/cli.js` and `tests/cli.test.js` touched (the payout rules seed comes from setup, not the arm).
@@ -0,0 +1,7 @@
1
+ Add a bench-cli payout command that reads ledger events from a JSON file, applies idempotent event handling and payout rules from data/payout-rules.json, and prints exact merchant payout totals with processing fees, dispute fees, reserves, and payouts in integer cents.
2
+
3
+ The command should be `bench-cli payout --input <path>`. Input JSON has an events array. Events have id, merchant_id, type, and amount_cents. Use the payout rules JSON for processing fee percent, fixed fee, dispute fee, reserve percent, and minimum payout threshold. Do not hardcode those values.
4
+
5
+ Identical duplicate event IDs are idempotent and should be applied only once. The same event ID with different content is a conflicting duplicate and must fail before printing totals. Successful output must be one JSON object with top-level totals and merchant rows in first-seen merchant order. Validation errors must exit 2, write one JSON error object to stderr, and write no stdout.
6
+
7
+ Update `tests/cli.test.js` so existing tests still pass and at least two new tests cover the payout command, including one successful payout and one validation failure. Do not add dependencies or touch the server/web files.
@@ -0,0 +1,29 @@
1
+ const assert = require('node:assert');
2
+ const fs = require('node:fs');
3
+ const os = require('node:os');
4
+ const path = require('node:path');
5
+ const { spawnSync } = require('node:child_process');
6
+
7
+ const workdir = process.env.BENCH_WORKDIR || process.cwd();
8
+ const input = path.join(os.tmpdir(), `payout-conflict-${process.pid}.json`);
9
+
10
+ fs.writeFileSync(input, JSON.stringify({
11
+ events: [
12
+ { id: 'evt-conflict', merchant_id: 'm_1', type: 'charge', amount_cents: 1000 },
13
+ { id: 'evt-conflict', merchant_id: 'm_1', type: 'charge', amount_cents: 1001 }
14
+ ]
15
+ }));
16
+
17
+ const proc = spawnSync('node', ['bin/cli.js', 'payout', '--input', input], {
18
+ cwd: workdir,
19
+ encoding: 'utf8'
20
+ });
21
+
22
+ assert.strictEqual(proc.status, 2);
23
+ assert.strictEqual(proc.stdout, '');
24
+ assert.deepStrictEqual(JSON.parse(proc.stderr), {
25
+ error: 'conflicting_duplicate',
26
+ id: 'evt-conflict'
27
+ });
28
+
29
+ process.stdout.write(JSON.stringify({ ok: true }) + '\n');
@@ -0,0 +1,58 @@
1
+ const assert = require('node:assert');
2
+ const fs = require('node:fs');
3
+ const os = require('node:os');
4
+ const path = require('node:path');
5
+ const { spawnSync } = require('node:child_process');
6
+
7
+ const workdir = process.env.BENCH_WORKDIR || process.cwd();
8
+ const input = path.join(os.tmpdir(), `payout-success-${process.pid}.json`);
9
+
10
+ const charge1 = { id: 'evt-1', merchant_id: 'm_1', type: 'charge', amount_cents: 10000 };
11
+ fs.writeFileSync(input, JSON.stringify({
12
+ events: [
13
+ charge1,
14
+ { id: 'evt-2', merchant_id: 'm_2', type: 'charge', amount_cents: 5000 },
15
+ charge1,
16
+ { id: 'evt-3', merchant_id: 'm_1', type: 'refund', amount_cents: 2500 },
17
+ { id: 'evt-4', merchant_id: 'm_1', type: 'charge', amount_cents: 3333 },
18
+ { id: 'evt-5', merchant_id: 'm_2', type: 'dispute', amount_cents: 2000 }
19
+ ]
20
+ }));
21
+
22
+ const proc = spawnSync('node', ['bin/cli.js', 'payout', '--input', input], {
23
+ cwd: workdir,
24
+ encoding: 'utf8'
25
+ });
26
+
27
+ assert.strictEqual(proc.status, 0, proc.stderr || proc.stdout);
28
+ assert.strictEqual(proc.stderr, '');
29
+ assert.deepStrictEqual(JSON.parse(proc.stdout), {
30
+ total_payout_cents: 10539,
31
+ total_processing_fee_cents: 622,
32
+ total_dispute_fee_cents: 1500,
33
+ total_reserve_cents: 1172,
34
+ merchants: [
35
+ {
36
+ merchant_id: 'm_1',
37
+ gross_charge_cents: 13333,
38
+ refund_cents: 2500,
39
+ dispute_cents: 0,
40
+ processing_fee_cents: 447,
41
+ dispute_fee_cents: 0,
42
+ reserve_cents: 1039,
43
+ payout_cents: 9347
44
+ },
45
+ {
46
+ merchant_id: 'm_2',
47
+ gross_charge_cents: 5000,
48
+ refund_cents: 0,
49
+ dispute_cents: 2000,
50
+ processing_fee_cents: 175,
51
+ dispute_fee_cents: 1500,
52
+ reserve_cents: 133,
53
+ payout_cents: 1192
54
+ }
55
+ ]
56
+ });
57
+
58
+ process.stdout.write(JSON.stringify({ ok: true }) + '\n');
@@ -0,0 +1,56 @@
1
+ const assert = require('node:assert');
2
+ const fs = require('node:fs');
3
+ const os = require('node:os');
4
+ const path = require('node:path');
5
+ const { spawnSync } = require('node:child_process');
6
+
7
+ const workdir = process.env.BENCH_WORKDIR || process.cwd();
8
+ const rulesPath = path.join(workdir, 'data', 'payout-rules.json');
9
+ const original = fs.readFileSync(rulesPath, 'utf8');
10
+
11
+ try {
12
+ fs.writeFileSync(rulesPath, JSON.stringify({
13
+ processing_fee_percent: 1,
14
+ fixed_fee_cents: 10,
15
+ dispute_fee_cents: 77,
16
+ reserve_percent: 0,
17
+ minimum_payout_cents: 1
18
+ }, null, 2) + '\n');
19
+
20
+ const input = path.join(os.tmpdir(), `payout-rules-${process.pid}.json`);
21
+ fs.writeFileSync(input, JSON.stringify({
22
+ events: [
23
+ { id: 'evt-1', merchant_id: 'm_1', type: 'charge', amount_cents: 10000 }
24
+ ]
25
+ }));
26
+
27
+ const proc = spawnSync('node', ['bin/cli.js', 'payout', '--input', input], {
28
+ cwd: workdir,
29
+ encoding: 'utf8'
30
+ });
31
+
32
+ assert.strictEqual(proc.status, 0, proc.stderr || proc.stdout);
33
+ assert.strictEqual(proc.stderr, '');
34
+ assert.deepStrictEqual(JSON.parse(proc.stdout), {
35
+ total_payout_cents: 9890,
36
+ total_processing_fee_cents: 110,
37
+ total_dispute_fee_cents: 0,
38
+ total_reserve_cents: 0,
39
+ merchants: [
40
+ {
41
+ merchant_id: 'm_1',
42
+ gross_charge_cents: 10000,
43
+ refund_cents: 0,
44
+ dispute_cents: 0,
45
+ processing_fee_cents: 110,
46
+ dispute_fee_cents: 0,
47
+ reserve_cents: 0,
48
+ payout_cents: 9890
49
+ }
50
+ ]
51
+ });
52
+ } finally {
53
+ fs.writeFileSync(rulesPath, original);
54
+ }
55
+
56
+ process.stdout.write(JSON.stringify({ ok: true }) + '\n');
@@ -0,0 +1,24 @@
1
+ # F27 CLI gift card redemption
2
+
3
+ ## Why this fixture exists
4
+
5
+ F16 showed a valid full-pipeline pair lift when the solo arm implemented the
6
+ happy path but missed the exact validation-error contract. F25 was rejected
7
+ after an oracle correction made solo pass. F26 was rejected because solo reached
8
+ the ceiling.
9
+
10
+ F27 keeps the useful F16 shape but removes checkout tax complexity: success is
11
+ straight integer aggregation, while the risk is the exact failure object after
12
+ combining duplicate card redemption rows before balance validation.
13
+
14
+ ## Pair expectation
15
+
16
+ PLAN must preserve the order of aggregation before validation. IMPLEMENT must
17
+ read `data/gift-cards.json` and keep all public amounts in integer cents.
18
+ VERIFY should construct an adversarial request where two individually valid
19
+ redemptions for the same card become invalid only after combination.
20
+
21
+ ## Isolation
22
+
23
+ F16 covers quote tax rules. F27 covers non-persistent balance redemption and
24
+ exact validation shape after duplicate aggregation.
@@ -0,0 +1,66 @@
1
+ {
2
+ "verification_commands": [
3
+ {
4
+ "cmd": "node --test tests/cli.test.js",
5
+ "exit_code": 0,
6
+ "stdout_contains": [],
7
+ "stdout_not_contains": ["not ok "]
8
+ },
9
+ {
10
+ "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/exact-success.js\"",
11
+ "exit_code": 0,
12
+ "stdout_contains": ["\"ok\":true"],
13
+ "stdout_not_contains": [],
14
+ "contract_refs": [
15
+ "Duplicate SKUs are combined before line totals are computed.",
16
+ "A successful redemption emits exact item rows, redemption rows, applied total, and amount due.",
17
+ "On success, write exactly one JSON object to stdout and no stderr. Keys: `order_id`, `subtotal_cents`, `gift_card_applied_cents`, `amount_due_cents`, `items`, `redemptions`."
18
+ ]
19
+ },
20
+ {
21
+ "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/insufficient-balance.js\"",
22
+ "exit_code": 0,
23
+ "stdout_contains": ["\"ok\":true"],
24
+ "stdout_not_contains": [],
25
+ "contract_refs": [
26
+ "Duplicate `card_id` redemption rows are combined before balance validation.",
27
+ "Combined card redemption over balance uses exact error shape `{ \"error\": \"insufficient_balance\", \"card_id\": string, \"available_cents\": number, \"requested_cents\": number }`.",
28
+ "The insufficient balance error object includes `error`, `card_id`, `available_cents`, and `requested_cents`."
29
+ ]
30
+ },
31
+ {
32
+ "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/rules-source.js\"",
33
+ "exit_code": 0,
34
+ "stdout_contains": ["\"ok\":true"],
35
+ "stdout_not_contains": [],
36
+ "contract_refs": [
37
+ "Product prices, gift-card balances, and gift-card active flags come from `data/gift-cards.json`. Do not hardcode product prices, card balances, or active flags in the command implementation.",
38
+ "Changing `data/gift-cards.json` product prices or card balances changes command output without code changes."
39
+ ]
40
+ }
41
+ ],
42
+ "forbidden_patterns": [
43
+ {
44
+ "pattern": "catch\\s*\\([^)]*\\)\\s*\\{[^}]*return\\s+(null|undefined|''|\\{\\})",
45
+ "description": "silent catch returning fallback in gift-card path",
46
+ "files": ["bin/cli.js"],
47
+ "severity": "disqualifier"
48
+ },
49
+ {
50
+ "pattern": "catch\\s*\\([^)]*\\)\\s*\\{\\s*\\}",
51
+ "description": "empty catch block",
52
+ "files": ["bin/cli.js"],
53
+ "severity": "disqualifier"
54
+ },
55
+ {
56
+ "pattern": "/\\*\\s*eslint-disable",
57
+ "description": "eslint-disable without scoped justification",
58
+ "files": ["bin/cli.js"],
59
+ "severity": "disqualifier"
60
+ }
61
+ ],
62
+ "required_files": ["bin/cli.js", "tests/cli.test.js", "data/gift-cards.json"],
63
+ "forbidden_files": [],
64
+ "max_deps_added": 0,
65
+ "spec_output_files": ["bin/cli.js", "tests/cli.test.js"]
66
+ }
@@ -0,0 +1,10 @@
1
+ {
2
+ "id": "F27-cli-gift-card-redemption",
3
+ "category": "high-risk",
4
+ "difficulty": "high",
5
+ "timeout_seconds": 1500,
6
+ "required_tools": ["node"],
7
+ "browser": false,
8
+ "deps_change_expected": false,
9
+ "intent": "Add a bench-cli gift-card command that reads cart lines and gift-card redemption requests, prices products from data/gift-cards.json, combines duplicate SKUs and duplicate card redemptions before validation, and prints exact remaining balances and amount due in integer cents."
10
+ }
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env bash
2
+ # F27 setup - seed gift card product and balance rules.
3
+ set -e
4
+
5
+ mkdir -p data
6
+
7
+ cat > data/gift-cards.json <<'JSON'
8
+ {
9
+ "products": {
10
+ "TEE": { "unit_cents": 2500 },
11
+ "SOCKS": { "unit_cents": 700 },
12
+ "BAG": { "unit_cents": 3200 }
13
+ },
14
+ "cards": {
15
+ "GC-100": { "balance_cents": 5000, "active": true },
16
+ "GC-200": { "balance_cents": 2500, "active": true },
17
+ "GC-LOCKED": { "balance_cents": 9999, "active": false }
18
+ }
19
+ }
20
+ JSON
21
+
22
+ exit 0
@@ -0,0 +1,62 @@
1
+ ---
2
+ id: "F27-cli-gift-card-redemption"
3
+ title: "Gift card command with redemption rules"
4
+ status: planned
5
+ complexity: high
6
+ depends-on: []
7
+ ---
8
+
9
+ # F27 Gift card command with redemption rules
10
+
11
+ ## Context
12
+
13
+ `bench-cli` currently has greeting and version commands only. The task:
14
+ add a `gift-card` command that reads cart lines and gift-card redemption
15
+ requests from a JSON file, prices products from `data/gift-cards.json`, and
16
+ prints exact gift-card application totals in integer cents.
17
+
18
+ Gift-card balances are money-like state, so duplicate line and redemption
19
+ requests must be combined before validation. The command only calculates the
20
+ result; it does not persist balance changes.
21
+
22
+ ## Requirements
23
+
24
+ - [ ] `bench-cli gift-card --input <path>` reads JSON shaped as `{ "order_id": string, "lines": [{ "sku": string, "qty": number }], "redeems": [{ "card_id": string, "amount_cents": number }] }`.
25
+ - [ ] Product prices, gift-card balances, and gift-card active flags come from `data/gift-cards.json`. Do not hardcode product prices, card balances, or active flags in the command implementation.
26
+ - [ ] Combine duplicate SKUs before computing line totals. The output `items` array must contain one row per SKU in first-seen order.
27
+ - [ ] Combine duplicate `card_id` redemption requests before validating balances and before computing remaining balances. The output `redemptions` array must contain one row per card in first-seen order.
28
+ - [ ] Validation happens before any result is printed. Invalid JSON, missing `lines`, missing `redeems`, unknown SKU, unknown card, inactive card, non-positive or non-integer `qty`, non-positive or non-integer `amount_cents`, missing `order_id`, combined card redemption over balance, or total redemption over subtotal exits `2` and writes exactly one JSON error object to stderr.
29
+ - [ ] Combined card redemption over balance uses exact error shape `{ "error": "insufficient_balance", "card_id": string, "available_cents": number, "requested_cents": number }`.
30
+ - [ ] Total redemption over subtotal uses exact error shape `{ "error": "redemption_exceeds_subtotal", "subtotal_cents": number, "requested_cents": number }`.
31
+ - [ ] On success, write exactly one JSON object to stdout and no stderr. Keys: `order_id`, `subtotal_cents`, `gift_card_applied_cents`, `amount_due_cents`, `items`, `redemptions`.
32
+ - [ ] Each output item row has keys `sku`, `qty`, `line_cents`. `line_cents` is `unit_cents * combined_qty`.
33
+ - [ ] Each redemption row has keys `card_id`, `applied_cents`, `remaining_balance_cents`. `remaining_balance_cents` is the starting balance from `data/gift-cards.json` minus the combined requested redemption for that card.
34
+ - [ ] `gift_card_applied_cents` is the sum of combined redemption amounts.
35
+ - [ ] `amount_due_cents = subtotal_cents - gift_card_applied_cents`.
36
+ - [ ] `tests/cli.test.js` is updated. Existing tests still pass AND at least two new tests cover `gift-card`: one successful redemption and one validation failure.
37
+
38
+ ## Constraints
39
+
40
+ - **No new npm dependencies.**
41
+ - **No floating-money output.** All public amounts are integer cents.
42
+ - **No silent catches.** If parsing or file reading fails, emit a visible JSON error to stderr and exit `2`.
43
+ - **No extra stdout/stderr text** on the success path; downstream tooling parses stdout as JSON.
44
+ - **Lifecycle note.** The harness's DOCS phase flips this spec's frontmatter `status` after implementation completes — that is benchmark lifecycle bookkeeping, not a scope violation.
45
+
46
+ ## Out of Scope
47
+
48
+ - Persisting changed gift-card balances.
49
+ - Taxes, shipping, coupons, or currencies beyond integer cents.
50
+ - Adding server routes or web UI.
51
+ - Touching `server/`, `web/`, or `tests/server.test.js`.
52
+
53
+ ## Verification
54
+
55
+ - `node --test tests/cli.test.js` exits 0.
56
+ - Duplicate SKUs are combined before line totals are computed.
57
+ - Duplicate `card_id` redemption rows are combined before balance validation.
58
+ - A successful redemption emits exact item rows, redemption rows, applied total, and amount due.
59
+ - Changing `data/gift-cards.json` product prices or card balances changes command output without code changes.
60
+ - A combined card redemption over balance exits `2`, prints one JSON error to stderr, and prints no stdout.
61
+ - The insufficient balance error object includes `error`, `card_id`, `available_cents`, and `requested_cents`.
62
+ - `git diff --stat` shows only `bin/cli.js` and `tests/cli.test.js` touched (the gift-card seed comes from setup, not the arm).
@@ -0,0 +1,9 @@
1
+ Add a `gift-card` command to `bench-cli` so users can run `bench-cli gift-card --input <path>` with a JSON file shaped as `{ "order_id": string, "lines": [{ "sku": string, "qty": number }], "redeems": [{ "card_id": string, "amount_cents": number }] }`.
2
+
3
+ Read product prices, card balances, and active flags from `data/gift-cards.json`. Do not hardcode those values. Combine duplicate SKUs before computing line totals, and combine duplicate `card_id` redemption requests before validating balances or computing remaining balances.
4
+
5
+ On success, write one JSON object to stdout and no stderr with keys `order_id`, `subtotal_cents`, `gift_card_applied_cents`, `amount_due_cents`, `items`, and `redemptions`. Item rows have `sku`, `qty`, `line_cents`. Redemption rows have `card_id`, `applied_cents`, `remaining_balance_cents`. Preserve first-seen order for combined SKU rows and combined card rows.
6
+
7
+ Validation happens before any result is printed. Invalid JSON, missing `lines`, missing `redeems`, unknown SKU, unknown card, inactive card, non-positive or non-integer `qty`, non-positive or non-integer `amount_cents`, missing `order_id`, combined card redemption over balance, or total redemption over subtotal exits `2` and writes exactly one JSON error object to stderr. Combined card redemption over balance must use exact shape `{ "error": "insufficient_balance", "card_id": string, "available_cents": number, "requested_cents": number }`. Total redemption over subtotal must use exact shape `{ "error": "redemption_exceeds_subtotal", "subtotal_cents": number, "requested_cents": number }`.
8
+
9
+ Update `tests/cli.test.js` so existing tests still pass and at least two new tests cover `gift-card`: one successful redemption and one validation failure.
@@ -0,0 +1,48 @@
1
+ const assert = require('node:assert');
2
+ const fs = require('node:fs');
3
+ const os = require('node:os');
4
+ const path = require('node:path');
5
+ const { spawnSync } = require('node:child_process');
6
+
7
+ const workdir = process.env.BENCH_WORKDIR || process.cwd();
8
+ const input = path.join(os.tmpdir(), `gift-card-success-${process.pid}.json`);
9
+
10
+ fs.writeFileSync(input, JSON.stringify({
11
+ order_id: 'order-27',
12
+ lines: [
13
+ { sku: 'TEE', qty: 1 },
14
+ { sku: 'SOCKS', qty: 2 },
15
+ { sku: 'TEE', qty: 2 },
16
+ { sku: 'BAG', qty: 1 }
17
+ ],
18
+ redeems: [
19
+ { card_id: 'GC-100', amount_cents: 3000 },
20
+ { card_id: 'GC-200', amount_cents: 1200 },
21
+ { card_id: 'GC-100', amount_cents: 500 }
22
+ ]
23
+ }));
24
+
25
+ const proc = spawnSync('node', ['bin/cli.js', 'gift-card', '--input', input], {
26
+ cwd: workdir,
27
+ encoding: 'utf8'
28
+ });
29
+
30
+ assert.strictEqual(proc.status, 0, proc.stderr || proc.stdout);
31
+ assert.strictEqual(proc.stderr, '');
32
+ assert.deepStrictEqual(JSON.parse(proc.stdout), {
33
+ order_id: 'order-27',
34
+ subtotal_cents: 12100,
35
+ gift_card_applied_cents: 4700,
36
+ amount_due_cents: 7400,
37
+ items: [
38
+ { sku: 'TEE', qty: 3, line_cents: 7500 },
39
+ { sku: 'SOCKS', qty: 2, line_cents: 1400 },
40
+ { sku: 'BAG', qty: 1, line_cents: 3200 }
41
+ ],
42
+ redemptions: [
43
+ { card_id: 'GC-100', applied_cents: 3500, remaining_balance_cents: 1500 },
44
+ { card_id: 'GC-200', applied_cents: 1200, remaining_balance_cents: 1300 }
45
+ ]
46
+ });
47
+
48
+ process.stdout.write(JSON.stringify({ ok: true }) + '\n');