devlyn-cli 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/CLAUDE.md +1 -1
  2. package/README.md +1 -1
  3. package/benchmark/auto-resolve/README.md +318 -2
  4. package/benchmark/auto-resolve/RUBRIC.md +6 -0
  5. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/NOTES.md +63 -0
  6. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/expected.json +60 -0
  7. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/metadata.json +10 -0
  8. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/setup.sh +17 -0
  9. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/spec.md +52 -0
  10. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/task.txt +9 -0
  11. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/verifiers/invalid.js +29 -0
  12. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/verifiers/parallel.js +50 -0
  13. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/NOTES.md +70 -0
  14. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/expected.json +52 -0
  15. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/metadata.json +10 -0
  16. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/setup.sh +171 -0
  17. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/spec.md +51 -0
  18. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/task.txt +9 -0
  19. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/NOTES.md +83 -0
  20. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/expected.json +74 -0
  21. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/metadata.json +10 -0
  22. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/setup.sh +251 -0
  23. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/spec.md +58 -0
  24. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/task.txt +13 -0
  25. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/verifiers/replay-malformed-body.js +64 -0
  26. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/NOTES.md +98 -0
  27. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/expected.json +46 -0
  28. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/metadata.json +10 -0
  29. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/setup.sh +336 -0
  30. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/spec.md +52 -0
  31. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/task.txt +9 -0
  32. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/NOTES.md +26 -0
  33. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/expected.json +64 -0
  34. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/metadata.json +10 -0
  35. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/setup.sh +32 -0
  36. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/spec.md +58 -0
  37. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/task.txt +7 -0
  38. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/verifiers/exact-success.js +54 -0
  39. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/verifiers/no-hardcoded-pricing.js +47 -0
  40. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/verifiers/stock-error.js +45 -0
  41. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/NOTES.md +27 -0
  42. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/expected.json +62 -0
  43. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/metadata.json +10 -0
  44. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/setup.sh +2 -0
  45. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/spec.md +62 -0
  46. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/task.txt +7 -0
  47. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/verifiers/error-order.js +55 -0
  48. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/verifiers/priority-blocked.js +48 -0
  49. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/NOTES.md +27 -0
  50. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/expected.json +56 -0
  51. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/metadata.json +10 -0
  52. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/setup.sh +2 -0
  53. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/spec.md +65 -0
  54. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/task.txt +7 -0
  55. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/verifiers/conflicting-duplicate.js +34 -0
  56. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/verifiers/idempotent-close.js +41 -0
  57. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/NOTES.md +27 -0
  58. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/expected.json +56 -0
  59. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/metadata.json +10 -0
  60. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/setup.sh +2 -0
  61. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/spec.md +71 -0
  62. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/task.txt +7 -0
  63. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/verifiers/priority-rollback.js +64 -0
  64. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/verifiers/single-warehouse-fefo.js +66 -0
  65. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/NOTES.md +28 -0
  66. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/expected.json +66 -0
  67. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/metadata.json +10 -0
  68. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/setup.sh +36 -0
  69. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/spec.md +65 -0
  70. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/task.txt +7 -0
  71. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/verifiers/catalog-source.js +57 -0
  72. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/verifiers/exact-success.js +63 -0
  73. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/verifiers/stock-error.js +34 -0
  74. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/NOTES.md +25 -0
  75. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/expected.json +68 -0
  76. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/metadata.json +10 -0
  77. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/setup.sh +17 -0
  78. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/spec.md +69 -0
  79. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/task.txt +7 -0
  80. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/verifiers/conflicting-duplicate.js +29 -0
  81. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/verifiers/exact-payout.js +58 -0
  82. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/verifiers/rules-source.js +56 -0
  83. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/NOTES.md +24 -0
  84. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/expected.json +66 -0
  85. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/metadata.json +10 -0
  86. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/setup.sh +22 -0
  87. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/spec.md +62 -0
  88. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/task.txt +9 -0
  89. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/verifiers/exact-success.js +48 -0
  90. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/verifiers/insufficient-balance.js +36 -0
  91. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/verifiers/rules-source.js +55 -0
  92. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/NOTES.md +20 -0
  93. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/expected.json +66 -0
  94. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/metadata.json +10 -0
  95. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/setup.sh +23 -0
  96. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/spec.md +66 -0
  97. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/task.txt +11 -0
  98. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/verifiers/exact-success.js +44 -0
  99. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/verifiers/rules-source.js +58 -0
  100. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/verifiers/unavailable-inventory.js +35 -0
  101. package/benchmark/auto-resolve/fixtures/SCHEMA.md +13 -1
  102. package/benchmark/auto-resolve/scripts/collect-swebench-predictions.py +98 -0
  103. package/benchmark/auto-resolve/scripts/fetch-swebench-instances.py +111 -0
  104. package/benchmark/auto-resolve/scripts/frozen-verify-gate.py +289 -0
  105. package/benchmark/auto-resolve/scripts/full-pipeline-pair-gate.py +250 -0
  106. package/benchmark/auto-resolve/scripts/headroom-gate.py +147 -0
  107. package/benchmark/auto-resolve/scripts/judge.sh +82 -3
  108. package/benchmark/auto-resolve/scripts/prepare-swebench-frozen-case.py +244 -0
  109. package/benchmark/auto-resolve/scripts/prepare-swebench-frozen-corpus.py +118 -0
  110. package/benchmark/auto-resolve/scripts/prepare-swebench-solver-worktree.py +192 -0
  111. package/benchmark/auto-resolve/scripts/run-fixture.sh +234 -40
  112. package/benchmark/auto-resolve/scripts/run-frozen-verify-pair.sh +511 -0
  113. package/benchmark/auto-resolve/scripts/run-full-pipeline-pair-candidate.sh +162 -0
  114. package/benchmark/auto-resolve/scripts/run-headroom-candidate.sh +93 -0
  115. package/benchmark/auto-resolve/scripts/run-swebench-frozen-corpus.sh +209 -0
  116. package/benchmark/auto-resolve/scripts/run-swebench-solver-batch.sh +239 -0
  117. package/benchmark/auto-resolve/scripts/swebench-frozen-matrix.py +265 -0
  118. package/benchmark/auto-resolve/scripts/test-frozen-verify-gate.sh +192 -0
  119. package/benchmark/auto-resolve/scripts/test-full-pipeline-pair-gate.sh +131 -0
  120. package/benchmark/auto-resolve/scripts/test-headroom-gate.sh +84 -0
  121. package/benchmark/auto-resolve/scripts/test-swebench-frozen-case.sh +302 -0
  122. package/bin/devlyn.js +56 -10
  123. package/config/skills/_shared/archive_run.py +3 -0
  124. package/config/skills/_shared/codex-config.md +2 -2
  125. package/config/skills/_shared/codex-monitored.sh +72 -7
  126. package/config/skills/_shared/collect-codex-findings.py +125 -0
  127. package/config/skills/_shared/engine-preflight.md +1 -1
  128. package/config/skills/_shared/expected.schema.json +18 -0
  129. package/config/skills/_shared/spec-verify-check.py +312 -10
  130. package/config/skills/_shared/verify-merge-findings.py +327 -0
  131. package/config/skills/devlyn:ideate/SKILL.md +1 -1
  132. package/config/skills/devlyn:resolve/SKILL.md +62 -8
  133. package/config/skills/devlyn:resolve/references/phases/build-gate.md +1 -1
  134. package/config/skills/devlyn:resolve/references/phases/probe-derive.md +164 -0
  135. package/config/skills/devlyn:resolve/references/phases/verify.md +156 -4
  136. package/config/skills/devlyn:resolve/references/state-schema.md +10 -4
  137. package/package.json +1 -1
  138. package/scripts/lint-skills.sh +32 -0
@@ -0,0 +1,36 @@
1
+ const assert = require('node:assert');
2
+ const fs = require('node:fs');
3
+ const os = require('node:os');
4
+ const path = require('node:path');
5
+ const { spawnSync } = require('node:child_process');
6
+
7
+ const workdir = process.env.BENCH_WORKDIR || process.cwd();
8
+ const input = path.join(os.tmpdir(), `gift-card-balance-${process.pid}.json`);
9
+
10
+ fs.writeFileSync(input, JSON.stringify({
11
+ order_id: 'order-balance',
12
+ lines: [
13
+ { sku: 'TEE', qty: 3 },
14
+ { sku: 'BAG', qty: 1 }
15
+ ],
16
+ redeems: [
17
+ { card_id: 'GC-100', amount_cents: 3000 },
18
+ { card_id: 'GC-100', amount_cents: 2500 }
19
+ ]
20
+ }));
21
+
22
+ const proc = spawnSync('node', ['bin/cli.js', 'gift-card', '--input', input], {
23
+ cwd: workdir,
24
+ encoding: 'utf8'
25
+ });
26
+
27
+ assert.strictEqual(proc.status, 2);
28
+ assert.strictEqual(proc.stdout, '');
29
+ assert.deepStrictEqual(JSON.parse(proc.stderr), {
30
+ error: 'insufficient_balance',
31
+ card_id: 'GC-100',
32
+ available_cents: 5000,
33
+ requested_cents: 5500
34
+ });
35
+
36
+ process.stdout.write(JSON.stringify({ ok: true }) + '\n');
@@ -0,0 +1,55 @@
1
+ const assert = require('node:assert');
2
+ const fs = require('node:fs');
3
+ const os = require('node:os');
4
+ const path = require('node:path');
5
+ const { spawnSync } = require('node:child_process');
6
+
7
+ const workdir = process.env.BENCH_WORKDIR || process.cwd();
8
+ const rulesPath = path.join(workdir, 'data', 'gift-cards.json');
9
+ const original = fs.readFileSync(rulesPath, 'utf8');
10
+
11
+ try {
12
+ fs.writeFileSync(rulesPath, JSON.stringify({
13
+ products: {
14
+ TEE: { unit_cents: 1000 }
15
+ },
16
+ cards: {
17
+ 'GC-SOURCE': { balance_cents: 900, active: true }
18
+ }
19
+ }, null, 2) + '\n');
20
+
21
+ const input = path.join(os.tmpdir(), `gift-card-source-${process.pid}.json`);
22
+ fs.writeFileSync(input, JSON.stringify({
23
+ order_id: 'order-source',
24
+ lines: [
25
+ { sku: 'TEE', qty: 1 }
26
+ ],
27
+ redeems: [
28
+ { card_id: 'GC-SOURCE', amount_cents: 700 }
29
+ ]
30
+ }));
31
+
32
+ const proc = spawnSync('node', ['bin/cli.js', 'gift-card', '--input', input], {
33
+ cwd: workdir,
34
+ encoding: 'utf8'
35
+ });
36
+
37
+ assert.strictEqual(proc.status, 0, proc.stderr || proc.stdout);
38
+ assert.strictEqual(proc.stderr, '');
39
+ assert.deepStrictEqual(JSON.parse(proc.stdout), {
40
+ order_id: 'order-source',
41
+ subtotal_cents: 1000,
42
+ gift_card_applied_cents: 700,
43
+ amount_due_cents: 300,
44
+ items: [
45
+ { sku: 'TEE', qty: 1, line_cents: 1000 }
46
+ ],
47
+ redemptions: [
48
+ { card_id: 'GC-SOURCE', applied_cents: 700, remaining_balance_cents: 200 }
49
+ ]
50
+ });
51
+ } finally {
52
+ fs.writeFileSync(rulesPath, original);
53
+ }
54
+
55
+ process.stdout.write(JSON.stringify({ ok: true }) + '\n');
@@ -0,0 +1,20 @@
1
+ # F28 CLI rental quote rules
2
+
3
+ ## Why this fixture exists
4
+
5
+ F27 was rejected because the direct bare arm passed every verifier. F28 returns
6
+ to the F16 pattern that produced valid lift: exact success shape plus exact
7
+ validation shape, with enough arithmetic and date handling that a direct
8
+ implementation is likely to leak extra fields or miss one contract.
9
+
10
+ ## Pair expectation
11
+
12
+ PLAN must preserve the date-counting and duplicate-combine invariants.
13
+ IMPLEMENT must keep all public amounts in integer cents and read rules from
14
+ `data/rental-rules.json`. VERIFY should probe both the Friday-to-Tuesday
15
+ weekend count and the combined-stock exact error shape.
16
+
17
+ ## Isolation
18
+
19
+ F16 covers checkout tax rules. F28 covers rental-day UTC math, weekend
20
+ surcharges, deposits, protection fees, and non-persistent inventory validation.
@@ -0,0 +1,66 @@
1
+ {
2
+ "verification_commands": [
3
+ {
4
+ "cmd": "node --test tests/cli.test.js",
5
+ "exit_code": 0,
6
+ "stdout_contains": [],
7
+ "stdout_not_contains": ["not ok "]
8
+ },
9
+ {
10
+ "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/exact-success.js\"",
11
+ "exit_code": 0,
12
+ "stdout_contains": ["\"ok\":true"],
13
+ "stdout_not_contains": [],
14
+ "contract_refs": [
15
+ "A Friday-to-Tuesday rental counts four rental days and two weekend days.",
16
+ "Duplicate SKUs are combined before stock validation and pricing.",
17
+ "A successful quote emits exact integer-cent weekend surcharge, discount, protection, deposit, and total fields."
18
+ ]
19
+ },
20
+ {
21
+ "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/unavailable-inventory.js\"",
22
+ "exit_code": 0,
23
+ "stdout_contains": ["\"ok\":true"],
24
+ "stdout_not_contains": [],
25
+ "contract_refs": [
26
+ "A combined quantity over stock exits `2`, prints one JSON error to stderr, and prints no stdout.",
27
+ "Combined quantity over stock uses exact error shape `{ \"error\": \"unavailable_inventory\", \"sku\": string, \"available\": number, \"requested\": number }`.",
28
+ "The unavailable inventory error object includes `error`, `sku`, `available`, and `requested`."
29
+ ]
30
+ },
31
+ {
32
+ "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/rules-source.js\"",
33
+ "exit_code": 0,
34
+ "stdout_contains": ["\"ok\":true"],
35
+ "stdout_not_contains": [],
36
+ "contract_refs": [
37
+ "Item daily rates, stock, deposits, weekend surcharge percent, protection daily price, and coupons come from `data/rental-rules.json`. Do not hardcode those values in the command implementation.",
38
+ "Changing `data/rental-rules.json` rates, deposits, or surcharge settings changes command output without code changes."
39
+ ]
40
+ }
41
+ ],
42
+ "forbidden_patterns": [
43
+ {
44
+ "pattern": "catch\\s*\\([^)]*\\)\\s*\\{[^}]*return\\s+(null|undefined|''|\\{\\})",
45
+ "description": "silent catch returning fallback in rental quote path",
46
+ "files": ["bin/cli.js"],
47
+ "severity": "disqualifier"
48
+ },
49
+ {
50
+ "pattern": "catch\\s*\\([^)]*\\)\\s*\\{\\s*\\}",
51
+ "description": "empty catch block",
52
+ "files": ["bin/cli.js"],
53
+ "severity": "disqualifier"
54
+ },
55
+ {
56
+ "pattern": "/\\*\\s*eslint-disable",
57
+ "description": "eslint-disable without scoped justification",
58
+ "files": ["bin/cli.js"],
59
+ "severity": "disqualifier"
60
+ }
61
+ ],
62
+ "required_files": ["bin/cli.js", "tests/cli.test.js", "data/rental-rules.json"],
63
+ "forbidden_files": [],
64
+ "max_deps_added": 0,
65
+ "spec_output_files": ["bin/cli.js", "tests/cli.test.js"]
66
+ }
@@ -0,0 +1,10 @@
1
+ {
2
+ "id": "F28-cli-rental-quote-rules",
3
+ "category": "high-risk",
4
+ "difficulty": "high",
5
+ "timeout_seconds": 1500,
6
+ "required_tools": ["node"],
7
+ "browser": false,
8
+ "deps_change_expected": false,
9
+ "intent": "Add a bench-cli rental-quote command that prices equipment rentals from data/rental-rules.json using rental-day, weekend surcharge, discount, protection, deposit, and combined stock-validation rules."
10
+ }
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env bash
2
+ # F28 setup - seed rental quote rules.
3
+ set -e
4
+
5
+ mkdir -p data
6
+
7
+ cat > data/rental-rules.json <<'JSON'
8
+ {
9
+ "items": {
10
+ "CAM": { "daily_cents": 1200, "stock": 2, "deposit_cents": 5000 },
11
+ "LIGHT": { "daily_cents": 700, "stock": 3, "deposit_cents": 2000 },
12
+ "TRIPOD": { "daily_cents": 400, "stock": 5, "deposit_cents": 1000 }
13
+ },
14
+ "weekend_surcharge_percent": 25,
15
+ "protection_daily_cents": 300,
16
+ "coupons": {
17
+ "LONG3": { "percent": 10, "min_rental_days": 3 },
18
+ "NONE": { "percent": 0, "min_rental_days": 1 }
19
+ }
20
+ }
21
+ JSON
22
+
23
+ exit 0
@@ -0,0 +1,66 @@
1
+ ---
2
+ id: "F28-cli-rental-quote-rules"
3
+ title: "Rental quote command with weekend and deposit rules"
4
+ status: planned
5
+ complexity: high
6
+ depends-on: []
7
+ ---
8
+
9
+ # F28 Rental quote command with weekend and deposit rules
10
+
11
+ ## Context
12
+
13
+ `bench-cli` currently has greeting and version commands only. The task:
14
+ add a `rental-quote` command that reads an equipment rental request, prices it
15
+ from `data/rental-rules.json`, validates combined inventory, and prints one
16
+ exact integer-cents quote.
17
+
18
+ Rental quotes are operational contracts: duplicate item rows must be combined
19
+ before stock validation, weekend surcharge rules must be deterministic, and
20
+ success output must stay machine-readable without extra fields.
21
+
22
+ ## Requirements
23
+
24
+ - [ ] `bench-cli rental-quote --input <path>` reads JSON shaped as `{ "start_date": "YYYY-MM-DD", "end_date": "YYYY-MM-DD", "coupon": string | null, "protection": boolean, "items": [{ "sku": string, "qty": number }] }`.
25
+ - [ ] Item daily rates, stock, deposits, weekend surcharge percent, protection daily price, and coupons come from `data/rental-rules.json`. Do not hardcode those values in the command implementation.
26
+ - [ ] `rental_days` is the number of calendar days from `start_date` inclusive to `end_date` exclusive, using UTC date math. `end_date` must be after `start_date`.
27
+ - [ ] A rental day is a weekend day when its UTC day is Saturday or Sunday.
28
+ - [ ] Combine duplicate SKUs before validating stock and before computing item rows. The output `items` array must contain one row per SKU in first-seen order.
29
+ - [ ] Validation happens before any quote is printed. Invalid JSON, missing `items`, unknown SKU, non-positive or non-integer `qty`, combined quantity over stock, invalid date format, `end_date` not after `start_date`, unknown coupon, or non-boolean `protection` exits `2` and writes exactly one JSON error object to stderr.
30
+ - [ ] Combined quantity over stock uses exact error shape `{ "error": "unavailable_inventory", "sku": string, "available": number, "requested": number }`.
31
+ - [ ] `subtotal_cents` is the sum of `daily_cents * combined_qty * rental_days` for all item rows.
32
+ - [ ] `weekend_surcharge_cents` is the sum of `Math.round(daily_cents * combined_qty * weekend_days * weekend_surcharge_percent / 100)` for all item rows.
33
+ - [ ] `discount_cents` is `Math.round((subtotal_cents + weekend_surcharge_cents) * coupon.percent / 100)` when a coupon is present and `rental_days >= coupon.min_rental_days`; otherwise `0`.
34
+ - [ ] `protection_cents` is `protection_daily_cents * total_combined_qty * rental_days` when `protection` is true; otherwise `0`.
35
+ - [ ] `deposit_cents` is the sum of `deposit_cents * combined_qty` for all item rows. Deposits are never discounted.
36
+ - [ ] `total_cents = subtotal_cents + weekend_surcharge_cents - discount_cents + protection_cents + deposit_cents`.
37
+ - [ ] On success, write exactly one JSON object to stdout and no stderr. Keys: `rental_days`, `weekend_days`, `subtotal_cents`, `weekend_surcharge_cents`, `discount_cents`, `protection_cents`, `deposit_cents`, `total_cents`, `items`.
38
+ - [ ] Each output item row has keys `sku`, `qty`, `rental_cents`, `deposit_cents`. `rental_cents` is `daily_cents * combined_qty * rental_days`, and row `deposit_cents` is the item's deposit times combined quantity.
39
+ - [ ] `tests/cli.test.js` is updated. Existing tests still pass AND at least two new tests cover `rental-quote`: one successful quote and one validation failure.
40
+
41
+ ## Constraints
42
+
43
+ - **No new npm dependencies.**
44
+ - **No floating-money output.** All public amounts are integer cents.
45
+ - **No silent catches.** If parsing or file reading fails, emit a visible JSON error to stderr and exit `2`.
46
+ - **No extra stdout/stderr text** on the success path; downstream tooling parses stdout as JSON.
47
+ - **Lifecycle note.** The harness's DOCS phase flips this spec's frontmatter `status` after implementation completes — that is benchmark lifecycle bookkeeping, not a scope violation.
48
+
49
+ ## Out of Scope
50
+
51
+ - Persisting reservations or mutating inventory.
52
+ - Hourly pricing, time zones beyond UTC date math, holidays, or blackout dates.
53
+ - Taxes, shipping, currencies, or payment capture.
54
+ - Adding server routes or web UI.
55
+ - Touching `server/`, `web/`, or `tests/server.test.js`.
56
+
57
+ ## Verification
58
+
59
+ - `node --test tests/cli.test.js` exits 0.
60
+ - A Friday-to-Tuesday rental counts four rental days and two weekend days.
61
+ - Duplicate SKUs are combined before stock validation and pricing.
62
+ - A successful quote emits exact integer-cent weekend surcharge, discount, protection, deposit, and total fields.
63
+ - A combined quantity over stock exits `2`, prints one JSON error to stderr, and prints no stdout.
64
+ - The unavailable inventory error object includes `error`, `sku`, `available`, and `requested`.
65
+ - Changing `data/rental-rules.json` rates, deposits, or surcharge settings changes command output without code changes.
66
+ - `git diff --stat` shows only `bin/cli.js` and `tests/cli.test.js` touched (the rental rules seed comes from setup, not the arm).
@@ -0,0 +1,11 @@
1
+ Add a `rental-quote` command to `bench-cli` so users can run `bench-cli rental-quote --input <path>` with a JSON file shaped as `{ "start_date": "YYYY-MM-DD", "end_date": "YYYY-MM-DD", "coupon": string | null, "protection": boolean, "items": [{ "sku": string, "qty": number }] }`.
2
+
3
+ Read item daily rates, stock, deposits, weekend surcharge percent, protection daily price, and coupons from `data/rental-rules.json`. Do not hardcode those values. `rental_days` is the number of calendar days from `start_date` inclusive to `end_date` exclusive using UTC date math. A rental day is a weekend day when its UTC day is Saturday or Sunday.
4
+
5
+ Combine duplicate SKUs before stock validation and pricing. On success, write one JSON object to stdout and no stderr with keys `rental_days`, `weekend_days`, `subtotal_cents`, `weekend_surcharge_cents`, `discount_cents`, `protection_cents`, `deposit_cents`, `total_cents`, and `items`. Each item row has keys `sku`, `qty`, `rental_cents`, and `deposit_cents`.
6
+
7
+ Use these formulas: `subtotal_cents` is the sum of `daily_cents * combined_qty * rental_days`; `weekend_surcharge_cents` is the sum of `Math.round(daily_cents * combined_qty * weekend_days * weekend_surcharge_percent / 100)`; `discount_cents` is `Math.round((subtotal_cents + weekend_surcharge_cents) * coupon.percent / 100)` when a coupon is present and `rental_days >= coupon.min_rental_days`, otherwise `0`; `protection_cents` is `protection_daily_cents * total_combined_qty * rental_days` when `protection` is true, otherwise `0`; `deposit_cents` is the sum of item deposits times combined quantities; `total_cents = subtotal_cents + weekend_surcharge_cents - discount_cents + protection_cents + deposit_cents`.
8
+
9
+ Validation happens before any quote is printed. Invalid JSON, missing `items`, unknown SKU, non-positive or non-integer `qty`, combined quantity over stock, invalid date format, `end_date` not after `start_date`, unknown coupon, or non-boolean `protection` exits `2` and writes exactly one JSON error object to stderr. Combined quantity over stock must use exact shape `{ "error": "unavailable_inventory", "sku": string, "available": number, "requested": number }`.
10
+
11
+ Update `tests/cli.test.js` so existing tests still pass and at least two new tests cover `rental-quote`: one successful quote and one validation failure.
@@ -0,0 +1,44 @@
1
+ const assert = require('node:assert');
2
+ const fs = require('node:fs');
3
+ const os = require('node:os');
4
+ const path = require('node:path');
5
+ const { spawnSync } = require('node:child_process');
6
+
7
+ const workdir = process.env.BENCH_WORKDIR || process.cwd();
8
+ const input = path.join(os.tmpdir(), `rental-success-${process.pid}.json`);
9
+
10
+ fs.writeFileSync(input, JSON.stringify({
11
+ start_date: '2026-05-08',
12
+ end_date: '2026-05-12',
13
+ coupon: 'LONG3',
14
+ protection: true,
15
+ items: [
16
+ { sku: 'CAM', qty: 1 },
17
+ { sku: 'LIGHT', qty: 1 },
18
+ { sku: 'CAM', qty: 1 }
19
+ ]
20
+ }));
21
+
22
+ const proc = spawnSync('node', ['bin/cli.js', 'rental-quote', '--input', input], {
23
+ cwd: workdir,
24
+ encoding: 'utf8'
25
+ });
26
+
27
+ assert.strictEqual(proc.status, 0, proc.stderr || proc.stdout);
28
+ assert.strictEqual(proc.stderr, '');
29
+ assert.deepStrictEqual(JSON.parse(proc.stdout), {
30
+ rental_days: 4,
31
+ weekend_days: 2,
32
+ subtotal_cents: 12400,
33
+ weekend_surcharge_cents: 1550,
34
+ discount_cents: 1395,
35
+ protection_cents: 3600,
36
+ deposit_cents: 12000,
37
+ total_cents: 28155,
38
+ items: [
39
+ { sku: 'CAM', qty: 2, rental_cents: 9600, deposit_cents: 10000 },
40
+ { sku: 'LIGHT', qty: 1, rental_cents: 2800, deposit_cents: 2000 }
41
+ ]
42
+ });
43
+
44
+ process.stdout.write(JSON.stringify({ ok: true }) + '\n');
@@ -0,0 +1,58 @@
1
+ const assert = require('node:assert');
2
+ const fs = require('node:fs');
3
+ const os = require('node:os');
4
+ const path = require('node:path');
5
+ const { spawnSync } = require('node:child_process');
6
+
7
+ const workdir = process.env.BENCH_WORKDIR || process.cwd();
8
+ const rulesPath = path.join(workdir, 'data', 'rental-rules.json');
9
+ const original = fs.readFileSync(rulesPath, 'utf8');
10
+
11
+ try {
12
+ fs.writeFileSync(rulesPath, JSON.stringify({
13
+ items: {
14
+ CAM: { daily_cents: 1000, stock: 3, deposit_cents: 3000 }
15
+ },
16
+ weekend_surcharge_percent: 50,
17
+ protection_daily_cents: 100,
18
+ coupons: {
19
+ HALF: { percent: 50, min_rental_days: 1 }
20
+ }
21
+ }, null, 2) + '\n');
22
+
23
+ const input = path.join(os.tmpdir(), `rental-source-${process.pid}.json`);
24
+ fs.writeFileSync(input, JSON.stringify({
25
+ start_date: '2026-05-09',
26
+ end_date: '2026-05-11',
27
+ coupon: 'HALF',
28
+ protection: true,
29
+ items: [
30
+ { sku: 'CAM', qty: 1 }
31
+ ]
32
+ }));
33
+
34
+ const proc = spawnSync('node', ['bin/cli.js', 'rental-quote', '--input', input], {
35
+ cwd: workdir,
36
+ encoding: 'utf8'
37
+ });
38
+
39
+ assert.strictEqual(proc.status, 0, proc.stderr || proc.stdout);
40
+ assert.strictEqual(proc.stderr, '');
41
+ assert.deepStrictEqual(JSON.parse(proc.stdout), {
42
+ rental_days: 2,
43
+ weekend_days: 2,
44
+ subtotal_cents: 2000,
45
+ weekend_surcharge_cents: 1000,
46
+ discount_cents: 1500,
47
+ protection_cents: 200,
48
+ deposit_cents: 3000,
49
+ total_cents: 4700,
50
+ items: [
51
+ { sku: 'CAM', qty: 1, rental_cents: 2000, deposit_cents: 3000 }
52
+ ]
53
+ });
54
+ } finally {
55
+ fs.writeFileSync(rulesPath, original);
56
+ }
57
+
58
+ process.stdout.write(JSON.stringify({ ok: true }) + '\n');
@@ -0,0 +1,35 @@
1
+ const assert = require('node:assert');
2
+ const fs = require('node:fs');
3
+ const os = require('node:os');
4
+ const path = require('node:path');
5
+ const { spawnSync } = require('node:child_process');
6
+
7
+ const workdir = process.env.BENCH_WORKDIR || process.cwd();
8
+ const input = path.join(os.tmpdir(), `rental-stock-${process.pid}.json`);
9
+
10
+ fs.writeFileSync(input, JSON.stringify({
11
+ start_date: '2026-05-08',
12
+ end_date: '2026-05-10',
13
+ coupon: null,
14
+ protection: false,
15
+ items: [
16
+ { sku: 'CAM', qty: 1 },
17
+ { sku: 'CAM', qty: 2 }
18
+ ]
19
+ }));
20
+
21
+ const proc = spawnSync('node', ['bin/cli.js', 'rental-quote', '--input', input], {
22
+ cwd: workdir,
23
+ encoding: 'utf8'
24
+ });
25
+
26
+ assert.strictEqual(proc.status, 2);
27
+ assert.strictEqual(proc.stdout, '');
28
+ assert.deepStrictEqual(JSON.parse(proc.stderr), {
29
+ error: 'unavailable_inventory',
30
+ sku: 'CAM',
31
+ available: 2,
32
+ requested: 3
33
+ });
34
+
35
+ process.stdout.write(JSON.stringify({ ok: true }) + '\n');
@@ -73,7 +73,8 @@ Machine-readable acceptance criteria used by both `run-fixture.sh` verification
73
73
  "cmd": "node bin/cli.js doctor",
74
74
  "exit_code": 0,
75
75
  "stdout_contains": ["doctor: "],
76
- "stdout_not_contains": ["undefined"]
76
+ "stdout_not_contains": ["undefined"],
77
+ "contract_refs": []
77
78
  }
78
79
  ],
79
80
  "forbidden_patterns": [
@@ -86,14 +87,25 @@ Machine-readable acceptance criteria used by both `run-fixture.sh` verification
86
87
  ],
87
88
  "required_files": ["bin/cli.js"],
88
89
  "forbidden_files": [],
90
+ "tier_a_waivers": [],
91
+ "spec_output_files": ["bin/cli.js"],
89
92
  "max_deps_added": 0
90
93
  }
91
94
  ```
92
95
 
93
96
  - **verification_commands** — runner executes each. Each command's pass/fail contributes to the arm's `verify_score`.
97
+ Commands run with `BENCH_WORKDIR` (fresh arm work tree) and
98
+ `BENCH_FIXTURE_DIR` (the fixture directory outside the arm work tree) in
99
+ the environment. Put discriminator/oracle scripts under the fixture
100
+ directory when the arm should not read the verifier source.
101
+ Any command that references `BENCH_FIXTURE_DIR` is a hidden oracle and must
102
+ include `contract_refs`: exact substrings from `spec.md` proving the oracle
103
+ tests a visible contract rather than inventing a narrower one.
94
104
  - **forbidden_patterns** — regexes scanned across `diff.patch`. Match at `severity: "disqualifier"` is a hard-floor fail. Match at `severity: "warning"` goes into the judge's critical-findings report.
95
105
  - **required_files** — must exist after the arm runs.
96
106
  - **forbidden_files** — must NOT appear in the arm's diff.
107
+ - **tier_a_waivers** — optional globs for files the spec explicitly authorizes even though Tier A scope oracle would normally flag them.
108
+ - **spec_output_files** — files or globs that define the authorized output surface for Tier B scope tracing.
97
109
  - **max_deps_added** — count of new entries under `dependencies`/`devDependencies` in `package.json`. Exceeds → hard-floor fail.
98
110
 
99
111
  ## NOTES.md
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/env python3
2
+ """Collect patch.diff files into SWE-bench prediction JSONL."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import argparse
7
+ import json
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+
12
+ def read_jsonl(path: Path) -> list[dict[str, Any]]:
13
+ rows: list[dict[str, Any]] = []
14
+ with path.open(encoding="utf8") as f:
15
+ for line_no, line in enumerate(f, start=1):
16
+ if not line.strip():
17
+ continue
18
+ value = json.loads(line)
19
+ if not isinstance(value, dict):
20
+ raise ValueError(f"{path}:{line_no}: expected JSON object")
21
+ rows.append(value)
22
+ return rows
23
+
24
+
25
+ def instance_ids_from_jsonl(path: Path | None) -> set[str] | None:
26
+ if path is None:
27
+ return None
28
+ ids: set[str] = set()
29
+ for row in read_jsonl(path):
30
+ instance_id = row.get("instance_id")
31
+ if not isinstance(instance_id, str) or not instance_id:
32
+ raise ValueError(f"{path}: row missing non-empty instance_id")
33
+ ids.add(instance_id)
34
+ return ids
35
+
36
+
37
+ def collect_from_root(root: Path, patch_name: str, keep: set[str] | None) -> list[tuple[str, Path]]:
38
+ patches: list[tuple[str, Path]] = []
39
+ for patch_path in sorted(root.glob(f"*/{patch_name}")):
40
+ instance_id = patch_path.parent.name
41
+ if keep is not None and instance_id not in keep:
42
+ continue
43
+ patches.append((instance_id, patch_path))
44
+ return patches
45
+
46
+
47
+ def main() -> int:
48
+ parser = argparse.ArgumentParser()
49
+ parser.add_argument("--patch-root", required=True, type=Path)
50
+ parser.add_argument("--patch-name", default="patch.diff")
51
+ parser.add_argument("--instances-jsonl", type=Path, help="Optional filter/validation set.")
52
+ parser.add_argument("--model-name", required=True)
53
+ parser.add_argument("--out", required=True, type=Path)
54
+ parser.add_argument("--allow-empty", action="store_true")
55
+ args = parser.parse_args()
56
+
57
+ keep = instance_ids_from_jsonl(args.instances_jsonl)
58
+ patches = collect_from_root(args.patch_root, args.patch_name, keep)
59
+ if not patches:
60
+ raise ValueError(f"no {args.patch_name} files found under {args.patch_root}")
61
+
62
+ args.out.parent.mkdir(parents=True, exist_ok=True)
63
+ written = 0
64
+ skipped_empty: list[str] = []
65
+ with args.out.open("w", encoding="utf8") as f:
66
+ for instance_id, patch_path in patches:
67
+ patch = patch_path.read_text(encoding="utf8")
68
+ if not patch.strip():
69
+ if args.allow_empty:
70
+ skipped_empty.append(instance_id)
71
+ continue
72
+ raise ValueError(f"empty patch for {instance_id}: {patch_path}")
73
+ f.write(
74
+ json.dumps(
75
+ {
76
+ "instance_id": instance_id,
77
+ "model_name_or_path": args.model_name,
78
+ "model_patch": patch,
79
+ }
80
+ )
81
+ + "\n"
82
+ )
83
+ written += 1
84
+
85
+ report = {
86
+ "patch_root": str(args.patch_root),
87
+ "patch_name": args.patch_name,
88
+ "model_name_or_path": args.model_name,
89
+ "out": str(args.out),
90
+ "predictions_written": written,
91
+ "empty_skipped": skipped_empty,
92
+ }
93
+ print(json.dumps(report, indent=2))
94
+ return 0
95
+
96
+
97
+ if __name__ == "__main__":
98
+ raise SystemExit(main())