devlyn-cli 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/CLAUDE.md +1 -1
  2. package/README.md +1 -1
  3. package/benchmark/auto-resolve/README.md +318 -2
  4. package/benchmark/auto-resolve/RUBRIC.md +6 -0
  5. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/NOTES.md +63 -0
  6. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/expected.json +60 -0
  7. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/metadata.json +10 -0
  8. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/setup.sh +17 -0
  9. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/spec.md +52 -0
  10. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/task.txt +9 -0
  11. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/verifiers/invalid.js +29 -0
  12. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/verifiers/parallel.js +50 -0
  13. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/NOTES.md +70 -0
  14. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/expected.json +52 -0
  15. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/metadata.json +10 -0
  16. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/setup.sh +171 -0
  17. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/spec.md +51 -0
  18. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/task.txt +9 -0
  19. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/NOTES.md +83 -0
  20. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/expected.json +74 -0
  21. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/metadata.json +10 -0
  22. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/setup.sh +251 -0
  23. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/spec.md +58 -0
  24. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/task.txt +13 -0
  25. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/verifiers/replay-malformed-body.js +64 -0
  26. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/NOTES.md +98 -0
  27. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/expected.json +46 -0
  28. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/metadata.json +10 -0
  29. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/setup.sh +336 -0
  30. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/spec.md +52 -0
  31. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/task.txt +9 -0
  32. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/NOTES.md +26 -0
  33. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/expected.json +64 -0
  34. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/metadata.json +10 -0
  35. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/setup.sh +32 -0
  36. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/spec.md +58 -0
  37. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/task.txt +7 -0
  38. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/verifiers/exact-success.js +54 -0
  39. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/verifiers/no-hardcoded-pricing.js +47 -0
  40. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/verifiers/stock-error.js +45 -0
  41. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/NOTES.md +27 -0
  42. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/expected.json +62 -0
  43. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/metadata.json +10 -0
  44. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/setup.sh +2 -0
  45. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/spec.md +62 -0
  46. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/task.txt +7 -0
  47. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/verifiers/error-order.js +55 -0
  48. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/verifiers/priority-blocked.js +48 -0
  49. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/NOTES.md +27 -0
  50. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/expected.json +56 -0
  51. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/metadata.json +10 -0
  52. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/setup.sh +2 -0
  53. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/spec.md +65 -0
  54. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/task.txt +7 -0
  55. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/verifiers/conflicting-duplicate.js +34 -0
  56. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/verifiers/idempotent-close.js +41 -0
  57. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/NOTES.md +27 -0
  58. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/expected.json +56 -0
  59. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/metadata.json +10 -0
  60. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/setup.sh +2 -0
  61. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/spec.md +71 -0
  62. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/task.txt +7 -0
  63. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/verifiers/priority-rollback.js +64 -0
  64. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/verifiers/single-warehouse-fefo.js +66 -0
  65. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/NOTES.md +28 -0
  66. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/expected.json +66 -0
  67. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/metadata.json +10 -0
  68. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/setup.sh +36 -0
  69. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/spec.md +65 -0
  70. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/task.txt +7 -0
  71. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/verifiers/catalog-source.js +57 -0
  72. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/verifiers/exact-success.js +63 -0
  73. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/verifiers/stock-error.js +34 -0
  74. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/NOTES.md +25 -0
  75. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/expected.json +68 -0
  76. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/metadata.json +10 -0
  77. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/setup.sh +17 -0
  78. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/spec.md +69 -0
  79. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/task.txt +7 -0
  80. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/verifiers/conflicting-duplicate.js +29 -0
  81. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/verifiers/exact-payout.js +58 -0
  82. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/verifiers/rules-source.js +56 -0
  83. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/NOTES.md +24 -0
  84. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/expected.json +66 -0
  85. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/metadata.json +10 -0
  86. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/setup.sh +22 -0
  87. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/spec.md +62 -0
  88. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/task.txt +9 -0
  89. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/verifiers/exact-success.js +48 -0
  90. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/verifiers/insufficient-balance.js +36 -0
  91. package/benchmark/auto-resolve/fixtures/F27-cli-gift-card-redemption/verifiers/rules-source.js +55 -0
  92. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/NOTES.md +20 -0
  93. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/expected.json +66 -0
  94. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/metadata.json +10 -0
  95. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/setup.sh +23 -0
  96. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/spec.md +66 -0
  97. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/task.txt +11 -0
  98. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/verifiers/exact-success.js +44 -0
  99. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/verifiers/rules-source.js +58 -0
  100. package/benchmark/auto-resolve/fixtures/F28-cli-rental-quote-rules/verifiers/unavailable-inventory.js +35 -0
  101. package/benchmark/auto-resolve/fixtures/SCHEMA.md +13 -1
  102. package/benchmark/auto-resolve/scripts/collect-swebench-predictions.py +98 -0
  103. package/benchmark/auto-resolve/scripts/fetch-swebench-instances.py +111 -0
  104. package/benchmark/auto-resolve/scripts/frozen-verify-gate.py +289 -0
  105. package/benchmark/auto-resolve/scripts/full-pipeline-pair-gate.py +250 -0
  106. package/benchmark/auto-resolve/scripts/headroom-gate.py +147 -0
  107. package/benchmark/auto-resolve/scripts/judge.sh +82 -3
  108. package/benchmark/auto-resolve/scripts/prepare-swebench-frozen-case.py +244 -0
  109. package/benchmark/auto-resolve/scripts/prepare-swebench-frozen-corpus.py +118 -0
  110. package/benchmark/auto-resolve/scripts/prepare-swebench-solver-worktree.py +192 -0
  111. package/benchmark/auto-resolve/scripts/run-fixture.sh +234 -40
  112. package/benchmark/auto-resolve/scripts/run-frozen-verify-pair.sh +511 -0
  113. package/benchmark/auto-resolve/scripts/run-full-pipeline-pair-candidate.sh +162 -0
  114. package/benchmark/auto-resolve/scripts/run-headroom-candidate.sh +93 -0
  115. package/benchmark/auto-resolve/scripts/run-swebench-frozen-corpus.sh +209 -0
  116. package/benchmark/auto-resolve/scripts/run-swebench-solver-batch.sh +239 -0
  117. package/benchmark/auto-resolve/scripts/swebench-frozen-matrix.py +265 -0
  118. package/benchmark/auto-resolve/scripts/test-frozen-verify-gate.sh +192 -0
  119. package/benchmark/auto-resolve/scripts/test-full-pipeline-pair-gate.sh +131 -0
  120. package/benchmark/auto-resolve/scripts/test-headroom-gate.sh +84 -0
  121. package/benchmark/auto-resolve/scripts/test-swebench-frozen-case.sh +302 -0
  122. package/bin/devlyn.js +56 -10
  123. package/config/skills/_shared/archive_run.py +3 -0
  124. package/config/skills/_shared/codex-config.md +2 -2
  125. package/config/skills/_shared/codex-monitored.sh +72 -7
  126. package/config/skills/_shared/collect-codex-findings.py +125 -0
  127. package/config/skills/_shared/engine-preflight.md +1 -1
  128. package/config/skills/_shared/expected.schema.json +18 -0
  129. package/config/skills/_shared/spec-verify-check.py +312 -10
  130. package/config/skills/_shared/verify-merge-findings.py +327 -0
  131. package/config/skills/devlyn:ideate/SKILL.md +1 -1
  132. package/config/skills/devlyn:resolve/SKILL.md +62 -8
  133. package/config/skills/devlyn:resolve/references/phases/build-gate.md +1 -1
  134. package/config/skills/devlyn:resolve/references/phases/probe-derive.md +164 -0
  135. package/config/skills/devlyn:resolve/references/phases/verify.md +156 -4
  136. package/config/skills/devlyn:resolve/references/state-schema.md +10 -4
  137. package/package.json +1 -1
  138. package/scripts/lint-skills.sh +32 -0
@@ -0,0 +1,47 @@
1
+ 'use strict';
2
+
3
+ const fs = require('node:fs');
4
+ const os = require('node:os');
5
+ const path = require('node:path');
6
+ const { spawnSync } = require('node:child_process');
7
+
8
+ const pricingPath = path.join(process.env.BENCH_WORKDIR, 'data', 'pricing.json');
9
+ const pricing = JSON.parse(fs.readFileSync(pricingPath, 'utf8'));
10
+ pricing.products.A.unit_cents = 2111;
11
+ pricing.products.A.stock = 5;
12
+ pricing.shipping_cents = 123;
13
+ fs.writeFileSync(pricingPath, JSON.stringify(pricing, null, 2));
14
+
15
+ const orderPath = path.join(os.tmpdir(), `quote-pricing-${process.pid}.json`);
16
+ fs.writeFileSync(orderPath, JSON.stringify({
17
+ state: 'OR',
18
+ coupon: null,
19
+ items: [{ sku: 'A', qty: 2 }]
20
+ }));
21
+
22
+ const cli = path.join(process.env.BENCH_WORKDIR, 'bin', 'cli.js');
23
+ const result = spawnSync('node', [cli, 'quote', '--input', orderPath], {
24
+ cwd: process.env.BENCH_WORKDIR,
25
+ encoding: 'utf8'
26
+ });
27
+
28
+ let quote;
29
+ try {
30
+ quote = JSON.parse(result.stdout);
31
+ } catch {
32
+ quote = null;
33
+ }
34
+
35
+ const ok = result.status === 0
36
+ && result.stderr === ''
37
+ && quote
38
+ && quote.subtotal_cents === 4222
39
+ && quote.shipping_cents === 123
40
+ && quote.total_cents === 4345;
41
+
42
+ console.log(JSON.stringify({
43
+ ok,
44
+ status: result.status,
45
+ quote
46
+ }));
47
+ process.exit(ok ? 0 : 1);
@@ -0,0 +1,45 @@
1
+ 'use strict';
2
+
3
+ const fs = require('node:fs');
4
+ const os = require('node:os');
5
+ const path = require('node:path');
6
+ const { spawnSync } = require('node:child_process');
7
+
8
+ const orderPath = path.join(os.tmpdir(), `quote-stock-${process.pid}.json`);
9
+ fs.writeFileSync(orderPath, JSON.stringify({
10
+ state: 'NY',
11
+ coupon: null,
12
+ items: [
13
+ { sku: 'A', qty: 2 },
14
+ { sku: 'A', qty: 2 }
15
+ ]
16
+ }));
17
+
18
+ const cli = path.join(process.env.BENCH_WORKDIR, 'bin', 'cli.js');
19
+ const result = spawnSync('node', [cli, 'quote', '--input', orderPath], {
20
+ cwd: process.env.BENCH_WORKDIR,
21
+ encoding: 'utf8'
22
+ });
23
+
24
+ let err;
25
+ try {
26
+ err = JSON.parse(result.stderr);
27
+ } catch {
28
+ err = null;
29
+ }
30
+
31
+ const ok = result.status === 2
32
+ && result.stdout === ''
33
+ && err
34
+ && err.error === 'invalid_stock'
35
+ && err.sku === 'A'
36
+ && err.available === 3
37
+ && err.requested === 4;
38
+
39
+ console.log(JSON.stringify({
40
+ ok,
41
+ status: result.status,
42
+ stdout: result.stdout,
43
+ err
44
+ }));
45
+ process.exit(ok ? 0 : 1);
@@ -0,0 +1,27 @@
1
+ # F21 CLI scheduler priority
2
+
3
+ ## Failure mode
4
+
5
+ This fixture detects implementations that pass simple scheduling tests while
6
+ missing interaction rules: global priority ordering, earliest-fit placement,
7
+ blocked-interval exclusion, half-open time boundaries, and deterministic output
8
+ ordering.
9
+
10
+ ## Pipeline phase target
11
+
12
+ PLAN must preserve the ordering and interval invariants. IMPLEMENT must build a
13
+ small scheduling engine without adding dependencies. VERIFY should catch
14
+ counterexamples where local request order or naive overlap checks produce a
15
+ plausible but wrong schedule.
16
+
17
+ ## Why existing fixtures do not cover it
18
+
19
+ F16 covers checkout arithmetic. F10/F11/F12/F15 cover server behavior. None
20
+ exercise a CLI algorithm where the correct result depends on sorting,
21
+ interval arithmetic, and output ordering at once.
22
+
23
+ ## Retirement
24
+
25
+ Retire or replace when both bare and solo consistently exceed the headroom
26
+ thresholds, or if future fixtures cover the same interval-scheduling failure
27
+ mode with better discrimination.
@@ -0,0 +1,62 @@
1
+ {
2
+ "verification_commands": [
3
+ {
4
+ "cmd": "node --test tests/cli.test.js",
5
+ "exit_code": 0,
6
+ "stdout_contains": [],
7
+ "stdout_not_contains": ["not ok "]
8
+ },
9
+ {
10
+ "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/priority-blocked.js\"",
11
+ "exit_code": 0,
12
+ "stdout_contains": ["\"ok\":true"],
13
+ "stdout_not_contains": [],
14
+ "contract_refs": [
15
+ "Process requests globally by `priority` descending, then `submitted_at` ascending, then `id` ascending.",
16
+ "For each request, place it on the requested resource at the earliest start minute that is greater than or equal to the request's requested `start`, fits wholly inside one availability window, does not overlap any blocked interval, and does not overlap any already accepted request on that resource.",
17
+ "A request may end exactly at a window end, but any one-minute overlap with a blocked interval is rejected or moved later."
18
+ ]
19
+ },
20
+ {
21
+ "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/error-order.js\"",
22
+ "exit_code": 0,
23
+ "stdout_contains": ["\"ok\":true"],
24
+ "stdout_not_contains": [],
25
+ "contract_refs": [
26
+ "If the request references an unknown resource, reject with `{ \"id\": string, \"reason\": \"unknown_resource\" }`.",
27
+ "`rejected` is ordered in the original request input order.",
28
+ "Duplicate request ids are invalid input: exit `2`, one JSON error to stderr, no stdout."
29
+ ]
30
+ }
31
+ ],
32
+ "forbidden_patterns": [
33
+ {
34
+ "pattern": "catch\\s*\\([^)]*\\)\\s*\\{[^}]*return\\s+(null|undefined|''|\\{\\})",
35
+ "description": "silent catch returning fallback in scheduler path",
36
+ "files": ["bin/cli.js", "tests/cli.test.js"],
37
+ "severity": "disqualifier"
38
+ },
39
+ {
40
+ "pattern": "catch\\s*\\([^)]*\\)\\s*\\{\\s*\\}",
41
+ "description": "empty catch block",
42
+ "files": ["bin/cli.js", "tests/cli.test.js"],
43
+ "severity": "disqualifier"
44
+ },
45
+ {
46
+ "pattern": "catch\\s*(?:\\([^)]*\\))?\\s*\\{[^}]*ignore[^}]*\\}",
47
+ "description": "silent catch-ignore cleanup",
48
+ "files": ["bin/cli.js", "tests/cli.test.js"],
49
+ "severity": "disqualifier"
50
+ },
51
+ {
52
+ "pattern": "/\\*\\s*eslint-disable",
53
+ "description": "eslint-disable without scoped justification",
54
+ "files": ["bin/cli.js"],
55
+ "severity": "disqualifier"
56
+ }
57
+ ],
58
+ "required_files": ["bin/cli.js", "tests/cli.test.js"],
59
+ "forbidden_files": [],
60
+ "max_deps_added": 0,
61
+ "spec_output_files": ["bin/cli.js", "tests/cli.test.js"]
62
+ }
@@ -0,0 +1,10 @@
1
+ {
2
+ "id": "F21-cli-scheduler-priority",
3
+ "category": "high-risk",
4
+ "difficulty": "high",
5
+ "timeout_seconds": 1500,
6
+ "required_tools": ["node"],
7
+ "browser": false,
8
+ "deps_change_expected": false,
9
+ "intent": "Add a bench-cli schedule command that assigns appointment requests to resource availability windows using priority, submitted order, blocked intervals, and earliest-fit placement while producing exact JSON accept/reject results."
10
+ }
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env bash
2
+ set -e
@@ -0,0 +1,62 @@
1
+ ---
2
+ id: "F21-cli-scheduler-priority"
3
+ title: "Priority appointment scheduler"
4
+ status: planned
5
+ complexity: high
6
+ depends-on: []
7
+ ---
8
+
9
+ # F21 Priority appointment scheduler
10
+
11
+ ## Context
12
+
13
+ Add a `bench-cli schedule --input <path>` command that assigns appointment
14
+ requests to resource availability windows using priority, submitted order,
15
+ blocked intervals, and earliest-fit placement while producing exact JSON
16
+ accept/reject results.
17
+
18
+ The scheduler is used by downstream automation, so output shape, ordering, and
19
+ failure reasons must be deterministic.
20
+
21
+ ## Requirements
22
+
23
+ - [ ] `bench-cli schedule --input <path>` reads JSON shaped as `{ "resources": Array<Resource>, "requests": Array<Request> }`.
24
+ - [ ] Each resource has `{ "id": string, "windows": [{ "start": "HH:MM", "end": "HH:MM" }], "blocked": [{ "start": "HH:MM", "end": "HH:MM" }] }`. `blocked` may be empty.
25
+ - [ ] Each request has `{ "id": string, "resource": string, "start": "HH:MM", "duration_min": number, "priority": number, "submitted_at": string }`.
26
+ - [ ] Times are same-day 24-hour clock minutes. A range is half-open: `[start, end)`. A request ending exactly at a window end is allowed; overlapping a blocked range by one minute is not allowed.
27
+ - [ ] Process requests globally by `priority` descending, then `submitted_at` ascending, then `id` ascending.
28
+ - [ ] For each request, place it on the requested resource at the earliest start minute that is greater than or equal to the request's requested `start`, fits wholly inside one availability window, does not overlap any blocked interval, and does not overlap any already accepted request on that resource.
29
+ - [ ] Do not move a request to a different resource.
30
+ - [ ] If no placement exists, reject with `{ "id": string, "reason": "no_slot" }`.
31
+ - [ ] If the request references an unknown resource, reject with `{ "id": string, "reason": "unknown_resource" }`.
32
+ - [ ] Invalid top-level shape, invalid time strings, non-positive or non-integer `duration_min`, or duplicate request ids exits `2`, writes exactly one JSON error object to stderr, and writes nothing to stdout.
33
+ - [ ] On success, write exactly one JSON object to stdout and no stderr. Keys: `scheduled`, `rejected`.
34
+ - [ ] `scheduled` is ordered by actual scheduled start time ascending, then resource id ascending, then request id ascending. Each row has keys `id`, `resource`, `start`, `end`.
35
+ - [ ] `rejected` is ordered in the original request input order. Each row has keys `id`, `reason`.
36
+ - [ ] `tests/cli.test.js` is updated. Existing tests still pass AND at least two scheduler tests cover one success case and one rejection case.
37
+
38
+ ## Constraints
39
+
40
+ - **No new npm dependencies.**
41
+ - **No silent catches in implementation or tests.** Invalid input and file-read failures must surface as JSON errors with exit `2`; test cleanup should use explicit safe primitives such as `fs.rmSync(path, { force: true })`, not `catch { /* ignore */ }`.
42
+ - **No mutation of the input file.**
43
+ - **No extra stdout/stderr text** on the success path; downstream tooling parses stdout as JSON.
44
+ - **Touch only `bin/cli.js` and `tests/cli.test.js`.**
45
+ - **Lifecycle note.** The harness's DOCS phase flips this spec's frontmatter `status` after implementation completes — that is benchmark lifecycle bookkeeping, not a scope violation.
46
+
47
+ ## Out of Scope
48
+
49
+ - Multiple-day scheduling.
50
+ - Time zones.
51
+ - Recurring appointments.
52
+ - Persisting scheduled results.
53
+ - Touching `server/`, `web/`, or `tests/server.test.js`.
54
+
55
+ ## Verification
56
+
57
+ - `node --test tests/cli.test.js` exits 0.
58
+ - A higher-priority later-submitted request can take the first slot, forcing a lower-priority earlier-submitted request to the next non-overlapping slot.
59
+ - A request may end exactly at a window end, but any one-minute overlap with a blocked interval is rejected or moved later.
60
+ - Unknown resources are reported in `rejected` without aborting the whole run.
61
+ - Duplicate request ids are invalid input: exit `2`, one JSON error to stderr, no stdout.
62
+ - `git diff --stat` shows only `bin/cli.js` and `tests/cli.test.js` touched.
@@ -0,0 +1,7 @@
1
+ Add a `schedule` command to `bench-cli` so users can run `bench-cli schedule --input <path>` with a JSON file containing resources and appointment requests. It should assign requests to resource availability windows using priority, submitted order, blocked intervals, and earliest-fit placement, then print one exact JSON result with `scheduled` and `rejected` arrays.
2
+
3
+ The input has resources with `id`, `windows`, and `blocked`, and requests with `id`, `resource`, `start`, `duration_min`, `priority`, and `submitted_at`. Times are same-day `HH:MM` values. Process requests by priority descending, then submitted_at ascending, then id ascending. A placement must stay on the requested resource, start no earlier than the requested start, fit inside one window, avoid blocked intervals, and avoid already accepted requests.
4
+
5
+ On success, stdout must be exactly parseable JSON and stderr must be empty. `scheduled` rows must be ordered by actual scheduled start time, then resource id, then request id. `rejected` rows must stay in original request input order. Invalid input, duplicate request ids, invalid times, bad durations, or file-read failures should exit `2`, print exactly one JSON error object to stderr, and print nothing to stdout.
6
+
7
+ Update `tests/cli.test.js` so existing tests still pass and add at least two scheduler tests: one success case and one rejection case. Do not use silent catches in implementation or test cleanup; use explicit safe primitives such as `fs.rmSync(path, { force: true })` instead of `catch { /* ignore */ }`. No new npm dependencies. Only touch `bin/cli.js` and `tests/cli.test.js`.
@@ -0,0 +1,55 @@
1
+ 'use strict';
2
+ const assert = require('node:assert');
3
+ const { execFileSync, spawnSync } = require('node:child_process');
4
+ const fs = require('node:fs');
5
+ const os = require('node:os');
6
+ const path = require('node:path');
7
+
8
+ const work = process.env.BENCH_WORKDIR || process.cwd();
9
+ const cli = path.join(work, 'bin', 'cli.js');
10
+ const tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'f21-errors-'));
11
+
12
+ const unknownInput = path.join(tmp, 'unknown.json');
13
+ fs.writeFileSync(unknownInput, JSON.stringify({
14
+ resources: [
15
+ { id: 'r1', windows: [{ start: '10:00', end: '10:30' }], blocked: [] }
16
+ ],
17
+ requests: [
18
+ { id: 'unknown-first', resource: 'missing', start: '10:00', duration_min: 5, priority: 9, submitted_at: '2026-01-01T10:00:00Z' },
19
+ { id: 'too-long', resource: 'r1', start: '10:00', duration_min: 45, priority: 8, submitted_at: '2026-01-01T10:01:00Z' },
20
+ { id: 'ok', resource: 'r1', start: '10:00', duration_min: 30, priority: 7, submitted_at: '2026-01-01T10:02:00Z' }
21
+ ]
22
+ }), 'utf8');
23
+
24
+ const out = execFileSync('node', [cli, 'schedule', '--input', unknownInput], {
25
+ cwd: work,
26
+ encoding: 'utf8',
27
+ stdio: ['ignore', 'pipe', 'pipe']
28
+ });
29
+ const parsed = JSON.parse(out);
30
+ assert.deepStrictEqual(parsed.scheduled, [
31
+ { id: 'ok', resource: 'r1', start: '10:00', end: '10:30' }
32
+ ]);
33
+ assert.deepStrictEqual(parsed.rejected, [
34
+ { id: 'unknown-first', reason: 'unknown_resource' },
35
+ { id: 'too-long', reason: 'no_slot' }
36
+ ]);
37
+
38
+ const dupInput = path.join(tmp, 'dup.json');
39
+ fs.writeFileSync(dupInput, JSON.stringify({
40
+ resources: [
41
+ { id: 'r1', windows: [{ start: '10:00', end: '11:00' }], blocked: [] }
42
+ ],
43
+ requests: [
44
+ { id: 'dup', resource: 'r1', start: '10:00', duration_min: 10, priority: 1, submitted_at: '2026-01-01T10:00:00Z' },
45
+ { id: 'dup', resource: 'r1', start: '10:10', duration_min: 10, priority: 1, submitted_at: '2026-01-01T10:01:00Z' }
46
+ ]
47
+ }), 'utf8');
48
+ const dup = spawnSync('node', [cli, 'schedule', '--input', dupInput], {
49
+ cwd: work,
50
+ encoding: 'utf8'
51
+ });
52
+ assert.strictEqual(dup.status, 2);
53
+ assert.strictEqual(dup.stdout, '');
54
+ assert.doesNotThrow(() => JSON.parse(dup.stderr));
55
+ console.log(JSON.stringify({ ok: true }));
@@ -0,0 +1,48 @@
1
+ 'use strict';
2
+ const assert = require('node:assert');
3
+ const { execFileSync } = require('node:child_process');
4
+ const fs = require('node:fs');
5
+ const os = require('node:os');
6
+ const path = require('node:path');
7
+
8
+ const work = process.env.BENCH_WORKDIR || process.cwd();
9
+ const cli = path.join(work, 'bin', 'cli.js');
10
+ const tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'f21-schedule-'));
11
+ const input = path.join(tmp, 'input.json');
12
+
13
+ fs.writeFileSync(input, JSON.stringify({
14
+ resources: [
15
+ {
16
+ id: 'room-a',
17
+ windows: [{ start: '09:00', end: '10:00' }],
18
+ blocked: [{ start: '09:30', end: '09:40' }]
19
+ },
20
+ {
21
+ id: 'room-b',
22
+ windows: [{ start: '09:00', end: '09:45' }],
23
+ blocked: []
24
+ }
25
+ ],
26
+ requests: [
27
+ { id: 'low-first', resource: 'room-a', start: '09:00', duration_min: 30, priority: 1, submitted_at: '2026-01-01T09:00:00Z' },
28
+ { id: 'high-second', resource: 'room-a', start: '09:00', duration_min: 30, priority: 10, submitted_at: '2026-01-01T09:05:00Z' },
29
+ { id: 'edge-ok', resource: 'room-b', start: '09:15', duration_min: 30, priority: 5, submitted_at: '2026-01-01T09:01:00Z' },
30
+ { id: 'blocked-one-minute', resource: 'room-a', start: '09:29', duration_min: 2, priority: 4, submitted_at: '2026-01-01T09:02:00Z' }
31
+ ]
32
+ }), 'utf8');
33
+
34
+ const stdout = execFileSync('node', [cli, 'schedule', '--input', input], {
35
+ cwd: work,
36
+ encoding: 'utf8',
37
+ stdio: ['ignore', 'pipe', 'pipe']
38
+ });
39
+ const parsed = JSON.parse(stdout);
40
+ assert.deepStrictEqual(parsed.scheduled, [
41
+ { id: 'high-second', resource: 'room-a', start: '09:00', end: '09:30' },
42
+ { id: 'edge-ok', resource: 'room-b', start: '09:15', end: '09:45' },
43
+ { id: 'blocked-one-minute', resource: 'room-a', start: '09:40', end: '09:42' }
44
+ ]);
45
+ assert.deepStrictEqual(parsed.rejected, [
46
+ { id: 'low-first', reason: 'no_slot' }
47
+ ]);
48
+ console.log(JSON.stringify({ ok: true }));
@@ -0,0 +1,27 @@
1
+ # F22 CLI ledger close
2
+
3
+ ## Failure mode
4
+
5
+ This fixture detects finance-style reconciliation mistakes: applying duplicate
6
+ transactions twice, silently accepting conflicting duplicate ids, validating
7
+ only while mutating balances, rejecting negative balances that are explicitly
8
+ allowed, or producing nondeterministic account ordering.
9
+
10
+ ## Pipeline phase target
11
+
12
+ PLAN must separate validation, idempotency, chronological application, and
13
+ output formatting. IMPLEMENT must keep cents as integers and avoid fallback
14
+ error handling. VERIFY should catch duplicate-id counterexamples and negative
15
+ balance behavior.
16
+
17
+ ## Why existing fixtures do not cover it
18
+
19
+ F16 covers order quote arithmetic, but not ledger idempotency or full-input
20
+ validation before mutation. F21 covers interval scheduling. Server fixtures
21
+ cover API behavior rather than CLI reconciliation.
22
+
23
+ ## Retirement
24
+
25
+ Retire or replace if both bare and solo consistently score above the headroom
26
+ thresholds, or if a future ledger fixture captures the same duplicate-id and
27
+ validation-before-mutation risks with stronger discrimination.
@@ -0,0 +1,56 @@
1
+ {
2
+ "verification_commands": [
3
+ {
4
+ "cmd": "node --test tests/cli.test.js",
5
+ "exit_code": 0,
6
+ "stdout_contains": [],
7
+ "stdout_not_contains": ["not ok "]
8
+ },
9
+ {
10
+ "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/idempotent-close.js\"",
11
+ "exit_code": 0,
12
+ "stdout_contains": ["\"ok\":true"],
13
+ "stdout_not_contains": [],
14
+ "contract_refs": [
15
+ "Exact duplicate transactions are idempotent: apply the first copy once and count later exact copies in `duplicates_ignored`.",
16
+ "Apply accepted unique transactions in chronological order by `occurred_at` ascending, then `id` ascending.",
17
+ "Negative closing balances are allowed and must be reported, not rejected."
18
+ ]
19
+ },
20
+ {
21
+ "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/conflicting-duplicate.js\"",
22
+ "exit_code": 0,
23
+ "stdout_contains": ["\"ok\":true"],
24
+ "stdout_not_contains": [],
25
+ "contract_refs": [
26
+ "Conflicting duplicate transaction ids are invalid input. A duplicate id is conflicting when any field other than `id` differs from the first transaction with that id.",
27
+ "Invalid input exits `2`, writes exactly one JSON error object to stderr, and writes nothing to stdout.",
28
+ "Conflicting duplicate transaction ids use the exact stderr JSON shape `{ \"error\": \"conflicting_duplicate\", \"id\": string }`."
29
+ ]
30
+ }
31
+ ],
32
+ "forbidden_patterns": [
33
+ {
34
+ "pattern": "catch\\s*\\([^)]*\\)\\s*\\{[^}]*return\\s+(null|undefined|''|\\{\\})",
35
+ "description": "silent catch returning fallback in ledger path",
36
+ "files": ["bin/cli.js"],
37
+ "severity": "disqualifier"
38
+ },
39
+ {
40
+ "pattern": "catch\\s*\\([^)]*\\)\\s*\\{\\s*\\}",
41
+ "description": "empty catch block",
42
+ "files": ["bin/cli.js"],
43
+ "severity": "disqualifier"
44
+ },
45
+ {
46
+ "pattern": "/\\*\\s*eslint-disable",
47
+ "description": "eslint-disable without scoped justification",
48
+ "files": ["bin/cli.js"],
49
+ "severity": "disqualifier"
50
+ }
51
+ ],
52
+ "required_files": ["bin/cli.js", "tests/cli.test.js"],
53
+ "forbidden_files": [],
54
+ "max_deps_added": 0,
55
+ "spec_output_files": ["bin/cli.js", "tests/cli.test.js"]
56
+ }
@@ -0,0 +1,10 @@
1
+ {
2
+ "id": "F22-cli-ledger-close",
3
+ "category": "high-risk",
4
+ "difficulty": "high",
5
+ "timeout_seconds": 1500,
6
+ "required_tools": ["node"],
7
+ "browser": false,
8
+ "deps_change_expected": false,
9
+ "intent": "Add a bench-cli ledger-close command that reads account opening balances and transaction events, applies chronological idempotent ledger rules, rejects conflicting duplicate transaction ids, and prints exact closing balances and audit counts."
10
+ }
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env bash
2
+ set -e
@@ -0,0 +1,65 @@
1
+ ---
2
+ id: "F22-cli-ledger-close"
3
+ title: "Ledger close command"
4
+ status: planned
5
+ complexity: high
6
+ depends-on: []
7
+ ---
8
+
9
+ # F22 Ledger close command
10
+
11
+ ## Context
12
+
13
+ Add a `bench-cli ledger-close --input <path>` command that reads account
14
+ opening balances and transaction events, applies chronological idempotent
15
+ ledger rules, rejects conflicting duplicate transaction ids, and prints exact
16
+ closing balances and audit counts.
17
+
18
+ The command is used in finance-style reconciliation. Determinism matters more
19
+ than convenience: every amount is integer cents, every accepted event is applied
20
+ once, and duplicate ids must not silently corrupt balances.
21
+
22
+ ## Requirements
23
+
24
+ - [ ] `bench-cli ledger-close --input <path>` reads JSON shaped as `{ "accounts": Array<Account>, "transactions": Array<Transaction> }`.
25
+ - [ ] Each account has `{ "id": string, "currency": string, "opening_cents": number }`.
26
+ - [ ] Each transaction has `{ "id": string, "account": string, "currency": string, "kind": "debit" | "credit", "amount_cents": number, "occurred_at": string }`.
27
+ - [ ] Validate before applying balances: account ids are unique, transaction ids are non-empty strings, account references exist, currencies match the referenced account, `amount_cents` is a positive integer, and `kind` is either `debit` or `credit`.
28
+ - [ ] Conflicting duplicate transaction ids are invalid input. A duplicate id is conflicting when any field other than `id` differs from the first transaction with that id.
29
+ - [ ] Exact duplicate transactions are idempotent: apply the first copy once and count later exact copies in `duplicates_ignored`.
30
+ - [ ] Apply accepted unique transactions in chronological order by `occurred_at` ascending, then `id` ascending.
31
+ - [ ] A `debit` subtracts `amount_cents`; a `credit` adds `amount_cents`.
32
+ - [ ] Negative closing balances are allowed and must be reported, not rejected.
33
+ - [ ] Invalid input exits `2`, writes exactly one JSON error object to stderr, and writes nothing to stdout.
34
+ - [ ] Conflicting duplicate transaction ids use the exact stderr JSON shape `{ "error": "conflicting_duplicate", "id": string }`.
35
+ - [ ] On success, write exactly one JSON object to stdout and no stderr. Keys: `accounts`, `applied_count`, `duplicates_ignored`.
36
+ - [ ] Output `accounts` sorted by account id ascending. Each row has keys `id`, `currency`, `closing_cents`.
37
+ - [ ] `tests/cli.test.js` is updated. Existing tests still pass AND at least two ledger tests cover one success case and one invalid-input case.
38
+
39
+ ## Constraints
40
+
41
+ - **No new npm dependencies.**
42
+ - **No floating-money output.** All amounts are integer cents.
43
+ - **No silent catches.** Invalid input and file-read failures must surface as JSON errors with exit `2`.
44
+ - **No mutation of the input file.**
45
+ - **No extra stdout/stderr text** on the success path; downstream tooling parses stdout as JSON.
46
+ - **Touch only `bin/cli.js` and `tests/cli.test.js`.**
47
+ - **Lifecycle note.** The harness's DOCS phase flips this spec's frontmatter `status` after implementation completes — that is benchmark lifecycle bookkeeping, not a scope violation.
48
+
49
+ ## Out of Scope
50
+
51
+ - Exchange rates.
52
+ - Decimal currency parsing.
53
+ - Persistence or database writes.
54
+ - Account creation from transactions.
55
+ - Touching `server/`, `web/`, or `tests/server.test.js`.
56
+
57
+ ## Verification
58
+
59
+ - `node --test tests/cli.test.js` exits 0.
60
+ - Exact duplicate transactions are counted in `duplicates_ignored` and applied once.
61
+ - Conflicting duplicate transaction ids exit `2`, write one JSON error to stderr, and write no stdout.
62
+ - Conflicting duplicate transaction ids use the exact stderr JSON shape `{ "error": "conflicting_duplicate", "id": string }`.
63
+ - Transactions are applied in chronological order by `occurred_at` ascending, then `id` ascending.
64
+ - Negative closing balances are allowed and appear in output.
65
+ - `git diff --stat` shows only `bin/cli.js` and `tests/cli.test.js` touched.
@@ -0,0 +1,7 @@
1
+ Add a `ledger-close` command to `bench-cli` so users can run `bench-cli ledger-close --input <path>` with a JSON file containing account opening balances and transaction events. It should apply chronological idempotent ledger rules, reject conflicting duplicate transaction ids, and print exact closing balances and audit counts.
2
+
3
+ The input has `accounts` with `id`, `currency`, and `opening_cents`, and `transactions` with `id`, `account`, `currency`, `kind`, `amount_cents`, and `occurred_at`. Validate the full input before applying balances. Exact duplicate transactions should be applied only once and counted in `duplicates_ignored`; duplicate ids with any conflicting field should make the whole input invalid. Apply accepted unique transactions by `occurred_at` ascending, then id ascending. Debits subtract, credits add, and negative closing balances are allowed.
4
+
5
+ On success, stdout must be exactly parseable JSON and stderr must be empty. The output has `accounts`, `applied_count`, and `duplicates_ignored`; accounts are sorted by id and each row has `id`, `currency`, and `closing_cents`. Invalid input or file-read failures should exit `2`, print exactly one JSON error object to stderr, and print nothing to stdout. Conflicting duplicate transaction ids must use the exact stderr JSON shape `{ "error": "conflicting_duplicate", "id": string }`.
6
+
7
+ Update `tests/cli.test.js` so existing tests still pass and add at least two ledger tests: one success case and one invalid-input case. No new npm dependencies. Only touch `bin/cli.js` and `tests/cli.test.js`.
@@ -0,0 +1,34 @@
1
+ 'use strict';
2
+ const assert = require('node:assert');
3
+ const { spawnSync } = require('node:child_process');
4
+ const fs = require('node:fs');
5
+ const os = require('node:os');
6
+ const path = require('node:path');
7
+
8
+ const work = process.env.BENCH_WORKDIR || process.cwd();
9
+ const cli = path.join(work, 'bin', 'cli.js');
10
+ const tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'f22-conflict-'));
11
+ const input = path.join(tmp, 'ledger.json');
12
+
13
+ fs.writeFileSync(input, JSON.stringify({
14
+ accounts: [
15
+ { id: 'cash', currency: 'USD', opening_cents: 1000 }
16
+ ],
17
+ transactions: [
18
+ { id: 'same-id', account: 'cash', currency: 'USD', kind: 'credit', amount_cents: 100, occurred_at: '2026-01-01T00:00:00Z' },
19
+ { id: 'same-id', account: 'cash', currency: 'USD', kind: 'credit', amount_cents: 101, occurred_at: '2026-01-01T00:00:00Z' }
20
+ ]
21
+ }), 'utf8');
22
+
23
+ const proc = spawnSync('node', [cli, 'ledger-close', '--input', input], {
24
+ cwd: work,
25
+ encoding: 'utf8'
26
+ });
27
+ assert.strictEqual(proc.status, 2);
28
+ assert.strictEqual(proc.stdout, '');
29
+ const parsed = JSON.parse(proc.stderr);
30
+ assert.deepStrictEqual(parsed, {
31
+ error: 'conflicting_duplicate',
32
+ id: 'same-id'
33
+ });
34
+ console.log(JSON.stringify({ ok: true }));
@@ -0,0 +1,41 @@
1
+ 'use strict';
2
+ const assert = require('node:assert');
3
+ const { execFileSync } = require('node:child_process');
4
+ const fs = require('node:fs');
5
+ const os = require('node:os');
6
+ const path = require('node:path');
7
+
8
+ const work = process.env.BENCH_WORKDIR || process.cwd();
9
+ const cli = path.join(work, 'bin', 'cli.js');
10
+ const tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'f22-ledger-'));
11
+ const input = path.join(tmp, 'ledger.json');
12
+
13
+ const tx = { id: 't-dup', account: 'cash', currency: 'USD', kind: 'debit', amount_cents: 1200, occurred_at: '2026-01-03T00:00:00Z' };
14
+ fs.writeFileSync(input, JSON.stringify({
15
+ accounts: [
16
+ { id: 'cash', currency: 'USD', opening_cents: 1000 },
17
+ { id: 'receivable', currency: 'USD', opening_cents: 200 }
18
+ ],
19
+ transactions: [
20
+ { id: 't-late', account: 'receivable', currency: 'USD', kind: 'credit', amount_cents: 50, occurred_at: '2026-01-05T00:00:00Z' },
21
+ tx,
22
+ { ...tx },
23
+ { id: 't-early', account: 'cash', currency: 'USD', kind: 'credit', amount_cents: 300, occurred_at: '2026-01-01T00:00:00Z' }
24
+ ]
25
+ }), 'utf8');
26
+
27
+ const stdout = execFileSync('node', [cli, 'ledger-close', '--input', input], {
28
+ cwd: work,
29
+ encoding: 'utf8',
30
+ stdio: ['ignore', 'pipe', 'pipe']
31
+ });
32
+ const parsed = JSON.parse(stdout);
33
+ assert.deepStrictEqual(parsed, {
34
+ accounts: [
35
+ { id: 'cash', currency: 'USD', closing_cents: 100 },
36
+ { id: 'receivable', currency: 'USD', closing_cents: 250 }
37
+ ],
38
+ applied_count: 3,
39
+ duplicates_ignored: 1
40
+ });
41
+ console.log(JSON.stringify({ ok: true }));