devlyn-cli 2.3.0 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/AGENTS.md +1 -1
  2. package/CLAUDE.md +2 -2
  3. package/README.md +82 -29
  4. package/benchmark/auto-resolve/BENCHMARK-DESIGN.md +61 -44
  5. package/benchmark/auto-resolve/BENCHMARK-RESULTS.md +341 -0
  6. package/benchmark/auto-resolve/README.md +307 -44
  7. package/benchmark/auto-resolve/RUBRIC.md +23 -14
  8. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/NOTES.md +7 -3
  9. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/NOTES.md +8 -3
  10. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/NOTES.md +8 -3
  11. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/NOTES.md +10 -4
  12. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/NOTES.md +10 -4
  13. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/NOTES.md +12 -0
  14. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/spec.md +6 -0
  15. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/NOTES.md +7 -4
  16. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/NOTES.md +12 -0
  17. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/spec.md +6 -0
  18. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/NOTES.md +8 -0
  19. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/NOTES.md +12 -0
  20. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/spec.md +6 -0
  21. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/NOTES.md +16 -4
  22. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/spec.md +7 -0
  23. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/NOTES.md +11 -5
  24. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/NOTES.md +8 -1
  25. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected.json +4 -2
  26. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/spec.md +1 -1
  27. package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/NOTES.md +34 -0
  28. package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/expected.json +57 -0
  29. package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/metadata.json +10 -0
  30. package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/setup.sh +2 -0
  31. package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/spec.md +67 -0
  32. package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/task.txt +7 -0
  33. package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/verifiers/duplicate-event-error.js +35 -0
  34. package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/verifiers/priority-transfer-rollback.js +53 -0
  35. package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/NOTES.md +38 -0
  36. package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/expected.json +57 -0
  37. package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/metadata.json +10 -0
  38. package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/setup.sh +2 -0
  39. package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/spec.md +70 -0
  40. package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/task.txt +3 -0
  41. package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/verifiers/duplicate-renewal-error.js +42 -0
  42. package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/verifiers/priority-credit-rollback.js +70 -0
  43. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/NOTES.md +10 -3
  44. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/NOTES.md +7 -0
  45. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/NOTES.md +5 -0
  46. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/NOTES.md +7 -0
  47. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/NOTES.md +3 -0
  48. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/spec.md +1 -1
  49. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/NOTES.md +15 -3
  50. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/spec.md +1 -1
  51. package/benchmark/auto-resolve/fixtures/SCHEMA.md +53 -7
  52. package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/NOTES.md +37 -0
  53. package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/RETIRED.md +13 -0
  54. package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/expected.json +56 -0
  55. package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/metadata.json +10 -0
  56. package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/setup.sh +18 -0
  57. package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/spec.md +69 -0
  58. package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/task.txt +7 -0
  59. package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/verifiers/exact-proration.js +48 -0
  60. package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/verifiers/rules-source-and-conflict.js +79 -0
  61. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/NOTES.md +54 -0
  62. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/RETIRED.md +7 -0
  63. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/expected.json +67 -0
  64. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/metadata.json +10 -0
  65. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/setup.sh +2 -0
  66. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/spec.md +67 -0
  67. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/task.txt +5 -0
  68. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/verifiers/policy-precedence.js +72 -0
  69. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/verifiers/validation-and-immutability.js +43 -0
  70. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/verifiers/validation-boundary.js +116 -0
  71. package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/NOTES.md +35 -0
  72. package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/RETIRED.md +12 -0
  73. package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/expected.json +58 -0
  74. package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/metadata.json +10 -0
  75. package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/setup.sh +2 -0
  76. package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/spec.md +73 -0
  77. package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/task.txt +17 -0
  78. package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/verifiers/mixed-idempotent-settlement.js +53 -0
  79. package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/verifiers/rejection-boundaries.js +74 -0
  80. package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/NOTES.md +60 -0
  81. package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/RETIRED.md +29 -0
  82. package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/expected.json +73 -0
  83. package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/metadata.json +10 -0
  84. package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/setup.sh +28 -0
  85. package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/spec.md +58 -0
  86. package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/task.txt +5 -0
  87. package/benchmark/auto-resolve/results/20260510-f16-f23-f25-combined-proof/full-pipeline-pair-gate.json +82 -0
  88. package/benchmark/auto-resolve/results/20260510-f16-f23-f25-combined-proof/full-pipeline-pair-gate.md +18 -0
  89. package/benchmark/auto-resolve/results/20260510-f16-f23-f25-combined-proof/headroom-gate.json +46 -0
  90. package/benchmark/auto-resolve/results/20260510-f16-f23-f25-combined-proof/headroom-gate.md +17 -0
  91. package/benchmark/auto-resolve/run-real-benchmark.md +303 -0
  92. package/benchmark/auto-resolve/scripts/audit-headroom-rejections.py +441 -0
  93. package/benchmark/auto-resolve/scripts/audit-pair-evidence.py +1256 -0
  94. package/benchmark/auto-resolve/scripts/build-pair-eligible-manifest.py +147 -15
  95. package/benchmark/auto-resolve/scripts/check-f9-artifacts.py +28 -16
  96. package/benchmark/auto-resolve/scripts/collect-swebench-predictions.py +11 -1
  97. package/benchmark/auto-resolve/scripts/compile-report.py +208 -46
  98. package/benchmark/auto-resolve/scripts/fetch-swebench-instances.py +22 -4
  99. package/benchmark/auto-resolve/scripts/frozen-verify-gate.py +175 -30
  100. package/benchmark/auto-resolve/scripts/full-pipeline-pair-gate.py +408 -46
  101. package/benchmark/auto-resolve/scripts/headroom-gate.py +270 -39
  102. package/benchmark/auto-resolve/scripts/iter-0033c-compare.py +164 -33
  103. package/benchmark/auto-resolve/scripts/iter-0033c-l1-summary.py +97 -0
  104. package/benchmark/auto-resolve/scripts/judge-opus-pass.sh +150 -38
  105. package/benchmark/auto-resolve/scripts/judge.sh +153 -26
  106. package/benchmark/auto-resolve/scripts/oracle-scope-tier-a.py +12 -5
  107. package/benchmark/auto-resolve/scripts/oracle-scope-tier-b.py +25 -2
  108. package/benchmark/auto-resolve/scripts/pair-candidate-frontier.py +469 -0
  109. package/benchmark/auto-resolve/scripts/pair-plan-idgen.py +5 -5
  110. package/benchmark/auto-resolve/scripts/pair-plan-lint.py +9 -2
  111. package/benchmark/auto-resolve/scripts/pair-rejected-fixtures.sh +91 -0
  112. package/benchmark/auto-resolve/scripts/pair_evidence_contract.py +269 -0
  113. package/benchmark/auto-resolve/scripts/prepare-swebench-frozen-case.py +39 -10
  114. package/benchmark/auto-resolve/scripts/prepare-swebench-frozen-corpus.py +34 -4
  115. package/benchmark/auto-resolve/scripts/prepare-swebench-solver-worktree.py +23 -5
  116. package/benchmark/auto-resolve/scripts/recent-benchmark-summary.py +232 -0
  117. package/benchmark/auto-resolve/scripts/run-fixture.sh +118 -51
  118. package/benchmark/auto-resolve/scripts/run-frozen-verify-pair.sh +211 -39
  119. package/benchmark/auto-resolve/scripts/run-full-pipeline-pair-candidate.sh +335 -39
  120. package/benchmark/auto-resolve/scripts/run-headroom-candidate.sh +249 -6
  121. package/benchmark/auto-resolve/scripts/run-iter-0033c.sh +22 -48
  122. package/benchmark/auto-resolve/scripts/run-suite.sh +44 -7
  123. package/benchmark/auto-resolve/scripts/run-swebench-frozen-corpus.sh +120 -19
  124. package/benchmark/auto-resolve/scripts/run-swebench-solver-batch.sh +32 -14
  125. package/benchmark/auto-resolve/scripts/ship-gate.py +219 -50
  126. package/benchmark/auto-resolve/scripts/solo-ceiling-avoidance.py +53 -0
  127. package/benchmark/auto-resolve/scripts/solo-headroom-hypothesis.py +77 -0
  128. package/benchmark/auto-resolve/scripts/swebench-frozen-matrix.py +239 -26
  129. package/benchmark/auto-resolve/scripts/test-audit-headroom-rejections.sh +288 -0
  130. package/benchmark/auto-resolve/scripts/test-audit-pair-evidence.sh +1672 -0
  131. package/benchmark/auto-resolve/scripts/test-benchmark-arg-parsing.sh +933 -0
  132. package/benchmark/auto-resolve/scripts/test-build-pair-eligible-manifest.sh +491 -0
  133. package/benchmark/auto-resolve/scripts/test-check-f9-artifacts.sh +91 -0
  134. package/benchmark/auto-resolve/scripts/test-frozen-verify-gate.sh +328 -3
  135. package/benchmark/auto-resolve/scripts/test-full-pipeline-pair-gate.sh +497 -18
  136. package/benchmark/auto-resolve/scripts/test-headroom-gate.sh +331 -14
  137. package/benchmark/auto-resolve/scripts/test-iter-0033c-compare.sh +525 -0
  138. package/benchmark/auto-resolve/scripts/test-iter-0033c-l1-summary.sh +254 -0
  139. package/benchmark/auto-resolve/scripts/test-lint-fixtures.sh +580 -0
  140. package/benchmark/auto-resolve/scripts/test-pair-candidate-frontier.sh +591 -0
  141. package/benchmark/auto-resolve/scripts/test-run-full-pipeline-pair-candidate.sh +497 -0
  142. package/benchmark/auto-resolve/scripts/test-run-headroom-candidate.sh +401 -0
  143. package/benchmark/auto-resolve/scripts/test-run-swebench-solver-batch.sh +111 -0
  144. package/benchmark/auto-resolve/scripts/test-ship-gate.sh +1189 -0
  145. package/benchmark/auto-resolve/scripts/test-swebench-frozen-case.sh +924 -5
  146. package/benchmark/auto-resolve/shadow-fixtures/S1-cli-lang-flag/NOTES.md +28 -0
  147. package/benchmark/auto-resolve/shadow-fixtures/S1-cli-lang-flag/expected.json +63 -0
  148. package/benchmark/auto-resolve/shadow-fixtures/S1-cli-lang-flag/metadata.json +10 -0
  149. package/benchmark/auto-resolve/shadow-fixtures/S1-cli-lang-flag/setup.sh +3 -0
  150. package/benchmark/auto-resolve/shadow-fixtures/S1-cli-lang-flag/spec.md +47 -0
  151. package/benchmark/auto-resolve/shadow-fixtures/S1-cli-lang-flag/task.txt +1 -0
  152. package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/NOTES.md +34 -0
  153. package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/expected.json +53 -0
  154. package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/metadata.json +10 -0
  155. package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/setup.sh +3 -0
  156. package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/spec.md +50 -0
  157. package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/task.txt +1 -0
  158. package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/verifiers/duplicate-order-error.js +27 -0
  159. package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/verifiers/priority-stock-reservation.js +44 -0
  160. package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/NOTES.md +34 -0
  161. package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/expected.json +55 -0
  162. package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/metadata.json +10 -0
  163. package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/setup.sh +3 -0
  164. package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/spec.md +52 -0
  165. package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/task.txt +1 -0
  166. package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/verifiers/duplicate-ticket-error.js +29 -0
  167. package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/verifiers/priority-agent-assignment.js +48 -0
  168. package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/NOTES.md +34 -0
  169. package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/expected.json +55 -0
  170. package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/metadata.json +10 -0
  171. package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/setup.sh +3 -0
  172. package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/spec.md +55 -0
  173. package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/task.txt +1 -0
  174. package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/verifiers/duplicate-return-error.js +43 -0
  175. package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/verifiers/priority-return-routing.js +70 -0
  176. package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/NOTES.md +37 -0
  177. package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/expected.json +54 -0
  178. package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/metadata.json +10 -0
  179. package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/setup.sh +3 -0
  180. package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/spec.md +59 -0
  181. package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/task.txt +1 -0
  182. package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/verifiers/credit-ledger-priority.js +98 -0
  183. package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/verifiers/duplicate-charge-error.js +38 -0
  184. package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/NOTES.md +36 -0
  185. package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/expected.json +56 -0
  186. package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/metadata.json +10 -0
  187. package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/setup.sh +3 -0
  188. package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/spec.md +59 -0
  189. package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/task.txt +1 -0
  190. package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/verifiers/duplicate-refund-error.js +41 -0
  191. package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/verifiers/priority-refund-ledger.js +65 -0
  192. package/bin/devlyn.js +211 -18
  193. package/config/skills/_shared/adapters/README.md +3 -0
  194. package/config/skills/_shared/adapters/gpt-5-5.md +5 -1
  195. package/config/skills/_shared/adapters/opus-4-7.md +9 -1
  196. package/config/skills/_shared/archive_run.py +78 -6
  197. package/config/skills/_shared/codex-config.md +3 -2
  198. package/config/skills/_shared/codex-monitored.sh +46 -1
  199. package/config/skills/_shared/collect-codex-findings.py +20 -5
  200. package/config/skills/_shared/engine-preflight.md +1 -1
  201. package/config/skills/_shared/runtime-principles.md +5 -8
  202. package/config/skills/_shared/spec-verify-check.py +2664 -107
  203. package/config/skills/_shared/verify-merge-findings.py +1369 -19
  204. package/config/skills/devlyn:ideate/SKILL.md +7 -4
  205. package/config/skills/devlyn:ideate/references/elicitation.md +50 -4
  206. package/config/skills/devlyn:ideate/references/from-spec-mode.md +26 -4
  207. package/config/skills/devlyn:ideate/references/project-mode.md +20 -1
  208. package/config/skills/devlyn:ideate/references/spec-template.md +10 -1
  209. package/config/skills/devlyn:resolve/SKILL.md +49 -18
  210. package/config/skills/devlyn:resolve/references/free-form-mode.md +15 -0
  211. package/config/skills/devlyn:resolve/references/phases/build-gate.md +2 -2
  212. package/config/skills/devlyn:resolve/references/phases/probe-derive.md +74 -2
  213. package/config/skills/devlyn:resolve/references/phases/verify.md +62 -28
  214. package/config/skills/devlyn:resolve/references/state-schema.md +7 -4
  215. package/package.json +47 -2
  216. package/scripts/lint-fixtures.sh +349 -0
  217. package/scripts/lint-shadow-fixtures.sh +58 -0
  218. package/scripts/lint-skills.sh +3642 -92
  219. /package/{optional-skills → config/skills}/devlyn:design-ui/SKILL.md +0 -0
@@ -0,0 +1,580 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ ROOT="$(cd "$(dirname "$0")/../../.." && pwd)"
5
+ TMP="$(mktemp -d)"
6
+ trap 'rm -rf "$TMP"' EXIT
7
+
8
+ FIXTURES_DIR="$TMP/fixtures"
9
+ mkdir -p "$FIXTURES_DIR/F99-riskless-high-risk"
10
+ fixture="$FIXTURES_DIR/F99-riskless-high-risk"
11
+ CHECKER="$ROOT/benchmark/auto-resolve/scripts/solo-headroom-hypothesis.py"
12
+ CEILING_CHECKER="$ROOT/benchmark/auto-resolve/scripts/solo-ceiling-avoidance.py"
13
+
14
+ cat > "$TMP/weak-hypothesis.md" <<'EOF'
15
+ ## Solo-headroom hypothesis
16
+
17
+ A capable solo_claude baseline is expected to miss duplicate idempotency ordering.
18
+ EOF
19
+ if python3 "$CHECKER" "$TMP/weak-hypothesis.md"; then
20
+ echo "weak hypothesis without observable command must fail" >&2
21
+ exit 1
22
+ fi
23
+ cat > "$TMP/unrelated-backtick-hypothesis.md" <<'EOF'
24
+ ## Solo-headroom hypothesis
25
+
26
+ A capable solo_claude baseline is expected to miss duplicate idempotency ordering; implementation token `idempotency-key`.
27
+ EOF
28
+ if python3 "$CHECKER" "$TMP/unrelated-backtick-hypothesis.md"; then
29
+ echo "hypothesis with unrelated backtick must fail" >&2
30
+ exit 1
31
+ fi
32
+ cat > "$TMP/actionable-hypothesis.md" <<'EOF'
33
+ ## Solo-headroom hypothesis
34
+
35
+ A capable solo_claude baseline is expected to miss duplicate idempotency ordering; `node -e "process.exit(0)"` exposes the miss.
36
+ EOF
37
+ python3 "$CHECKER" "$TMP/actionable-hypothesis.md"
38
+ cat > "$TMP/docs-style-actionable-hypothesis.md" <<'EOF'
39
+ ## Solo-headroom hypothesis
40
+
41
+ Solo-headroom hypothesis: the spec must literally contain `solo_claude`, `miss`, and an observable command; `node -e "process.exit(0)"` exposes the miss.
42
+ EOF
43
+ python3 "$CHECKER" "$TMP/docs-style-actionable-hypothesis.md"
44
+ cat > "$TMP/actionable-expected.json" <<'EOF'
45
+ {
46
+ "verification_commands": [
47
+ {
48
+ "cmd": "node -e \"process.exit(0)\"",
49
+ "exit_code": 0
50
+ }
51
+ ]
52
+ }
53
+ EOF
54
+ python3 "$CHECKER" --expected-json "$TMP/actionable-expected.json" "$TMP/actionable-hypothesis.md"
55
+ python3 "$CHECKER" --expected-json "$TMP/actionable-expected.json" "$TMP/docs-style-actionable-hypothesis.md"
56
+ cat > "$TMP/other-expected.json" <<'EOF'
57
+ {
58
+ "verification_commands": [
59
+ {
60
+ "cmd": "node -e \"process.exit(1)\"",
61
+ "exit_code": 1
62
+ }
63
+ ]
64
+ }
65
+ EOF
66
+ if python3 "$CHECKER" --expected-json "$TMP/other-expected.json" "$TMP/actionable-hypothesis.md"; then
67
+ echo "hypothesis command must match expected.json verification command" >&2
68
+ exit 1
69
+ fi
70
+ printf '\xff\n' > "$TMP/non-utf8-hypothesis.md"
71
+ set +e
72
+ python3 "$CHECKER" "$TMP/non-utf8-hypothesis.md" > "$TMP/non-utf8-hypothesis.out" 2>&1
73
+ status=$?
74
+ set -e
75
+ [ "$status" -ne 0 ]
76
+ grep -Fq 'expected UTF-8 text' "$TMP/non-utf8-hypothesis.out"
77
+
78
+ cat > "$TMP/weak-solo-ceiling.md" <<'EOF'
79
+ ## Solo ceiling avoidance
80
+
81
+ This candidate mentions solo_claude but gives no control comparison.
82
+ EOF
83
+ if python3 "$CEILING_CHECKER" "$TMP/weak-solo-ceiling.md"; then
84
+ echo "weak solo ceiling avoidance must fail" >&2
85
+ exit 1
86
+ fi
87
+ cat > "$TMP/actionable-solo-ceiling.md" <<'EOF'
88
+ ## Solo ceiling avoidance
89
+
90
+ Unlike solo-saturated S2-S6 controls, this fixture should preserve
91
+ solo_claude headroom because it targets a multi-run state dependency.
92
+ EOF
93
+ python3 "$CEILING_CHECKER" "$TMP/actionable-solo-ceiling.md"
94
+ printf '\xff\n' > "$TMP/non-utf8-solo-ceiling.md"
95
+ set +e
96
+ python3 "$CEILING_CHECKER" "$TMP/non-utf8-solo-ceiling.md" > "$TMP/non-utf8-solo-ceiling.out" 2>&1
97
+ status=$?
98
+ set -e
99
+ [ "$status" -ne 0 ]
100
+ grep -Fq 'expected UTF-8 text' "$TMP/non-utf8-solo-ceiling.out"
101
+
102
+ write_fixture() {
103
+ local intent="$1"
104
+ cat > "$fixture/metadata.json" <<EOF
105
+ {
106
+ "id": "F99-riskless-high-risk",
107
+ "category": "high-risk",
108
+ "difficulty": "high",
109
+ "timeout_seconds": 900,
110
+ "required_tools": ["node"],
111
+ "browser": false,
112
+ "deps_change_expected": false,
113
+ "intent": "$intent"
114
+ }
115
+ EOF
116
+ cat > "$fixture/spec.md" <<EOF
117
+ ---
118
+ id: F99-riskless-high-risk
119
+ ---
120
+
121
+ # Riskless Fixture
122
+
123
+ ## Context
124
+
125
+ $intent
126
+
127
+ ## Requirements
128
+
129
+ - Add the requested behavior.
130
+ EOF
131
+ printf '%s\n' "$intent" > "$fixture/task.txt"
132
+ cat > "$fixture/expected.json" <<'EOF'
133
+ {
134
+ "verification_commands": [
135
+ {
136
+ "cmd": "node -e \"process.exit(0)\"",
137
+ "exit_code": 0
138
+ }
139
+ ],
140
+ "forbidden_patterns": [],
141
+ "required_files": [],
142
+ "forbidden_files": [],
143
+ "tier_a_waivers": [],
144
+ "spec_output_files": [],
145
+ "max_deps_added": 0
146
+ }
147
+ EOF
148
+ cat > "$fixture/setup.sh" <<'EOF'
149
+ #!/usr/bin/env bash
150
+ set -euo pipefail
151
+ EOF
152
+ chmod +x "$fixture/setup.sh"
153
+ cat > "$fixture/NOTES.md" <<'EOF'
154
+ # Notes
155
+
156
+ Synthetic lint fixture for high-risk trigger validation.
157
+ EOF
158
+ }
159
+
160
+ write_fixture "Add a generic CLI helper with clear output."
161
+ set +e
162
+ DEVLYN_FIXTURES_DIR="$FIXTURES_DIR" bash "$ROOT/scripts/lint-fixtures.sh" > "$TMP/fail.out" 2>&1
163
+ status=$?
164
+ set -e
165
+ [ "$status" -ne 0 ]
166
+ grep -Fq 'high-risk fixture must include a resolve risk-trigger term' "$TMP/fail.out"
167
+
168
+ write_fixture "Add idempotency handling for duplicate requests."
169
+ DEVLYN_FIXTURES_DIR="$FIXTURES_DIR" bash "$ROOT/scripts/lint-fixtures.sh" > "$TMP/pass.out" 2>&1
170
+ grep -Fq '✓ lint-fixtures: 1 active fixture(s) passed schema + structural checks; 0 retired fixture(s) preserved' "$TMP/pass.out"
171
+
172
+ cat >> "$fixture/NOTES.md" <<'EOF'
173
+
174
+ ## Measurement status
175
+
176
+ Pair evidence passed in `test-pair-run`: bare `33`, solo_claude `66`, pair `99`,
177
+ margin `+33`, wall `1.50x`, arm `l2_risk_probes`, verdict `pair_evidence_passed`.
178
+ EOF
179
+ set +e
180
+ DEVLYN_FIXTURES_DIR="$FIXTURES_DIR" bash "$ROOT/scripts/lint-fixtures.sh" > "$TMP/pair-evidence-hypothesis-fail.out" 2>&1
181
+ status=$?
182
+ set -e
183
+ [ "$status" -ne 0 ]
184
+ grep -Fq 'pair_evidence_passed fixture spec.md must document an actionable solo-headroom hypothesis with solo_claude miss and observable command from expected.json' \
185
+ "$TMP/pair-evidence-hypothesis-fail.out"
186
+
187
+ cat >> "$fixture/spec.md" <<'EOF'
188
+
189
+ ## Solo-headroom hypothesis
190
+
191
+ A capable solo_claude baseline is expected to miss duplicate idempotency ordering;
192
+ observable command `node -e "process.exit(0)"` exposes the miss.
193
+ EOF
194
+ DEVLYN_FIXTURES_DIR="$FIXTURES_DIR" bash "$ROOT/scripts/lint-fixtures.sh" > "$TMP/pair-evidence-hypothesis-pass.out" 2>&1
195
+ grep -Fq '✓ lint-fixtures: 1 active fixture(s) passed schema + structural checks; 0 retired fixture(s) preserved' \
196
+ "$TMP/pair-evidence-hypothesis-pass.out"
197
+ write_fixture "Add idempotency handling for duplicate requests."
198
+
199
+ python3 - "$fixture/spec.md" <<'PY'
200
+ import pathlib
201
+ path = pathlib.Path(__import__("sys").argv[1])
202
+ text = path.read_text(encoding="utf-8")
203
+ path.write_text(text.replace("---\nid:", "---\ncomplexity: hihg\nid:", 1), encoding="utf-8")
204
+ PY
205
+ set +e
206
+ DEVLYN_FIXTURES_DIR="$FIXTURES_DIR" bash "$ROOT/scripts/lint-fixtures.sh" > "$TMP/spec-verify-check-fail.out" 2>&1
207
+ status=$?
208
+ set -e
209
+ [ "$status" -ne 0 ]
210
+ grep -Fq 'spec-verify-check --check failed' "$TMP/spec-verify-check-fail.out"
211
+ grep -Fq 'frontmatter complexity must be one of' "$TMP/spec-verify-check-fail.out"
212
+ write_fixture "Add idempotency handling for duplicate requests."
213
+
214
+ cat > "$TMP/malformed-rejected.sh" <<'EOF'
215
+ #!/usr/bin/env bash
216
+ not_the_registry_function() {
217
+ return 1
218
+ }
219
+ EOF
220
+ set +e
221
+ DEVLYN_FIXTURES_DIR="$FIXTURES_DIR" \
222
+ DEVLYN_REJECTED_FIXTURE_REGISTRY="$TMP/malformed-rejected.sh" \
223
+ bash "$ROOT/scripts/lint-fixtures.sh" > "$TMP/malformed-rejected.out" 2>&1
224
+ status=$?
225
+ set -e
226
+ [ "$status" -ne 0 ]
227
+ grep -Fq 'rejected fixture registry must define rejected_pair_fixture_reason' \
228
+ "$TMP/malformed-rejected.out"
229
+
230
+ SHADOW_DIR="$TMP/shadow-fixtures"
231
+ shadow_fixture="$SHADOW_DIR/S99-riskless-high-risk"
232
+ mkdir -p "$shadow_fixture"
233
+ cp -R "$fixture/." "$shadow_fixture/"
234
+ python3 - "$shadow_fixture" <<'PY'
235
+ from pathlib import Path
236
+ import sys
237
+ root = Path(sys.argv[1])
238
+ for name in ("metadata.json", "spec.md"):
239
+ path = root / name
240
+ path.write_text(path.read_text().replace("F99-riskless-high-risk", "S99-riskless-high-risk"))
241
+ PY
242
+ DEVLYN_FIXTURES_DIR="$SHADOW_DIR" DEVLYN_FIXTURE_GLOB="S*" \
243
+ bash "$ROOT/scripts/lint-fixtures.sh" > "$TMP/shadow-pass.out" 2>&1
244
+ grep -Fq '✓ lint-fixtures: 1 active fixture(s) passed schema + structural checks; 0 retired fixture(s) preserved' "$TMP/shadow-pass.out"
245
+
246
+ cat > "$TMP/empty-rejected.sh" <<'EOF'
247
+ #!/usr/bin/env bash
248
+ rejected_pair_fixture_reason() {
249
+ return 1
250
+ }
251
+ EOF
252
+ set +e
253
+ DEVLYN_SHADOW_FIXTURES_DIR="$SHADOW_DIR" \
254
+ DEVLYN_REJECTED_FIXTURE_REGISTRY="$TMP/empty-rejected.sh" \
255
+ bash "$ROOT/scripts/lint-shadow-fixtures.sh" > "$TMP/shadow-missing-hypothesis.out" 2>&1
256
+ status=$?
257
+ set -e
258
+ [ "$status" -ne 0 ]
259
+ grep -Fq 'unmeasured high-risk shadow fixture spec.md must document a solo-headroom hypothesis with solo_claude miss and observable command from expected.json before provider spend' \
260
+ "$TMP/shadow-missing-hypothesis.out"
261
+
262
+ cat >> "$shadow_fixture/spec.md" <<'EOF'
263
+
264
+ ## Solo-headroom hypothesis
265
+
266
+ A capable solo_claude baseline is expected to miss duplicate idempotency ordering.
267
+ EOF
268
+ set +e
269
+ DEVLYN_SHADOW_FIXTURES_DIR="$SHADOW_DIR" \
270
+ DEVLYN_REJECTED_FIXTURE_REGISTRY="$TMP/empty-rejected.sh" \
271
+ bash "$ROOT/scripts/lint-shadow-fixtures.sh" > "$TMP/shadow-weak-hypothesis.out" 2>&1
272
+ status=$?
273
+ set -e
274
+ [ "$status" -ne 0 ]
275
+ grep -Fq 'solo-headroom hypothesis must include' "$TMP/shadow-weak-hypothesis.out"
276
+
277
+ cat >> "$shadow_fixture/spec.md" <<'EOF'
278
+
279
+ Observable command: `node -e "process.exit(0)"` exposes the miss.
280
+ EOF
281
+ set +e
282
+ DEVLYN_SHADOW_FIXTURES_DIR="$SHADOW_DIR" \
283
+ DEVLYN_REJECTED_FIXTURE_REGISTRY="$TMP/empty-rejected.sh" \
284
+ bash "$ROOT/scripts/lint-shadow-fixtures.sh" > "$TMP/shadow-missing-solo-ceiling-avoidance.out" 2>&1
285
+ status=$?
286
+ set -e
287
+ [ "$status" -ne 0 ]
288
+ grep -Fq 'unmeasured high-risk shadow fixture NOTES.md must include ## Solo ceiling avoidance' \
289
+ "$TMP/shadow-missing-solo-ceiling-avoidance.out"
290
+
291
+ cat >> "$shadow_fixture/NOTES.md" <<'EOF'
292
+
293
+ ## Solo ceiling avoidance
294
+
295
+ This candidate mentions solo_claude but gives no control comparison.
296
+ EOF
297
+ set +e
298
+ DEVLYN_SHADOW_FIXTURES_DIR="$SHADOW_DIR" \
299
+ DEVLYN_REJECTED_FIXTURE_REGISTRY="$TMP/empty-rejected.sh" \
300
+ bash "$ROOT/scripts/lint-shadow-fixtures.sh" > "$TMP/shadow-weak-solo-ceiling-avoidance.out" 2>&1
301
+ status=$?
302
+ set -e
303
+ [ "$status" -ne 0 ]
304
+ grep -Fq 'unmeasured high-risk shadow fixture NOTES.md must include ## Solo ceiling avoidance' \
305
+ "$TMP/shadow-weak-solo-ceiling-avoidance.out"
306
+
307
+ cat >> "$shadow_fixture/NOTES.md" <<'EOF'
308
+
309
+ This candidate is expected to preserve solo_claude headroom because it differs
310
+ from solo-saturated S2-S6 controls by exercising a synthetic hidden invariant.
311
+ EOF
312
+ DEVLYN_SHADOW_FIXTURES_DIR="$SHADOW_DIR" \
313
+ DEVLYN_REJECTED_FIXTURE_REGISTRY="$TMP/empty-rejected.sh" \
314
+ bash "$ROOT/scripts/lint-shadow-fixtures.sh" > "$TMP/shadow-hypothesis-pass.out" 2>&1
315
+ grep -Fq '✓ lint-fixtures: 1 active fixture(s) passed schema + structural checks; 0 retired fixture(s) preserved' \
316
+ "$TMP/shadow-hypothesis-pass.out"
317
+
318
+ cat >> "$shadow_fixture/NOTES.md" <<'EOF'
319
+
320
+ ## Calibration status
321
+
322
+ - `test-shadow-headroom`: bare `33`, solo_claude `99`, headroom gate FAIL.
323
+ EOF
324
+ set +e
325
+ DEVLYN_SHADOW_FIXTURES_DIR="$SHADOW_DIR" \
326
+ DEVLYN_REJECTED_FIXTURE_REGISTRY="$TMP/empty-rejected.sh" \
327
+ bash "$ROOT/scripts/lint-shadow-fixtures.sh" > "$TMP/shadow-calibration-rejected-missing.out" 2>&1
328
+ status=$?
329
+ set -e
330
+ [ "$status" -ne 0 ]
331
+ grep -Fq 'NOTES.md records pair-candidate rejection but pair-rejected-fixtures.sh has no rejected reason' \
332
+ "$TMP/shadow-calibration-rejected-missing.out"
333
+
334
+ cat > "$TMP/rejected.sh" <<'EOF'
335
+ #!/usr/bin/env bash
336
+ rejected_pair_fixture_reason() {
337
+ case "$1" in
338
+ F99-*|F99) echo "bare 33 / solo_claude 98 in test-active-headroom" ;;
339
+ S99-*|S99) echo "bare 33 / solo_claude 99 in test-shadow-headroom" ;;
340
+ *) return 1 ;;
341
+ esac
342
+ }
343
+ EOF
344
+ DEVLYN_SHADOW_FIXTURES_DIR="$SHADOW_DIR" \
345
+ DEVLYN_REJECTED_FIXTURE_REGISTRY="$TMP/rejected.sh" \
346
+ bash "$ROOT/scripts/lint-shadow-fixtures.sh" > "$TMP/shadow-calibration-rejected-pass.out" 2>&1
347
+ grep -Fq '✓ lint-fixtures: 1 active fixture(s) passed schema + structural checks; 0 retired fixture(s) preserved' \
348
+ "$TMP/shadow-calibration-rejected-pass.out"
349
+
350
+ cat >> "$fixture/NOTES.md" <<'EOF'
351
+
352
+ ## Pair-candidate status
353
+
354
+ Rejected as pair-lift evidence by `test-active-headroom`: bare scored 33, but
355
+ solo_claude scored 98.
356
+ EOF
357
+ set +e
358
+ DEVLYN_FIXTURES_DIR="$FIXTURES_DIR" \
359
+ DEVLYN_REJECTED_FIXTURE_REGISTRY="$TMP/empty-rejected.sh" \
360
+ bash "$ROOT/scripts/lint-fixtures.sh" > "$TMP/active-calibration-rejected-missing.out" 2>&1
361
+ status=$?
362
+ set -e
363
+ [ "$status" -ne 0 ]
364
+ grep -Fq 'NOTES.md records pair-candidate rejection but pair-rejected-fixtures.sh has no rejected reason' \
365
+ "$TMP/active-calibration-rejected-missing.out"
366
+
367
+ DEVLYN_FIXTURES_DIR="$FIXTURES_DIR" \
368
+ DEVLYN_REJECTED_FIXTURE_REGISTRY="$TMP/rejected.sh" \
369
+ bash "$ROOT/scripts/lint-fixtures.sh" > "$TMP/active-calibration-rejected-pass.out" 2>&1
370
+ grep -Fq '✓ lint-fixtures: 1 active fixture(s) passed schema + structural checks; 0 retired fixture(s) preserved' \
371
+ "$TMP/active-calibration-rejected-pass.out"
372
+
373
+ cat > "$fixture/expected.json" <<'EOF'
374
+ {
375
+ "verification_commands": [],
376
+ "forbidden_patterns": [],
377
+ "required_files": [],
378
+ "forbidden_files": [],
379
+ "tier_a_waivers": [],
380
+ "spec_output_files": [],
381
+ "max_deps_added": 0
382
+ }
383
+ EOF
384
+ set +e
385
+ DEVLYN_FIXTURES_DIR="$FIXTURES_DIR" bash "$ROOT/scripts/lint-fixtures.sh" > "$TMP/spec-verify-check-expected-fail.out" 2>&1
386
+ status=$?
387
+ set -e
388
+ [ "$status" -ne 0 ]
389
+ grep -Fq 'spec-verify-check --check-expected failed' "$TMP/spec-verify-check-expected-fail.out"
390
+ grep -Fq 'unless sibling spec.md declares all Requirements are pure-design' "$TMP/spec-verify-check-expected-fail.out"
391
+
392
+ write_fixture "Add idempotency handling for duplicate requests."
393
+
394
+ mkdir -p "$fixture/verifiers"
395
+ printf 'console.log(JSON.stringify({ ok: true }))\n' > "$fixture/verifiers/hidden-oracle.js"
396
+
397
+ cat > "$fixture/expected.json" <<'EOF'
398
+ {
399
+ "verification_commands": [
400
+ {
401
+ "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/hidden-oracle.js\"",
402
+ "exit_code": 0
403
+ }
404
+ ],
405
+ "forbidden_patterns": [],
406
+ "required_files": [],
407
+ "forbidden_files": [],
408
+ "tier_a_waivers": [],
409
+ "spec_output_files": [],
410
+ "max_deps_added": 0
411
+ }
412
+ EOF
413
+ set +e
414
+ DEVLYN_FIXTURES_DIR="$FIXTURES_DIR" bash "$ROOT/scripts/lint-fixtures.sh" > "$TMP/missing-contract-refs.out" 2>&1
415
+ status=$?
416
+ set -e
417
+ [ "$status" -ne 0 ]
418
+ grep -Fq 'hidden oracle missing contract_refs' "$TMP/missing-contract-refs.out"
419
+
420
+ cat > "$fixture/expected.json" <<'EOF'
421
+ {
422
+ "verification_commands": [
423
+ {
424
+ "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/hidden-oracle.js\"",
425
+ "exit_code": 0,
426
+ "contract_refs": ["Add the requested behavior."]
427
+ }
428
+ ],
429
+ "forbidden_patterns": [],
430
+ "required_files": [],
431
+ "forbidden_files": [],
432
+ "tier_a_waivers": [],
433
+ "spec_output_files": [],
434
+ "max_deps_added": 0
435
+ }
436
+ EOF
437
+ set +e
438
+ DEVLYN_FIXTURES_DIR="$FIXTURES_DIR" bash "$ROOT/scripts/lint-fixtures.sh" > "$TMP/missing-hidden-oracle-sentinel.out" 2>&1
439
+ status=$?
440
+ set -e
441
+ [ "$status" -ne 0 ]
442
+ grep -Fq "hidden oracle must assert stdout_contains includes '\"ok\":true'" "$TMP/missing-hidden-oracle-sentinel.out"
443
+
444
+ cat > "$fixture/expected.json" <<'EOF'
445
+ {
446
+ "verification_commands": [
447
+ {
448
+ "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/hidden-oracle.js\"",
449
+ "exit_code": 0,
450
+ "contract_refs": ["This visible contract is not in the spec."]
451
+ }
452
+ ],
453
+ "forbidden_patterns": [],
454
+ "required_files": [],
455
+ "forbidden_files": [],
456
+ "tier_a_waivers": [],
457
+ "spec_output_files": [],
458
+ "max_deps_added": 0
459
+ }
460
+ EOF
461
+ set +e
462
+ DEVLYN_FIXTURES_DIR="$FIXTURES_DIR" bash "$ROOT/scripts/lint-fixtures.sh" > "$TMP/bad-contract-ref.out" 2>&1
463
+ status=$?
464
+ set -e
465
+ [ "$status" -ne 0 ]
466
+ grep -Fq 'contract_ref not found in spec.md' "$TMP/bad-contract-ref.out"
467
+
468
+ printf 'console.log("outside")\n' > "$FIXTURES_DIR/outside-hidden-oracle.js"
469
+ cat > "$fixture/expected.json" <<'EOF'
470
+ {
471
+ "verification_commands": [
472
+ {
473
+ "cmd": "node \"$BENCH_FIXTURE_DIR/../outside-hidden-oracle.js\"",
474
+ "exit_code": 0,
475
+ "contract_refs": ["Add the requested behavior."]
476
+ }
477
+ ],
478
+ "forbidden_patterns": [],
479
+ "required_files": [],
480
+ "forbidden_files": [],
481
+ "tier_a_waivers": [],
482
+ "spec_output_files": [],
483
+ "max_deps_added": 0
484
+ }
485
+ EOF
486
+ set +e
487
+ DEVLYN_FIXTURES_DIR="$FIXTURES_DIR" bash "$ROOT/scripts/lint-fixtures.sh" > "$TMP/escaping-hidden-oracle-file.out" 2>&1
488
+ status=$?
489
+ set -e
490
+ [ "$status" -ne 0 ]
491
+ grep -Fq 'BENCH_FIXTURE_DIR file escapes fixture dir' "$TMP/escaping-hidden-oracle-file.out"
492
+
493
+ cat > "$fixture/expected.json" <<'EOF'
494
+ {
495
+ "verification_commands": [
496
+ {
497
+ "cmd": "cd \"$BENCH_FIXTURE_DIR\" && node verifiers/hidden-oracle.js",
498
+ "exit_code": 0,
499
+ "contract_refs": ["Add the requested behavior."]
500
+ }
501
+ ],
502
+ "forbidden_patterns": [],
503
+ "required_files": [],
504
+ "forbidden_files": [],
505
+ "tier_a_waivers": [],
506
+ "spec_output_files": [],
507
+ "max_deps_added": 0
508
+ }
509
+ EOF
510
+ set +e
511
+ DEVLYN_FIXTURES_DIR="$FIXTURES_DIR" bash "$ROOT/scripts/lint-fixtures.sh" > "$TMP/implicit-hidden-oracle-file.out" 2>&1
512
+ status=$?
513
+ set -e
514
+ [ "$status" -ne 0 ]
515
+ grep -Fq 'hidden oracle must reference an explicit $BENCH_FIXTURE_DIR/... file' "$TMP/implicit-hidden-oracle-file.out"
516
+
517
+ cat > "$fixture/expected.json" <<'EOF'
518
+ {
519
+ "verification_commands": [
520
+ {
521
+ "cmd": "node \"$BENCH_FIXTURE_DIR/verifiers/missing-hidden-oracle.js\"",
522
+ "exit_code": 0,
523
+ "contract_refs": ["Add the requested behavior."]
524
+ }
525
+ ],
526
+ "forbidden_patterns": [],
527
+ "required_files": [],
528
+ "forbidden_files": [],
529
+ "tier_a_waivers": [],
530
+ "spec_output_files": [],
531
+ "max_deps_added": 0
532
+ }
533
+ EOF
534
+ set +e
535
+ DEVLYN_FIXTURES_DIR="$FIXTURES_DIR" bash "$ROOT/scripts/lint-fixtures.sh" > "$TMP/missing-hidden-oracle-file.out" 2>&1
536
+ status=$?
537
+ set -e
538
+ [ "$status" -ne 0 ]
539
+ grep -Fq 'BENCH_FIXTURE_DIR file not found' "$TMP/missing-hidden-oracle-file.out"
540
+
541
+ cat > "$fixture/expected.json" <<'EOF'
542
+ {
543
+ "verification_commands": "node -e \"process.exit(0)\"",
544
+ "forbidden_patterns": [],
545
+ "required_files": [],
546
+ "forbidden_files": [],
547
+ "tier_a_waivers": [],
548
+ "spec_output_files": [],
549
+ "max_deps_added": 0
550
+ }
551
+ EOF
552
+ set +e
553
+ DEVLYN_FIXTURES_DIR="$FIXTURES_DIR" DEVLYN_LINT_FIXTURES_NO_JSONSCHEMA=1 \
554
+ bash "$ROOT/scripts/lint-fixtures.sh" > "$TMP/fallback-fail.out" 2>&1
555
+ status=$?
556
+ set -e
557
+ [ "$status" -ne 0 ]
558
+ grep -Fq 'verification_commands must be an array' "$TMP/fallback-fail.out"
559
+ if grep -Fq 'Traceback' "$TMP/fallback-fail.out"; then
560
+ echo "fallback schema failure must not continue into traceback-prone checks" >&2
561
+ cat "$TMP/fallback-fail.out" >&2
562
+ exit 1
563
+ fi
564
+
565
+ cat > "$fixture/expected.json" <<'EOF'
566
+ []
567
+ EOF
568
+ set +e
569
+ DEVLYN_FIXTURES_DIR="$FIXTURES_DIR" bash "$ROOT/scripts/lint-fixtures.sh" > "$TMP/non-object-fail.out" 2>&1
570
+ status=$?
571
+ set -e
572
+ [ "$status" -ne 0 ]
573
+ grep -Fq 'expected.json must be an object' "$TMP/non-object-fail.out"
574
+ if grep -Fq 'Traceback' "$TMP/non-object-fail.out"; then
575
+ echo "non-object expected.json failure must not emit Traceback" >&2
576
+ cat "$TMP/non-object-fail.out" >&2
577
+ exit 1
578
+ fi
579
+
580
+ echo "PASS test-lint-fixtures"