devlyn-cli 2.2.2 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. package/AGENTS.md +2 -2
  2. package/CLAUDE.md +4 -4
  3. package/README.md +85 -34
  4. package/benchmark/auto-resolve/BENCHMARK-DESIGN.md +61 -44
  5. package/benchmark/auto-resolve/BENCHMARK-RESULTS.md +341 -0
  6. package/benchmark/auto-resolve/README.md +307 -44
  7. package/benchmark/auto-resolve/RUBRIC.md +23 -14
  8. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/NOTES.md +7 -3
  9. package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/NOTES.md +8 -3
  10. package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/NOTES.md +8 -3
  11. package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/NOTES.md +10 -4
  12. package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/NOTES.md +10 -4
  13. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/NOTES.md +12 -0
  14. package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/spec.md +6 -0
  15. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/NOTES.md +7 -4
  16. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/NOTES.md +12 -0
  17. package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/spec.md +6 -0
  18. package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/NOTES.md +8 -0
  19. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/NOTES.md +12 -0
  20. package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/spec.md +6 -0
  21. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/NOTES.md +16 -4
  22. package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/spec.md +7 -0
  23. package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/NOTES.md +11 -5
  24. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/NOTES.md +8 -1
  25. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected.json +4 -2
  26. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/spec.md +1 -1
  27. package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/NOTES.md +34 -0
  28. package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/expected.json +57 -0
  29. package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/metadata.json +10 -0
  30. package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/setup.sh +2 -0
  31. package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/spec.md +67 -0
  32. package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/task.txt +7 -0
  33. package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/verifiers/duplicate-event-error.js +35 -0
  34. package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/verifiers/priority-transfer-rollback.js +53 -0
  35. package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/NOTES.md +38 -0
  36. package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/expected.json +57 -0
  37. package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/metadata.json +10 -0
  38. package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/setup.sh +2 -0
  39. package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/spec.md +70 -0
  40. package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/task.txt +3 -0
  41. package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/verifiers/duplicate-renewal-error.js +42 -0
  42. package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/verifiers/priority-credit-rollback.js +70 -0
  43. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/NOTES.md +10 -3
  44. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/NOTES.md +7 -0
  45. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/NOTES.md +5 -0
  46. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/NOTES.md +7 -0
  47. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/NOTES.md +3 -0
  48. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/spec.md +1 -1
  49. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/NOTES.md +15 -3
  50. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/spec.md +1 -1
  51. package/benchmark/auto-resolve/fixtures/SCHEMA.md +53 -7
  52. package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/NOTES.md +37 -0
  53. package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/RETIRED.md +13 -0
  54. package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/expected.json +56 -0
  55. package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/metadata.json +10 -0
  56. package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/setup.sh +18 -0
  57. package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/spec.md +69 -0
  58. package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/task.txt +7 -0
  59. package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/verifiers/exact-proration.js +48 -0
  60. package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/verifiers/rules-source-and-conflict.js +79 -0
  61. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/NOTES.md +54 -0
  62. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/RETIRED.md +7 -0
  63. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/expected.json +67 -0
  64. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/metadata.json +10 -0
  65. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/setup.sh +2 -0
  66. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/spec.md +67 -0
  67. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/task.txt +5 -0
  68. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/verifiers/policy-precedence.js +72 -0
  69. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/verifiers/validation-and-immutability.js +43 -0
  70. package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/verifiers/validation-boundary.js +116 -0
  71. package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/NOTES.md +35 -0
  72. package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/RETIRED.md +12 -0
  73. package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/expected.json +58 -0
  74. package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/metadata.json +10 -0
  75. package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/setup.sh +2 -0
  76. package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/spec.md +73 -0
  77. package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/task.txt +17 -0
  78. package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/verifiers/mixed-idempotent-settlement.js +53 -0
  79. package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/verifiers/rejection-boundaries.js +74 -0
  80. package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/NOTES.md +60 -0
  81. package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/RETIRED.md +29 -0
  82. package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/expected.json +73 -0
  83. package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/metadata.json +10 -0
  84. package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/setup.sh +28 -0
  85. package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/spec.md +58 -0
  86. package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/task.txt +5 -0
  87. package/benchmark/auto-resolve/results/20260510-f16-f23-f25-combined-proof/full-pipeline-pair-gate.json +82 -0
  88. package/benchmark/auto-resolve/results/20260510-f16-f23-f25-combined-proof/full-pipeline-pair-gate.md +18 -0
  89. package/benchmark/auto-resolve/results/20260510-f16-f23-f25-combined-proof/headroom-gate.json +46 -0
  90. package/benchmark/auto-resolve/results/20260510-f16-f23-f25-combined-proof/headroom-gate.md +17 -0
  91. package/benchmark/auto-resolve/run-real-benchmark.md +303 -0
  92. package/benchmark/auto-resolve/scripts/audit-headroom-rejections.py +441 -0
  93. package/benchmark/auto-resolve/scripts/audit-pair-evidence.py +1256 -0
  94. package/benchmark/auto-resolve/scripts/build-pair-eligible-manifest.py +147 -15
  95. package/benchmark/auto-resolve/scripts/check-f9-artifacts.py +28 -16
  96. package/benchmark/auto-resolve/scripts/collect-swebench-predictions.py +11 -1
  97. package/benchmark/auto-resolve/scripts/compile-report.py +208 -46
  98. package/benchmark/auto-resolve/scripts/fetch-swebench-instances.py +22 -4
  99. package/benchmark/auto-resolve/scripts/frozen-verify-gate.py +175 -30
  100. package/benchmark/auto-resolve/scripts/full-pipeline-pair-gate.py +408 -46
  101. package/benchmark/auto-resolve/scripts/headroom-gate.py +270 -39
  102. package/benchmark/auto-resolve/scripts/iter-0033c-compare.py +164 -33
  103. package/benchmark/auto-resolve/scripts/iter-0033c-l1-summary.py +97 -0
  104. package/benchmark/auto-resolve/scripts/judge-opus-pass.sh +150 -38
  105. package/benchmark/auto-resolve/scripts/judge.sh +153 -26
  106. package/benchmark/auto-resolve/scripts/oracle-scope-tier-a.py +12 -5
  107. package/benchmark/auto-resolve/scripts/oracle-scope-tier-b.py +25 -2
  108. package/benchmark/auto-resolve/scripts/pair-candidate-frontier.py +469 -0
  109. package/benchmark/auto-resolve/scripts/pair-plan-idgen.py +5 -5
  110. package/benchmark/auto-resolve/scripts/pair-plan-lint.py +9 -2
  111. package/benchmark/auto-resolve/scripts/pair-rejected-fixtures.sh +91 -0
  112. package/benchmark/auto-resolve/scripts/pair_evidence_contract.py +269 -0
  113. package/benchmark/auto-resolve/scripts/prepare-swebench-frozen-case.py +39 -10
  114. package/benchmark/auto-resolve/scripts/prepare-swebench-frozen-corpus.py +34 -4
  115. package/benchmark/auto-resolve/scripts/prepare-swebench-solver-worktree.py +23 -5
  116. package/benchmark/auto-resolve/scripts/recent-benchmark-summary.py +232 -0
  117. package/benchmark/auto-resolve/scripts/run-fixture.sh +118 -51
  118. package/benchmark/auto-resolve/scripts/run-frozen-verify-pair.sh +211 -39
  119. package/benchmark/auto-resolve/scripts/run-full-pipeline-pair-candidate.sh +335 -39
  120. package/benchmark/auto-resolve/scripts/run-headroom-candidate.sh +249 -6
  121. package/benchmark/auto-resolve/scripts/run-iter-0033c.sh +22 -48
  122. package/benchmark/auto-resolve/scripts/run-suite.sh +44 -7
  123. package/benchmark/auto-resolve/scripts/run-swebench-frozen-corpus.sh +120 -19
  124. package/benchmark/auto-resolve/scripts/run-swebench-solver-batch.sh +32 -14
  125. package/benchmark/auto-resolve/scripts/ship-gate.py +219 -50
  126. package/benchmark/auto-resolve/scripts/solo-ceiling-avoidance.py +53 -0
  127. package/benchmark/auto-resolve/scripts/solo-headroom-hypothesis.py +77 -0
  128. package/benchmark/auto-resolve/scripts/swebench-frozen-matrix.py +239 -26
  129. package/benchmark/auto-resolve/scripts/test-audit-headroom-rejections.sh +288 -0
  130. package/benchmark/auto-resolve/scripts/test-audit-pair-evidence.sh +1672 -0
  131. package/benchmark/auto-resolve/scripts/test-benchmark-arg-parsing.sh +933 -0
  132. package/benchmark/auto-resolve/scripts/test-build-pair-eligible-manifest.sh +491 -0
  133. package/benchmark/auto-resolve/scripts/test-check-f9-artifacts.sh +91 -0
  134. package/benchmark/auto-resolve/scripts/test-frozen-verify-gate.sh +328 -3
  135. package/benchmark/auto-resolve/scripts/test-full-pipeline-pair-gate.sh +497 -18
  136. package/benchmark/auto-resolve/scripts/test-headroom-gate.sh +331 -14
  137. package/benchmark/auto-resolve/scripts/test-iter-0033c-compare.sh +525 -0
  138. package/benchmark/auto-resolve/scripts/test-iter-0033c-l1-summary.sh +254 -0
  139. package/benchmark/auto-resolve/scripts/test-lint-fixtures.sh +580 -0
  140. package/benchmark/auto-resolve/scripts/test-pair-candidate-frontier.sh +591 -0
  141. package/benchmark/auto-resolve/scripts/test-run-full-pipeline-pair-candidate.sh +497 -0
  142. package/benchmark/auto-resolve/scripts/test-run-headroom-candidate.sh +401 -0
  143. package/benchmark/auto-resolve/scripts/test-run-swebench-solver-batch.sh +111 -0
  144. package/benchmark/auto-resolve/scripts/test-ship-gate.sh +1189 -0
  145. package/benchmark/auto-resolve/scripts/test-swebench-frozen-case.sh +924 -5
  146. package/benchmark/auto-resolve/shadow-fixtures/S1-cli-lang-flag/NOTES.md +28 -0
  147. package/benchmark/auto-resolve/shadow-fixtures/S1-cli-lang-flag/expected.json +63 -0
  148. package/benchmark/auto-resolve/shadow-fixtures/S1-cli-lang-flag/metadata.json +10 -0
  149. package/benchmark/auto-resolve/shadow-fixtures/S1-cli-lang-flag/setup.sh +3 -0
  150. package/benchmark/auto-resolve/shadow-fixtures/S1-cli-lang-flag/spec.md +47 -0
  151. package/benchmark/auto-resolve/shadow-fixtures/S1-cli-lang-flag/task.txt +1 -0
  152. package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/NOTES.md +34 -0
  153. package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/expected.json +53 -0
  154. package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/metadata.json +10 -0
  155. package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/setup.sh +3 -0
  156. package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/spec.md +50 -0
  157. package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/task.txt +1 -0
  158. package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/verifiers/duplicate-order-error.js +27 -0
  159. package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/verifiers/priority-stock-reservation.js +44 -0
  160. package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/NOTES.md +34 -0
  161. package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/expected.json +55 -0
  162. package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/metadata.json +10 -0
  163. package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/setup.sh +3 -0
  164. package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/spec.md +52 -0
  165. package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/task.txt +1 -0
  166. package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/verifiers/duplicate-ticket-error.js +29 -0
  167. package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/verifiers/priority-agent-assignment.js +48 -0
  168. package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/NOTES.md +34 -0
  169. package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/expected.json +55 -0
  170. package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/metadata.json +10 -0
  171. package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/setup.sh +3 -0
  172. package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/spec.md +55 -0
  173. package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/task.txt +1 -0
  174. package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/verifiers/duplicate-return-error.js +43 -0
  175. package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/verifiers/priority-return-routing.js +70 -0
  176. package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/NOTES.md +37 -0
  177. package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/expected.json +54 -0
  178. package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/metadata.json +10 -0
  179. package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/setup.sh +3 -0
  180. package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/spec.md +59 -0
  181. package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/task.txt +1 -0
  182. package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/verifiers/credit-ledger-priority.js +98 -0
  183. package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/verifiers/duplicate-charge-error.js +38 -0
  184. package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/NOTES.md +36 -0
  185. package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/expected.json +56 -0
  186. package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/metadata.json +10 -0
  187. package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/setup.sh +3 -0
  188. package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/spec.md +59 -0
  189. package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/task.txt +1 -0
  190. package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/verifiers/duplicate-refund-error.js +41 -0
  191. package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/verifiers/priority-refund-ledger.js +65 -0
  192. package/bin/devlyn.js +221 -17
  193. package/config/skills/_shared/adapters/README.md +3 -0
  194. package/config/skills/_shared/adapters/gpt-5-5.md +5 -1
  195. package/config/skills/_shared/adapters/opus-4-7.md +9 -1
  196. package/config/skills/_shared/archive_run.py +78 -6
  197. package/config/skills/_shared/codex-config.md +5 -4
  198. package/config/skills/_shared/codex-monitored.sh +46 -1
  199. package/config/skills/_shared/collect-codex-findings.py +20 -5
  200. package/config/skills/_shared/engine-preflight.md +17 -13
  201. package/config/skills/_shared/runtime-principles.md +6 -9
  202. package/config/skills/_shared/spec-verify-check.py +2664 -107
  203. package/config/skills/_shared/verify-merge-findings.py +1369 -19
  204. package/config/skills/devlyn:design-ui/SKILL.md +364 -0
  205. package/config/skills/devlyn:ideate/SKILL.md +7 -4
  206. package/config/skills/devlyn:ideate/references/elicitation.md +50 -4
  207. package/config/skills/devlyn:ideate/references/from-spec-mode.md +26 -4
  208. package/config/skills/devlyn:ideate/references/project-mode.md +20 -1
  209. package/config/skills/devlyn:ideate/references/spec-template.md +10 -1
  210. package/config/skills/devlyn:resolve/SKILL.md +78 -26
  211. package/config/skills/devlyn:resolve/references/free-form-mode.md +15 -0
  212. package/config/skills/devlyn:resolve/references/phases/build-gate.md +2 -2
  213. package/config/skills/devlyn:resolve/references/phases/implement.md +1 -1
  214. package/config/skills/devlyn:resolve/references/phases/probe-derive.md +74 -2
  215. package/config/skills/devlyn:resolve/references/phases/verify.md +80 -29
  216. package/config/skills/devlyn:resolve/references/state-schema.md +9 -4
  217. package/package.json +47 -2
  218. package/scripts/lint-fixtures.sh +349 -0
  219. package/scripts/lint-shadow-fixtures.sh +58 -0
  220. package/scripts/lint-skills.sh +3645 -95
@@ -12,6 +12,7 @@ from __future__ import annotations
12
12
  import argparse
13
13
  import json
14
14
  import pathlib
15
+ import re
15
16
  import sys
16
17
  import tempfile
17
18
  from typing import Any
@@ -32,6 +33,54 @@ VERDICT_RANK = {
32
33
  "BLOCKED": 3,
33
34
  }
34
35
  RANK_VERDICT = {0: "PASS", 1: "PASS_WITH_ISSUES", 2: "NEEDS_WORK", 3: "BLOCKED"}
36
+ ALLOWED_PAIR_SKIP_REASONS = {"user_no_pair", "mechanical_blocker", "primary_judge_blocker"}
37
+ KNOWN_PAIR_TRIGGER_REASONS = {
38
+ "mode.verify-only",
39
+ "mode.pair-verify",
40
+ "complexity.high",
41
+ "complexity.large",
42
+ "spec.complexity.high",
43
+ "spec.complexity.large",
44
+ "spec.solo_headroom_hypothesis",
45
+ "risk.high",
46
+ "risk_probes.enabled",
47
+ "risk_probes.present",
48
+ "coverage.failed",
49
+ "mechanical.warning",
50
+ "judge.warning",
51
+ }
52
+ OBSERVABLE_COMMAND_MARKERS = ("command", "observable", "expose")
53
+ BACKTICKED_TEXT_RE = re.compile(r"`[^`\n]+`")
54
+ RESERVED_BACKTICK_TERMS = {"solo-headroom hypothesis", "solo_claude", "miss"}
55
+ COMMAND_PREFIXES = {
56
+ "bash",
57
+ "bun",
58
+ "cargo",
59
+ "git",
60
+ "go",
61
+ "jest",
62
+ "make",
63
+ "node",
64
+ "npm",
65
+ "pnpm",
66
+ "printf",
67
+ "pytest",
68
+ "python",
69
+ "python3",
70
+ "ruff",
71
+ "sh",
72
+ "uv",
73
+ "vitest",
74
+ "yarn",
75
+ }
76
+
77
+
78
+ def reject_json_constant(token: str) -> None:
79
+ raise ValueError(f"invalid JSON numeric constant: {token}")
80
+
81
+
82
+ def loads_strict_json(text: str) -> Any:
83
+ return json.loads(text, parse_constant=reject_json_constant)
35
84
 
36
85
 
37
86
  def rank(verdict: str | None) -> int:
@@ -42,6 +91,18 @@ def worse(a: str | None, b: str | None) -> str:
42
91
  return RANK_VERDICT[max(rank(a), rank(b))]
43
92
 
44
93
 
94
+ def is_known_pair_trigger_reason(reason: str) -> bool:
95
+ return reason in KNOWN_PAIR_TRIGGER_REASONS
96
+
97
+
98
+ def has_known_pair_trigger_reason(reasons: list[str]) -> bool:
99
+ return any(is_known_pair_trigger_reason(reason) for reason in reasons)
100
+
101
+
102
+ def all_known_pair_trigger_reasons(reasons: list[str]) -> bool:
103
+ return all(is_known_pair_trigger_reason(reason) for reason in reasons)
104
+
105
+
45
106
  def finding_rank(finding: dict[str, Any]) -> int:
46
107
  severity = str(finding.get("severity") or "").upper()
47
108
  if severity in {"CRITICAL", "HIGH"}:
@@ -66,8 +127,8 @@ def read_findings(devlyn: pathlib.Path) -> tuple[list[dict[str, Any]], dict[str,
66
127
  if not raw:
67
128
  continue
68
129
  try:
69
- item = json.loads(raw)
70
- except json.JSONDecodeError as exc:
130
+ item = loads_strict_json(raw)
131
+ except ValueError as exc:
71
132
  blocked = {
72
133
  "id": f"verify-merge-invalid-json-{name}-{line_no}",
73
134
  "rule_id": "verify.findings.invalid-json",
@@ -102,16 +163,318 @@ def has_pair_findings(devlyn: pathlib.Path) -> bool:
102
163
  return False
103
164
 
104
165
 
166
+ def pair_trigger_status(devlyn: pathlib.Path) -> tuple[bool, dict[str, Any] | None]:
167
+ state_path = devlyn / "pipeline.state.json"
168
+ if not state_path.is_file():
169
+ return False, None
170
+ try:
171
+ state = loads_strict_json(state_path.read_text(encoding="utf-8"))
172
+ except ValueError:
173
+ return False, {
174
+ "id": "verify-pair-trigger-state-malformed",
175
+ "message": "pipeline.state.json is malformed; cannot verify pair_trigger contract.",
176
+ "file": "pipeline.state.json",
177
+ }
178
+ phases = state.get("phases") if isinstance(state, dict) else {}
179
+ verify_phase = phases.get("verify") if isinstance(phases, dict) else None
180
+ trigger = None
181
+ if isinstance(verify_phase, dict):
182
+ trigger = verify_phase.get("pair_trigger")
183
+ if trigger is None and isinstance(state, dict):
184
+ verify_state = state.get("verify")
185
+ if isinstance(verify_state, dict):
186
+ trigger = verify_state.get("pair_trigger")
187
+ if trigger is None:
188
+ return False, None
189
+ if not isinstance(trigger, dict):
190
+ return False, {
191
+ "id": "verify-pair-trigger-malformed",
192
+ "message": "pair_trigger must be an object.",
193
+ "file": "pipeline.state.json",
194
+ }
195
+ eligible = trigger.get("eligible")
196
+ if not isinstance(eligible, bool):
197
+ return False, {
198
+ "id": "verify-pair-trigger-eligible-malformed",
199
+ "message": "pair_trigger.eligible must be a boolean.",
200
+ "file": "pipeline.state.json",
201
+ }
202
+ reasons = trigger.get("reasons")
203
+ if not isinstance(reasons, list) or not all(isinstance(item, str) for item in reasons):
204
+ return False, {
205
+ "id": "verify-pair-trigger-reasons-malformed",
206
+ "message": "pair_trigger.reasons must be a list of strings.",
207
+ "file": "pipeline.state.json",
208
+ }
209
+ skipped_reason = trigger.get("skipped_reason")
210
+ if skipped_reason is not None and not isinstance(skipped_reason, str):
211
+ return False, {
212
+ "id": "verify-pair-trigger-skipped-reason-malformed",
213
+ "message": "pair_trigger.skipped_reason must be a string or null.",
214
+ "file": "pipeline.state.json",
215
+ }
216
+ if eligible is True and not reasons:
217
+ return False, {
218
+ "id": "verify-pair-trigger-reasons-empty",
219
+ "message": "pair_trigger.eligible cannot be true with an empty reasons list.",
220
+ "file": "pipeline.state.json",
221
+ }
222
+ if eligible is True and not has_known_pair_trigger_reason(reasons):
223
+ return False, {
224
+ "id": "verify-pair-trigger-reasons-unknown",
225
+ "message": "pair_trigger.reasons must include a known pair-trigger reason.",
226
+ "file": "pipeline.state.json",
227
+ }
228
+ if eligible is True and not all_known_pair_trigger_reasons(reasons):
229
+ return False, {
230
+ "id": "verify-pair-trigger-reasons-unknown",
231
+ "message": "pair_trigger.reasons must only include known pair-trigger reasons.",
232
+ "file": "pipeline.state.json",
233
+ }
234
+ if eligible is True and skipped_reason is not None:
235
+ return False, {
236
+ "id": "verify-pair-trigger-skip-contradiction",
237
+ "message": "pair_trigger.eligible cannot be true while skipped_reason is set.",
238
+ "file": "pipeline.state.json",
239
+ }
240
+ if eligible is False and reasons:
241
+ return False, {
242
+ "id": "verify-pair-trigger-ineligible-reasons",
243
+ "message": "pair_trigger.reasons must be empty when pair_trigger.eligible is false.",
244
+ "file": "pipeline.state.json",
245
+ }
246
+ return eligible is True and len(reasons) > 0, None
247
+
248
+
105
249
  def pair_trigger_required(devlyn: pathlib.Path) -> bool:
250
+ required, _malformed = pair_trigger_status(devlyn)
251
+ return required
252
+
253
+
254
+ def pair_trigger_present(devlyn: pathlib.Path) -> bool:
106
255
  state_path = devlyn / "pipeline.state.json"
107
256
  if not state_path.is_file():
108
257
  return False
109
258
  try:
110
- state = json.loads(state_path.read_text(encoding="utf-8"))
111
- except json.JSONDecodeError:
259
+ state = loads_strict_json(state_path.read_text(encoding="utf-8"))
260
+ except ValueError:
112
261
  return False
113
262
  phases = state.get("phases") if isinstance(state, dict) else {}
114
263
  verify_phase = phases.get("verify") if isinstance(phases, dict) else None
264
+ if isinstance(verify_phase, dict) and "pair_trigger" in verify_phase:
265
+ return True
266
+ if isinstance(state, dict):
267
+ verify_state = state.get("verify")
268
+ if isinstance(verify_state, dict) and "pair_trigger" in verify_state:
269
+ return True
270
+ return False
271
+
272
+
273
+ def pair_flag_contract_violation(devlyn: pathlib.Path) -> dict[str, Any] | None:
274
+ state_path = devlyn / "pipeline.state.json"
275
+ if not state_path.is_file():
276
+ return None
277
+ try:
278
+ state = loads_strict_json(state_path.read_text(encoding="utf-8"))
279
+ except ValueError:
280
+ return None
281
+ if not isinstance(state, dict) or state.get("pair_verify") is not True:
282
+ return None
283
+ risk_profile = state.get("risk_profile")
284
+ if isinstance(risk_profile, dict) and risk_profile.get("pair_default_enabled") is False:
285
+ return {
286
+ "id": "verify-pair-trigger-conflicting-pair-flags",
287
+ "message": "--pair-verify and --no-pair are mutually exclusive.",
288
+ "file": "pipeline.state.json",
289
+ }
290
+ return None
291
+
292
+
293
+ def risk_profile_contract_violation(devlyn: pathlib.Path) -> dict[str, Any] | None:
294
+ state_path = devlyn / "pipeline.state.json"
295
+ if not state_path.is_file():
296
+ return None
297
+ try:
298
+ state = loads_strict_json(state_path.read_text(encoding="utf-8"))
299
+ except ValueError:
300
+ return None
301
+ if not isinstance(state, dict) or "risk_profile" not in state:
302
+ return None
303
+ risk_profile = state.get("risk_profile")
304
+ if not isinstance(risk_profile, dict):
305
+ return {
306
+ "id": "verify-risk-profile-malformed",
307
+ "message": "risk_profile must be an object.",
308
+ "file": "pipeline.state.json",
309
+ }
310
+ for field in ("high_risk", "risk_probes_enabled", "pair_default_enabled"):
311
+ if field in risk_profile and not isinstance(risk_profile.get(field), bool):
312
+ return {
313
+ "id": "verify-risk-profile-malformed",
314
+ "message": f"risk_profile.{field} must be a boolean.",
315
+ "file": "pipeline.state.json",
316
+ }
317
+ reasons = risk_profile.get("reasons")
318
+ if "reasons" in risk_profile and (
319
+ not isinstance(reasons, list) or not all(isinstance(item, str) for item in reasons)
320
+ ):
321
+ return {
322
+ "id": "verify-risk-profile-malformed",
323
+ "message": "risk_profile.reasons must be a list of strings.",
324
+ "file": "pipeline.state.json",
325
+ }
326
+ return None
327
+
328
+
329
+ def source_spec_text(state: dict[str, Any]) -> str | None:
330
+ source = state.get("source") if isinstance(state.get("source"), dict) else {}
331
+ for key in ("spec_path", "criteria_path"):
332
+ raw_path = source.get(key)
333
+ if not isinstance(raw_path, str) or not raw_path:
334
+ continue
335
+ path = pathlib.Path(raw_path)
336
+ if not path.is_absolute():
337
+ path = pathlib.Path.cwd() / path
338
+ try:
339
+ return path.read_text(encoding="utf-8")
340
+ except OSError:
341
+ continue
342
+ return None
343
+
344
+
345
+ def spec_frontmatter_complexity(state: dict[str, Any]) -> str | None:
346
+ text = source_spec_text(state)
347
+ if text is None:
348
+ return None
349
+ if not text.startswith("---"):
350
+ return None
351
+ end = text.find("\n---", 3)
352
+ if end == -1:
353
+ return None
354
+ for line in text[3:end].splitlines():
355
+ match = re.match(r"\s*complexity\s*:\s*[\"']?([A-Za-z_-]+)", line)
356
+ if match:
357
+ return match.group(1).lower()
358
+ return None
359
+
360
+
361
+ def spec_has_solo_headroom_hypothesis(state: dict[str, Any]) -> bool:
362
+ text = source_spec_text(state)
363
+ if text is None:
364
+ return False
365
+ lower = text.lower()
366
+ return (
367
+ "solo-headroom hypothesis" in lower
368
+ and "solo_claude" in lower
369
+ and "miss" in lower
370
+ and has_backticked_observable_command(text)
371
+ )
372
+
373
+
374
+ def has_backticked_observable_command(text: str) -> bool:
375
+ for line in text.splitlines():
376
+ lower = line.lower()
377
+ if "miss" not in lower or not any(marker in lower for marker in OBSERVABLE_COMMAND_MARKERS):
378
+ continue
379
+ if any(is_command_like_backtick(match.group(0).strip("`")) for match in BACKTICKED_TEXT_RE.finditer(line)):
380
+ return True
381
+ return False
382
+
383
+
384
+ def is_command_like_backtick(value: str) -> bool:
385
+ stripped = value.strip()
386
+ lower = stripped.lower()
387
+ if not stripped or lower in RESERVED_BACKTICK_TERMS:
388
+ return False
389
+ first = lower.split(maxsplit=1)[0]
390
+ return (
391
+ first in COMMAND_PREFIXES
392
+ or any(marker in stripped for marker in ("/", "$", "=", "|", "&&", ";"))
393
+ or stripped.endswith((".js", ".py", ".sh"))
394
+ )
395
+
396
+
397
+ def state_pair_trigger_reasons(
398
+ devlyn: pathlib.Path,
399
+ source_verdicts: dict[str, str],
400
+ ) -> list[str]:
401
+ state_path = devlyn / "pipeline.state.json"
402
+ if not state_path.is_file():
403
+ return []
404
+ try:
405
+ state = loads_strict_json(state_path.read_text(encoding="utf-8"))
406
+ except ValueError:
407
+ return []
408
+ if not isinstance(state, dict):
409
+ return []
410
+ phases = state.get("phases") if isinstance(state.get("phases"), dict) else {}
411
+ verify_phase = phases.get("verify") if isinstance(phases, dict) else {}
412
+ verify_state = state.get("verify") if isinstance(state.get("verify"), dict) else {}
413
+ risk_profile = state.get("risk_profile") if isinstance(state.get("risk_profile"), dict) else {}
414
+ reasons: list[str] = []
415
+ if state.get("mode") == "verify-only":
416
+ reasons.append("mode.verify-only")
417
+ if state.get("pair_verify") is True:
418
+ reasons.append("mode.pair-verify")
419
+ if state.get("complexity") in {"high", "large"}:
420
+ reasons.append(f"complexity.{state.get('complexity')}")
421
+ spec_complexity = spec_frontmatter_complexity(state)
422
+ if spec_complexity in {"high", "large"}:
423
+ reasons.append(f"spec.complexity.{spec_complexity}")
424
+ if spec_has_solo_headroom_hypothesis(state):
425
+ reasons.append("spec.solo_headroom_hypothesis")
426
+ if risk_profile.get("high_risk") is True:
427
+ reasons.append("risk.high")
428
+ if risk_profile.get("risk_probes_enabled") is True:
429
+ reasons.append("risk_probes.enabled")
430
+ if (devlyn / "risk-probes.jsonl").is_file():
431
+ reasons.append("risk_probes.present")
432
+ coverage_failed = False
433
+ if isinstance(verify_state, dict) and verify_state.get("coverage_failed") is True:
434
+ coverage_failed = True
435
+ if isinstance(verify_phase, dict) and verify_phase.get("coverage_failed") is True:
436
+ coverage_failed = True
437
+ if coverage_failed:
438
+ reasons.append("coverage.failed")
439
+ if rank(source_verdicts.get("mechanical")) == 1:
440
+ reasons.append("mechanical.warning")
441
+ if rank(source_verdicts.get("judge")) == 1:
442
+ reasons.append("judge.warning")
443
+ return reasons
444
+
445
+
446
+ def pair_trigger_missing_contract_violation(
447
+ devlyn: pathlib.Path,
448
+ source_verdicts: dict[str, str],
449
+ ) -> dict[str, Any] | None:
450
+ if rank(source_verdicts.get("mechanical")) >= 2 or rank(source_verdicts.get("judge")) >= 2:
451
+ return None
452
+ reasons = state_pair_trigger_reasons(devlyn, source_verdicts)
453
+ if not reasons:
454
+ return None
455
+ return {
456
+ "id": "verify-pair-trigger-required-missing",
457
+ "message": (
458
+ "pair_trigger is missing even though VERIFY state requires a pair decision: "
459
+ + ", ".join(reasons)
460
+ ),
461
+ "file": "pipeline.state.json",
462
+ }
463
+
464
+
465
+ def pair_trigger_skip_contract_violation(
466
+ devlyn: pathlib.Path,
467
+ source_verdicts: dict[str, str],
468
+ ) -> dict[str, Any] | None:
469
+ state_path = devlyn / "pipeline.state.json"
470
+ if not state_path.is_file():
471
+ return None
472
+ try:
473
+ state = loads_strict_json(state_path.read_text(encoding="utf-8"))
474
+ except ValueError:
475
+ return None
476
+ phases = state.get("phases") if isinstance(state, dict) else {}
477
+ verify_phase = phases.get("verify") if isinstance(phases, dict) else None
115
478
  trigger = None
116
479
  if isinstance(verify_phase, dict):
117
480
  trigger = verify_phase.get("pair_trigger")
@@ -119,11 +482,106 @@ def pair_trigger_required(devlyn: pathlib.Path) -> bool:
119
482
  verify_state = state.get("verify")
120
483
  if isinstance(verify_state, dict):
121
484
  trigger = verify_state.get("pair_trigger")
122
- return bool(
123
- isinstance(trigger, dict)
124
- and trigger.get("eligible") is True
125
- and trigger.get("reasons")
126
- )
485
+ if not isinstance(trigger, dict):
486
+ return None
487
+ skipped_reason = trigger.get("skipped_reason")
488
+ if trigger.get("eligible") is False and skipped_reason is None:
489
+ natural_reasons = state_pair_trigger_reasons(devlyn, source_verdicts)
490
+ if natural_reasons:
491
+ return {
492
+ "id": "verify-pair-trigger-ineligible-unjustified",
493
+ "message": (
494
+ "pair_trigger is ineligible without a skip reason even though "
495
+ "VERIFY state requires a pair decision: "
496
+ + ", ".join(natural_reasons)
497
+ ),
498
+ "file": "pipeline.state.json",
499
+ }
500
+ if skipped_reason is None:
501
+ return None
502
+ if skipped_reason not in ALLOWED_PAIR_SKIP_REASONS:
503
+ return {
504
+ "id": "verify-pair-trigger-skipped-reason-unsupported",
505
+ "message": (
506
+ "pair_trigger.skipped_reason must be user_no_pair, "
507
+ "mechanical_blocker, primary_judge_blocker, or null."
508
+ ),
509
+ "file": "pipeline.state.json",
510
+ }
511
+ if skipped_reason == "user_no_pair":
512
+ risk_profile = state.get("risk_profile") if isinstance(state, dict) else {}
513
+ if not isinstance(risk_profile, dict) or risk_profile.get("pair_default_enabled") is not False:
514
+ return {
515
+ "id": "verify-pair-trigger-user-no-pair-unsupported",
516
+ "message": (
517
+ "pair_trigger skipped_reason user_no_pair requires "
518
+ "risk_profile.pair_default_enabled false from an explicit --no-pair opt-out."
519
+ ),
520
+ "file": "pipeline.state.json",
521
+ }
522
+ if skipped_reason == "mechanical_blocker" and rank(source_verdicts.get("mechanical")) < 2:
523
+ return {
524
+ "id": "verify-pair-trigger-mechanical-blocker-unsupported",
525
+ "message": (
526
+ "pair_trigger skipped_reason mechanical_blocker requires a "
527
+ "verdict-binding MECHANICAL finding."
528
+ ),
529
+ "file": "pipeline.state.json",
530
+ }
531
+ if skipped_reason == "primary_judge_blocker" and rank(source_verdicts.get("judge")) < 2:
532
+ return {
533
+ "id": "verify-pair-trigger-primary-judge-blocker-unsupported",
534
+ "message": (
535
+ "pair_trigger skipped_reason primary_judge_blocker requires a "
536
+ "verdict-binding primary JUDGE finding."
537
+ ),
538
+ "file": "pipeline.state.json",
539
+ }
540
+ return None
541
+
542
+
543
+ def pair_trigger_reason_completeness_violation(
544
+ devlyn: pathlib.Path,
545
+ source_verdicts: dict[str, str],
546
+ ) -> dict[str, Any] | None:
547
+ if rank(source_verdicts.get("mechanical")) >= 2 or rank(source_verdicts.get("judge")) >= 2:
548
+ return None
549
+ state_path = devlyn / "pipeline.state.json"
550
+ if not state_path.is_file():
551
+ return None
552
+ try:
553
+ state = loads_strict_json(state_path.read_text(encoding="utf-8"))
554
+ except ValueError:
555
+ return None
556
+ phases = state.get("phases") if isinstance(state, dict) else {}
557
+ verify_phase = phases.get("verify") if isinstance(phases, dict) else None
558
+ trigger = None
559
+ if isinstance(verify_phase, dict):
560
+ trigger = verify_phase.get("pair_trigger")
561
+ if trigger is None and isinstance(state, dict):
562
+ verify_state = state.get("verify")
563
+ if isinstance(verify_state, dict):
564
+ trigger = verify_state.get("pair_trigger")
565
+ if not isinstance(trigger, dict) or trigger.get("eligible") is not True:
566
+ return None
567
+ reasons = trigger.get("reasons")
568
+ if not isinstance(reasons, list) or not all(isinstance(item, str) for item in reasons):
569
+ return None
570
+ missing = [
571
+ reason
572
+ for reason in state_pair_trigger_reasons(devlyn, source_verdicts)
573
+ if reason not in reasons
574
+ ]
575
+ if not missing:
576
+ return None
577
+ return {
578
+ "id": "verify-pair-trigger-reasons-incomplete",
579
+ "message": (
580
+ "pair_trigger.reasons is missing applicable canonical reason(s): "
581
+ + ", ".join(missing)
582
+ ),
583
+ "file": "pipeline.state.json",
584
+ }
127
585
 
128
586
 
129
587
  def pair_blocker(id_: str, message: str, file_: str | None = None) -> dict[str, Any]:
@@ -145,10 +603,71 @@ def detect_pair_stdout_contract_violations(
145
603
  source_verdicts: dict[str, str],
146
604
  ) -> list[dict[str, Any]]:
147
605
  stdout_path = devlyn / "codex-judge.stdout"
606
+ flag_violation = pair_flag_contract_violation(devlyn)
607
+ if flag_violation is not None:
608
+ source_verdicts["pair_judge"] = "BLOCKED"
609
+ return [
610
+ pair_blocker(
611
+ flag_violation["id"],
612
+ flag_violation["message"],
613
+ flag_violation["file"],
614
+ )
615
+ ]
616
+ required, malformed_trigger = pair_trigger_status(devlyn)
617
+ if malformed_trigger is not None:
618
+ source_verdicts["pair_judge"] = "BLOCKED"
619
+ return [
620
+ pair_blocker(
621
+ malformed_trigger["id"],
622
+ malformed_trigger["message"],
623
+ malformed_trigger["file"],
624
+ )
625
+ ]
626
+ risk_profile_violation = risk_profile_contract_violation(devlyn)
627
+ if risk_profile_violation is not None:
628
+ source_verdicts["pair_judge"] = "BLOCKED"
629
+ return [
630
+ pair_blocker(
631
+ risk_profile_violation["id"],
632
+ risk_profile_violation["message"],
633
+ risk_profile_violation["file"],
634
+ )
635
+ ]
636
+ if not required and not pair_trigger_present(devlyn):
637
+ missing_violation = pair_trigger_missing_contract_violation(devlyn, source_verdicts)
638
+ if missing_violation is not None:
639
+ source_verdicts["pair_judge"] = "BLOCKED"
640
+ return [
641
+ pair_blocker(
642
+ missing_violation["id"],
643
+ missing_violation["message"],
644
+ missing_violation["file"],
645
+ )
646
+ ]
647
+ skip_violation = pair_trigger_skip_contract_violation(devlyn, source_verdicts)
648
+ if skip_violation is not None:
649
+ source_verdicts["pair_judge"] = "BLOCKED"
650
+ return [
651
+ pair_blocker(
652
+ skip_violation["id"],
653
+ skip_violation["message"],
654
+ skip_violation["file"],
655
+ )
656
+ ]
657
+ reason_violation = pair_trigger_reason_completeness_violation(devlyn, source_verdicts)
658
+ if reason_violation is not None:
659
+ source_verdicts["pair_judge"] = "BLOCKED"
660
+ return [
661
+ pair_blocker(
662
+ reason_violation["id"],
663
+ reason_violation["message"],
664
+ reason_violation["file"],
665
+ )
666
+ ]
148
667
  if has_pair_findings(devlyn):
149
668
  return []
150
669
  if not stdout_path.is_file():
151
- if pair_trigger_required(devlyn):
670
+ if required:
152
671
  source_verdicts["pair_judge"] = "BLOCKED"
153
672
  return [
154
673
  pair_blocker(
@@ -176,8 +695,8 @@ def detect_pair_stdout_contract_violations(
176
695
  continue
177
696
  if raw.startswith("# SUMMARY "):
178
697
  try:
179
- summary = json.loads(raw.removeprefix("# SUMMARY ").strip())
180
- except json.JSONDecodeError:
698
+ summary = loads_strict_json(raw.removeprefix("# SUMMARY ").strip())
699
+ except ValueError:
181
700
  continue
182
701
  if summary.get("verdict") in {"NEEDS_WORK", "FAIL", "BLOCKED"}:
183
702
  has_nonpass_summary = True
@@ -185,8 +704,8 @@ def detect_pair_stdout_contract_violations(
185
704
  if raw.startswith("#"):
186
705
  continue
187
706
  try:
188
- item = json.loads(raw)
189
- except json.JSONDecodeError:
707
+ item = loads_strict_json(raw)
708
+ except ValueError:
190
709
  continue
191
710
  if isinstance(item, dict) and str(item.get("severity") or "").upper() in {
192
711
  "CRITICAL",
@@ -237,7 +756,7 @@ def write_state(devlyn: pathlib.Path, summary: dict[str, Any]) -> None:
237
756
  state_path = devlyn / "pipeline.state.json"
238
757
  if not state_path.is_file():
239
758
  raise SystemExit(f"error: {state_path} not found")
240
- state = json.loads(state_path.read_text(encoding="utf-8"))
759
+ state = loads_strict_json(state_path.read_text(encoding="utf-8"))
241
760
  phases = state.setdefault("phases", {})
242
761
  verify = phases.get("verify")
243
762
  if not isinstance(verify, dict):
@@ -260,7 +779,19 @@ def self_test() -> int:
260
779
  with tempfile.TemporaryDirectory() as tmp:
261
780
  devlyn = pathlib.Path(tmp)
262
781
  (devlyn / "pipeline.state.json").write_text(
263
- json.dumps({"phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}}}),
782
+ json.dumps({
783
+ "phases": {
784
+ "verify": {
785
+ "verdict": "PASS",
786
+ "sub_verdicts": {},
787
+ "pair_trigger": {
788
+ "eligible": True,
789
+ "reasons": ["risk.high", "judge.warning"],
790
+ "skipped_reason": None,
791
+ },
792
+ }
793
+ }
794
+ }),
264
795
  encoding="utf-8",
265
796
  )
266
797
  (devlyn / "verify.findings.jsonl").write_text(
@@ -274,17 +805,33 @@ def self_test() -> int:
274
805
  findings, source_verdicts = read_findings(devlyn)
275
806
  summary = write_outputs(devlyn, findings, source_verdicts)
276
807
  write_state(devlyn, summary)
277
- state = json.loads((devlyn / "pipeline.state.json").read_text(encoding="utf-8"))
808
+ state = loads_strict_json((devlyn / "pipeline.state.json").read_text(encoding="utf-8"))
278
809
  assert summary["verdict"] == "NEEDS_WORK", summary
279
810
  assert state["phases"]["verify"]["verdict"] == "NEEDS_WORK", state
280
811
  assert state["phases"]["verify"]["sub_verdicts"]["pair_judge"] == "NEEDS_WORK", state
281
812
  assert (devlyn / "verify-merged.findings.jsonl").read_text(encoding="utf-8")
813
+ (devlyn / "verify.findings.jsonl").write_text(
814
+ '{"id":"nan","severity":NaN}\n',
815
+ encoding="utf-8",
816
+ )
817
+ (devlyn / "verify.pair.findings.jsonl").write_text("", encoding="utf-8")
818
+ findings, source_verdicts = read_findings(devlyn)
819
+ assert source_verdicts["judge"] == "BLOCKED", source_verdicts
820
+ assert any(
821
+ finding.get("id") == "verify-merge-invalid-json-verify.findings.jsonl-1"
822
+ and "invalid JSON numeric constant: NaN" in finding.get("message", "")
823
+ for finding in findings
824
+ ), findings
825
+ (devlyn / "pipeline.state.json").write_text(
826
+ json.dumps({"phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}}}),
827
+ encoding="utf-8",
828
+ )
282
829
  (devlyn / "verify.findings.jsonl").write_text("", encoding="utf-8")
283
830
  (devlyn / "verify.pair.findings.jsonl").write_text("", encoding="utf-8")
284
831
  findings, source_verdicts = read_findings(devlyn)
285
832
  summary = write_outputs(devlyn, findings, source_verdicts)
286
833
  write_state(devlyn, summary)
287
- state = json.loads((devlyn / "pipeline.state.json").read_text(encoding="utf-8"))
834
+ state = loads_strict_json((devlyn / "pipeline.state.json").read_text(encoding="utf-8"))
288
835
  assert summary["verdict"] == "PASS", summary
289
836
  assert state["phases"]["verify"]["verdict"] == "PASS", state
290
837
  assert state["phases"]["verify"]["sub_verdicts"]["pair_judge"] == "PASS", state
@@ -296,9 +843,812 @@ def self_test() -> int:
296
843
  findings, source_verdicts = read_findings(devlyn)
297
844
  summary = write_outputs(devlyn, findings, source_verdicts)
298
845
  write_state(devlyn, summary)
299
- state = json.loads((devlyn / "pipeline.state.json").read_text(encoding="utf-8"))
846
+ state = loads_strict_json((devlyn / "pipeline.state.json").read_text(encoding="utf-8"))
847
+ assert summary["verdict"] == "BLOCKED", summary
848
+ assert state["phases"]["verify"]["sub_verdicts"]["pair_judge"] == "BLOCKED", state
849
+
850
+ (devlyn / "codex-judge.stdout").unlink()
851
+ (devlyn / "pipeline.state.json").write_text(
852
+ json.dumps({
853
+ "phases": {
854
+ "verify": {
855
+ "verdict": "PASS",
856
+ "sub_verdicts": {},
857
+ "pair_trigger": {
858
+ "eligible": True,
859
+ "reasons": ["risk.high"],
860
+ "skipped_reason": None,
861
+ },
862
+ }
863
+ }
864
+ }),
865
+ encoding="utf-8",
866
+ )
867
+ findings, source_verdicts = read_findings(devlyn)
868
+ summary = write_outputs(devlyn, findings, source_verdicts)
869
+ write_state(devlyn, summary)
870
+ state = loads_strict_json((devlyn / "pipeline.state.json").read_text(encoding="utf-8"))
300
871
  assert summary["verdict"] == "BLOCKED", summary
301
872
  assert state["phases"]["verify"]["sub_verdicts"]["pair_judge"] == "BLOCKED", state
873
+ assert any(
874
+ finding.get("id") == "verify-pair-required-output-missing"
875
+ for finding in findings
876
+ ), findings
877
+
878
+ (devlyn / "pipeline.state.json").write_text(
879
+ json.dumps({
880
+ "mode": "spec",
881
+ "risk_profile": {
882
+ "high_risk": True,
883
+ "risk_probes_enabled": True,
884
+ "pair_default_enabled": True,
885
+ },
886
+ "phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
887
+ }),
888
+ encoding="utf-8",
889
+ )
890
+ findings, source_verdicts = read_findings(devlyn)
891
+ summary = write_outputs(devlyn, findings, source_verdicts)
892
+ assert summary["verdict"] == "BLOCKED", summary
893
+ assert any(
894
+ finding.get("id") == "verify-pair-trigger-required-missing"
895
+ for finding in findings
896
+ ), findings
897
+
898
+ (devlyn / "pipeline.state.json").write_text(
899
+ json.dumps({
900
+ "mode": "spec",
901
+ "risk_profile": "enabled",
902
+ "phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
903
+ }),
904
+ encoding="utf-8",
905
+ )
906
+ findings, source_verdicts = read_findings(devlyn)
907
+ summary = write_outputs(devlyn, findings, source_verdicts)
908
+ assert summary["verdict"] == "BLOCKED", summary
909
+ assert any(
910
+ finding.get("id") == "verify-risk-profile-malformed"
911
+ and "risk_profile must be an object" in str(finding.get("message"))
912
+ for finding in findings
913
+ ), findings
914
+
915
+ (devlyn / "pipeline.state.json").write_text(
916
+ json.dumps({
917
+ "mode": "spec",
918
+ "risk_profile": {
919
+ "high_risk": True,
920
+ "risk_probes_enabled": "true",
921
+ "pair_default_enabled": True,
922
+ },
923
+ "phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
924
+ }),
925
+ encoding="utf-8",
926
+ )
927
+ findings, source_verdicts = read_findings(devlyn)
928
+ summary = write_outputs(devlyn, findings, source_verdicts)
929
+ assert summary["verdict"] == "BLOCKED", summary
930
+ assert any(
931
+ finding.get("id") == "verify-risk-profile-malformed"
932
+ and "risk_profile.risk_probes_enabled must be a boolean" in str(finding.get("message"))
933
+ for finding in findings
934
+ ), findings
935
+
936
+ (devlyn / "pipeline.state.json").write_text(
937
+ json.dumps({
938
+ "mode": "spec",
939
+ "risk_profile": {
940
+ "high_risk": True,
941
+ "risk_probes_enabled": False,
942
+ "pair_default_enabled": True,
943
+ "reasons": ["explicit", 3],
944
+ },
945
+ "phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
946
+ }),
947
+ encoding="utf-8",
948
+ )
949
+ findings, source_verdicts = read_findings(devlyn)
950
+ summary = write_outputs(devlyn, findings, source_verdicts)
951
+ assert summary["verdict"] == "BLOCKED", summary
952
+ assert any(
953
+ finding.get("id") == "verify-risk-profile-malformed"
954
+ and "risk_profile.reasons must be a list of strings" in str(finding.get("message"))
955
+ for finding in findings
956
+ ), findings
957
+
958
+ (devlyn / "pipeline.state.json").write_text(
959
+ json.dumps({
960
+ "mode": "spec",
961
+ "pair_verify": True,
962
+ "phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
963
+ }),
964
+ encoding="utf-8",
965
+ )
966
+ findings, source_verdicts = read_findings(devlyn)
967
+ summary = write_outputs(devlyn, findings, source_verdicts)
968
+ assert summary["verdict"] == "BLOCKED", summary
969
+ assert any(
970
+ finding.get("id") == "verify-pair-trigger-required-missing"
971
+ and "mode.pair-verify" in finding.get("message", "")
972
+ for finding in findings
973
+ ), findings
974
+
975
+ (devlyn / "pipeline.state.json").write_text(
976
+ json.dumps({
977
+ "mode": "spec",
978
+ "complexity": "large",
979
+ "phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
980
+ }),
981
+ encoding="utf-8",
982
+ )
983
+ findings, source_verdicts = read_findings(devlyn)
984
+ summary = write_outputs(devlyn, findings, source_verdicts)
985
+ assert summary["verdict"] == "BLOCKED", summary
986
+ assert any(
987
+ finding.get("id") == "verify-pair-trigger-required-missing"
988
+ and "complexity.large" in str(finding.get("message"))
989
+ for finding in findings
990
+ ), findings
991
+
992
+ spec_path = devlyn / "spec.md"
993
+ spec_path.write_text(
994
+ '---\nid: "spec-high"\ncomplexity: high\n---\n\n# Spec\n',
995
+ encoding="utf-8",
996
+ )
997
+ (devlyn / "pipeline.state.json").write_text(
998
+ json.dumps({
999
+ "mode": "spec",
1000
+ "source": {"spec_path": str(spec_path)},
1001
+ "phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
1002
+ }),
1003
+ encoding="utf-8",
1004
+ )
1005
+ findings, source_verdicts = read_findings(devlyn)
1006
+ summary = write_outputs(devlyn, findings, source_verdicts)
1007
+ assert summary["verdict"] == "BLOCKED", summary
1008
+ assert any(
1009
+ finding.get("id") == "verify-pair-trigger-required-missing"
1010
+ and "spec.complexity.high" in str(finding.get("message"))
1011
+ for finding in findings
1012
+ ), findings
1013
+
1014
+ spec_path.write_text(
1015
+ '---\nid: "spec-large"\ncomplexity: large\n---\n\n# Spec\n',
1016
+ encoding="utf-8",
1017
+ )
1018
+ findings, source_verdicts = read_findings(devlyn)
1019
+ summary = write_outputs(devlyn, findings, source_verdicts)
1020
+ assert summary["verdict"] == "BLOCKED", summary
1021
+ assert any(
1022
+ finding.get("id") == "verify-pair-trigger-required-missing"
1023
+ and "spec.complexity.large" in str(finding.get("message"))
1024
+ for finding in findings
1025
+ ), findings
1026
+
1027
+ spec_path.write_text(
1028
+ "# Spec\n\n## Context\n\nsolo-headroom hypothesis: `SOLO_CLAUDE` should miss the priority rollback behavior; implementation token `rollback`.\n",
1029
+ encoding="utf-8",
1030
+ )
1031
+ assert spec_has_solo_headroom_hypothesis(
1032
+ {"source": {"spec_path": str(spec_path)}}
1033
+ ) is False
1034
+ (devlyn / "pipeline.state.json").write_text(
1035
+ json.dumps({
1036
+ "mode": "spec",
1037
+ "source": {"spec_path": str(spec_path)},
1038
+ "phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
1039
+ }),
1040
+ encoding="utf-8",
1041
+ )
1042
+ findings, source_verdicts = read_findings(devlyn)
1043
+ summary = write_outputs(devlyn, findings, source_verdicts)
1044
+ assert summary["verdict"] == "PASS", summary
1045
+ assert not any(
1046
+ finding.get("id") == "verify-pair-trigger-required-missing"
1047
+ and "spec.solo_headroom_hypothesis" in str(finding.get("message"))
1048
+ for finding in findings
1049
+ ), findings
1050
+
1051
+ spec_path.write_text(
1052
+ "# Spec\n\n## Context\n\nsolo-headroom hypothesis: solo_claude should miss the priority rollback behavior.\nObservable command: `node check.js` exposes behavior.\n",
1053
+ encoding="utf-8",
1054
+ )
1055
+ assert spec_has_solo_headroom_hypothesis(
1056
+ {"source": {"spec_path": str(spec_path)}}
1057
+ ) is False
1058
+
1059
+ spec_path.write_text(
1060
+ "# Spec\n\n## Context\n\nsolo-headroom hypothesis: `SOLO_CLAUDE` should miss the priority rollback behavior; observable `SOLO_CLAUDE` exposes the miss.\n",
1061
+ encoding="utf-8",
1062
+ )
1063
+ assert spec_has_solo_headroom_hypothesis(
1064
+ {"source": {"spec_path": str(spec_path)}}
1065
+ ) is False
1066
+
1067
+ spec_path.write_text(
1068
+ "# Spec\n\n## Context\n\nsolo-headroom hypothesis: solo_claude should miss behavior where observable `priority rollback` exposes the miss.\n",
1069
+ encoding="utf-8",
1070
+ )
1071
+ assert spec_has_solo_headroom_hypothesis(
1072
+ {"source": {"spec_path": str(spec_path)}}
1073
+ ) is False
1074
+
1075
+ spec_path.write_text(
1076
+ "# Spec\n\n## Context\n\nsolo-headroom hypothesis: `SOLO_CLAUDE` should miss the priority rollback behavior exposed by `node check.js`.\n",
1077
+ encoding="utf-8",
1078
+ )
1079
+ assert spec_has_solo_headroom_hypothesis(
1080
+ {"source": {"spec_path": str(spec_path)}}
1081
+ ) is True
1082
+ (devlyn / "pipeline.state.json").write_text(
1083
+ json.dumps({
1084
+ "mode": "spec",
1085
+ "source": {"spec_path": str(spec_path)},
1086
+ "phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
1087
+ }),
1088
+ encoding="utf-8",
1089
+ )
1090
+ findings, source_verdicts = read_findings(devlyn)
1091
+ summary = write_outputs(devlyn, findings, source_verdicts)
1092
+ assert summary["verdict"] == "BLOCKED", summary
1093
+ assert any(
1094
+ finding.get("id") == "verify-pair-trigger-required-missing"
1095
+ and "spec.solo_headroom_hypothesis" in str(finding.get("message"))
1096
+ for finding in findings
1097
+ ), findings
1098
+
1099
+ (devlyn / "pipeline.state.json").write_text(
1100
+ json.dumps({
1101
+ "mode": "spec",
1102
+ "source": {"spec_path": str(spec_path)},
1103
+ "risk_profile": {
1104
+ "high_risk": True,
1105
+ "risk_probes_enabled": False,
1106
+ "pair_default_enabled": True,
1107
+ },
1108
+ "phases": {
1109
+ "verify": {
1110
+ "verdict": "PASS",
1111
+ "sub_verdicts": {},
1112
+ "pair_trigger": {
1113
+ "eligible": True,
1114
+ "reasons": ["risk.high"],
1115
+ "skipped_reason": None,
1116
+ },
1117
+ }
1118
+ },
1119
+ }),
1120
+ encoding="utf-8",
1121
+ )
1122
+ findings, source_verdicts = read_findings(devlyn)
1123
+ summary = write_outputs(devlyn, findings, source_verdicts)
1124
+ assert summary["verdict"] == "BLOCKED", summary
1125
+ assert any(
1126
+ finding.get("id") == "verify-pair-trigger-reasons-incomplete"
1127
+ and "spec.solo_headroom_hypothesis" in str(finding.get("message"))
1128
+ for finding in findings
1129
+ ), findings
1130
+
1131
+ criteria_path = devlyn / "criteria.generated.md"
1132
+ criteria_path.write_text(
1133
+ "# Criteria\n\nsolo-headroom hypothesis: `SOLO_CLAUDE` should miss the priority rollback behavior exposed by `node check.js`.\n",
1134
+ encoding="utf-8",
1135
+ )
1136
+ assert spec_has_solo_headroom_hypothesis(
1137
+ {"source": {"criteria_path": str(criteria_path)}}
1138
+ ) is True
1139
+ (devlyn / "pipeline.state.json").write_text(
1140
+ json.dumps({
1141
+ "mode": "free-form",
1142
+ "source": {"criteria_path": str(criteria_path)},
1143
+ "phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
1144
+ }),
1145
+ encoding="utf-8",
1146
+ )
1147
+ findings, source_verdicts = read_findings(devlyn)
1148
+ summary = write_outputs(devlyn, findings, source_verdicts)
1149
+ assert summary["verdict"] == "BLOCKED", summary
1150
+ assert any(
1151
+ finding.get("id") == "verify-pair-trigger-required-missing"
1152
+ and "spec.solo_headroom_hypothesis" in str(finding.get("message"))
1153
+ for finding in findings
1154
+ ), findings
1155
+
1156
+ (devlyn / "verify-mechanical.findings.jsonl").write_text(
1157
+ json.dumps({"id": "m0", "severity": "HIGH"}) + "\n",
1158
+ encoding="utf-8",
1159
+ )
1160
+ findings, source_verdicts = read_findings(devlyn)
1161
+ summary = write_outputs(devlyn, findings, source_verdicts)
1162
+ assert summary["verdict"] == "NEEDS_WORK", summary
1163
+ assert not any(
1164
+ finding.get("id") == "verify-pair-trigger-required-missing"
1165
+ for finding in findings
1166
+ ), findings
1167
+ (devlyn / "verify-mechanical.findings.jsonl").write_text("", encoding="utf-8")
1168
+
1169
+ (devlyn / "pipeline.state.json").write_text(
1170
+ json.dumps({
1171
+ "phases": {
1172
+ "verify": {
1173
+ "verdict": "PASS",
1174
+ "sub_verdicts": {},
1175
+ "pair_trigger": {
1176
+ "eligible": "true",
1177
+ "reasons": ["risk.high"],
1178
+ "skipped_reason": None,
1179
+ },
1180
+ }
1181
+ }
1182
+ }),
1183
+ encoding="utf-8",
1184
+ )
1185
+ findings, source_verdicts = read_findings(devlyn)
1186
+ summary = write_outputs(devlyn, findings, source_verdicts)
1187
+ assert summary["verdict"] == "BLOCKED", summary
1188
+ assert any(
1189
+ finding.get("id") == "verify-pair-trigger-eligible-malformed"
1190
+ for finding in findings
1191
+ ), findings
1192
+
1193
+ (devlyn / "pipeline.state.json").write_text(
1194
+ json.dumps({
1195
+ "phases": {
1196
+ "verify": {
1197
+ "verdict": "PASS",
1198
+ "sub_verdicts": {},
1199
+ "pair_trigger": {
1200
+ "eligible": True,
1201
+ "reasons": "risk.high",
1202
+ "skipped_reason": None,
1203
+ },
1204
+ }
1205
+ }
1206
+ }),
1207
+ encoding="utf-8",
1208
+ )
1209
+ findings, source_verdicts = read_findings(devlyn)
1210
+ summary = write_outputs(devlyn, findings, source_verdicts)
1211
+ assert summary["verdict"] == "BLOCKED", summary
1212
+ assert any(
1213
+ finding.get("id") == "verify-pair-trigger-reasons-malformed"
1214
+ for finding in findings
1215
+ ), findings
1216
+
1217
+ (devlyn / "pipeline.state.json").write_text(
1218
+ json.dumps({
1219
+ "phases": {
1220
+ "verify": {
1221
+ "verdict": "PASS",
1222
+ "sub_verdicts": {},
1223
+ "pair_trigger": {
1224
+ "eligible": True,
1225
+ "reasons": ["risk.high", "looks-hard"],
1226
+ "skipped_reason": None,
1227
+ },
1228
+ }
1229
+ }
1230
+ }),
1231
+ encoding="utf-8",
1232
+ )
1233
+ findings, source_verdicts = read_findings(devlyn)
1234
+ summary = write_outputs(devlyn, findings, source_verdicts)
1235
+ assert summary["verdict"] == "BLOCKED", summary
1236
+ assert any(
1237
+ finding.get("id") == "verify-pair-trigger-reasons-unknown"
1238
+ and "only include known" in finding.get("message", "")
1239
+ for finding in findings
1240
+ ), findings
1241
+
1242
+ (devlyn / "pipeline.state.json").write_text(
1243
+ json.dumps({
1244
+ "phases": {
1245
+ "verify": {
1246
+ "verdict": "PASS",
1247
+ "sub_verdicts": {},
1248
+ "pair_trigger": {
1249
+ "eligible": True,
1250
+ "reasons": ["risk high"],
1251
+ "skipped_reason": None,
1252
+ },
1253
+ }
1254
+ }
1255
+ }),
1256
+ encoding="utf-8",
1257
+ )
1258
+ findings, source_verdicts = read_findings(devlyn)
1259
+ summary = write_outputs(devlyn, findings, source_verdicts)
1260
+ assert summary["verdict"] == "BLOCKED", summary
1261
+ assert any(
1262
+ finding.get("id") == "verify-pair-trigger-reasons-unknown"
1263
+ and "include a known" in finding.get("message", "")
1264
+ for finding in findings
1265
+ ), findings
1266
+
1267
+ (devlyn / "pipeline.state.json").write_text(
1268
+ json.dumps({
1269
+ "phases": {
1270
+ "verify": {
1271
+ "verdict": "PASS",
1272
+ "sub_verdicts": {},
1273
+ "pair_trigger": {
1274
+ "eligible": True,
1275
+ "reasons": ["risk_profile.high_risk", "risk_probes_enabled"],
1276
+ "skipped_reason": None,
1277
+ },
1278
+ }
1279
+ }
1280
+ }),
1281
+ encoding="utf-8",
1282
+ )
1283
+ findings, source_verdicts = read_findings(devlyn)
1284
+ summary = write_outputs(devlyn, findings, source_verdicts)
1285
+ assert summary["verdict"] == "BLOCKED", summary
1286
+ assert any(
1287
+ finding.get("id") == "verify-pair-trigger-reasons-unknown"
1288
+ and "include a known" in finding.get("message", "")
1289
+ for finding in findings
1290
+ ), findings
1291
+
1292
+ (devlyn / "pipeline.state.json").write_text(
1293
+ json.dumps({
1294
+ "phases": {
1295
+ "verify": {
1296
+ "verdict": "PASS",
1297
+ "sub_verdicts": {},
1298
+ "pair_trigger": {
1299
+ "eligible": True,
1300
+ "reasons": ["risk.high", 3],
1301
+ "skipped_reason": None,
1302
+ },
1303
+ }
1304
+ }
1305
+ }),
1306
+ encoding="utf-8",
1307
+ )
1308
+ findings, source_verdicts = read_findings(devlyn)
1309
+ summary = write_outputs(devlyn, findings, source_verdicts)
1310
+ assert summary["verdict"] == "BLOCKED", summary
1311
+ assert any(
1312
+ finding.get("id") == "verify-pair-trigger-reasons-malformed"
1313
+ for finding in findings
1314
+ ), findings
1315
+
1316
+ (devlyn / "pipeline.state.json").write_text(
1317
+ json.dumps({
1318
+ "phases": {
1319
+ "verify": {
1320
+ "verdict": "PASS",
1321
+ "sub_verdicts": {},
1322
+ "pair_trigger": {
1323
+ "eligible": True,
1324
+ "reasons": [],
1325
+ "skipped_reason": None,
1326
+ },
1327
+ }
1328
+ }
1329
+ }),
1330
+ encoding="utf-8",
1331
+ )
1332
+ findings, source_verdicts = read_findings(devlyn)
1333
+ summary = write_outputs(devlyn, findings, source_verdicts)
1334
+ assert summary["verdict"] == "BLOCKED", summary
1335
+ assert any(
1336
+ finding.get("id") == "verify-pair-trigger-reasons-empty"
1337
+ for finding in findings
1338
+ ), findings
1339
+
1340
+ (devlyn / "pipeline.state.json").write_text(
1341
+ json.dumps({
1342
+ "phases": {
1343
+ "verify": {
1344
+ "verdict": "PASS",
1345
+ "sub_verdicts": {},
1346
+ "pair_trigger": {
1347
+ "eligible": True,
1348
+ "reasons": ["risk.high"],
1349
+ "skipped_reason": "user_no_pair",
1350
+ },
1351
+ }
1352
+ }
1353
+ }),
1354
+ encoding="utf-8",
1355
+ )
1356
+ findings, source_verdicts = read_findings(devlyn)
1357
+ summary = write_outputs(devlyn, findings, source_verdicts)
1358
+ assert summary["verdict"] == "BLOCKED", summary
1359
+ assert any(
1360
+ finding.get("id") == "verify-pair-trigger-skip-contradiction"
1361
+ for finding in findings
1362
+ ), findings
1363
+
1364
+ (devlyn / "pipeline.state.json").write_text(
1365
+ json.dumps({
1366
+ "phases": {
1367
+ "verify": {
1368
+ "verdict": "PASS",
1369
+ "sub_verdicts": {},
1370
+ "pair_trigger": {
1371
+ "eligible": False,
1372
+ "reasons": ["risk.high"],
1373
+ "skipped_reason": "user_no_pair",
1374
+ },
1375
+ }
1376
+ }
1377
+ }),
1378
+ encoding="utf-8",
1379
+ )
1380
+ findings, source_verdicts = read_findings(devlyn)
1381
+ summary = write_outputs(devlyn, findings, source_verdicts)
1382
+ assert summary["verdict"] == "BLOCKED", summary
1383
+ assert any(
1384
+ finding.get("id") == "verify-pair-trigger-ineligible-reasons"
1385
+ for finding in findings
1386
+ ), findings
1387
+
1388
+ (devlyn / "pipeline.state.json").write_text(
1389
+ json.dumps({
1390
+ "mode": "spec",
1391
+ "risk_profile": {
1392
+ "high_risk": True,
1393
+ "risk_probes_enabled": False,
1394
+ "pair_default_enabled": True,
1395
+ },
1396
+ "phases": {
1397
+ "verify": {
1398
+ "verdict": "PASS",
1399
+ "sub_verdicts": {},
1400
+ "pair_trigger": {
1401
+ "eligible": False,
1402
+ "reasons": [],
1403
+ "skipped_reason": None,
1404
+ },
1405
+ }
1406
+ },
1407
+ }),
1408
+ encoding="utf-8",
1409
+ )
1410
+ findings, source_verdicts = read_findings(devlyn)
1411
+ summary = write_outputs(devlyn, findings, source_verdicts)
1412
+ assert summary["verdict"] == "BLOCKED", summary
1413
+ assert any(
1414
+ finding.get("id") == "verify-pair-trigger-ineligible-unjustified"
1415
+ and "risk.high" in str(finding.get("message"))
1416
+ for finding in findings
1417
+ ), findings
1418
+
1419
+ (devlyn / "pipeline.state.json").write_text(
1420
+ json.dumps({
1421
+ "mode": "spec",
1422
+ "risk_profile": {
1423
+ "high_risk": True,
1424
+ "risk_probes_enabled": True,
1425
+ "pair_default_enabled": True,
1426
+ },
1427
+ "phases": {
1428
+ "verify": {
1429
+ "verdict": "PASS",
1430
+ "sub_verdicts": {},
1431
+ "pair_trigger": {
1432
+ "eligible": False,
1433
+ "reasons": [],
1434
+ "skipped_reason": "user_no_pair",
1435
+ },
1436
+ }
1437
+ },
1438
+ }),
1439
+ encoding="utf-8",
1440
+ )
1441
+ findings, source_verdicts = read_findings(devlyn)
1442
+ summary = write_outputs(devlyn, findings, source_verdicts)
1443
+ assert summary["verdict"] == "BLOCKED", summary
1444
+ assert any(
1445
+ finding.get("id") == "verify-pair-trigger-user-no-pair-unsupported"
1446
+ and "pair_default_enabled false" in str(finding.get("message"))
1447
+ for finding in findings
1448
+ ), findings
1449
+
1450
+ (devlyn / "pipeline.state.json").write_text(
1451
+ json.dumps({
1452
+ "pair_verify": True,
1453
+ "risk_profile": {
1454
+ "high_risk": True,
1455
+ "risk_probes_enabled": False,
1456
+ "pair_default_enabled": False,
1457
+ },
1458
+ "phases": {
1459
+ "verify": {
1460
+ "verdict": "PASS",
1461
+ "sub_verdicts": {},
1462
+ "pair_trigger": {
1463
+ "eligible": False,
1464
+ "reasons": [],
1465
+ "skipped_reason": "user_no_pair",
1466
+ },
1467
+ }
1468
+ },
1469
+ }),
1470
+ encoding="utf-8",
1471
+ )
1472
+ findings, source_verdicts = read_findings(devlyn)
1473
+ summary = write_outputs(devlyn, findings, source_verdicts)
1474
+ assert summary["verdict"] == "BLOCKED", summary
1475
+ assert any(
1476
+ finding.get("id") == "verify-pair-trigger-conflicting-pair-flags"
1477
+ for finding in findings
1478
+ ), findings
1479
+
1480
+ (devlyn / "pipeline.state.json").write_text(
1481
+ json.dumps({
1482
+ "mode": "spec",
1483
+ "risk_profile": {
1484
+ "high_risk": True,
1485
+ "risk_probes_enabled": True,
1486
+ "pair_default_enabled": False,
1487
+ },
1488
+ "phases": {
1489
+ "verify": {
1490
+ "verdict": "PASS",
1491
+ "sub_verdicts": {},
1492
+ "pair_trigger": {
1493
+ "eligible": False,
1494
+ "reasons": [],
1495
+ "skipped_reason": "user_no_pair",
1496
+ },
1497
+ }
1498
+ },
1499
+ }),
1500
+ encoding="utf-8",
1501
+ )
1502
+ findings, source_verdicts = read_findings(devlyn)
1503
+ summary = write_outputs(devlyn, findings, source_verdicts)
1504
+ assert summary["verdict"] == "PASS", summary
1505
+ assert not any(
1506
+ finding.get("id") == "verify-pair-trigger-required-missing"
1507
+ for finding in findings
1508
+ ), findings
1509
+
1510
+ (devlyn / "pipeline.state.json").write_text(
1511
+ json.dumps({
1512
+ "phases": {
1513
+ "verify": {
1514
+ "verdict": "PASS",
1515
+ "sub_verdicts": {},
1516
+ "pair_trigger": {
1517
+ "eligible": False,
1518
+ "reasons": [],
1519
+ "skipped_reason": ["user_no_pair"],
1520
+ },
1521
+ }
1522
+ }
1523
+ }),
1524
+ encoding="utf-8",
1525
+ )
1526
+ findings, source_verdicts = read_findings(devlyn)
1527
+ summary = write_outputs(devlyn, findings, source_verdicts)
1528
+ assert summary["verdict"] == "BLOCKED", summary
1529
+ assert any(
1530
+ finding.get("id") == "verify-pair-trigger-skipped-reason-malformed"
1531
+ for finding in findings
1532
+ ), findings
1533
+
1534
+ (devlyn / "pipeline.state.json").write_text(
1535
+ json.dumps({
1536
+ "phases": {
1537
+ "verify": {
1538
+ "verdict": "PASS",
1539
+ "sub_verdicts": {},
1540
+ "pair_trigger": {
1541
+ "eligible": False,
1542
+ "reasons": [],
1543
+ "skipped_reason": "codex_unavailable",
1544
+ },
1545
+ }
1546
+ }
1547
+ }),
1548
+ encoding="utf-8",
1549
+ )
1550
+ findings, source_verdicts = read_findings(devlyn)
1551
+ summary = write_outputs(devlyn, findings, source_verdicts)
1552
+ assert summary["verdict"] == "BLOCKED", summary
1553
+ assert any(
1554
+ finding.get("id") == "verify-pair-trigger-skipped-reason-unsupported"
1555
+ for finding in findings
1556
+ ), findings
1557
+
1558
+ (devlyn / "pipeline.state.json").write_text(
1559
+ json.dumps({
1560
+ "phases": {
1561
+ "verify": {
1562
+ "verdict": "PASS",
1563
+ "sub_verdicts": {},
1564
+ "pair_trigger": {
1565
+ "eligible": False,
1566
+ "reasons": [],
1567
+ "skipped_reason": "mechanical_blocker",
1568
+ },
1569
+ }
1570
+ }
1571
+ }),
1572
+ encoding="utf-8",
1573
+ )
1574
+ findings, source_verdicts = read_findings(devlyn)
1575
+ summary = write_outputs(devlyn, findings, source_verdicts)
1576
+ assert summary["verdict"] == "BLOCKED", summary
1577
+ assert any(
1578
+ finding.get("id") == "verify-pair-trigger-mechanical-blocker-unsupported"
1579
+ for finding in findings
1580
+ ), findings
1581
+
1582
+ (devlyn / "verify-mechanical.findings.jsonl").write_text(
1583
+ json.dumps({"id": "m1", "severity": "HIGH"}) + "\n",
1584
+ encoding="utf-8",
1585
+ )
1586
+ findings, source_verdicts = read_findings(devlyn)
1587
+ summary = write_outputs(devlyn, findings, source_verdicts)
1588
+ assert summary["verdict"] == "NEEDS_WORK", summary
1589
+ assert not any(
1590
+ finding.get("id") == "verify-pair-trigger-mechanical-blocker-unsupported"
1591
+ for finding in findings
1592
+ ), findings
1593
+ (devlyn / "verify-mechanical.findings.jsonl").write_text("", encoding="utf-8")
1594
+
1595
+ (devlyn / "pipeline.state.json").write_text(
1596
+ json.dumps({
1597
+ "phases": {
1598
+ "verify": {
1599
+ "verdict": "PASS",
1600
+ "sub_verdicts": {},
1601
+ "pair_trigger": {
1602
+ "eligible": False,
1603
+ "reasons": [],
1604
+ "skipped_reason": "primary_judge_blocker",
1605
+ },
1606
+ }
1607
+ }
1608
+ }),
1609
+ encoding="utf-8",
1610
+ )
1611
+ findings, source_verdicts = read_findings(devlyn)
1612
+ summary = write_outputs(devlyn, findings, source_verdicts)
1613
+ assert summary["verdict"] == "BLOCKED", summary
1614
+ assert any(
1615
+ finding.get("id") == "verify-pair-trigger-primary-judge-blocker-unsupported"
1616
+ for finding in findings
1617
+ ), findings
1618
+
1619
+ (devlyn / "verify.findings.jsonl").write_text(
1620
+ json.dumps({"id": "j2", "severity": "HIGH"}) + "\n",
1621
+ encoding="utf-8",
1622
+ )
1623
+ findings, source_verdicts = read_findings(devlyn)
1624
+ summary = write_outputs(devlyn, findings, source_verdicts)
1625
+ assert summary["verdict"] == "NEEDS_WORK", summary
1626
+ assert not any(
1627
+ finding.get("id") == "verify-pair-trigger-primary-judge-blocker-unsupported"
1628
+ for finding in findings
1629
+ ), findings
1630
+ (devlyn / "verify.findings.jsonl").write_text("", encoding="utf-8")
1631
+
1632
+ (devlyn / "pipeline.state.json").write_text(
1633
+ json.dumps({
1634
+ "phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
1635
+ "verify": {
1636
+ "pair_trigger": {
1637
+ "eligible": True,
1638
+ "reasons": ["looks-hard"],
1639
+ "skipped_reason": None,
1640
+ }
1641
+ },
1642
+ }),
1643
+ encoding="utf-8",
1644
+ )
1645
+ findings, source_verdicts = read_findings(devlyn)
1646
+ summary = write_outputs(devlyn, findings, source_verdicts)
1647
+ assert summary["verdict"] == "BLOCKED", summary
1648
+ assert any(
1649
+ finding.get("id") == "verify-pair-trigger-reasons-unknown"
1650
+ for finding in findings
1651
+ ), findings
302
1652
  return 0
303
1653
 
304
1654